some jit updates

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@240 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-17 22:26:42 +00:00
parent 4faa685def
commit ba684cabcd
7 changed files with 82 additions and 13 deletions

View File

@ -114,6 +114,8 @@ namespace Jit64
void reg_imm(UGeckoInstruction inst);
void ps_sel(UGeckoInstruction inst);
void ps_mr(UGeckoInstruction inst);
void ps_sign(UGeckoInstruction inst); //aggregate
void ps_arith(UGeckoInstruction inst); //aggregate
void ps_mergeXX(UGeckoInstruction inst);

View File

@ -149,6 +149,8 @@ namespace Jit64
void RegCache::FlushR(X64Reg reg)
{
if (reg >= NUMXREGS)
PanicAlert("Flushing non existent reg");
if (!xregs[reg].free)
{
StoreFromX64(xregs[reg].ppcReg);

View File

@ -101,7 +101,9 @@ void lfs(UGeckoInstruction inst)
void lfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
if (!cpu_info.bSSSE3) {
DISABLE_32BIT;
}
int d = inst.RD;
int a = inst.RA;
if (!a)
@ -117,9 +119,18 @@ void lfd(UGeckoInstruction inst)
fpr.Lock(d);
if (cpu_info.bSSSE3) {
X64Reg xd = fpr.RX(d);
#ifdef _M_X64
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
#else
MOV(32, R(EAX), R(ABI_PARAM1));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
#endif
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
} else {
#ifndef _M_X64
PanicAlert("lfd - wtf");
#endif
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64, EAX);
MOV(64, M(&temp64), R(EAX));
@ -153,7 +164,7 @@ void stfd(UGeckoInstruction inst)
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
#endif
if (cpu_info.bSSSE3) {
MOVAPS(XMM0, fpr.R(s));
MOVAPD(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x8));
#ifdef _M_X64
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0);
@ -227,6 +238,15 @@ void stfs(UGeckoInstruction inst)
}
void stfsx(UGeckoInstruction inst)
{
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
INSTRUCTION_START;
// TODO
Default(inst); return;
}
void lfsx(UGeckoInstruction inst)
{
INSTRUCTION_START;

View File

@ -214,7 +214,7 @@ void psq_st(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset)
MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPS(XMM0, fpr.R(s));
MOVAPD(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
CVTPD2DQ(XMM0, R(XMM0));
@ -247,7 +247,7 @@ void psq_st(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPS(XMM0, fpr.R(s));
MOVAPD(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
SHUFPD(XMM0, R(XMM0), 1);
@ -317,7 +317,7 @@ void psq_l(UGeckoInstruction inst)
CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r), 1);
}
if (update)
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
#else
@ -347,7 +347,7 @@ void psq_l(UGeckoInstruction inst)
CVTPS2PD(r, M(&psTemp[0]));
gpr.UnlockAllX();
}
if (update)
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
#endif
@ -373,7 +373,7 @@ void psq_l(UGeckoInstruction inst)
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
if (update)
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
@ -399,7 +399,7 @@ void psq_l(UGeckoInstruction inst)
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
SHUFPD(r, R(r), 1);
if (update)
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;

View File

@ -30,7 +30,9 @@
// ps_madds0
// ps_muls0
// ps_madds1
// ps_sel
// cmppd, andpd, andnpd, or
// lfsx, ps_merge01 etc
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
@ -46,6 +48,46 @@ namespace Jit64
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
void ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START;
int d = inst.FD;
int b = inst.FB;
if (d == b)
return;
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
}
void ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START;
Default(inst);
return;
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.FlushLockX(XMM7);
fpr.FlushLockX(XMM6);
fpr.Lock(a, b, c, d);
fpr.LoadToX64(a, true, false);
fpr.LoadToX64(d, false, true);
// BLENDPD would have been nice...
MOVAPD(XMM7, fpr.R(a));
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
MOVAPD(XMM6, R(XMM7));
ANDPD(XMM7, fpr.R(d));
ANDNPD(XMM6, fpr.R(c));
MOVAPD(fpr.RX(d), R(XMM7));
ORPD(fpr.RX(d), R(XMM6));
fpr.UnlockAll();
fpr.UnlockAllX();
}
void ps_sign(UGeckoInstruction inst)
{

View File

@ -213,7 +213,7 @@ GekkoOPTemplate table4[] =
{136, CInterpreter::ps_nabs, Jit64::ps_sign, {"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, CInterpreter::ps_abs, Jit64::ps_sign, {"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, CInterpreter::ps_cmpu1, Jit64::Default, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, CInterpreter::ps_mr, Jit64::Default, {"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{72, CInterpreter::ps_mr, Jit64::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, CInterpreter::ps_cmpo1, Jit64::Default, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, CInterpreter::ps_merge00, Jit64::ps_mergeXX, {"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, CInterpreter::ps_merge01, Jit64::ps_mergeXX, {"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
@ -234,7 +234,7 @@ GekkoOPTemplate table4_2[] =
{18, CInterpreter::ps_div, Jit64::ps_arith, {"ps_div", OPTYPE_PS, 0, 16}},
{20, CInterpreter::ps_sub, Jit64::ps_arith, {"ps_sub", OPTYPE_PS, 0}},
{21, CInterpreter::ps_add, Jit64::ps_arith, {"ps_add", OPTYPE_PS, 0}},
{23, CInterpreter::ps_sel, Jit64::Default, {"ps_sel", OPTYPE_PS, 0}},
{23, CInterpreter::ps_sel, Jit64::ps_sel, {"ps_sel", OPTYPE_PS, 0}},
{24, CInterpreter::ps_res, Jit64::Default, {"ps_res", OPTYPE_PS, 0}},
{25, CInterpreter::ps_mul, Jit64::ps_arith, {"ps_mul", OPTYPE_PS, 0}},
{26, CInterpreter::ps_rsqrte, Jit64::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, 0}},

View File

@ -29,14 +29,17 @@
namespace PowerPC
{
// align to cache line
GC_ALIGNED64_DECL(PowerPCState ppcState);
PowerPCState GC_ALIGNED16(ppcState);
ICPUCore* m_pCore = NULL;
volatile CPUState state = CPU_STEPPING;
void ResetRegisters()
{
if (((u64)&ppcState & 0xf) != 0) {
PanicAlert("The compiler misaligned ppcState in memory. Likely to cause crashes.");
}
for (int i = 0; i < 32; i++)
{
ppcState.gpr[i] = 0;