Fixed JIT register allocator.

Now it only writes back dirty registers when flushed
	* Fixed KillImmediate
	* Renamed LoadToX64 and StoreFromX64 to BindToRegister and StoreFromRegister respectively (as suggested by ector)
	* Code cleanup in calls to the reg allocator


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6053 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
dok.slade 2010-08-04 19:34:47 +00:00
parent a8f6904ec7
commit 9b6180ab27
9 changed files with 175 additions and 207 deletions

View File

@ -128,7 +128,7 @@ X64Reg RegCache::GetFreeXReg()
int preg = xregs[xr].ppcReg; int preg = xregs[xr].ppcReg;
if (!locks[preg]) if (!locks[preg])
{ {
StoreFromX64(preg); StoreFromRegister(preg);
return xr; return xr;
} }
} }
@ -159,7 +159,7 @@ void RegCache::FlushR(X64Reg reg)
PanicAlert("Flushing non existent reg"); PanicAlert("Flushing non existent reg");
if (!xregs[reg].free) if (!xregs[reg].free)
{ {
StoreFromX64(xregs[reg].ppcReg); StoreFromRegister(xregs[reg].ppcReg);
} }
} }
@ -185,9 +185,12 @@ void RegCache::DiscardRegContentsIfCached(int preg)
{ {
if (regs[preg].away && regs[preg].location.IsSimpleReg()) if (regs[preg].away && regs[preg].location.IsSimpleReg())
{ {
xregs[regs[preg].location.GetSimpleReg()].free = true; X64Reg xr = regs[preg].location.GetSimpleReg();
xregs[regs[preg].location.GetSimpleReg()].dirty = false; xregs[xr].free = true;
xregs[xr].dirty = false;
xregs[xr].ppcReg = -1;
regs[preg].away = false; regs[preg].away = false;
regs[preg].location = GetDefaultLocation(preg);
} }
} }
@ -252,15 +255,18 @@ OpArg FPURegCache::GetDefaultLocation(int reg) const
return M(&ppcState.ps[reg][0]); return M(&ppcState.ps[reg][0]);
} }
void RegCache::KillImmediate(int preg) void RegCache::KillImmediate(int preg, bool doLoad, bool makeDirty)
{ {
if (regs[preg].away && regs[preg].location.IsImm()) if (regs[preg].away)
{ {
LoadToX64(preg, true, true); if (regs[preg].location.IsImm())
BindToRegister(preg, doLoad, makeDirty);
else if (regs[preg].location.IsSimpleReg())
xregs[RX(preg)].dirty |= makeDirty;
} }
} }
void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty) void GPRRegCache::BindToRegister(int i, bool doLoad, bool makeDirty)
{ {
if (!regs[i].away && regs[i].location.IsImm()) if (!regs[i].away && regs[i].location.IsImm())
PanicAlert("Bad immediate"); PanicAlert("Bad immediate");
@ -297,7 +303,7 @@ void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty)
} }
} }
void GPRRegCache::StoreFromX64(int i) void GPRRegCache::StoreFromRegister(int i)
{ {
if (regs[i].away) if (regs[i].away)
{ {
@ -316,14 +322,14 @@ void GPRRegCache::StoreFromX64(int i)
doStore = true; doStore = true;
} }
OpArg newLoc = GetDefaultLocation(i); OpArg newLoc = GetDefaultLocation(i);
// if (doStore) //<-- Breaks JIT compilation if (doStore)
emit->MOV(32, newLoc, regs[i].location); emit->MOV(32, newLoc, regs[i].location);
regs[i].location = newLoc; regs[i].location = newLoc;
regs[i].away = false; regs[i].away = false;
} }
} }
void FPURegCache::LoadToX64(int i, bool doLoad, bool makeDirty) void FPURegCache::BindToRegister(int i, bool doLoad, bool makeDirty)
{ {
_assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - load - imm"); _assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - load - imm");
if (!regs[i].away) if (!regs[i].away)
@ -351,7 +357,7 @@ void FPURegCache::LoadToX64(int i, bool doLoad, bool makeDirty)
} }
} }
void FPURegCache::StoreFromX64(int i) void FPURegCache::StoreFromRegister(int i)
{ {
_assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - store - imm"); _assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - store - imm");
if (regs[i].away) if (regs[i].away)
@ -389,12 +395,12 @@ void RegCache::Flush(FlushMode mode)
if (regs[i].location.IsSimpleReg()) if (regs[i].location.IsSimpleReg())
{ {
X64Reg xr = RX(i); X64Reg xr = RX(i);
StoreFromX64(i); StoreFromRegister(i);
xregs[xr].dirty = false; xregs[xr].dirty = false;
} }
else if (regs[i].location.IsImm()) else if (regs[i].location.IsImm())
{ {
StoreFromX64(i); StoreFromRegister(i);
} }
else else
{ {

View File

@ -96,12 +96,12 @@ public:
virtual void Flush(FlushMode mode); virtual void Flush(FlushMode mode);
virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);} virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);}
int SanityCheck() const; int SanityCheck() const;
void KillImmediate(int preg); void KillImmediate(int preg, bool doLoad, bool makeDirty);
//TODO - instead of doload, use "read", "write" //TODO - instead of doload, use "read", "write"
//read only will not set dirty flag //read only will not set dirty flag
virtual void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true) = 0; virtual void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true) = 0;
virtual void StoreFromX64(int preg) = 0; virtual void StoreFromRegister(int preg) = 0;
const OpArg &R(int preg) const {return regs[preg].location;} const OpArg &R(int preg) const {return regs[preg].location;}
X64Reg RX(int preg) const X64Reg RX(int preg) const
@ -131,8 +131,8 @@ class GPRRegCache : public RegCache
{ {
public: public:
void Start(PPCAnalyst::BlockRegStats &stats); void Start(PPCAnalyst::BlockRegStats &stats);
void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true); void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true);
void StoreFromX64(int preg); void StoreFromRegister(int preg);
OpArg GetDefaultLocation(int reg) const; OpArg GetDefaultLocation(int reg) const;
const int *GetAllocationOrder(int &count); const int *GetAllocationOrder(int &count);
void SetImmediate32(int preg, u32 immValue); void SetImmediate32(int preg, u32 immValue);
@ -143,8 +143,8 @@ class FPURegCache : public RegCache
{ {
public: public:
void Start(PPCAnalyst::BlockRegStats &stats); void Start(PPCAnalyst::BlockRegStats &stats);
void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true); void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true);
void StoreFromX64(int preg); void StoreFromRegister(int preg);
const int *GetAllocationOrder(int &count); const int *GetAllocationOrder(int &count);
OpArg GetDefaultLocation(int reg) const; OpArg GetDefaultLocation(int reg) const;
}; };

View File

@ -34,24 +34,24 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
fpr.Lock(d, a, b); fpr.Lock(d, a, b);
if (d == a) if (d == a)
{ {
fpr.LoadToX64(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (d == b && reversible) else if (d == b && reversible)
{ {
fpr.LoadToX64(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), fpr.R(a));
} }
else if (a != d && b != d) else if (a != d && b != d)
{ {
// Sources different from d, can use rather quick solution // Sources different from d, can use rather quick solution
fpr.LoadToX64(d, !dupe); fpr.BindToRegister(d, !dupe);
MOVSD(fpr.RX(d), fpr.R(a)); MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (b != d) else if (b != d)
{ {
fpr.LoadToX64(d, !dupe); fpr.BindToRegister(d, !dupe);
MOVSD(XMM0, fpr.R(b)); MOVSD(XMM0, fpr.R(b));
MOVSD(fpr.RX(d), fpr.R(a)); MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0)); (this->*op)(fpr.RX(d), Gen::R(XMM0));
@ -60,7 +60,7 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
{ {
MOVSD(XMM0, fpr.R(a)); MOVSD(XMM0, fpr.R(a));
MOVSD(XMM1, fpr.R(b)); MOVSD(XMM1, fpr.R(b));
fpr.LoadToX64(d, !dupe); fpr.BindToRegister(d, !dupe);
(this->*op)(XMM0, Gen::R(XMM1)); (this->*op)(XMM0, Gen::R(XMM1));
MOVSD(fpr.RX(d), Gen::R(XMM0)); MOVSD(fpr.RX(d), Gen::R(XMM0));
} }
@ -87,7 +87,7 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
int d = inst.FD; int d = inst.FD;
int b = inst.FB; int b = inst.FB;
fpr.Lock(b, d); fpr.Lock(b, d);
fpr.LoadToX64(d, true, true); fpr.BindToRegister(d, true, true);
MOVSD(XMM0, M((void *)&one_const)); MOVSD(XMM0, M((void *)&one_const));
SQRTSD(XMM1, fpr.R(b)); SQRTSD(XMM1, fpr.R(b));
DIVSD(XMM0, R(XMM1)); DIVSD(XMM0, R(XMM1));
@ -160,7 +160,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
XORPD(XMM0, M((void*)&psSignBits2)); XORPD(XMM0, M((void*)&psSignBits2));
break; break;
} }
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
//YES it is necessary to dupe the result :( //YES it is necessary to dupe the result :(
//TODO : analysis - does the top reg get used? If so, dupe, if not, don't. //TODO : analysis - does the top reg get used? If so, dupe, if not, don't.
if (single_precision) { if (single_precision) {
@ -186,7 +186,7 @@ void Jit64::fsign(UGeckoInstruction inst)
int d = inst.FD; int d = inst.FD;
int b = inst.FB; int b = inst.FB;
fpr.Lock(b, d); fpr.Lock(b, d);
fpr.LoadToX64(d, true, true); fpr.BindToRegister(d, true, true);
MOVSD(XMM0, fpr.R(b)); MOVSD(XMM0, fpr.R(b));
switch (inst.SUBOP10) { switch (inst.SUBOP10) {
case 40: // fnegx case 40: // fnegx
@ -216,7 +216,7 @@ void Jit64::fmrx(UGeckoInstruction inst)
int d = inst.FD; int d = inst.FD;
int b = inst.FB; int b = inst.FB;
fpr.Lock(b, d); fpr.Lock(b, d);
fpr.LoadToX64(d, true, true); fpr.BindToRegister(d, true, true);
MOVSD(XMM0, fpr.R(b)); MOVSD(XMM0, fpr.R(b));
MOVSD(fpr.R(d), XMM0); MOVSD(fpr.R(d), XMM0);
fpr.UnlockAll(); fpr.UnlockAll();
@ -238,7 +238,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
int crf = inst.CRFD; int crf = inst.CRFD;
fpr.Lock(a,b); fpr.Lock(a,b);
if (a != b) fpr.LoadToX64(a, true); if (a != b) fpr.BindToRegister(a, true);
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));

View File

@ -84,8 +84,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
} }
else else
{ {
if (gpr.R(d).IsImm()) gpr.KillImmediate(d, true, true);
gpr.LoadToX64(d, false);
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (carry) if (carry)
GenerateCarry(); GenerateCarry();
@ -93,7 +92,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
} }
else else
{ {
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (carry) if (carry)
@ -107,7 +106,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
#ifdef __APPLE__ #ifdef __APPLE__
// XXX soren // XXX soren
// FIXME: Seems to be required on OS X (see r5799) // FIXME: Seems to be required on OS X (see r5799)
gpr.StoreFromX64(d); gpr.StoreFromRegister(d);
#endif #endif
} }
else else
@ -135,7 +134,7 @@ void Jit64::reg_imm(UGeckoInstruction inst)
gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16); gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16);
} else if (inst.SIMM_16 == 0 && d != a && a != 0) { } else if (inst.SIMM_16 == 0 && d != a && a != 0) {
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, false, true); gpr.BindToRegister(d, false, true);
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
gpr.UnlockAll(); gpr.UnlockAll();
} else { } else {
@ -150,7 +149,7 @@ void Jit64::reg_imm(UGeckoInstruction inst)
gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) + (u32)(s32)js.next_inst.SIMM_16); gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) + (u32)(s32)js.next_inst.SIMM_16);
#ifdef __APPLE__ #ifdef __APPLE__
// FIXME: Seems to be required on OS X (see r5799) // FIXME: Seems to be required on OS X (see r5799)
gpr.StoreFromX64(d); gpr.StoreFromRegister(d);
#endif #endif
js.downcountAmount++; js.downcountAmount++;
js.skipnext = true; js.skipnext = true;
@ -160,7 +159,7 @@ void Jit64::reg_imm(UGeckoInstruction inst)
gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) | (u32)js.next_inst.UIMM); gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) | (u32)js.next_inst.UIMM);
#ifdef __APPLE__ #ifdef __APPLE__
// FIXME: Seems to be required on OS X (see r5799) // FIXME: Seems to be required on OS X (see r5799)
gpr.StoreFromX64(d); gpr.StoreFromRegister(d);
#endif #endif
js.downcountAmount++; js.downcountAmount++;
js.skipnext = true; js.skipnext = true;
@ -221,7 +220,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
OpArg comparand; OpArg comparand;
if (inst.OPCD == 31) { if (inst.OPCD == 31) {
gpr.Lock(a, b); gpr.Lock(a, b);
gpr.LoadToX64(a, true, false); gpr.BindToRegister(a, true, false);
comparand = gpr.R(b); comparand = gpr.R(b);
if (inst.SUBOP10 == 32) { if (inst.SUBOP10 == 32) {
//cmpl //cmpl
@ -234,7 +233,8 @@ void Jit64::cmpXX(UGeckoInstruction inst)
} }
} }
else { else {
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference gpr.Lock(a);
gpr.KillImmediate(a, true, false); // todo, optimize instead, but unlikely to make a difference
if (inst.OPCD == 10) { if (inst.OPCD == 10) {
//cmpli //cmpli
less_than = CC_B; less_than = CC_B;
@ -355,14 +355,14 @@ void Jit64::orx(UGeckoInstruction inst)
if (s == b && s != a) if (s == b && s != a)
{ {
gpr.Lock(a,s); gpr.Lock(a,s);
gpr.LoadToX64(a, false); gpr.BindToRegister(a, false);
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
gpr.UnlockAll(); gpr.UnlockAll();
} }
else else
{ {
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.LoadToX64(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
if (a == s) if (a == s)
OR(32, gpr.R(a), gpr.R(b)); OR(32, gpr.R(a), gpr.R(b));
else if (a == b) else if (a == b)
@ -385,12 +385,8 @@ void Jit64::orcx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB; int a = inst.RA, s = inst.RS, b = inst.RB;
if (a != s && a != b) {
gpr.LoadToX64(a, false, true);
} else {
gpr.LoadToX64(a, true, true);
}
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(b)); MOV(32, R(EAX), gpr.R(b));
NOT(32, R(EAX)); NOT(32, R(EAX));
OR(32, R(EAX), gpr.R(s)); OR(32, R(EAX), gpr.R(s));
@ -414,7 +410,7 @@ void Jit64::norx(UGeckoInstruction inst)
if (s == b && s != a) if (s == b && s != a)
{ {
gpr.Lock(a,s); gpr.Lock(a,s);
gpr.LoadToX64(a, false); gpr.BindToRegister(a, false);
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
gpr.UnlockAll(); gpr.UnlockAll();
@ -422,7 +418,7 @@ void Jit64::norx(UGeckoInstruction inst)
else else
{ {
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.LoadToX64(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
if (a == s) if (a == s)
OR(32, gpr.R(a), gpr.R(b)); OR(32, gpr.R(a), gpr.R(b));
else if (a == b) else if (a == b)
@ -456,8 +452,8 @@ void Jit64::xorx(UGeckoInstruction inst)
} }
else else
{ {
gpr.LoadToX64(a, a == s || a == b, true);
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, a == s || a == b, true);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
XOR(32, R(EAX), gpr.R(b)); XOR(32, R(EAX), gpr.R(b));
MOV(32, gpr.R(a), R(EAX)); MOV(32, gpr.R(a), R(EAX));
@ -483,8 +479,8 @@ void Jit64::eqvx(UGeckoInstruction inst)
} }
else else
{ {
gpr.LoadToX64(a, a == s || a == b, true);
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, a == s || a == b, true);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
XOR(32, R(EAX), gpr.R(b)); XOR(32, R(EAX), gpr.R(b));
NOT(32, R(EAX)); NOT(32, R(EAX));
@ -503,12 +499,8 @@ void Jit64::andx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB; int a = inst.RA, s = inst.RS, b = inst.RB;
if (a != s && a != b) {
gpr.LoadToX64(a, false, true);
} else {
gpr.LoadToX64(a, true, true);
}
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
AND(32, R(EAX), gpr.R(b)); AND(32, R(EAX), gpr.R(b));
MOV(32, gpr.R(a), R(EAX)); MOV(32, gpr.R(a), R(EAX));
@ -525,12 +517,8 @@ void Jit64::nandx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB; int a = inst.RA, s = inst.RS, b = inst.RB;
if (a != s && a != b) {
gpr.LoadToX64(a, false, true);
} else {
gpr.LoadToX64(a, true, true);
}
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
AND(32, R(EAX), gpr.R(b)); AND(32, R(EAX), gpr.R(b));
NOT(32, R(EAX)); NOT(32, R(EAX));
@ -548,12 +536,8 @@ void Jit64::andcx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB; int a = inst.RA, s = inst.RS, b = inst.RB;
if (a != s && a != b) {
gpr.LoadToX64(a, false, true);
} else {
gpr.LoadToX64(a, true, true);
}
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(b)); MOV(32, R(EAX), gpr.R(b));
NOT(32, R(EAX)); NOT(32, R(EAX));
AND(32, R(EAX), gpr.R(s)); AND(32, R(EAX), gpr.R(s));
@ -570,9 +554,9 @@ void Jit64::extsbx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, int a = inst.RA, s = inst.RS;
s = inst.RS; gpr.Lock(a, s);
gpr.LoadToX64(a, a == s, true); gpr.BindToRegister(a, a == s, true);
// Always force moving to EAX because it isn't possible // Always force moving to EAX because it isn't possible
// to refer to the lowest byte of some registers, at least in // to refer to the lowest byte of some registers, at least in
// 32-bit mode. // 32-bit mode.
@ -581,6 +565,7 @@ void Jit64::extsbx(UGeckoInstruction inst)
if (inst.Rc) { if (inst.Rc) {
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
} }
gpr.UnlockAll();
} }
void Jit64::extshx(UGeckoInstruction inst) void Jit64::extshx(UGeckoInstruction inst)
@ -588,8 +573,9 @@ void Jit64::extshx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, s = inst.RS; int a = inst.RA, s = inst.RS;
gpr.KillImmediate(s); gpr.Lock(a, s);
gpr.LoadToX64(a, a == s, true); gpr.KillImmediate(s, true, false);
gpr.BindToRegister(a, a == s, true);
// This looks a little dangerous, but it's safe because // This looks a little dangerous, but it's safe because
// every 32-bit register has a 16-bit half at the same index // every 32-bit register has a 16-bit half at the same index
// as the 32-bit register. // as the 32-bit register.
@ -597,6 +583,7 @@ void Jit64::extshx(UGeckoInstruction inst)
if (inst.Rc) { if (inst.Rc) {
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
} }
gpr.UnlockAll();
} }
void Jit64::subfic(UGeckoInstruction inst) void Jit64::subfic(UGeckoInstruction inst)
@ -605,7 +592,7 @@ void Jit64::subfic(UGeckoInstruction inst)
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, d = inst.RD; int a = inst.RA, d = inst.RD;
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, a == d, true); gpr.BindToRegister(d, a == d, true);
int imm = inst.SIMM_16; int imm = inst.SIMM_16;
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
NOT(32, R(EAX)); NOT(32, R(EAX));
@ -622,10 +609,7 @@ void Jit64::subfcx(UGeckoInstruction inst)
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if(d != a && d != b) gpr.BindToRegister(d, (d == a || d == b), true);
gpr.LoadToX64(d, false, true);
else
gpr.LoadToX64(d, true, true);
// For some reason, I could not get the jit versions of sub* // For some reason, I could not get the jit versions of sub*
// working with x86 sub...so we use the ~a + b + 1 method // working with x86 sub...so we use the ~a + b + 1 method
@ -656,10 +640,7 @@ void Jit64::subfex(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if(d != a && d != b) gpr.BindToRegister(d, (d == a || d == b), true);
gpr.LoadToX64(d, false, true);
else
gpr.LoadToX64(d, true, true);
// Get CA // Get CA
MOV(32, R(ECX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(ECX), M(&PowerPC::ppcState.spr[SPR_XER]));
@ -701,8 +682,8 @@ void Jit64::subfmex(UGeckoInstruction inst)
if (d == a) if (d == a)
{ {
gpr.Lock(a, d); gpr.Lock(d);
gpr.LoadToX64(d, true); gpr.BindToRegister(d, true);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
@ -713,7 +694,7 @@ void Jit64::subfmex(UGeckoInstruction inst)
else else
{ {
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
@ -738,8 +719,8 @@ void Jit64::subfzex(UGeckoInstruction inst)
if (d == a) if (d == a)
{ {
gpr.Lock(a, d); gpr.Lock(d);
gpr.LoadToX64(d, true); gpr.BindToRegister(d, true);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
@ -750,7 +731,7 @@ void Jit64::subfzex(UGeckoInstruction inst)
else else
{ {
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
@ -772,11 +753,7 @@ void Jit64::subfx(UGeckoInstruction inst)
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) { gpr.BindToRegister(d, (d == a || d == b), true);
gpr.LoadToX64(d, false, true);
} else {
gpr.LoadToX64(d, true, true);
}
MOV(32, R(EAX), gpr.R(b)); MOV(32, R(EAX), gpr.R(b));
SUB(32, R(EAX), gpr.R(a)); SUB(32, R(EAX), gpr.R(a));
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
@ -794,8 +771,8 @@ void Jit64::mulli(UGeckoInstruction inst)
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, d = inst.RD; int a = inst.RA, d = inst.RD;
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, (d == a), true); gpr.BindToRegister(d, (d == a), true);
gpr.KillImmediate(a); gpr.KillImmediate(a, true, false);
IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16));
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -806,7 +783,7 @@ void Jit64::mullwx(UGeckoInstruction inst)
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
if (d == a) { if (d == a) {
IMUL(32, gpr.RX(d), gpr.R(b)); IMUL(32, gpr.RX(d), gpr.R(b));
} else if (d == b) { } else if (d == b) {
@ -828,15 +805,11 @@ void Jit64::mulhwux(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(EDX); gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) { gpr.BindToRegister(d, (d == a || d == b), true);
gpr.LoadToX64(d, false, true);
} else {
gpr.LoadToX64(d, true, true);
}
if (gpr.RX(d) == EDX) if (gpr.RX(d) == EDX)
PanicAlert("mulhwux : WTF"); PanicAlert("mulhwux : WTF");
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
gpr.KillImmediate(b); gpr.KillImmediate(b, true, false);
MUL(32, gpr.R(b)); MUL(32, gpr.R(b));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -852,14 +825,10 @@ void Jit64::divwux(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(EDX); gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) { gpr.BindToRegister(d, (d == a || d == b), true);
gpr.LoadToX64(d, false, true);
} else {
gpr.LoadToX64(d, true, true);
}
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
XOR(32, R(EDX), R(EDX)); XOR(32, R(EDX), R(EDX));
gpr.KillImmediate(b); gpr.KillImmediate(b, true, false);
CMP(32, gpr.R(b), Imm32(0)); CMP(32, gpr.R(b), Imm32(0));
// doesn't handle if OE is set, but int doesn't either... // doesn't handle if OE is set, but int doesn't either...
FixupBranch not_div_by_zero = J_CC(CC_NZ); FixupBranch not_div_by_zero = J_CC(CC_NZ);
@ -887,7 +856,7 @@ void Jit64::addx(UGeckoInstruction inst)
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0)); LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0));
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -895,14 +864,14 @@ void Jit64::addx(UGeckoInstruction inst)
{ {
int operand = ((d == a) ? b : a); int operand = ((d == a) ? b : a);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, true); gpr.BindToRegister(d, true);
ADD(32, gpr.R(d), gpr.R(operand)); ADD(32, gpr.R(d), gpr.R(operand));
gpr.UnlockAll(); gpr.UnlockAll();
} }
else else
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADD(32, gpr.R(d), gpr.R(b)); ADD(32, gpr.R(d), gpr.R(b));
gpr.UnlockAll(); gpr.UnlockAll();
@ -924,7 +893,7 @@ void Jit64::addex(UGeckoInstruction inst)
if ((d == a) || (d == b)) if ((d == a) || (d == b))
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, true); gpr.BindToRegister(d, true);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a));
@ -934,7 +903,7 @@ void Jit64::addex(UGeckoInstruction inst)
else else
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
@ -960,7 +929,7 @@ void Jit64::addcx(UGeckoInstruction inst)
{ {
int operand = ((d == a) ? b : a); int operand = ((d == a) ? b : a);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, true); gpr.BindToRegister(d, true);
ADD(32, gpr.R(d), gpr.R(operand)); ADD(32, gpr.R(d), gpr.R(operand));
GenerateCarry(); GenerateCarry();
gpr.UnlockAll(); gpr.UnlockAll();
@ -968,7 +937,7 @@ void Jit64::addcx(UGeckoInstruction inst)
else else
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADD(32, gpr.R(d), gpr.R(b)); ADD(32, gpr.R(d), gpr.R(b));
GenerateCarry(); GenerateCarry();
@ -990,8 +959,8 @@ void Jit64::addmex(UGeckoInstruction inst)
if (d == a) if (d == a)
{ {
gpr.Lock(a, d); gpr.Lock(d);
gpr.LoadToX64(d, true); gpr.BindToRegister(d, true);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
@ -1001,7 +970,7 @@ void Jit64::addmex(UGeckoInstruction inst)
else else
{ {
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
@ -1025,8 +994,8 @@ void Jit64::addzex(UGeckoInstruction inst)
if (d == a) if (d == a)
{ {
gpr.Lock(a, d); gpr.Lock(d);
gpr.LoadToX64(d, true); gpr.BindToRegister(d, true);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
@ -1036,7 +1005,7 @@ void Jit64::addzex(UGeckoInstruction inst)
else else
{ {
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
@ -1068,7 +1037,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
} }
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.LoadToX64(a, a == s); gpr.BindToRegister(a, a == s);
if (a != s) if (a != s)
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
@ -1112,18 +1081,9 @@ void Jit64::rlwimix(UGeckoInstruction inst)
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int s = inst.RS;
if (gpr.R(a).IsImm() || gpr.R(s).IsImm())
{
Default(inst);
return;
}
if (a != s)
{
gpr.Lock(a, s);
gpr.LoadToX64(a, true);
}
gpr.Lock(a, s);
gpr.KillImmediate(a, true, true);
u32 mask = Helper_Mask(inst.MB, inst.ME); u32 mask = Helper_Mask(inst.MB, inst.ME);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
AND(32, gpr.R(a), Imm32(~mask)); AND(32, gpr.R(a), Imm32(~mask));
@ -1143,15 +1103,11 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, s = inst.RS; int a = inst.RA, b = inst.RB, s = inst.RS;
if (gpr.R(a).IsImm())
{
Default(inst);
return;
}
u32 mask = Helper_Mask(inst.MB, inst.ME); u32 mask = Helper_Mask(inst.MB, inst.ME);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.KillImmediate(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
AND(32, R(ECX), Imm32(0x1f)); AND(32, R(ECX), Imm32(0x1f));
@ -1173,7 +1129,7 @@ void Jit64::negx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int d = inst.RD; int d = inst.RD;
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(d, a == d, true); gpr.BindToRegister(d, a == d, true);
if (a != d) if (a != d)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
NEG(32, gpr.R(d)); NEG(32, gpr.R(d));
@ -1193,7 +1149,7 @@ void Jit64::srwx(UGeckoInstruction inst)
int s = inst.RS; int s = inst.RS;
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.LoadToX64(a, a == s || a == b || s == b, true); gpr.BindToRegister(a, a == s || a == b || s == b, true);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
XOR(32, R(EAX), R(EAX)); XOR(32, R(EAX), R(EAX));
TEST(32, R(ECX), Imm32(32)); TEST(32, R(ECX), Imm32(32));
@ -1219,7 +1175,7 @@ void Jit64::slwx(UGeckoInstruction inst)
int s = inst.RS; int s = inst.RS;
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.LoadToX64(a, a == s || a == b || s == b, true); gpr.BindToRegister(a, a == s || a == b || s == b, true);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
XOR(32, R(EAX), R(EAX)); XOR(32, R(EAX), R(EAX));
TEST(32, R(ECX), Imm32(32)); TEST(32, R(ECX), Imm32(32));
@ -1244,9 +1200,9 @@ void Jit64::srawx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
int s = inst.RS; int s = inst.RS;
gpr.Lock(a, s); gpr.Lock(a, s, b);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.LoadToX64(a, a == s || a == b, true); gpr.BindToRegister(a, a == s || a == b, true);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
TEST(32, R(ECX), Imm32(32)); TEST(32, R(ECX), Imm32(32));
FixupBranch topBitSet = J_CC(CC_NZ); FixupBranch topBitSet = J_CC(CC_NZ);
@ -1290,7 +1246,7 @@ void Jit64::srawix(UGeckoInstruction inst)
if (amount != 0) if (amount != 0)
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.LoadToX64(a, a == s, true); gpr.BindToRegister(a, a == s, true);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
MOV(32, gpr.R(a), R(EAX)); MOV(32, gpr.R(a), R(EAX));
SAR(32, gpr.R(a), Imm8(amount)); SAR(32, gpr.R(a), Imm8(amount));
@ -1311,7 +1267,7 @@ void Jit64::srawix(UGeckoInstruction inst)
Default(inst); return; Default(inst); return;
gpr.Lock(a, s); gpr.Lock(a, s);
JitClearCA(); JitClearCA();
gpr.LoadToX64(a, a == s, true); gpr.BindToRegister(a, a == s, true);
if (a != s) if (a != s)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1329,13 +1285,10 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
JITDISABLE(Integer) JITDISABLE(Integer)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int s = inst.RS;
if (gpr.R(a).IsImm() || gpr.R(s).IsImm() || s == a)
{
Default(inst);
return;
}
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.LoadToX64(a, false); gpr.KillImmediate(s, true, false);
gpr.BindToRegister(a, (a == s), true);
BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s));
FixupBranch gotone = J_CC(CC_NZ); FixupBranch gotone = J_CC(CC_NZ);
MOV(32, gpr.R(a), Imm32(63)); MOV(32, gpr.R(a), Imm32(63));

View File

@ -44,11 +44,9 @@ void Jit64::lbzx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{ {
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
} }
@ -57,7 +55,7 @@ void Jit64::lbzx(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
@ -73,11 +71,9 @@ void Jit64::lhax(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{ {
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
} }
@ -87,7 +83,7 @@ void Jit64::lhax(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
@ -103,11 +99,9 @@ void Jit64::lwzx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{ {
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
} }
@ -116,7 +110,7 @@ void Jit64::lwzx(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
@ -157,10 +151,10 @@ void Jit64::lXz(UGeckoInstruction inst)
// do our job at first // do our job at first
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a); gpr.Lock(d);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 32, offset); SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
gpr.LoadToX64(d, false, true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -214,8 +208,8 @@ void Jit64::lXz(UGeckoInstruction inst)
{ {
// Fast and daring // Fast and daring
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.LoadToX64(a, true, false); gpr.BindToRegister(a, true, false);
gpr.LoadToX64(d, a == d, true); gpr.BindToRegister(d, a == d, true);
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
BSWAP(32, gpr.R(d).GetSimpleReg()); BSWAP(32, gpr.R(d).GetSimpleReg());
gpr.UnlockAll(); gpr.UnlockAll();
@ -223,16 +217,13 @@ void Jit64::lXz(UGeckoInstruction inst)
else else
{ {
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
gpr.LoadToX64(a, true, false);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
MEMCHECK_START MEMCHECK_START
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, a == d, true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
@ -252,14 +243,13 @@ void Jit64::lha(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring // Safe and boring
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
MEMCHECK_START MEMCHECK_START
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, d == a, true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
@ -280,7 +270,7 @@ void Jit64::lwzux(UGeckoInstruction inst)
return; return;
} }
gpr.Lock(a); gpr.Lock(a);
gpr.LoadToX64(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
SafeLoadRegToEAX(EAX, 32, 0, false); SafeLoadRegToEAX(EAX, 32, 0, false);
@ -288,7 +278,7 @@ void Jit64::lwzux(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, b == d, true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
@ -392,7 +382,11 @@ void Jit64::stX(UGeckoInstruction inst)
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
#endif #endif
if (update) if (update)
{
gpr.Lock(a);
gpr.KillImmediate(a, true, true);
ADD(32, gpr.R(a), Imm32(offset)); ADD(32, gpr.R(a), Imm32(offset));
}
gpr.UnlockAllX(); gpr.UnlockAllX();
return; return;
} }
@ -403,7 +397,7 @@ void Jit64::stX(UGeckoInstruction inst)
{ {
// Fast and daring - requires 64-bit // Fast and daring - requires 64-bit
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
gpr.LoadToX64(a, true, false); gpr.BindToRegister(a, true, false);
BSWAP(32, EAX); BSWAP(32, EAX);
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX)); MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
return; return;
@ -415,7 +409,7 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.FlushLockX(ECX, EDX); gpr.FlushLockX(ECX, EDX);
gpr.Lock(s, a); gpr.Lock(s, a);
if (update && offset) if (update && offset)
gpr.LoadToX64(a, true, true); gpr.BindToRegister(a, true, true);
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, offset); SafeWriteRegToReg(ECX, EDX, accessSize, offset);
@ -453,7 +447,7 @@ void Jit64::stXx(UGeckoInstruction inst)
gpr.FlushLockX(ECX, EDX); gpr.FlushLockX(ECX, EDX);
if (inst.SUBOP10 & 32) { if (inst.SUBOP10 & 32) {
gpr.LoadToX64(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EDX), gpr.R(a));
} else { } else {
@ -496,7 +490,7 @@ void Jit64::lmw(UGeckoInstruction inst)
{ {
MOV(32, R(ECX), MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4)); MOV(32, R(ECX), MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4));
BSWAP(32, ECX); BSWAP(32, ECX);
gpr.LoadToX64(i, false, true); gpr.BindToRegister(i, false, true);
MOV(32, gpr.R(i), R(ECX)); MOV(32, gpr.R(i), R(ECX));
} }
gpr.UnlockAllX(); gpr.UnlockAllX();

View File

@ -77,7 +77,7 @@ void Jit64::lfs(UGeckoInstruction inst)
MOV(32, M(&temp32), R(EAX)); MOV(32, M(&temp32), R(EAX));
fpr.Lock(d); fpr.Lock(d);
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
CVTSS2SD(fpr.RX(d), M(&temp32)); CVTSS2SD(fpr.RX(d), M(&temp32));
MOVDDUP(fpr.RX(d), fpr.R(d)); MOVDDUP(fpr.RX(d), fpr.R(d));
@ -107,8 +107,8 @@ void Jit64::lfd(UGeckoInstruction inst)
gpr.Lock(a); gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
// TODO - optimize. This has to load the previous value - upper double should stay unmodified. // TODO - optimize. This has to load the previous value - upper double should stay unmodified.
fpr.LoadToX64(d, true);
fpr.Lock(d); fpr.Lock(d);
fpr.BindToRegister(d, true);
X64Reg xd = fpr.RX(d); X64Reg xd = fpr.RX(d);
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
#ifdef _M_X64 #ifdef _M_X64
@ -184,7 +184,7 @@ void Jit64::stfd(UGeckoInstruction inst)
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
gpr.LoadToX64(a, true, false); gpr.BindToRegister(a, true, false);
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
TEST(32, R(ABI_PARAM1), Imm32(0x0c000000)); TEST(32, R(ABI_PARAM1), Imm32(0x0c000000));
FixupBranch not_ram = J_CC(CC_Z); FixupBranch not_ram = J_CC(CC_Z);
@ -222,7 +222,7 @@ void Jit64::stfd(UGeckoInstruction inst)
#endif #endif
} else { } else {
#ifdef _M_X64 #ifdef _M_X64
fpr.LoadToX64(s, true, false); fpr.BindToRegister(s, true, false);
MOVSD(M(&temp64), fpr.RX(s)); MOVSD(M(&temp64), fpr.RX(s));
MEMCHECK_START MEMCHECK_START
@ -233,7 +233,7 @@ void Jit64::stfd(UGeckoInstruction inst)
MEMCHECK_END MEMCHECK_END
#else #else
fpr.LoadToX64(s, true, false); fpr.BindToRegister(s, true, false);
MOVSD(M(&temp64), fpr.RX(s)); MOVSD(M(&temp64), fpr.RX(s));
MEMCHECK_START MEMCHECK_START
@ -301,6 +301,7 @@ void Jit64::stfs(UGeckoInstruction inst)
{ {
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(a, false, true);
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
MEMCHECK_END MEMCHECK_END
@ -345,7 +346,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
} }
if (cpu_info.bSSSE3 && !js.memcheck) { if (cpu_info.bSSSE3 && !js.memcheck) {
fpr.Lock(inst.RS); fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true); fpr.BindToRegister(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
@ -368,7 +369,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
MOV(32, M(&temp32), R(EAX)); MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32)); CVTSS2SD(XMM0, M(&temp32));
fpr.Lock(inst.RS); fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true); fpr.BindToRegister(inst.RS, false, true);
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
MEMCHECK_END MEMCHECK_END

View File

@ -99,8 +99,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(EAX, EDX);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
if (update) if (update)
gpr.LoadToX64(inst.RA, true, true); gpr.BindToRegister(inst.RA, true, true);
fpr.LoadToX64(inst.RS, true); fpr.BindToRegister(inst.RS, true, false);
MOV(32, R(ECX), gpr.R(inst.RA)); MOV(32, R(ECX), gpr.R(inst.RA));
if (offset) if (offset)
ADD(32, R(ECX), Imm32((u32)offset)); ADD(32, R(ECX), Imm32((u32)offset));
@ -159,8 +159,8 @@ void Jit64::psq_l(UGeckoInstruction inst)
gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(EAX, EDX);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.LoadToX64(inst.RA, true, true); gpr.BindToRegister(inst.RA, true, update && offset);
fpr.LoadToX64(inst.RS, false, true); fpr.BindToRegister(inst.RS, false, true);
if (offset) if (offset)
LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset)); LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset));
else else

View File

@ -50,7 +50,7 @@ void Jit64::ps_mr(UGeckoInstruction inst)
int b = inst.FB; int b = inst.FB;
if (d == b) if (d == b)
return; return;
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(b)); MOVAPD(fpr.RX(d), fpr.R(b));
} }
@ -72,8 +72,8 @@ void Jit64::ps_sel(UGeckoInstruction inst)
fpr.FlushLockX(XMM7); fpr.FlushLockX(XMM7);
fpr.FlushLockX(XMM6); fpr.FlushLockX(XMM6);
fpr.Lock(a, b, c, d); fpr.Lock(a, b, c, d);
fpr.LoadToX64(a, true, false); fpr.BindToRegister(a, true, false);
fpr.LoadToX64(d, false, true); fpr.BindToRegister(d, false, true);
// BLENDPD would have been nice... // BLENDPD would have been nice...
MOVAPD(XMM7, fpr.R(a)); MOVAPD(XMM7, fpr.R(a));
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111 CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
@ -99,12 +99,12 @@ void Jit64::ps_sign(UGeckoInstruction inst)
fpr.Lock(d, b); fpr.Lock(d, b);
if (d != b) if (d != b)
{ {
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(b)); MOVAPD(fpr.RX(d), fpr.R(b));
} }
else else
{ {
fpr.LoadToX64(d, true); fpr.BindToRegister(d, true);
} }
switch (inst.SUBOP10) switch (inst.SUBOP10)
@ -133,6 +133,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
int d = inst.FD; int d = inst.FD;
int b = inst.FB; int b = inst.FB;
fpr.Lock(d, b); fpr.Lock(d, b);
fpr.BindToRegister(d, (d == b), true);
SQRTPD(XMM0, fpr.R(b)); SQRTPD(XMM0, fpr.R(b));
MOVAPD(XMM1, M((void*)&psOneOne)); MOVAPD(XMM1, M((void*)&psOneOne));
DIVPD(XMM1, R(XMM0)); DIVPD(XMM1, R(XMM0));
@ -161,24 +162,24 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6
if (d == a) if (d == a)
{ {
fpr.LoadToX64(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (d == b && reversible) else if (d == b && reversible)
{ {
fpr.LoadToX64(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), fpr.R(a));
} }
else if (a != d && b != d) else if (a != d && b != d)
{ {
//sources different from d, can use rather quick solution //sources different from d, can use rather quick solution
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(a)); MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (b != d) else if (b != d)
{ {
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
MOVAPD(XMM0, fpr.R(b)); MOVAPD(XMM0, fpr.R(b));
MOVAPD(fpr.RX(d), fpr.R(a)); MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0)); (this->*op)(fpr.RX(d), Gen::R(XMM0));
@ -187,7 +188,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6
{ {
MOVAPD(XMM0, fpr.R(a)); MOVAPD(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b)); MOVAPD(XMM1, fpr.R(b));
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
(this->*op)(XMM0, Gen::R(XMM1)); (this->*op)(XMM0, Gen::R(XMM1));
MOVAPD(fpr.RX(d), Gen::R(XMM0)); MOVAPD(fpr.RX(d), Gen::R(XMM0));
} }
@ -231,7 +232,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
int b = inst.FB; int b = inst.FB;
int c = inst.FC; int c = inst.FC;
fpr.Lock(a,b,c,d); fpr.Lock(a,b,c,d);
fpr.LoadToX64(d, d == a || d == b || d == c, true); fpr.BindToRegister(d, d == a || d == b || d == c, true);
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 10: case 10:
@ -271,7 +272,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
int a = inst.FA; int a = inst.FA;
int c = inst.FC; int c = inst.FC;
fpr.Lock(a, c, d); fpr.Lock(a, c, d);
fpr.LoadToX64(d, d == a || d == c, true); fpr.BindToRegister(d, d == a || d == c, true);
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 12: case 12:
@ -329,7 +330,7 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst)
default: default:
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
} }
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0)); MOVAPD(fpr.RX(d), Gen::R(XMM0));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -387,7 +388,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
//fpr.UnlockAll(); //fpr.UnlockAll();
return; return;
} }
fpr.LoadToX64(d, false); fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0)); MOVAPD(fpr.RX(d), Gen::R(XMM0));
ForceSinglePrecisionP(fpr.RX(d)); ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();

View File

@ -72,8 +72,11 @@ void Jit64::mtspr(UGeckoInstruction inst)
} }
// OK, this is easy. // OK, this is easy.
gpr.Lock(d); if (!gpr.R(d).IsImm())
gpr.LoadToX64(d, true); {
gpr.Lock(d);
gpr.BindToRegister(d, true, false);
}
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -98,7 +101,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
// fall through // fall through
default: default:
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, false); gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
gpr.UnlockAll(); gpr.UnlockAll();
break; break;
@ -113,8 +116,13 @@ void Jit64::mtmsr(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(SystemRegisters) JITDISABLE(SystemRegisters)
gpr.LoadToX64(inst.RS, true, false); if (!gpr.R(inst.RS).IsImm())
{
gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false);
}
MOV(32, M(&MSR), gpr.R(inst.RS)); MOV(32, M(&MSR), gpr.R(inst.RS));
gpr.UnlockAll();
gpr.Flush(FLUSH_ALL); gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL);
WriteExit(js.compilerPC + 4, 0); WriteExit(js.compilerPC + 4, 0);
@ -127,8 +135,10 @@ void Jit64::mfmsr(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(SystemRegisters) JITDISABLE(SystemRegisters)
//Privileged? //Privileged?
gpr.LoadToX64(inst.RD, false); gpr.Lock(inst.RD);
gpr.BindToRegister(inst.RD, false, true);
MOV(32, gpr.R(inst.RD), M(&MSR)); MOV(32, gpr.R(inst.RD), M(&MSR));
gpr.UnlockAll();
} }
void Jit64::mftb(UGeckoInstruction inst) void Jit64::mftb(UGeckoInstruction inst)
@ -144,7 +154,8 @@ void Jit64::mfcr(UGeckoInstruction inst)
JITDISABLE(SystemRegisters) JITDISABLE(SystemRegisters)
// USES_CR // USES_CR
int d = inst.RD; int d = inst.RD;
gpr.LoadToX64(d, false, true); gpr.Lock(d);
gpr.KillImmediate(d, false, true);
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
for (int i = 1; i < 8; i++) { for (int i = 1; i < 8; i++) {
SHL(32, R(EAX), Imm8(4)); SHL(32, R(EAX), Imm8(4));
@ -175,7 +186,8 @@ void Jit64::mtcrf(UGeckoInstruction inst)
} }
else else
{ {
gpr.LoadToX64(inst.RS, true); gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false);
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
if ((crm & (0x80 >> i)) != 0) if ((crm & (0x80 >> i)) != 0)
@ -186,6 +198,7 @@ void Jit64::mtcrf(UGeckoInstruction inst)
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX)); MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX));
} }
} }
gpr.UnlockAll();
} }
} }
} }