Some JIT cleanup. Have not been able to figure out the "dirty-flag" mystery yet :(

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1269 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-11-23 12:33:02 +00:00
parent 3e7c80ab69
commit 117a05fa64
5 changed files with 50 additions and 74 deletions

View File

@ -96,7 +96,7 @@ namespace Jit64
void RegCache::UnlockAllX() void RegCache::UnlockAllX()
{ {
for (int i = 0; i < 16; i++) for (int i = 0; i < NUMXREGS; i++)
xlocks[i] = false; xlocks[i] = false;
} }
@ -175,24 +175,6 @@ namespace Jit64
} }
} }
bool GPRRegCache::IsXRegVolatile(X64Reg reg) const
{
#ifdef _WIN32
switch (reg)
{
case RAX: case RCX: case RDX: case R8: case R9: case R10: case R11:
#ifdef _M_IX86
case RBX:
#endif
return true;
default:
return false;
}
#else
return true;
#endif
}
void RegCache::DiscardRegContentsIfCached(int preg) void RegCache::DiscardRegContentsIfCached(int preg)
{ {
if (regs[preg].away && regs[preg].location.IsSimpleReg()) if (regs[preg].away && regs[preg].location.IsSimpleReg())
@ -211,23 +193,11 @@ namespace Jit64
regs[preg].location = Imm32(immValue); regs[preg].location = Imm32(immValue);
} }
bool FPURegCache::IsXRegVolatile(X64Reg reg) const
{
#ifdef _WIN32
// return true;
if (reg < 6)
return true;
else
return false;
#else
return true;
#endif
}
void GPRRegCache::Start(PPCAnalyst::BlockRegStats &stats) void GPRRegCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
RegCache::Start(stats); RegCache::Start(stats);
} }
void FPURegCache::Start(PPCAnalyst::BlockRegStats &stats) void FPURegCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
RegCache::Start(stats); RegCache::Start(stats);
@ -275,20 +245,23 @@ namespace Jit64
return M(&ppcState.ps[reg][0]); return M(&ppcState.ps[reg][0]);
} }
// eheh, this was a dupe.
void RegCache::KillImmediate(int preg) void RegCache::KillImmediate(int preg)
{ {
if (regs[preg].away && regs[preg].location.IsImm()) if (regs[preg].away && regs[preg].location.IsImm())
{ {
StoreFromX64(preg); LoadToX64(preg, true, true);
} }
} }
void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty) void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty)
{ {
if (!regs[i].away && regs[i].location.IsImm())
PanicAlert("Bad immedaite");
if (!regs[i].away || (regs[i].away && regs[i].location.IsImm())) if (!regs[i].away || (regs[i].away && regs[i].location.IsImm()))
{ {
X64Reg xr = GetFreeXReg(); X64Reg xr = GetFreeXReg();
if (xregs[xr].dirty) PanicAlert("Xreg already dirty");
if (xlocks[xr]) PanicAlert("GetFreeXReg returned locked register"); if (xlocks[xr]) PanicAlert("GetFreeXReg returned locked register");
xregs[xr].free = false; xregs[xr].free = false;
xregs[xr].ppcReg = i; xregs[xr].ppcReg = i;
@ -310,9 +283,11 @@ namespace Jit64
{ {
// HERE HOLDS: regs[i].away == true // HERE HOLDS: regs[i].away == true
// //
//reg location must be simplereg //reg location must be simplereg or immediate
if (regs[i].location.IsSimpleReg()) {
xregs[RX(i)].dirty |= makeDirty; xregs[RX(i)].dirty |= makeDirty;
} }
}
if (xlocks[RX(i)]) { if (xlocks[RX(i)]) {
PanicAlert("Seriously WTF, this reg should have been flushed"); PanicAlert("Seriously WTF, this reg should have been flushed");
} }
@ -322,7 +297,7 @@ namespace Jit64
{ {
if (regs[i].away) if (regs[i].away)
{ {
bool doStore = true; bool doStore;
if (regs[i].location.IsSimpleReg()) if (regs[i].location.IsSimpleReg())
{ {
X64Reg xr = RX(i); X64Reg xr = RX(i);
@ -334,10 +309,10 @@ namespace Jit64
else else
{ {
//must be immediate - do nothing //must be immediate - do nothing
doStore = true;
} }
OpArg newLoc = GetDefaultLocation(i); OpArg newLoc = GetDefaultLocation(i);
// if (doStore) //<-- Breaks JIT compilation
//if (doStore) <-- Breaks JIT compilation
MOV(32, newLoc, regs[i].location); MOV(32, newLoc, regs[i].location);
regs[i].location = newLoc; regs[i].location = newLoc;
regs[i].away = false; regs[i].away = false;
@ -349,6 +324,7 @@ namespace Jit64
_assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - load - imm"); _assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - load - imm");
if (!regs[i].away) if (!regs[i].away)
{ {
// Reg is at home in the memory register file. Let's pull it out.
X64Reg xr = GetFreeXReg(); X64Reg xr = GetFreeXReg();
_assert_msg_(DYNA_REC, xr >= 0 && xr < NUMXREGS, "WTF - load - invalid reg"); _assert_msg_(DYNA_REC, xr >= 0 && xr < NUMXREGS, "WTF - load - invalid reg");
xregs[xr].ppcReg = i; xregs[xr].ppcReg = i;
@ -363,6 +339,9 @@ namespace Jit64
} }
regs[i].location = newloc; regs[i].location = newloc;
regs[i].away = true; regs[i].away = true;
} else {
// There are no immediates in the FPR reg file, so we already had this in a register. Make dirty as necessary.
xregs[RX(i)].dirty |= makeDirty;
} }
} }
@ -389,11 +368,15 @@ namespace Jit64
void RegCache::Flush(FlushMode mode) void RegCache::Flush(FlushMode mode)
{ {
for (int i = 0; i < NUMXREGS; i++) {
if (xlocks[i])
PanicAlert("Somone forgot to unlock X64 reg %i.", i);
}
for (int i = 0; i < 32; i++) for (int i = 0; i < 32; i++)
{ {
if (locks[i]) if (locks[i])
{ {
_assert_msg_(DYNA_REC,0,"Somebody forgot some register locks."); PanicAlert("Somebody forgot to unlock PPC reg %i.", i);
} }
if (regs[i].away) if (regs[i].away)
{ {

View File

@ -64,9 +64,9 @@ namespace Jit64
private: private:
bool locks[32]; bool locks[32];
bool saved_locks[32]; bool saved_locks[32];
bool saved_xlocks[16]; bool saved_xlocks[NUMXREGS];
protected: protected:
bool xlocks[16]; bool xlocks[NUMXREGS];
PPCCachedReg regs[32]; PPCCachedReg regs[32];
X64CachedReg xregs[NUMXREGS]; X64CachedReg xregs[NUMXREGS];
@ -102,7 +102,7 @@ namespace Jit64
{ {
if (regs[preg].away && regs[preg].location.IsSimpleReg()) if (regs[preg].away && regs[preg].location.IsSimpleReg())
return regs[preg].location.GetSimpleReg(); return regs[preg].location.GetSimpleReg();
_assert_msg_(DYNA_REC,0,"Not so simple"); PanicAlert("Not so simple - %i", preg);
return (X64Reg)-1; return (X64Reg)-1;
} }
virtual OpArg GetDefaultLocation(int reg) const = 0; virtual OpArg GetDefaultLocation(int reg) const = 0;
@ -115,9 +115,6 @@ namespace Jit64
bool IsFreeX(int xreg) const; bool IsFreeX(int xreg) const;
X64Reg GetFreeXReg(); X64Reg GetFreeXReg();
int GetNumXRegs(){return 16;}
virtual bool IsXRegVolatile(X64Reg reg) const = 0;
void SaveState(); void SaveState();
void LoadState(); void LoadState();
@ -131,7 +128,6 @@ namespace Jit64
void StoreFromX64(int preg); void StoreFromX64(int preg);
OpArg GetDefaultLocation(int reg) const; OpArg GetDefaultLocation(int reg) const;
const int *GetAllocationOrder(int &count); const int *GetAllocationOrder(int &count);
bool IsXRegVolatile(X64Reg reg) const;
void SetImmediate32(int preg, u32 immValue); void SetImmediate32(int preg, u32 immValue);
}; };
@ -143,7 +139,6 @@ namespace Jit64
void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true); void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true);
void StoreFromX64(int preg); void StoreFromX64(int preg);
const int *GetAllocationOrder(int &count); const int *GetAllocationOrder(int &count);
bool IsXRegVolatile(X64Reg reg) const;
OpArg GetDefaultLocation(int reg) const; OpArg GetDefaultLocation(int reg) const;
}; };

View File

@ -373,7 +373,6 @@ namespace Jit64
int a = inst.RA, int a = inst.RA,
s = inst.RS; s = inst.RS;
gpr.LoadToX64(a, a == s, true); gpr.LoadToX64(a, a == s, true);
gpr.KillImmediate(s);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends
if (inst.Rc) { if (inst.Rc) {
@ -391,8 +390,8 @@ namespace Jit64
INSTRUCTION_START; INSTRUCTION_START;
int a = inst.RA, s = inst.RS; int a = inst.RA, s = inst.RS;
gpr.LoadToX64(a, a == s, true); gpr.LoadToX64(a, a == s, true);
gpr.KillImmediate(s); MOV(32, R(EAX), gpr.R(s));
MOVSX(32, 16, gpr.RX(a), gpr.R(s)); MOVSX(32, 16, gpr.RX(a), R(EAX));
if (inst.Rc) { if (inst.Rc) {
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
CALL((u8*)Asm::computeRc); CALL((u8*)Asm::computeRc);

View File

@ -339,8 +339,6 @@ namespace Jit64
BSWAP(accessSize, EAX); BSWAP(accessSize, EAX);
WriteToConstRamAddress(accessSize, R(EAX), addr); WriteToConstRamAddress(accessSize, R(EAX), addr);
return; return;
// PanicAlert("yum yum");
// This may be quite beneficial.
} }
// Other IO not worth the trouble. // Other IO not worth the trouble.
} }

View File

@ -134,6 +134,7 @@ void psq_st(UGeckoInstruction inst)
{ {
case QUANTIZE_FLOAT: case QUANTIZE_FLOAT:
{ {
// This one has quite a bit of optimization potential.
if (gpr.R(a).IsImm()) if (gpr.R(a).IsImm())
{ {
PanicAlert("Imm: %08x", gpr.R(a).offset); PanicAlert("Imm: %08x", gpr.R(a).offset);
@ -216,7 +217,7 @@ void psq_st(UGeckoInstruction inst)
CALL(ProtectFunction((void *)&WriteDual32, 0)); CALL(ProtectFunction((void *)&WriteDual32, 0));
#else #else
FixupBranch argh = J_CC(CC_NZ); FixupBranch argh = J_CC(CC_NZ);
MOV(32, R(ABI_PARAM1), M(((char*)&temp64)+4)); MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
BSWAP(32, ABI_PARAM1); BSWAP(32, ABI_PARAM1);
AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1)); MOV(32, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
@ -225,7 +226,7 @@ void psq_st(UGeckoInstruction inst)
MOV(32, MDisp(ABI_PARAM2, 4+(u32)Memory::base), R(ABI_PARAM1)); MOV(32, MDisp(ABI_PARAM2, 4+(u32)Memory::base), R(ABI_PARAM1));
FixupBranch arg2 = J(); FixupBranch arg2 = J();
SetJumpTarget(argh); SetJumpTarget(argh);
MOV(32, R(ABI_PARAM1), M(((char*)&temp64)+4)); MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM1), M(((char*)&temp64))); MOV(32, R(ABI_PARAM1), M(((char*)&temp64)));
ADD(32, R(ABI_PARAM2), Imm32(4)); ADD(32, R(ABI_PARAM2), Imm32(4));
@ -370,7 +371,7 @@ void psq_l(UGeckoInstruction inst)
CVTPS2PD(xd, R(xd)); CVTPS2PD(xd, R(xd));
} else { } else {
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.LoadToX64(inst.RA); gpr.LoadToX64(inst.RA, true, update);
// This can probably be optimized somewhat. // This can probably be optimized somewhat.
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
@ -380,7 +381,7 @@ void psq_l(UGeckoInstruction inst)
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4)); MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
BSWAP(32, RAX); BSWAP(32, RAX);
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX)); MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
fpr.LoadToX64(inst.RS, false); fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0])); CVTPS2PD(r, M(&psTemp[0]));
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -392,7 +393,7 @@ void psq_l(UGeckoInstruction inst)
} }
case QUANTIZE_U8: case QUANTIZE_U8:
{ {
gpr.LoadToX64(inst.RA); gpr.LoadToX64(inst.RA, true, update);
#ifdef _M_X64 #ifdef _M_X64
MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
#else #else
@ -407,7 +408,7 @@ void psq_l(UGeckoInstruction inst)
PUNPCKLBW(XMM0, R(XMM1)); PUNPCKLBW(XMM0, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1)); PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PD(XMM0, R(XMM0)); CVTDQ2PD(XMM0, R(XMM0));
fpr.LoadToX64(inst.RS, false); fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale])); MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0)); MULPD(r, R(XMM0));
@ -417,7 +418,7 @@ void psq_l(UGeckoInstruction inst)
break; break;
case QUANTIZE_S16: case QUANTIZE_S16:
{ {
gpr.LoadToX64(inst.RA); gpr.LoadToX64(inst.RA, true, update);
#ifdef _M_X64 #ifdef _M_X64
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
#else #else
@ -428,7 +429,7 @@ void psq_l(UGeckoInstruction inst)
BSWAP(32, EAX); BSWAP(32, EAX);
MOV(32, M(&temp64), R(EAX)); MOV(32, M(&temp64), R(EAX));
//INT3(); //INT3();
fpr.LoadToX64(inst.RS, false); fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVD_xmm(XMM0, M(&temp64)); MOVD_xmm(XMM0, M(&temp64));
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword.. PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..