Save only the registers that need to be saved rather than going through ProtectFunction.

This commit is contained in:
comex 2013-09-25 00:29:00 -04:00
parent 2a339c926e
commit ebe4448749
14 changed files with 193 additions and 58 deletions

View File

@ -1634,6 +1634,74 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u
CALLptr(M(impptr)); CALLptr(M(impptr));
} }
void XEmitter::PushRegistersAndAlignStack(u32 mask)
{
int shadow = 0;
#ifdef _WIN32
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = (count & 1) ? 0 : 8;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
}
}
void XEmitter::PopRegistersAndAlignStack(u32 mask)
{
int size = 0;
#ifdef _WIN32
size += 0x20;
#endif
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, size));
size += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += (count & 1) ? 0 : 8;
if (size)
ADD(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
{
POP((X64Reg) r);
}
}
}
#endif #endif
} }

View File

@ -691,6 +691,9 @@ public:
#define DECLARE_IMPORT(x) extern "C" void *__imp_##x #define DECLARE_IMPORT(x) extern "C" void *__imp_##x
void PushRegistersAndAlignStack(u32 mask);
void PopRegistersAndAlignStack(u32 mask);
#endif #endif
}; // class XEmitter }; // class XEmitter

View File

@ -737,3 +737,21 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
return normalEntry; return normalEntry;
} }
u32 Jit64::RegistersInUse()
{
#ifdef _M_X64
u32 result = 0;
for (int i = 0; i < NUMXREGS; i++)
{
if (!gpr.IsFreeX(i))
result |= (1 << i);
if (!fpr.IsFreeX(i))
result |= (1 << (16 + i));
}
return result;
#else
// not needed
return 0;
#endif
}

View File

@ -72,6 +72,8 @@ public:
void Jit(u32 em_address); void Jit(u32 em_address);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);
u32 RegistersInUse();
JitBlockCache *GetBlockCache() { return &blocks; } JitBlockCache *GetBlockCache() { return &blocks; }
void Trace(); void Trace();

View File

@ -76,11 +76,6 @@ void RegCache::LockX(int x1, int x2, int x3, int x4)
if (x4 != 0xFF) xlocks[x4] = true; if (x4 != 0xFF) xlocks[x4] = true;
} }
bool RegCache::IsFreeX(int xreg) const
{
return xregs[xreg].free && !xlocks[xreg];
}
void RegCache::UnlockAll() void RegCache::UnlockAll()
{ {
for (int i = 0; i < 32; i++) for (int i = 0; i < 32; i++)

View File

@ -106,7 +106,11 @@ public:
void UnlockAll(); void UnlockAll();
void UnlockAllX(); void UnlockAllX();
bool IsFreeX(int xreg) const; bool IsFreeX(int xreg) const
{
return xregs[xreg].free && !xlocks[xreg];
}
X64Reg GetFreeXReg(); X64Reg GetFreeXReg();

View File

@ -121,7 +121,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// do our job at first // do our job at first
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(d); gpr.Lock(d);
SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend); SafeLoadToEAX(gpr.R(a), accessSize, offset, RegistersInUse(), signExtend);
gpr.KillImmediate(d, false, true); gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
@ -193,7 +193,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
} }
} }
SafeLoadToEAX(opAddress, accessSize, 0, signExtend); SafeLoadToEAX(opAddress, accessSize, 0, RegistersInUse(), signExtend);
// We must flush immediate values from the following registers because // We must flush immediate values from the following registers because
// they may change at runtime if no MMU exception has been raised // they may change at runtime if no MMU exception has been raised
@ -373,7 +373,7 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.Lock(s, a); gpr.Lock(s, a);
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, offset); SafeWriteRegToReg(ECX, EDX, accessSize, offset, RegistersInUse());
if (update && offset) if (update && offset)
{ {
@ -429,7 +429,7 @@ void Jit64::stXx(UGeckoInstruction inst)
} }
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0); SafeWriteRegToReg(ECX, EDX, accessSize, 0, RegistersInUse());
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();

View File

@ -50,7 +50,7 @@ void Jit64::lfs(UGeckoInstruction inst)
} }
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
SafeLoadToEAX(gpr.R(a), 32, offset, false); SafeLoadToEAX(gpr.R(a), 32, offset, RegistersInUse(), false);
MEMCHECK_START MEMCHECK_START
@ -207,10 +207,10 @@ void Jit64::stfd(UGeckoInstruction inst)
MOVAPD(XMM0, fpr.R(s)); MOVAPD(XMM0, fpr.R(s));
PSRLQ(XMM0, 32); PSRLQ(XMM0, 32);
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());
SetJumpTarget(exit); SetJumpTarget(exit);
@ -282,7 +282,7 @@ void Jit64::stfs(UGeckoInstruction inst)
MEMCHECK_END MEMCHECK_END
} }
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
SafeWriteFloatToReg(XMM0, ABI_PARAM2); SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
@ -302,7 +302,7 @@ void Jit64::stfsx(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA)); ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
CVTSD2SS(XMM0, fpr.R(inst.RS)); CVTSD2SS(XMM0, fpr.R(inst.RS));
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
gpr.UnlockAllX(); gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
@ -337,7 +337,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
MEMCHECK_END MEMCHECK_END
} else { } else {
SafeLoadToEAX(R(EAX), 32, 0, false); SafeLoadToEAX(R(EAX), 32, 0, RegistersInUse(), false);
MEMCHECK_START MEMCHECK_START

View File

@ -77,6 +77,23 @@ struct RegInfo {
RegInfo(RegInfo&); // DO NOT IMPLEMENT RegInfo(RegInfo&); // DO NOT IMPLEMENT
}; };
static u32 regsInUse(RegInfo& R) {
#ifdef _M_X64
u32 result = 0;
for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++)
{
if (R.regs[i] != 0)
result |= (1 << i);
if (R.fregs[i] != 0)
result |= (1 << (16 + i));
}
return result;
#else
// not needed
return 0;
#endif
}
static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) { static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) {
unsigned& info = R.IInfo[Op - R.FirstI]; unsigned& info = R.IInfo[Op - R.FirstI];
if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1); if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1);
@ -634,7 +651,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
if (RI.MakeProfile) { if (RI.MakeProfile) {
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
} }
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0); RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI));
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I)); regClearInst(RI, getOp1(I));
} }
@ -1337,7 +1354,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->MOV(32, R(EAX), loc1); Jit->MOV(32, R(EAX), loc1);
} }
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0); RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI));
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I)); fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8) if (RI.IInfo[I - RI.FirstI] & 8)
@ -1400,12 +1417,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->PSRLQ(XMM0, 32); Jit->PSRLQ(XMM0, 32);
Jit->MOVD_xmm(R(EAX), XMM0); Jit->MOVD_xmm(R(EAX), XMM0);
Jit->MOV(32, R(ECX), address); Jit->MOV(32, R(ECX), address);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0); RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI));
Jit->MOVAPD(XMM0, value); Jit->MOVAPD(XMM0, value);
Jit->MOVD_xmm(R(EAX), XMM0); Jit->MOVD_xmm(R(EAX), XMM0);
Jit->MOV(32, R(ECX), address); Jit->MOV(32, R(ECX), address);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4); RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4, regsInUse(RI));
Jit->SetJumpTarget(exit); Jit->SetJumpTarget(exit);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)

View File

@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSDW(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSWB(XMM0, R(XMM0)); PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(16, R(AX), M((char*)psTemp + 4)); MOV(16, R(AX), M((char*)psTemp + 4));
BSWAP(32, EAX); BSWAP(32, EAX);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX); BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16)); ROL(32, R(EAX), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
@ -295,7 +295,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
// Easy! // Easy!
const u8* storeSingleFloat = AlignCode4(); const u8* storeSingleFloat = AlignCode4();
SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM); SafeWriteFloatToReg(XMM0, ECX, 0, SAFE_WRITE_NO_FASTMEM);
RET(); RET();
/* /*
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255)); MINSS(XMM0, M((void *)&m_255));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
const u8* storeSingleS8 = AlignCode4(); const u8* storeSingleS8 = AlignCode4();
@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m128)); MAXSS(XMM0, M((void *)&m_m128));
MINSS(XMM0, M((void *)&m_127)); MINSS(XMM0, M((void *)&m_127));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535)); MINSS(XMM0, M((void *)&m_65535));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
const u8* storeSingleS16 = AlignCode4(); const u8* storeSingleS16 = AlignCode4();
@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m32768)); MAXSS(XMM0, M((void *)&m_m32768));
MINSS(XMM0, M((void *)&m_32767)); MINSS(XMM0, M((void *)&m_32767));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET(); RET();
singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16())); singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));

View File

@ -56,7 +56,7 @@ void TrampolineCache::Shutdown()
} }
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. // Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info) const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse)
{ {
if (GetSpaceLeft() < 1024) if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
@ -76,17 +76,18 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
if (info.displacement) { if (info.displacement) {
ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
} }
PushRegistersAndAlignStack(registersInUse);
switch (info.operandSize) switch (info.operandSize)
{ {
case 4: case 4:
CALL(thunks.ProtectFunction((void *)&Memory::Read_U32, 1)); CALL((void *)&Memory::Read_U32);
break; break;
case 2: case 2:
CALL(thunks.ProtectFunction((void *)&Memory::Read_U16, 1)); CALL((void *)&Memory::Read_U16);
SHL(32, R(EAX), Imm8(16)); SHL(32, R(EAX), Imm8(16));
break; break;
case 1: case 1:
CALL(thunks.ProtectFunction((void *)&Memory::Read_U8, 1)); CALL((void *)&Memory::Read_U8);
break; break;
} }
@ -95,13 +96,14 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
MOV(32, R(dataReg), R(EAX)); MOV(32, R(dataReg), R(EAX));
} }
PopRegistersAndAlignStack(registersInUse);
RET(); RET();
#endif #endif
return trampoline; return trampoline;
} }
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. // Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info) const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse)
{ {
if (GetSpaceLeft() < 1024) if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
@ -135,25 +137,24 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
} }
SUB(64, R(RSP), Imm8(8)); PushRegistersAndAlignStack(registersInUse);
switch (info.operandSize) switch (info.operandSize)
{ {
case 8: case 8:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U64, 2)); CALL((void *)&Memory::Write_U64);
break; break;
case 4: case 4:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2)); CALL((void *)&Memory::Write_U32);
break; break;
case 2: case 2:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U16, 2)); CALL((void *)&Memory::Write_U16);
break; break;
case 1: case 1:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U8, 2)); CALL((void *)&Memory::Write_U8);
break; break;
} }
ADD(64, R(RSP), Imm8(8)); PopRegistersAndAlignStack(registersInUse);
RET(); RET();
#endif #endif
@ -182,6 +183,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
PanicAlert("BackPatch : Base reg not RBX." PanicAlert("BackPatch : Base reg not RBX."
"\n\nAttempted to access %08x.", emAddress); "\n\nAttempted to access %08x.", emAddress);
auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end())
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
u32 registersInUse = it->second;
if (!info.isMemoryWrite) if (!info.isMemoryWrite)
{ {
XEmitter emitter(codePtr); XEmitter emitter(codePtr);
@ -191,7 +197,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
bswapNopCount = 3; bswapNopCount = 3;
else else
bswapNopCount = 2; bswapNopCount = 2;
const u8 *trampoline = trampolines.GetReadTrampoline(info);
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline); emitter.CALL((void *)trampoline);
emitter.NOP((int)info.instructionSize + bswapNopCount - 5); emitter.NOP((int)info.instructionSize + bswapNopCount - 5);
return codePtr; return codePtr;
@ -223,7 +230,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
u8 *start = codePtr - bswapSize; u8 *start = codePtr - bswapSize;
XEmitter emitter(start); XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info); const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline); emitter.CALL((void *)trampoline);
emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr()); emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr());
return start; return start;

View File

@ -232,8 +232,8 @@ public:
void Init(); void Init();
void Shutdown(); void Shutdown();
const u8 *GetReadTrampoline(const InstructionInfo &info); const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
const u8 *GetWriteTrampoline(const InstructionInfo &info); const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse);
private: private:
ThunkManager thunks; ThunkManager thunks;
}; };

View File

@ -58,21 +58,25 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
#endif #endif
} }
void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
{ {
u8 *result;
#ifdef _M_X64 #ifdef _M_X64
if (opAddress.IsSimpleReg()) if (opAddress.IsSimpleReg())
{ {
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
} }
else else
{ {
MOV(32, R(EAX), opAddress); MOV(32, R(EAX), opAddress);
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset)); MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset));
} }
#else #else
if (opAddress.IsImm()) if (opAddress.IsImm())
{ {
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
} }
else else
@ -80,6 +84,7 @@ void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize,
if (!opAddress.IsSimpleReg(EAX)) if (!opAddress.IsSimpleReg(EAX))
MOV(32, R(EAX), opAddress); MOV(32, R(EAX), opAddress);
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset)); MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset));
} }
#endif #endif
@ -105,9 +110,10 @@ void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize,
// TODO: bake 8-bit into the original load. // TODO: bake 8-bit into the original load.
MOVSX(32, accessSize, EAX, R(EAX)); MOVSX(32, accessSize, EAX, R(EAX));
} }
return result;
} }
void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend)
{ {
#if defined(_M_X64) #if defined(_M_X64)
#ifdef ENABLE_MEM_CHECK #ifdef ENABLE_MEM_CHECK
@ -116,7 +122,11 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s
if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem) if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem)
#endif #endif
{ {
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); u8 *mov = UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
// XXX: are these dead anyway?
registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX));
registersInUseAtLoc[mov] = registersInUse;
} }
else else
#endif #endif
@ -208,22 +218,26 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s
} }
} }
void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{ {
u8 *result;
if (accessSize == 8 && reg_value >= 4) { if (accessSize == 8 && reg_value >= 4) {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
} }
if (swap) BSWAP(accessSize, reg_value); if (swap) BSWAP(accessSize, reg_value);
#ifdef _M_X64 #ifdef _M_X64
result = GetWritableCodePtr();
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
#else #else
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
#endif #endif
return result;
} }
// Destroys both arg registers // Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, int flags) void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags)
{ {
#if defined(_M_X64) #if defined(_M_X64)
if (!Core::g_CoreStartupParameter.bMMU && if (!Core::g_CoreStartupParameter.bMMU &&
@ -234,12 +248,16 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
#endif #endif
) )
{ {
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP)); u8 *mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP));
if (accessSize == 8) if (accessSize == 8)
{ {
NOP(1); NOP(1);
NOP(1); NOP(1);
} }
// XXX: are these dead anyway?
registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX));
registersInUseAtLoc[mov] = registersInUse;
return; return;
} }
#endif #endif
@ -278,7 +296,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
SetJumpTarget(exit); SetJumpTarget(exit);
} }
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int flags) void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
{ {
if (false && cpu_info.bSSSE3) { if (false && cpu_info.bSSSE3) {
// This path should be faster but for some reason it causes errors so I've disabled it. // This path should be faster but for some reason it causes errors so I've disabled it.
@ -311,7 +329,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int fl
} else { } else {
MOVSS(M(&float_buffer), xmm_value); MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer)); MOV(32, R(EAX), M(&float_buffer));
SafeWriteRegToReg(EAX, reg_addr, 32, 0, flags); SafeWriteRegToReg(EAX, reg_addr, 32, 0, registersInUse, flags);
} }
} }

View File

@ -7,25 +7,27 @@
#include "x64Emitter.h" #include "x64Emitter.h"
#include "Thunk.h" #include "Thunk.h"
#include <unordered_map>
// Like XCodeBlock but has some utilities for memory access. // Like XCodeBlock but has some utilities for memory access.
class EmuCodeBlock : public Gen::XCodeBlock { class EmuCodeBlock : public Gen::XCodeBlock {
public: public:
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); // these return the address of the MOV, for backpatching
void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); u8 *UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend);
enum SafeWriteFlags enum SafeWriteFlags
{ {
SAFE_WRITE_NO_SWAP = 1, SAFE_WRITE_NO_SWAP = 1,
SAFE_WRITE_NO_PROLOG = 2, SAFE_WRITE_NO_PROLOG = 2,
SAFE_WRITE_NO_FASTMEM = 4 SAFE_WRITE_NO_FASTMEM = 4
}; };
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, int flags = 0); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
// Trashes both inputs and EAX. // Trashes both inputs and EAX.
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, int flags = 0); void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, u32 registersInUse, int flags = 0);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address); void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
@ -37,6 +39,7 @@ public:
void ForceSinglePrecisionP(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm);
protected: protected:
ThunkManager thunks; ThunkManager thunks;
std::unordered_map<u8 *, u32> registersInUseAtLoc;
}; };
#endif // _JITUTIL_H #endif // _JITUTIL_H