Save only the registers that need to be saved rather than going through ProtectFunction.

This commit is contained in:
comex 2013-09-25 00:29:00 -04:00
parent 2a339c926e
commit ebe4448749
14 changed files with 193 additions and 58 deletions

View File

@ -1634,6 +1634,74 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u
CALLptr(M(impptr));
}
void XEmitter::PushRegistersAndAlignStack(u32 mask)
{
int shadow = 0;
#ifdef _WIN32
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = (count & 1) ? 0 : 8;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
}
}
void XEmitter::PopRegistersAndAlignStack(u32 mask)
{
int size = 0;
#ifdef _WIN32
size += 0x20;
#endif
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, size));
size += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += (count & 1) ? 0 : 8;
if (size)
ADD(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
{
POP((X64Reg) r);
}
}
}
#endif
}

View File

@ -691,6 +691,9 @@ public:
#define DECLARE_IMPORT(x) extern "C" void *__imp_##x
void PushRegistersAndAlignStack(u32 mask);
void PopRegistersAndAlignStack(u32 mask);
#endif
}; // class XEmitter

View File

@ -737,3 +737,21 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
return normalEntry;
}
u32 Jit64::RegistersInUse()
{
#ifdef _M_X64
u32 result = 0;
for (int i = 0; i < NUMXREGS; i++)
{
if (!gpr.IsFreeX(i))
result |= (1 << i);
if (!fpr.IsFreeX(i))
result |= (1 << (16 + i));
}
return result;
#else
// not needed
return 0;
#endif
}

View File

@ -72,6 +72,8 @@ public:
void Jit(u32 em_address);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);
u32 RegistersInUse();
JitBlockCache *GetBlockCache() { return &blocks; }
void Trace();

View File

@ -76,11 +76,6 @@ void RegCache::LockX(int x1, int x2, int x3, int x4)
if (x4 != 0xFF) xlocks[x4] = true;
}
bool RegCache::IsFreeX(int xreg) const
{
return xregs[xreg].free && !xlocks[xreg];
}
void RegCache::UnlockAll()
{
for (int i = 0; i < 32; i++)

View File

@ -106,7 +106,11 @@ public:
void UnlockAll();
void UnlockAllX();
bool IsFreeX(int xreg) const;
bool IsFreeX(int xreg) const
{
return xregs[xreg].free && !xlocks[xreg];
}
X64Reg GetFreeXReg();

View File

@ -121,7 +121,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// do our job at first
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(d);
SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend);
SafeLoadToEAX(gpr.R(a), accessSize, offset, RegistersInUse(), signExtend);
gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
@ -193,7 +193,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
}
SafeLoadToEAX(opAddress, accessSize, 0, signExtend);
SafeLoadToEAX(opAddress, accessSize, 0, RegistersInUse(), signExtend);
// We must flush immediate values from the following registers because
// they may change at runtime if no MMU exception has been raised
@ -373,7 +373,7 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.Lock(s, a);
MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, offset);
SafeWriteRegToReg(ECX, EDX, accessSize, offset, RegistersInUse());
if (update && offset)
{
@ -429,7 +429,7 @@ void Jit64::stXx(UGeckoInstruction inst)
}
MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
SafeWriteRegToReg(ECX, EDX, accessSize, 0, RegistersInUse());
gpr.UnlockAll();
gpr.UnlockAllX();

View File

@ -50,7 +50,7 @@ void Jit64::lfs(UGeckoInstruction inst)
}
s32 offset = (s32)(s16)inst.SIMM_16;
SafeLoadToEAX(gpr.R(a), 32, offset, false);
SafeLoadToEAX(gpr.R(a), 32, offset, RegistersInUse(), false);
MEMCHECK_START
@ -207,10 +207,10 @@ void Jit64::stfd(UGeckoInstruction inst)
MOVAPD(XMM0, fpr.R(s));
PSRLQ(XMM0, 32);
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());
SetJumpTarget(exit);
@ -282,7 +282,7 @@ void Jit64::stfs(UGeckoInstruction inst)
MEMCHECK_END
}
CVTSD2SS(XMM0, fpr.R(s));
SafeWriteFloatToReg(XMM0, ABI_PARAM2);
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
@ -302,7 +302,7 @@ void Jit64::stfsx(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
CVTSD2SS(XMM0, fpr.R(inst.RS));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
gpr.UnlockAllX();
fpr.UnlockAll();
@ -337,7 +337,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
MEMCHECK_END
} else {
SafeLoadToEAX(R(EAX), 32, 0, false);
SafeLoadToEAX(R(EAX), 32, 0, RegistersInUse(), false);
MEMCHECK_START

View File

@ -77,6 +77,23 @@ struct RegInfo {
RegInfo(RegInfo&); // DO NOT IMPLEMENT
};
static u32 regsInUse(RegInfo& R) {
#ifdef _M_X64
u32 result = 0;
for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++)
{
if (R.regs[i] != 0)
result |= (1 << i);
if (R.fregs[i] != 0)
result |= (1 << (16 + i));
}
return result;
#else
// not needed
return 0;
#endif
}
static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) {
unsigned& info = R.IInfo[Op - R.FirstI];
if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1);
@ -634,7 +651,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
if (RI.MakeProfile) {
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
}
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0);
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI));
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I));
}
@ -1337,7 +1354,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->MOV(32, R(EAX), loc1);
}
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI));
if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)
@ -1400,12 +1417,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->PSRLQ(XMM0, 32);
Jit->MOVD_xmm(R(EAX), XMM0);
Jit->MOV(32, R(ECX), address);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI));
Jit->MOVAPD(XMM0, value);
Jit->MOVD_xmm(R(EAX), XMM0);
Jit->MOV(32, R(ECX), address);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4);
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4, regsInUse(RI));
Jit->SetJumpTarget(exit);
if (RI.IInfo[I - RI.FirstI] & 4)

View File

@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(16, R(AX), M((char*)psTemp + 4));
BSWAP(32, EAX);
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
@ -295,7 +295,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
// Easy!
const u8* storeSingleFloat = AlignCode4();
SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM);
SafeWriteFloatToReg(XMM0, ECX, 0, SAFE_WRITE_NO_FASTMEM);
RET();
/*
if (cpu_info.bSSSE3) {
@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
const u8* storeSingleS8 = AlignCode4();
@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m128));
MINSS(XMM0, M((void *)&m_127));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
const u8* storeSingleS16 = AlignCode4();
@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MAXSS(XMM0, M((void *)&m_m32768));
MINSS(XMM0, M((void *)&m_32767));
CVTTSS2SI(EAX, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
RET();
singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));

View File

@ -56,7 +56,7 @@ void TrampolineCache::Shutdown()
}
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
@ -76,17 +76,18 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
if (info.displacement) {
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
}
PushRegistersAndAlignStack(registersInUse);
switch (info.operandSize)
{
case 4:
CALL(thunks.ProtectFunction((void *)&Memory::Read_U32, 1));
CALL((void *)&Memory::Read_U32);
break;
case 2:
CALL(thunks.ProtectFunction((void *)&Memory::Read_U16, 1));
CALL((void *)&Memory::Read_U16);
SHL(32, R(EAX), Imm8(16));
break;
case 1:
CALL(thunks.ProtectFunction((void *)&Memory::Read_U8, 1));
CALL((void *)&Memory::Read_U8);
break;
}
@ -95,13 +96,14 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
MOV(32, R(dataReg), R(EAX));
}
PopRegistersAndAlignStack(registersInUse);
RET();
#endif
return trampoline;
}
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
@ -135,25 +137,24 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
}
SUB(64, R(RSP), Imm8(8));
PushRegistersAndAlignStack(registersInUse);
switch (info.operandSize)
{
case 8:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U64, 2));
CALL((void *)&Memory::Write_U64);
break;
case 4:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2));
CALL((void *)&Memory::Write_U32);
break;
case 2:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U16, 2));
CALL((void *)&Memory::Write_U16);
break;
case 1:
CALL(thunks.ProtectFunction((void *)&Memory::Write_U8, 2));
CALL((void *)&Memory::Write_U8);
break;
}
ADD(64, R(RSP), Imm8(8));
PopRegistersAndAlignStack(registersInUse);
RET();
#endif
@ -182,6 +183,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
PanicAlert("BackPatch : Base reg not RBX."
"\n\nAttempted to access %08x.", emAddress);
auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end())
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
u32 registersInUse = it->second;
if (!info.isMemoryWrite)
{
XEmitter emitter(codePtr);
@ -191,7 +197,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
bswapNopCount = 3;
else
bswapNopCount = 2;
const u8 *trampoline = trampolines.GetReadTrampoline(info);
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline);
emitter.NOP((int)info.instructionSize + bswapNopCount - 5);
return codePtr;
@ -223,7 +230,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
u8 *start = codePtr - bswapSize;
XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline);
emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr());
return start;

View File

@ -232,8 +232,8 @@ public:
void Init();
void Shutdown();
const u8 *GetReadTrampoline(const InstructionInfo &info);
const u8 *GetWriteTrampoline(const InstructionInfo &info);
const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse);
private:
ThunkManager thunks;
};

View File

@ -58,21 +58,25 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
#endif
}
void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
{
u8 *result;
#ifdef _M_X64
if (opAddress.IsSimpleReg())
{
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
}
else
{
MOV(32, R(EAX), opAddress);
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset));
}
#else
if (opAddress.IsImm())
{
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
}
else
@ -80,6 +84,7 @@ void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize,
if (!opAddress.IsSimpleReg(EAX))
MOV(32, R(EAX), opAddress);
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset));
}
#endif
@ -105,9 +110,10 @@ void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize,
// TODO: bake 8-bit into the original load.
MOVSX(32, accessSize, EAX, R(EAX));
}
return result;
}
void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend)
{
#if defined(_M_X64)
#ifdef ENABLE_MEM_CHECK
@ -116,7 +122,11 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s
if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem)
#endif
{
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
u8 *mov = UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
// XXX: are these dead anyway?
registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX));
registersInUseAtLoc[mov] = registersInUse;
}
else
#endif
@ -208,22 +218,26 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s
}
}
void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{
u8 *result;
if (accessSize == 8 && reg_value >= 4) {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
}
if (swap) BSWAP(accessSize, reg_value);
#ifdef _M_X64
result = GetWritableCodePtr();
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
#else
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
#endif
return result;
}
// Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, int flags)
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags)
{
#if defined(_M_X64)
if (!Core::g_CoreStartupParameter.bMMU &&
@ -234,12 +248,16 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
#endif
)
{
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP));
u8 *mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP));
if (accessSize == 8)
{
NOP(1);
NOP(1);
}
// XXX: are these dead anyway?
registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX));
registersInUseAtLoc[mov] = registersInUse;
return;
}
#endif
@ -278,7 +296,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
SetJumpTarget(exit);
}
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int flags)
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
{
if (false && cpu_info.bSSSE3) {
// This path should be faster but for some reason it causes errors so I've disabled it.
@ -311,7 +329,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int fl
} else {
MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer));
SafeWriteRegToReg(EAX, reg_addr, 32, 0, flags);
SafeWriteRegToReg(EAX, reg_addr, 32, 0, registersInUse, flags);
}
}

View File

@ -7,25 +7,27 @@
#include "x64Emitter.h"
#include "Thunk.h"
#include <unordered_map>
// Like XCodeBlock but has some utilities for memory access.
class EmuCodeBlock : public Gen::XCodeBlock {
public:
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
// these return the address of the MOV, for backpatching
u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
u8 *UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend);
enum SafeWriteFlags
{
SAFE_WRITE_NO_SWAP = 1,
SAFE_WRITE_NO_PROLOG = 2,
SAFE_WRITE_NO_FASTMEM = 4
};
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, int flags = 0);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
// Trashes both inputs and EAX.
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, int flags = 0);
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, u32 registersInUse, int flags = 0);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
@ -37,6 +39,7 @@ public:
void ForceSinglePrecisionP(Gen::X64Reg xmm);
protected:
ThunkManager thunks;
std::unordered_map<u8 *, u32> registersInUseAtLoc;
};
#endif // _JITUTIL_H