MMU: handle exception checks in fastmem

Inspired by a patch by magumagu.
This commit is contained in:
Fiora 2015-01-02 13:11:01 -08:00
parent 190312e1a6
commit 0ff6ad5734
10 changed files with 121 additions and 83 deletions

View File

@ -178,6 +178,7 @@ void Jit64::Init()
jo.optimizeGatherPipe = true;
jo.accurateSinglePrecision = true;
js.memcheck = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU;
js.fastmemLoadStore = NULL;
gpr.SetEmitter(this);
fpr.SetEmitter(this);
@ -612,6 +613,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
const GekkoOPInfo *opinfo = ops[i].opinfo;
js.downcountAmount += opinfo->numCycles;
js.fastmemLoadStore = NULL;
if (i == (code_block.m_num_instructions - 1))
{
@ -761,19 +763,28 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
Jit64Tables::CompileInstruction(ops[i]);
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreFromRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreFromRegister(j);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
FixupBranch memException = J_CC(CC_NZ, true);
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle it.
FixupBranch memException;
if (!js.fastmemLoadStore)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
memException = J_CC(CC_NZ, true);
}
SwitchToFarCode();
SetJumpTarget(memException);
if (!js.fastmemLoadStore)
{
exceptionHandlerAtLoc[js.fastmemLoadStore] = NULL;
SetJumpTarget(memException);
}
else
{
exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr();
// the fastmem trampoline is jumping here, so we need to pop the return stack
ADD(64, R(RSP), Imm8(8));
}
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
@ -785,6 +796,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
SwitchToNearCode();
}
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreFromRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreFromRegister(j);
if (opinfo->flags & FL_LOADSTORE)
++jit->js.numLoadStoreInst;

View File

@ -246,19 +246,19 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
gpr.Lock(a, b, d);
if (update && storeAddress)
gpr.BindToRegister(a, true, true);
gpr.BindToRegister(d, js.memcheck, true);
BitSet32 registersInUse = CallerSavedRegistersInUse();
// We need to save the (usually scratch) address register for the update.
if (update && storeAddress)
{
// We need to save the (usually scratch) address register for the update.
registersInUse[RSCRATCH2] = true;
}
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
if (update && storeAddress)
{
gpr.BindToRegister(a, true, true);
MEMCHECK_START(false)
MEMCHECK_START
MOV(32, gpr.R(a), opAddress);
MEMCHECK_END
}
@ -266,7 +266,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// TODO: support no-swap in SafeLoadToReg instead
if (byte_reversed)
{
MEMCHECK_START(false)
MEMCHECK_START
BSWAP(accessSize, gpr.RX(d));
MEMCHECK_END
}
@ -372,7 +372,7 @@ void Jit64::stX(UGeckoInstruction inst)
else
{
gpr.KillImmediate(a, true, true);
MEMCHECK_START(false)
MEMCHECK_START
ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
}
@ -404,7 +404,7 @@ void Jit64::stX(UGeckoInstruction inst)
if (update)
{
MEMCHECK_START(false)
MEMCHECK_START
ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
}
@ -425,12 +425,9 @@ void Jit64::stXx(UGeckoInstruction inst)
gpr.Lock(a, b, s);
if (update)
{
gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(RSCRATCH2), gpr.R(a));
}
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
}
@ -462,7 +459,10 @@ void Jit64::stXx(UGeckoInstruction inst)
if (gpr.R(s).IsImm())
{
SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update)
registersInUse[RSCRATCH2] = true;
SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
}
else
{
@ -477,14 +477,16 @@ void Jit64::stXx(UGeckoInstruction inst)
gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s);
}
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update)
registersInUse[RSCRATCH2] = true;
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
}
if (update && js.memcheck)
if (update)
{
// revert the address change if an exception occurred
MEMCHECK_START(true)
SUB(32, gpr.R(a), gpr.R(b));
MEMCHECK_START
MOV(32, gpr.R(a), R(RSCRATCH2));
MEMCHECK_END;
}

View File

@ -46,9 +46,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
}
else
{
addr = R(RSCRATCH);
addr = R(RSCRATCH2);
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
{
MOV(32, addr, gpr.R(b));
@ -65,14 +65,14 @@ void Jit64::lfXXX(UGeckoInstruction inst)
offset = (s16)inst.SIMM_16;
}
fpr.Lock(d);
fpr.BindToRegister(d, js.memcheck || !single);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && js.memcheck)
registersInUse[RSCRATCH2] = true;
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
fpr.Lock(d);
fpr.BindToRegister(d, js.memcheck || !single);
MEMCHECK_START(false)
MEMCHECK_START
if (single)
{
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
@ -141,7 +141,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
else
{
gpr.KillImmediate(a, true, true);
MEMCHECK_START(false)
MEMCHECK_START
ADD(32, gpr.R(a), Imm32((u32)imm));
MEMCHECK_END
}
@ -152,47 +152,43 @@ void Jit64::stfXXX(UGeckoInstruction inst)
}
s32 offset = 0;
if (update)
gpr.BindToRegister(a, true, true);
if (indexed)
{
if (update)
{
gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(RSCRATCH2), gpr.R(a));
}
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
{
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
{
MOV(32, R(RSCRATCH2), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH2), gpr.R(a));
}
MOV(32, R(RSCRATCH2), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH2), gpr.R(a));
}
}
else
{
if (update)
{
gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), Imm32(imm));
LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm));
}
else
{
offset = imm;
MOV(32, R(RSCRATCH2), gpr.R(a));
}
MOV(32, R(RSCRATCH2), gpr.R(a));
}
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, CallerSavedRegistersInUse());
BitSet32 registersInUse = CallerSavedRegistersInUse();
// We need to save the (usually scratch) address register for the update.
if (update)
registersInUse[RSCRATCH2] = true;
if (js.memcheck && update)
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
if (update)
{
// revert the address change if an exception occurred
MEMCHECK_START(true)
SUB(32, gpr.R(a), indexed ? gpr.R(b) : Imm32(imm));
MEMCHECK_START
MOV(32, gpr.R(a), R(RSCRATCH2));
MEMCHECK_END
}

View File

@ -78,7 +78,7 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
if (update && js.memcheck)
{
MEMCHECK_START(false)
MEMCHECK_START
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
@ -137,7 +137,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
MEMCHECK_START(false)
MEMCHECK_START
CVTPS2PD(fpr.RX(s), R(XMM0));
if (update && js.memcheck)
{

View File

@ -73,6 +73,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
BitSet32 registersInUse = it->second;
u8* exceptionHandler = NULL;
if (jit->js.memcheck)
{
auto it2 = exceptionHandlerAtLoc.find(codePtr);
if (it2 != exceptionHandlerAtLoc.end())
exceptionHandler = it2->second;
}
if (!info.isMemoryWrite)
{
XEmitter emitter(codePtr);
@ -101,7 +109,7 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
totalSize += 3;
}
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse);
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse, exceptionHandler);
emitter.CALL((void *)trampoline);
int padding = totalSize - BACKPATCH_SIZE;
if (padding > 0)
@ -113,14 +121,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
else
{
// TODO: special case FIFO writes. Also, support 32-bit mode.
auto it2 = pcAtLoc.find(codePtr);
if (it2 == pcAtLoc.end())
auto it3 = pcAtLoc.find(codePtr);
if (it3 == pcAtLoc.end())
{
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
return nullptr;
}
u32 pc = it2->second;
u32 pc = it3->second;
u8 *start;
if (info.byteSwap || info.hasImmediate)
@ -154,7 +162,7 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
start = codePtr - bswapSize;
}
XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, pc);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, exceptionHandler, pc);
emitter.CALL((void *)trampoline);
ptrdiff_t padding = (codePtr - emitter.GetCodePtr()) + info.instructionSize;
if (padding > 0)

View File

@ -73,6 +73,7 @@ protected:
int downcountAmount;
u32 numLoadStoreInst;
u32 numFloatingPointInst;
u8* fastmemLoadStore;
bool firstFPInstructionFound;
bool isLastInstruction;

View File

@ -307,6 +307,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
u8 *mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
registersInUseAtLoc[mov] = registersInUse;
jit->js.fastmemLoadStore = mov;
}
else
{
@ -349,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
MEMCHECK_START(false)
MEMCHECK_START
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
@ -399,7 +400,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
}
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
MEMCHECK_START(false)
MEMCHECK_START
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
@ -565,6 +566,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
registersInUseAtLoc[mov] = registersInUse;
pcAtLoc[mov] = jit->js.compilerPC;
jit->js.fastmemLoadStore = mov;
return;
}

View File

@ -12,16 +12,14 @@
namespace MMIO { class Mapping; }
// If inv is true, invert the check (i.e. skip over the associated code if an exception hits,
// instead of skipping over the code if an exception isn't hit).
#define MEMCHECK_START(inv) \
#define MEMCHECK_START \
Gen::FixupBranch memException; \
if (jit->js.memcheck) \
if (jit->js.memcheck && !jit->js.fastmemLoadStore) \
{ TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); \
memException = J_CC((inv) ? Gen::CC_Z : Gen::CC_NZ, true); }
memException = J_CC(Gen::CC_NZ, true); }
#define MEMCHECK_END \
if (jit->js.memcheck) \
if (jit->js.memcheck && !jit->js.fastmemLoadStore) \
SetJumpTarget(memException);
// We offset by 0x80 because the range of one byte memory offsets is
@ -141,4 +139,5 @@ public:
protected:
std::unordered_map<u8 *, BitSet32> registersInUseAtLoc;
std::unordered_map<u8 *, u32> pcAtLoc;
std::unordered_map<u8 *, u8 *> exceptionHandlerAtLoc;
};

View File

@ -36,20 +36,20 @@ void TrampolineCache::Shutdown()
cachedTrampolines.clear();
}
const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse)
const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler)
{
TrampolineCacheKey key = { registersInUse, 0, info };
TrampolineCacheKey key = { registersInUse, exceptionHandler, 0, info };
auto it = cachedTrampolines.find(key);
if (it != cachedTrampolines.end())
return it->second;
const u8* trampoline = GenerateReadTrampoline(info, registersInUse);
const u8* trampoline = GenerateReadTrampoline(info, registersInUse, exceptionHandler);
cachedTrampolines[key] = trampoline;
return trampoline;
}
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse)
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
@ -90,24 +90,29 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B
MOV(dataRegSize, R(dataReg), R(ABI_RETURN));
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
RET();
return trampoline;
}
const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc)
const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc)
{
TrampolineCacheKey key = { registersInUse, pc, info };
TrampolineCacheKey key = { registersInUse, exceptionHandler, pc, info };
auto it = cachedTrampolines.find(key);
if (it != cachedTrampolines.end())
return it->second;
const u8* trampoline = GenerateWriteTrampoline(info, registersInUse, pc);
const u8* trampoline = GenerateWriteTrampoline(info, registersInUse, exceptionHandler, pc);
cachedTrampolines[key] = trampoline;
return trampoline;
}
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc)
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
@ -174,6 +179,11 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info,
}
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
RET();
return trampoline;
@ -191,6 +201,7 @@ size_t TrampolineCacheKeyHasher::operator()(const TrampolineCacheKey& k) const
res ^= std::hash<bool>()(k.info.signExtend) << 2;
res ^= std::hash<bool>()(k.info.hasImmediate) << 3;
res ^= std::hash<bool>()(k.info.isMemoryWrite) << 4;
res ^= std::hash<u8*>()(k.exceptionHandler) << 5;
return res;
}
@ -199,5 +210,6 @@ bool TrampolineCacheKey::operator==(const TrampolineCacheKey &other) const
{
return pc == other.pc &&
registersInUse == other.registersInUse &&
exceptionHandler == other.exceptionHandler &&
info == other.info;
}

View File

@ -17,6 +17,7 @@ const int BACKPATCH_SIZE = 5;
struct TrampolineCacheKey
{
BitSet32 registersInUse;
u8* exceptionHandler;
u32 pc;
InstructionInfo info;
@ -34,13 +35,13 @@ public:
void Init();
void Shutdown();
const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse);
const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc);
const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler);
const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc);
void ClearCodeSpace();
private:
const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse);
const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc);
const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler);
const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc);
std::unordered_map<TrampolineCacheKey, const u8*, TrampolineCacheKeyHasher> cachedTrampolines;
};