Fastmem: jump to trampolines instead of calling them
Should be slightly faster, and also lets us skip the nops on the way back. Remove the trampoline cache, since it isn't really useful anymore with this.
This commit is contained in:
parent
9923d705df
commit
2a8936312e
|
@ -785,8 +785,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr();
|
exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr();
|
||||||
// the fastmem trampoline is jumping here, so we need to pop the return stack
|
|
||||||
ADD(64, R(RSP), Imm8(8));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||||
|
|
|
@ -83,7 +83,6 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
|
|
||||||
if (!info.isMemoryWrite)
|
if (!info.isMemoryWrite)
|
||||||
{
|
{
|
||||||
XEmitter emitter(codePtr);
|
|
||||||
int bswapNopCount;
|
int bswapNopCount;
|
||||||
if (info.byteSwap || info.operandSize == 1)
|
if (info.byteSwap || info.operandSize == 1)
|
||||||
bswapNopCount = 0;
|
bswapNopCount = 0;
|
||||||
|
@ -109,9 +108,11 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
totalSize += 3;
|
totalSize += 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse, exceptionHandler);
|
XEmitter emitter(codePtr);
|
||||||
emitter.CALL((void *)trampoline);
|
|
||||||
int padding = totalSize - BACKPATCH_SIZE;
|
int padding = totalSize - BACKPATCH_SIZE;
|
||||||
|
u8* returnPtr = codePtr + 5 + padding;
|
||||||
|
const u8* trampoline = trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
|
||||||
|
emitter.JMP(trampoline, true);
|
||||||
if (padding > 0)
|
if (padding > 0)
|
||||||
{
|
{
|
||||||
emitter.NOP(padding);
|
emitter.NOP(padding);
|
||||||
|
@ -162,9 +163,10 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
start = codePtr - bswapSize;
|
start = codePtr - bswapSize;
|
||||||
}
|
}
|
||||||
XEmitter emitter(start);
|
XEmitter emitter(start);
|
||||||
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, exceptionHandler, pc);
|
ptrdiff_t padding = (codePtr - (start + 5)) + info.instructionSize;
|
||||||
emitter.CALL((void *)trampoline);
|
u8* returnPtr = start + 5 + padding;
|
||||||
ptrdiff_t padding = (codePtr - emitter.GetCodePtr()) + info.instructionSize;
|
const u8* trampoline = trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
|
||||||
|
emitter.JMP(trampoline, true);
|
||||||
if (padding > 0)
|
if (padding > 0)
|
||||||
{
|
{
|
||||||
emitter.NOP(padding);
|
emitter.NOP(padding);
|
||||||
|
|
|
@ -27,29 +27,14 @@ void TrampolineCache::Init()
|
||||||
void TrampolineCache::ClearCodeSpace()
|
void TrampolineCache::ClearCodeSpace()
|
||||||
{
|
{
|
||||||
X64CodeBlock::ClearCodeSpace();
|
X64CodeBlock::ClearCodeSpace();
|
||||||
cachedTrampolines.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void TrampolineCache::Shutdown()
|
void TrampolineCache::Shutdown()
|
||||||
{
|
{
|
||||||
FreeCodeSpace();
|
FreeCodeSpace();
|
||||||
cachedTrampolines.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler)
|
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr)
|
||||||
{
|
|
||||||
TrampolineCacheKey key = { registersInUse, exceptionHandler, 0, info };
|
|
||||||
|
|
||||||
auto it = cachedTrampolines.find(key);
|
|
||||||
if (it != cachedTrampolines.end())
|
|
||||||
return it->second;
|
|
||||||
|
|
||||||
const u8* trampoline = GenerateReadTrampoline(info, registersInUse, exceptionHandler);
|
|
||||||
cachedTrampolines[key] = trampoline;
|
|
||||||
return trampoline;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler)
|
|
||||||
{
|
{
|
||||||
if (GetSpaceLeft() < 1024)
|
if (GetSpaceLeft() < 1024)
|
||||||
PanicAlert("Trampoline cache full");
|
PanicAlert("Trampoline cache full");
|
||||||
|
@ -60,9 +45,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B
|
||||||
registersInUse[addrReg] = true;
|
registersInUse[addrReg] = true;
|
||||||
registersInUse[dataReg] = false;
|
registersInUse[dataReg] = false;
|
||||||
|
|
||||||
// It's a read. Easy.
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
// RSP alignment here is 8 due to the call.
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
|
|
||||||
|
|
||||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
||||||
MOVTwo(dataRegSize, ABI_PARAM1, addrReg, ABI_PARAM2, dataReg);
|
MOVTwo(dataRegSize, ABI_PARAM1, addrReg, ABI_PARAM2, dataReg);
|
||||||
|
@ -89,30 +72,17 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B
|
||||||
if (dataReg != ABI_RETURN)
|
if (dataReg != ABI_RETURN)
|
||||||
MOV(dataRegSize, R(dataReg), R(ABI_RETURN));
|
MOV(dataRegSize, R(dataReg), R(ABI_RETURN));
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
if (exceptionHandler)
|
if (exceptionHandler)
|
||||||
{
|
{
|
||||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||||
J_CC(CC_NZ, exceptionHandler);
|
J_CC(CC_NZ, exceptionHandler);
|
||||||
}
|
}
|
||||||
RET();
|
JMP(returnPtr, true);
|
||||||
return trampoline;
|
return trampoline;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc)
|
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc)
|
||||||
{
|
|
||||||
TrampolineCacheKey key = { registersInUse, exceptionHandler, pc, info };
|
|
||||||
|
|
||||||
auto it = cachedTrampolines.find(key);
|
|
||||||
if (it != cachedTrampolines.end())
|
|
||||||
return it->second;
|
|
||||||
|
|
||||||
const u8* trampoline = GenerateWriteTrampoline(info, registersInUse, exceptionHandler, pc);
|
|
||||||
cachedTrampolines[key] = trampoline;
|
|
||||||
return trampoline;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc)
|
|
||||||
{
|
{
|
||||||
if (GetSpaceLeft() < 1024)
|
if (GetSpaceLeft() < 1024)
|
||||||
PanicAlert("Trampoline cache full");
|
PanicAlert("Trampoline cache full");
|
||||||
|
@ -122,15 +92,13 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info,
|
||||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||||
|
|
||||||
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
|
|
||||||
// hardware access - we can take shortcuts.
|
|
||||||
// Don't treat FIFO writes specially for now because they require a burst
|
// Don't treat FIFO writes specially for now because they require a burst
|
||||||
// check anyway.
|
// check anyway.
|
||||||
|
|
||||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||||
MOV(32, PPCSTATE(pc), Imm32(pc));
|
MOV(32, PPCSTATE(pc), Imm32(pc));
|
||||||
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
|
|
||||||
if (info.hasImmediate)
|
if (info.hasImmediate)
|
||||||
{
|
{
|
||||||
|
@ -178,38 +146,13 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
if (exceptionHandler)
|
if (exceptionHandler)
|
||||||
{
|
{
|
||||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||||
J_CC(CC_NZ, exceptionHandler);
|
J_CC(CC_NZ, exceptionHandler);
|
||||||
}
|
}
|
||||||
RET();
|
JMP(returnPtr, true);
|
||||||
|
|
||||||
return trampoline;
|
return trampoline;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t TrampolineCacheKeyHasher::operator()(const TrampolineCacheKey& k) const
|
|
||||||
{
|
|
||||||
size_t res = std::hash<int>()(k.registersInUse.m_val);
|
|
||||||
res ^= std::hash<int>()(k.info.operandSize) >> 1;
|
|
||||||
res ^= std::hash<int>()(k.info.regOperandReg) >> 2;
|
|
||||||
res ^= std::hash<int>()(k.info.scaledReg) >> 3;
|
|
||||||
res ^= std::hash<u64>()(k.info.immediate) >> 4;
|
|
||||||
res ^= std::hash<int>()(k.pc) >> 5;
|
|
||||||
res ^= std::hash<int>()(k.info.displacement) << 1;
|
|
||||||
res ^= std::hash<bool>()(k.info.signExtend) << 2;
|
|
||||||
res ^= std::hash<bool>()(k.info.hasImmediate) << 3;
|
|
||||||
res ^= std::hash<bool>()(k.info.isMemoryWrite) << 4;
|
|
||||||
res ^= std::hash<u8*>()(k.exceptionHandler) << 5;
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TrampolineCacheKey::operator==(const TrampolineCacheKey &other) const
|
|
||||||
{
|
|
||||||
return pc == other.pc &&
|
|
||||||
registersInUse == other.registersInUse &&
|
|
||||||
exceptionHandler == other.exceptionHandler &&
|
|
||||||
info == other.info;
|
|
||||||
}
|
|
||||||
|
|
|
@ -14,34 +14,13 @@
|
||||||
// We need at least this many bytes for backpatching.
|
// We need at least this many bytes for backpatching.
|
||||||
const int BACKPATCH_SIZE = 5;
|
const int BACKPATCH_SIZE = 5;
|
||||||
|
|
||||||
struct TrampolineCacheKey
|
|
||||||
{
|
|
||||||
BitSet32 registersInUse;
|
|
||||||
u8* exceptionHandler;
|
|
||||||
u32 pc;
|
|
||||||
InstructionInfo info;
|
|
||||||
|
|
||||||
bool operator==(const TrampolineCacheKey &other) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TrampolineCacheKeyHasher
|
|
||||||
{
|
|
||||||
size_t operator()(const TrampolineCacheKey& k) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
class TrampolineCache : public Gen::X64CodeBlock
|
class TrampolineCache : public Gen::X64CodeBlock
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void Init();
|
void Init();
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
|
|
||||||
const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler);
|
const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr);
|
||||||
const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc);
|
const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc);
|
||||||
void ClearCodeSpace();
|
void ClearCodeSpace();
|
||||||
|
|
||||||
private:
|
|
||||||
const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler);
|
|
||||||
const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc);
|
|
||||||
|
|
||||||
std::unordered_map<TrampolineCacheKey, const u8*, TrampolineCacheKeyHasher> cachedTrampolines;
|
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue