JIT: fix handling of PC in dispatcher/block cache.
Specifically, don't make any assumptions about what effective addresses are used for code, and correctly handle changes to MSR.DR/MSR.IR. (Split off from dynamic-bat.)
This commit is contained in:
parent
fa5a2474f4
commit
758e6406cd
|
@ -41,10 +41,8 @@ void CachedInterpreter::Run()
|
||||||
|
|
||||||
void CachedInterpreter::SingleStep()
|
void CachedInterpreter::SingleStep()
|
||||||
{
|
{
|
||||||
int block = GetBlockNumberFromStartAddress(PC);
|
const u8* normalEntry = jit->GetBlockCache()->Dispatch();
|
||||||
if (block >= 0)
|
const Instruction* code = reinterpret_cast<const Instruction*>(normalEntry);
|
||||||
{
|
|
||||||
Instruction* code = (Instruction*)GetCompiledCodeFromBlock(block);
|
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
@ -68,9 +66,6 @@ void CachedInterpreter::SingleStep()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Jit(PC);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void EndBlock(UGeckoInstruction data)
|
static void EndBlock(UGeckoInstruction data)
|
||||||
{
|
{
|
||||||
PC = NPC;
|
PC = NPC;
|
||||||
|
|
|
@ -396,29 +396,12 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after)
|
||||||
linkData.exitAddress = destination;
|
linkData.exitAddress = destination;
|
||||||
linkData.linkStatus = false;
|
linkData.linkStatus = false;
|
||||||
|
|
||||||
// Link opportunity!
|
|
||||||
int block;
|
|
||||||
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
|
|
||||||
{
|
|
||||||
// It exists! Joy of joy!
|
|
||||||
JitBlock* jb = blocks.GetBlock(block);
|
|
||||||
const u8* addr = jb->checkedEntry;
|
|
||||||
linkData.exitPtrs = GetWritableCodePtr();
|
|
||||||
if (bl)
|
|
||||||
CALL(addr);
|
|
||||||
else
|
|
||||||
JMP(addr, true);
|
|
||||||
linkData.linkStatus = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(32, PPCSTATE(pc), Imm32(destination));
|
MOV(32, PPCSTATE(pc), Imm32(destination));
|
||||||
linkData.exitPtrs = GetWritableCodePtr();
|
linkData.exitPtrs = GetWritableCodePtr();
|
||||||
if (bl)
|
if (bl)
|
||||||
CALL(asm_routines.dispatcher);
|
CALL(asm_routines.dispatcher);
|
||||||
else
|
else
|
||||||
JMP(asm_routines.dispatcher, true);
|
JMP(asm_routines.dispatcher, true);
|
||||||
}
|
|
||||||
|
|
||||||
b->linkData.push_back(linkData);
|
b->linkData.push_back(linkData);
|
||||||
|
|
||||||
|
|
|
@ -64,7 +64,7 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
|
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ResetStack();
|
ResetStack(*this);
|
||||||
|
|
||||||
SUB(32, PPCSTATE(downcount), R(RSCRATCH2));
|
SUB(32, PPCSTATE(downcount), R(RSCRATCH2));
|
||||||
|
|
||||||
|
@ -102,31 +102,15 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
MOV(64, R(RMEM), Imm64((u64)Memory::logical_base));
|
MOV(64, R(RMEM), Imm64((u64)Memory::logical_base));
|
||||||
SetJumpTarget(membaseend);
|
SetJumpTarget(membaseend);
|
||||||
|
|
||||||
|
// The following is an translation of JitBaseBlockCache::Dispatch into assembly.
|
||||||
|
|
||||||
|
// Fast block number lookup.
|
||||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||||
|
u64 icache = reinterpret_cast<u64>(jit->GetBlockCache()->GetICache());
|
||||||
// TODO: We need to handle code which executes the same PC with
|
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2));
|
||||||
// different values of MSR.IR. It probably makes sense to handle
|
|
||||||
// MSR.DR here too, to allow IsOptimizableRAMAddress-based
|
|
||||||
// optimizations safe, because IR and DR are usually set/cleared together.
|
|
||||||
// TODO: Branching based on the 20 most significant bits of instruction
|
|
||||||
// addresses without translating them is wrong.
|
|
||||||
u64 icache = (u64)jit->GetBlockCache()->iCache.data();
|
|
||||||
u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data();
|
|
||||||
u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data();
|
|
||||||
u32 mask = 0;
|
|
||||||
FixupBranch no_mem;
|
|
||||||
FixupBranch exit_mem;
|
|
||||||
FixupBranch exit_vmem;
|
|
||||||
if (SConfig::GetInstance().bWii)
|
|
||||||
mask = JIT_ICACHE_EXRAM_BIT;
|
|
||||||
mask |= JIT_ICACHE_VMEM_BIT;
|
|
||||||
TEST(32, R(RSCRATCH), Imm32(mask));
|
|
||||||
no_mem = J_CC(CC_NZ);
|
|
||||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
|
||||||
|
|
||||||
if (icache <= INT_MAX)
|
if (icache <= INT_MAX)
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache));
|
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, static_cast<s32>(icache)));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -134,73 +118,46 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
||||||
}
|
}
|
||||||
|
|
||||||
exit_mem = J();
|
// Check whether the block we found matches the current state.
|
||||||
SetJumpTarget(no_mem);
|
u64 blocks = reinterpret_cast<u64>(jit->GetBlockCache()->GetBlocks());
|
||||||
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
|
IMUL(32, RSCRATCH, R(RSCRATCH), Imm32(sizeof(JitBlock)));
|
||||||
FixupBranch no_vmem = J_CC(CC_Z);
|
if (blocks <= INT_MAX)
|
||||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
|
||||||
if (icacheVmem <= INT_MAX)
|
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem));
|
ADD(64, R(RSCRATCH), Imm32(static_cast<s32>(blocks)));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH2), Imm64(icacheVmem));
|
MOV(64, R(RSCRATCH2), Imm64(blocks));
|
||||||
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
ADD(64, R(RSCRATCH), R(RSCRATCH2));
|
||||||
}
|
|
||||||
|
|
||||||
if (SConfig::GetInstance().bWii)
|
|
||||||
exit_vmem = J();
|
|
||||||
SetJumpTarget(no_vmem);
|
|
||||||
if (SConfig::GetInstance().bWii)
|
|
||||||
{
|
|
||||||
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
|
|
||||||
FixupBranch no_exram = J_CC(CC_Z);
|
|
||||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));
|
|
||||||
|
|
||||||
if (icacheEx <= INT_MAX)
|
|
||||||
{
|
|
||||||
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(64, R(RSCRATCH2), Imm64(icacheEx));
|
|
||||||
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
|
||||||
}
|
|
||||||
|
|
||||||
SetJumpTarget(no_exram);
|
|
||||||
}
|
|
||||||
SetJumpTarget(exit_mem);
|
|
||||||
if (SConfig::GetInstance().bWii)
|
|
||||||
SetJumpTarget(exit_vmem);
|
|
||||||
|
|
||||||
TEST(32, R(RSCRATCH), R(RSCRATCH));
|
|
||||||
FixupBranch notfound = J_CC(CC_L);
|
|
||||||
// grab from list and jump to it
|
|
||||||
u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers();
|
|
||||||
if (codePointers <= INT_MAX)
|
|
||||||
{
|
|
||||||
JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(64, R(RSCRATCH2), Imm64(codePointers));
|
|
||||||
JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0));
|
|
||||||
}
|
}
|
||||||
|
// Check both block.effectiveAddress and block.msrBits.
|
||||||
|
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
|
||||||
|
AND(32, R(RSCRATCH2), Imm32(JitBlock::JIT_CACHE_MSR_MASK));
|
||||||
|
SHL(64, R(RSCRATCH2), Imm8(32));
|
||||||
|
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
|
||||||
|
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
|
||||||
|
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, effectiveAddress))));
|
||||||
|
FixupBranch notfound = J_CC(CC_NE);
|
||||||
|
// Success; branch to the block we found.
|
||||||
|
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, normalEntry))));
|
||||||
SetJumpTarget(notfound);
|
SetJumpTarget(notfound);
|
||||||
|
|
||||||
|
// Failure; call into the block cache to update the state, then try again.
|
||||||
|
// (We need to loop because Jit() might not actually generate a block
|
||||||
|
// if we hit an ISI.)
|
||||||
|
|
||||||
// We reset the stack because Jit might clear the code cache.
|
// We reset the stack because Jit might clear the code cache.
|
||||||
// Also if we are in the middle of disabling BLR optimization on windows
|
// Also if we are in the middle of disabling BLR optimization on windows
|
||||||
// we need to reset the stack before _resetstkoflw() is called in Jit
|
// we need to reset the stack before _resetstkoflw() is called in Jit
|
||||||
// otherwise we will generate a second stack overflow exception during DoJit()
|
// otherwise we will generate a second stack overflow exception during DoJit()
|
||||||
ResetStack();
|
ResetStack(*this);
|
||||||
|
|
||||||
// Ok, no block, let's jit
|
// Ok, no block, let's call the slow dispatcher
|
||||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||||
ABI_CallFunctionA(32, (void*)&Jit, PPCSTATE(pc));
|
ABI_CallFunction(reinterpret_cast<void*>(&JitBase::Dispatch));
|
||||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||||
|
// JMPptr(R(ABI_RETURN));
|
||||||
JMP(dispatcherNoCheck, true); // no point in special casing this
|
JMP(dispatcherNoCheck, true);
|
||||||
|
|
||||||
SetJumpTarget(bail);
|
SetJumpTarget(bail);
|
||||||
doTiming = GetCodePtr();
|
doTiming = GetCodePtr();
|
||||||
|
@ -217,7 +174,7 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
// Landing pad for drec space
|
// Landing pad for drec space
|
||||||
if (SConfig::GetInstance().bEnableDebugging)
|
if (SConfig::GetInstance().bEnableDebugging)
|
||||||
SetJumpTarget(dbg_exit);
|
SetJumpTarget(dbg_exit);
|
||||||
ResetStack();
|
ResetStack(*this);
|
||||||
if (m_stack_top)
|
if (m_stack_top)
|
||||||
{
|
{
|
||||||
ADD(64, R(RSP), Imm8(0x18));
|
ADD(64, R(RSP), Imm8(0x18));
|
||||||
|
@ -232,12 +189,12 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
GenerateCommon();
|
GenerateCommon();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64AsmRoutineManager::ResetStack()
|
void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter)
|
||||||
{
|
{
|
||||||
if (m_stack_top)
|
if (m_stack_top)
|
||||||
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
|
emitter.MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
|
||||||
else
|
else
|
||||||
MOV(64, R(RSP), M(&s_saved_rsp));
|
emitter.MOV(64, R(RSP), M(&s_saved_rsp));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64AsmRoutineManager::GenerateCommon()
|
void Jit64AsmRoutineManager::GenerateCommon()
|
||||||
|
|
|
@ -25,7 +25,6 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
void Generate();
|
void Generate();
|
||||||
void ResetStack();
|
|
||||||
void GenerateCommon();
|
void GenerateCommon();
|
||||||
u8* m_stack_top;
|
u8* m_stack_top;
|
||||||
|
|
||||||
|
@ -41,4 +40,5 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void Shutdown() { FreeCodeSpace(); }
|
void Shutdown() { FreeCodeSpace(); }
|
||||||
|
void ResetStack(X64CodeBlock& emitter);
|
||||||
};
|
};
|
||||||
|
|
|
@ -310,6 +310,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||||
ABI_CallFunction((void*)JitInterface::InvalidateICache);
|
ABI_CallFunction((void*)JitInterface::InvalidateICache);
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
|
asm_routines.ResetStack(*this);
|
||||||
c = J(true);
|
c = J(true);
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
SetJumpTarget(c);
|
SetJumpTarget(c);
|
||||||
|
|
|
@ -391,6 +391,10 @@ void Jit64::mtmsr(UGeckoInstruction inst)
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
|
|
||||||
|
// Our jit cache also stores some MSR bits, as they have changed, we either
|
||||||
|
// have to validate them in the BLR/RET check, or just flush the stack here.
|
||||||
|
asm_routines.ResetStack(*this);
|
||||||
|
|
||||||
// If some exceptions are pending and EE are now enabled, force checking
|
// If some exceptions are pending and EE are now enabled, force checking
|
||||||
// external exceptions when going out of mtmsr in order to execute delayed
|
// external exceptions when going out of mtmsr in order to execute delayed
|
||||||
// interrupts as soon as possible.
|
// interrupts as soon as possible.
|
||||||
|
|
|
@ -373,19 +373,9 @@ void JitIL::WriteExit(u32 destination)
|
||||||
linkData.exitPtrs = GetWritableCodePtr();
|
linkData.exitPtrs = GetWritableCodePtr();
|
||||||
linkData.linkStatus = false;
|
linkData.linkStatus = false;
|
||||||
|
|
||||||
// Link opportunity!
|
|
||||||
int block;
|
|
||||||
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
|
|
||||||
{
|
|
||||||
// It exists! Joy of joy!
|
|
||||||
JMP(blocks.GetBlock(block)->checkedEntry, true);
|
|
||||||
linkData.linkStatus = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(32, PPCSTATE(pc), Imm32(destination));
|
MOV(32, PPCSTATE(pc), Imm32(destination));
|
||||||
JMP(asm_routines.dispatcher, true);
|
JMP(asm_routines.dispatcher, true);
|
||||||
}
|
|
||||||
b->linkData.push_back(linkData);
|
b->linkData.push_back(linkData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ void JitArm64BlockCache::WriteLinkBlock(u8* location, const JitBlock& block)
|
||||||
{
|
{
|
||||||
emit.B(CC_LE, block.normalEntry);
|
emit.B(CC_LE, block.normalEntry);
|
||||||
|
|
||||||
// We can't write DISPATCHER_PC here, as blink linking is only for 8bytes.
|
// We can't write DISPATCHER_PC here, as block linking may only use 8 bytes.
|
||||||
// So we'll hit two jumps when calling Advance.
|
// So we'll hit two jumps when calling Advance.
|
||||||
emit.B(block.checkedEntry);
|
emit.B(block.checkedEntry);
|
||||||
}
|
}
|
||||||
|
|
|
@ -791,11 +791,6 @@ void JitArm64::dcbz(UGeckoInstruction inst)
|
||||||
|
|
||||||
int a = inst.RA, b = inst.RB;
|
int a = inst.RA, b = inst.RB;
|
||||||
|
|
||||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
|
||||||
|
|
||||||
// The following masks the region used by the GC/Wii virtual memory lib
|
|
||||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
|
||||||
|
|
||||||
gpr.Lock(W0);
|
gpr.Lock(W0);
|
||||||
|
|
||||||
ARM64Reg addr_reg = W0;
|
ARM64Reg addr_reg = W0;
|
||||||
|
|
|
@ -48,54 +48,54 @@ void JitArm64::GenerateAsm()
|
||||||
|
|
||||||
dispatcherNoCheck = GetCodePtr();
|
dispatcherNoCheck = GetCodePtr();
|
||||||
|
|
||||||
FixupBranch exram, vmem, not_exram, not_vmem;
|
bool assembly_dispatcher = true;
|
||||||
|
|
||||||
|
if (assembly_dispatcher)
|
||||||
|
{
|
||||||
|
// iCache[(address >> 2) & iCache_Mask];
|
||||||
ARM64Reg pc_masked = W25;
|
ARM64Reg pc_masked = W25;
|
||||||
ARM64Reg cache_base = X27;
|
ARM64Reg cache_base = X27;
|
||||||
|
ARM64Reg block_num = W27;
|
||||||
|
ANDI2R(pc_masked, DISPATCHER_PC, JitBaseBlockCache::iCache_Mask << 2);
|
||||||
|
MOVP2R(cache_base, jit->GetBlockCache()->GetICache());
|
||||||
|
LDR(block_num, cache_base, EncodeRegTo64(pc_masked));
|
||||||
|
|
||||||
// VMEM
|
// blocks[block_num]
|
||||||
not_vmem = TBZ(DISPATCHER_PC, IntLog2(JIT_ICACHE_VMEM_BIT));
|
ARM64Reg block = X30;
|
||||||
ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHE_MASK);
|
ARM64Reg jit_block_size = W24;
|
||||||
MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCacheVMEM.data());
|
MOVI2R(jit_block_size, sizeof(JitBlock));
|
||||||
vmem = B();
|
MUL(block_num, block_num, jit_block_size);
|
||||||
SetJumpTarget(not_vmem);
|
MOVP2R(block, jit->GetBlockCache()->GetBlocks());
|
||||||
|
ADD(block, block, EncodeRegTo64(block_num));
|
||||||
|
|
||||||
if (SConfig::GetInstance().bWii)
|
// b.effectiveAddress != addr || b.msrBits != msr
|
||||||
{
|
ARM64Reg pc_and_msr = W25;
|
||||||
// Wii EX-RAM
|
ARM64Reg pc_and_msr2 = W24;
|
||||||
not_exram = TBZ(DISPATCHER_PC, IntLog2(JIT_ICACHE_EXRAM_BIT));
|
LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, effectiveAddress));
|
||||||
ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHEEX_MASK);
|
CMP(pc_and_msr, DISPATCHER_PC);
|
||||||
MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCacheEx.data());
|
FixupBranch pc_missmatch = B(CC_NEQ);
|
||||||
exram = B();
|
|
||||||
SetJumpTarget(not_exram);
|
LDR(INDEX_UNSIGNED, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
|
ANDI2R(pc_and_msr2, pc_and_msr2, JitBlock::JIT_CACHE_MSR_MASK);
|
||||||
|
LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, msrBits));
|
||||||
|
CMP(pc_and_msr, pc_and_msr2);
|
||||||
|
FixupBranch msr_missmatch = B(CC_NEQ);
|
||||||
|
|
||||||
|
// return blocks[block_num].normalEntry;
|
||||||
|
LDR(INDEX_UNSIGNED, block, block, offsetof(JitBlock, normalEntry));
|
||||||
|
BR(block);
|
||||||
|
SetJumpTarget(pc_missmatch);
|
||||||
|
SetJumpTarget(msr_missmatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Common memory
|
// Call C version of Dispatch().
|
||||||
ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHE_MASK);
|
// FIXME: Implement this in inline assembly.
|
||||||
MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCache.data());
|
|
||||||
|
|
||||||
SetJumpTarget(vmem);
|
|
||||||
if (SConfig::GetInstance().bWii)
|
|
||||||
SetJumpTarget(exram);
|
|
||||||
|
|
||||||
LDR(W27, cache_base, EncodeRegTo64(pc_masked));
|
|
||||||
|
|
||||||
FixupBranch JitBlock = TBNZ(W27, 7); // Test the 7th bit
|
|
||||||
// Success, it is our Jitblock.
|
|
||||||
MOVI2R(X30, (u64)jit->GetBlockCache()->GetCodePointers());
|
|
||||||
UBFM(X27, X27, 61, 60); // Same as X27 << 3
|
|
||||||
LDR(X30, X30, X27); // Load the block address in to R14
|
|
||||||
BR(X30);
|
|
||||||
// No need to jump anywhere after here, the block will go back to dispatcher start
|
|
||||||
|
|
||||||
SetJumpTarget(JitBlock);
|
|
||||||
|
|
||||||
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
MOVI2R(X30, (u64) & ::Jit);
|
MOVP2R(X30, reinterpret_cast<void*>(&JitBase::Dispatch));
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
|
||||||
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
// Jump to next block.
|
||||||
|
BR(X0);
|
||||||
B(dispatcherNoCheck);
|
|
||||||
|
|
||||||
SetJumpTarget(bail);
|
SetJumpTarget(bail);
|
||||||
doTiming = GetCodePtr();
|
doTiming = GetCodePtr();
|
||||||
|
|
|
@ -55,6 +55,10 @@
|
||||||
#define JITDISABLE(setting) \
|
#define JITDISABLE(setting) \
|
||||||
FALLBACK_IF(SConfig::GetInstance().bJITOff || SConfig::GetInstance().setting)
|
FALLBACK_IF(SConfig::GetInstance().bJITOff || SConfig::GetInstance().setting)
|
||||||
|
|
||||||
|
class JitBase;
|
||||||
|
|
||||||
|
extern JitBase* jit;
|
||||||
|
|
||||||
class JitBase : public CPUCoreBase
|
class JitBase : public CPUCoreBase
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
|
@ -125,6 +129,7 @@ public:
|
||||||
JitOptions jo;
|
JitOptions jo;
|
||||||
JitState js;
|
JitState js;
|
||||||
|
|
||||||
|
static const u8* Dispatch() { return jit->GetBlockCache()->Dispatch(); };
|
||||||
virtual JitBaseBlockCache* GetBlockCache() = 0;
|
virtual JitBaseBlockCache* GetBlockCache() = 0;
|
||||||
|
|
||||||
virtual void Jit(u32 em_address) = 0;
|
virtual void Jit(u32 em_address) = 0;
|
||||||
|
@ -147,8 +152,6 @@ public:
|
||||||
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
|
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern JitBase* jit;
|
|
||||||
|
|
||||||
void Jit(u32 em_address);
|
void Jit(u32 em_address);
|
||||||
|
|
||||||
// Merged routines that should be moved somewhere better
|
// Merged routines that should be moved somewhere better
|
||||||
|
|
|
@ -34,26 +34,15 @@ bool JitBaseBlockCache::IsFull() const
|
||||||
|
|
||||||
void JitBaseBlockCache::Init()
|
void JitBaseBlockCache::Init()
|
||||||
{
|
{
|
||||||
if (m_initialized)
|
|
||||||
{
|
|
||||||
PanicAlert("JitBaseBlockCache::Init() - iCache is already initialized");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
JitRegister::Init(SConfig::GetInstance().m_perfDir);
|
JitRegister::Init(SConfig::GetInstance().m_perfDir);
|
||||||
|
|
||||||
iCache.fill(JIT_ICACHE_INVALID_BYTE);
|
iCache.fill(0);
|
||||||
iCacheEx.fill(JIT_ICACHE_INVALID_BYTE);
|
|
||||||
iCacheVMEM.fill(JIT_ICACHE_INVALID_BYTE);
|
|
||||||
Clear();
|
Clear();
|
||||||
|
|
||||||
m_initialized = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::Shutdown()
|
void JitBaseBlockCache::Shutdown()
|
||||||
{
|
{
|
||||||
num_blocks = 0;
|
num_blocks = 0;
|
||||||
m_initialized = false;
|
|
||||||
|
|
||||||
JitRegister::Shutdown();
|
JitRegister::Shutdown();
|
||||||
}
|
}
|
||||||
|
@ -80,7 +69,8 @@ void JitBaseBlockCache::Clear()
|
||||||
valid_block.ClearAll();
|
valid_block.ClearAll();
|
||||||
|
|
||||||
num_blocks = 0;
|
num_blocks = 0;
|
||||||
blockCodePointers.fill(nullptr);
|
blocks[0].msrBits = 0xFFFFFFFF;
|
||||||
|
blocks[0].invalid = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::Reset()
|
void JitBaseBlockCache::Reset()
|
||||||
|
@ -103,7 +93,9 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address)
|
||||||
{
|
{
|
||||||
JitBlock& b = blocks[num_blocks];
|
JitBlock& b = blocks[num_blocks];
|
||||||
b.invalid = false;
|
b.invalid = false;
|
||||||
b.originalAddress = em_address;
|
b.effectiveAddress = em_address;
|
||||||
|
b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address;
|
||||||
|
b.msrBits = MSR & JitBlock::JIT_CACHE_MSR_MASK;
|
||||||
b.linkData.clear();
|
b.linkData.clear();
|
||||||
num_blocks++; // commit the current block
|
num_blocks++; // commit the current block
|
||||||
return num_blocks - 1;
|
return num_blocks - 1;
|
||||||
|
@ -111,13 +103,23 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address)
|
||||||
|
|
||||||
void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* code_ptr)
|
void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* code_ptr)
|
||||||
{
|
{
|
||||||
blockCodePointers[block_num] = code_ptr;
|
|
||||||
JitBlock& b = blocks[block_num];
|
JitBlock& b = blocks[block_num];
|
||||||
|
if (start_block_map.count(b.physicalAddress))
|
||||||
|
{
|
||||||
|
// We already have a block at this address; invalidate the old block.
|
||||||
|
// This should be very rare. This will only happen if the same block
|
||||||
|
// is called both with DR/IR enabled or disabled.
|
||||||
|
WARN_LOG(DYNA_REC, "Invalidating compiled block at same address %08x", b.physicalAddress);
|
||||||
|
int old_block_num = start_block_map[b.physicalAddress];
|
||||||
|
const JitBlock& old_b = blocks[old_block_num];
|
||||||
|
block_map.erase(
|
||||||
|
std::make_pair(old_b.physicalAddress + 4 * old_b.originalSize - 1, old_b.physicalAddress));
|
||||||
|
DestroyBlock(old_block_num, true);
|
||||||
|
}
|
||||||
|
start_block_map[b.physicalAddress] = block_num;
|
||||||
|
FastLookupEntryForAddress(b.effectiveAddress) = block_num;
|
||||||
|
|
||||||
std::memcpy(GetICachePtr(b.originalAddress), &block_num, sizeof(u32));
|
u32 pAddr = b.physicalAddress;
|
||||||
|
|
||||||
// Convert the logical address to a physical address for the block map
|
|
||||||
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
|
|
||||||
|
|
||||||
for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block)
|
for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block)
|
||||||
valid_block.Set(block);
|
valid_block.Set(block);
|
||||||
|
@ -135,46 +137,62 @@ void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8*
|
||||||
LinkBlockExits(block_num);
|
LinkBlockExits(block_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
JitRegister::Register(blockCodePointers[block_num], b.codeSize, "JIT_PPC_%08x",
|
JitRegister::Register(b.checkedEntry, b.codeSize, "JIT_PPC_%08x", b.physicalAddress);
|
||||||
b.originalAddress);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8** JitBaseBlockCache::GetCodePointers()
|
int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr, u32 msr)
|
||||||
{
|
{
|
||||||
return blockCodePointers.data();
|
u32 translated_addr = addr;
|
||||||
}
|
if (UReg_MSR(msr).IR)
|
||||||
|
|
||||||
u8* JitBaseBlockCache::GetICachePtr(u32 addr)
|
|
||||||
{
|
{
|
||||||
if (addr & JIT_ICACHE_VMEM_BIT)
|
auto translated = PowerPC::JitCache_TranslateAddress(addr);
|
||||||
return &jit->GetBlockCache()->iCacheVMEM[addr & JIT_ICACHE_MASK];
|
if (!translated.valid)
|
||||||
|
|
||||||
if (addr & JIT_ICACHE_EXRAM_BIT)
|
|
||||||
return &jit->GetBlockCache()->iCacheEx[addr & JIT_ICACHEEX_MASK];
|
|
||||||
|
|
||||||
return &jit->GetBlockCache()->iCache[addr & JIT_ICACHE_MASK];
|
|
||||||
}
|
|
||||||
|
|
||||||
int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr)
|
|
||||||
{
|
{
|
||||||
u32 inst;
|
|
||||||
std::memcpy(&inst, GetICachePtr(addr), sizeof(u32));
|
|
||||||
|
|
||||||
if (inst & 0xfc000000) // definitely not a JIT block
|
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
if ((int)inst >= num_blocks)
|
translated_addr = translated.address;
|
||||||
return -1;
|
|
||||||
|
|
||||||
if (blocks[inst].originalAddress != addr)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return inst;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledCode JitBaseBlockCache::GetCompiledCodeFromBlock(int block_num)
|
auto map_result = start_block_map.find(translated_addr);
|
||||||
|
if (map_result == start_block_map.end())
|
||||||
|
return -1;
|
||||||
|
int block_num = map_result->second;
|
||||||
|
const JitBlock& b = blocks[block_num];
|
||||||
|
if (b.invalid)
|
||||||
|
return -1;
|
||||||
|
if (b.effectiveAddress != addr)
|
||||||
|
return -1;
|
||||||
|
if (b.msrBits != (msr & JitBlock::JIT_CACHE_MSR_MASK))
|
||||||
|
return -1;
|
||||||
|
return block_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
|
||||||
{
|
{
|
||||||
return (CompiledCode)blockCodePointers[block_num];
|
int block_num = GetBlockNumberFromStartAddress(addr, msr);
|
||||||
|
if (block_num < 0)
|
||||||
|
{
|
||||||
|
Jit(addr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
FastLookupEntryForAddress(addr) = block_num;
|
||||||
|
LinkBlock(block_num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const u8* JitBaseBlockCache::Dispatch()
|
||||||
|
{
|
||||||
|
int block_num = FastLookupEntryForAddress(PC);
|
||||||
|
|
||||||
|
while (blocks[block_num].effectiveAddress != PC ||
|
||||||
|
blocks[block_num].msrBits != (MSR & JitBlock::JIT_CACHE_MSR_MASK))
|
||||||
|
{
|
||||||
|
MoveBlockIntoFastCache(PC, MSR & JitBlock::JIT_CACHE_MSR_MASK);
|
||||||
|
block_num = FastLookupEntryForAddress(PC);
|
||||||
|
}
|
||||||
|
|
||||||
|
return blocks[block_num].normalEntry;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Block linker
|
// Block linker
|
||||||
|
@ -195,7 +213,7 @@ void JitBaseBlockCache::LinkBlockExits(int i)
|
||||||
{
|
{
|
||||||
if (!e.linkStatus)
|
if (!e.linkStatus)
|
||||||
{
|
{
|
||||||
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress);
|
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress, b.msrBits);
|
||||||
if (destinationBlock != -1)
|
if (destinationBlock != -1)
|
||||||
{
|
{
|
||||||
WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]);
|
WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]);
|
||||||
|
@ -208,17 +226,13 @@ void JitBaseBlockCache::LinkBlockExits(int i)
|
||||||
void JitBaseBlockCache::LinkBlock(int i)
|
void JitBaseBlockCache::LinkBlock(int i)
|
||||||
{
|
{
|
||||||
LinkBlockExits(i);
|
LinkBlockExits(i);
|
||||||
JitBlock& b = blocks[i];
|
const JitBlock& b = blocks[i];
|
||||||
// equal_range(b) returns pair<iterator,iterator> representing the range
|
auto ppp = links_to.equal_range(b.effectiveAddress);
|
||||||
// of element with key b
|
|
||||||
auto ppp = links_to.equal_range(b.originalAddress);
|
|
||||||
|
|
||||||
if (ppp.first == ppp.second)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (auto iter = ppp.first; iter != ppp.second; ++iter)
|
for (auto iter = ppp.first; iter != ppp.second; ++iter)
|
||||||
{
|
{
|
||||||
// PanicAlert("Linking block %i to block %i", iter->second, i);
|
const JitBlock& b2 = blocks[iter->second];
|
||||||
|
if (b.msrBits == b2.msrBits)
|
||||||
LinkBlockExits(iter->second);
|
LinkBlockExits(iter->second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -226,21 +240,21 @@ void JitBaseBlockCache::LinkBlock(int i)
|
||||||
void JitBaseBlockCache::UnlinkBlock(int i)
|
void JitBaseBlockCache::UnlinkBlock(int i)
|
||||||
{
|
{
|
||||||
JitBlock& b = blocks[i];
|
JitBlock& b = blocks[i];
|
||||||
auto ppp = links_to.equal_range(b.originalAddress);
|
auto ppp = links_to.equal_range(b.effectiveAddress);
|
||||||
|
|
||||||
if (ppp.first == ppp.second)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (auto iter = ppp.first; iter != ppp.second; ++iter)
|
for (auto iter = ppp.first; iter != ppp.second; ++iter)
|
||||||
{
|
{
|
||||||
JitBlock& sourceBlock = blocks[iter->second];
|
JitBlock& sourceBlock = blocks[iter->second];
|
||||||
|
if (sourceBlock.msrBits != b.msrBits)
|
||||||
|
continue;
|
||||||
|
|
||||||
for (auto& e : sourceBlock.linkData)
|
for (auto& e : sourceBlock.linkData)
|
||||||
{
|
{
|
||||||
if (e.exitAddress == b.originalAddress)
|
if (e.exitAddress == b.effectiveAddress)
|
||||||
e.linkStatus = false;
|
e.linkStatus = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
links_to.erase(b.originalAddress);
|
links_to.erase(b.effectiveAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
|
void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
|
||||||
|
@ -258,20 +272,23 @@ void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
b.invalid = true;
|
b.invalid = true;
|
||||||
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
|
start_block_map.erase(b.physicalAddress);
|
||||||
|
FastLookupEntryForAddress(b.effectiveAddress) = 0;
|
||||||
|
|
||||||
UnlinkBlock(block_num);
|
UnlinkBlock(block_num);
|
||||||
|
|
||||||
// Send anyone who tries to run this block back to the dispatcher.
|
// Send anyone who tries to run this block back to the dispatcher.
|
||||||
// Not entirely ideal, but .. pretty good.
|
// Not entirely ideal, but .. pretty good.
|
||||||
// Spurious entrances from previously linked blocks can only come through checkedEntry
|
// Spurious entrances from previously linked blocks can only come through checkedEntry
|
||||||
WriteDestroyBlock(b.checkedEntry, b.originalAddress);
|
WriteDestroyBlock(b.checkedEntry, b.effectiveAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced)
|
void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced)
|
||||||
{
|
{
|
||||||
// Convert the logical address to a physical address for the block map
|
auto translated = PowerPC::JitCache_TranslateAddress(address);
|
||||||
u32 pAddr = address & 0x1FFFFFFF;
|
if (!translated.valid)
|
||||||
|
return;
|
||||||
|
u32 pAddr = translated.address;
|
||||||
|
|
||||||
// Optimize the common case of length == 32 which is used by Interpreter::dcb*
|
// Optimize the common case of length == 32 which is used by Interpreter::dcb*
|
||||||
bool destroy_block = true;
|
bool destroy_block = true;
|
||||||
|
@ -288,20 +305,11 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
|
||||||
// address
|
// address
|
||||||
if (destroy_block)
|
if (destroy_block)
|
||||||
{
|
{
|
||||||
std::map<std::pair<u32, u32>, u32>::iterator it1 = block_map.lower_bound(
|
auto it = block_map.lower_bound(std::make_pair(pAddr, 0));
|
||||||
std::make_pair(pAddr, 0)),
|
while (it != block_map.end() && it->first.second < pAddr + length)
|
||||||
it2 = it1;
|
|
||||||
while (it2 != block_map.end() && it2->first.second < pAddr + length)
|
|
||||||
{
|
{
|
||||||
JitBlock& b = blocks[it2->second];
|
DestroyBlock(it->second, true);
|
||||||
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
|
it = block_map.erase(it);
|
||||||
|
|
||||||
DestroyBlock(it2->second, true);
|
|
||||||
++it2;
|
|
||||||
}
|
|
||||||
if (it1 != it2)
|
|
||||||
{
|
|
||||||
block_map.erase(it1, it2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the code was actually modified, we need to clear the relevant entries from the
|
// If the code was actually modified, we need to clear the relevant entries from the
|
||||||
|
|
|
@ -12,32 +12,55 @@
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
|
||||||
static const u32 JIT_ICACHE_SIZE = 0x2000000;
|
// A JitBlock is block of compiled code which corresponds to the PowerPC
|
||||||
static const u32 JIT_ICACHE_MASK = 0x1ffffff;
|
// code at a given address.
|
||||||
static const u32 JIT_ICACHEEX_SIZE = 0x4000000;
|
//
|
||||||
static const u32 JIT_ICACHEEX_MASK = 0x3ffffff;
|
// The notion of the address of a block is a bit complicated because of the
|
||||||
static const u32 JIT_ICACHE_EXRAM_BIT = 0x10000000;
|
// way address translation works, but basically it's the combination of an
|
||||||
static const u32 JIT_ICACHE_VMEM_BIT = 0x20000000;
|
// effective address, the address translation bits in MSR, and the physical
|
||||||
|
// address.
|
||||||
// This corresponds to opcode 5 which is invalid in PowerPC
|
|
||||||
static const u32 JIT_ICACHE_INVALID_BYTE = 0x80;
|
|
||||||
static const u32 JIT_ICACHE_INVALID_WORD = 0x80808080;
|
|
||||||
|
|
||||||
struct JitBlock
|
struct JitBlock
|
||||||
{
|
{
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
// Mask for the MSR bits which determine whether a compiled block
|
||||||
|
// is valid (MSR.IR and MSR.DR, the address translation bits).
|
||||||
|
JIT_CACHE_MSR_MASK = 0x30,
|
||||||
|
};
|
||||||
|
|
||||||
|
// A special entry point for block linking; usually used to check the
|
||||||
|
// downcount.
|
||||||
const u8* checkedEntry;
|
const u8* checkedEntry;
|
||||||
|
// The normal entry point for the block, returned by Dispatch().
|
||||||
const u8* normalEntry;
|
const u8* normalEntry;
|
||||||
|
|
||||||
u32 originalAddress;
|
// The effective address (PC) for the beginning of the block.
|
||||||
|
u32 effectiveAddress;
|
||||||
|
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
|
||||||
|
u32 msrBits;
|
||||||
|
// The physical address of the code represented by this block.
|
||||||
|
// Various maps in the cache are indexed by this (start_block_map,
|
||||||
|
// block_map, and valid_block in particular). This is useful because of
|
||||||
|
// of the way the instruction cache works on PowerPC.
|
||||||
|
u32 physicalAddress;
|
||||||
|
// The number of bytes of JIT'ed code contained in this block. Mostly
|
||||||
|
// useful for logging.
|
||||||
u32 codeSize;
|
u32 codeSize;
|
||||||
|
// The number of PPC instructions represented by this block. Mostly
|
||||||
|
// useful for logging.
|
||||||
u32 originalSize;
|
u32 originalSize;
|
||||||
int runCount; // for profiling.
|
int runCount; // for profiling.
|
||||||
|
|
||||||
|
// Whether this struct refers to a valid block. This is mostly useful as
|
||||||
|
// a debugging aid.
|
||||||
|
// FIXME: Change current users of invalid bit to assertions?
|
||||||
bool invalid;
|
bool invalid;
|
||||||
|
|
||||||
|
// Information about exits to a known address from this block.
|
||||||
|
// This is used to implement block linking.
|
||||||
struct LinkData
|
struct LinkData
|
||||||
{
|
{
|
||||||
u8* exitPtrs; // to be able to rewrite the exit jum
|
u8* exitPtrs; // to be able to rewrite the exit jump
|
||||||
u32 exitAddress;
|
u32 exitAddress;
|
||||||
bool linkStatus; // is it already linked?
|
bool linkStatus; // is it already linked?
|
||||||
};
|
};
|
||||||
|
@ -59,7 +82,12 @@ class ValidBlockBitSet final
|
||||||
public:
|
public:
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
VALID_BLOCK_MASK_SIZE = 0x20000000 / 32,
|
// ValidBlockBitSet covers the whole 32-bit address-space in 32-byte
|
||||||
|
// chunks.
|
||||||
|
// FIXME: Maybe we can get away with less? There isn't any actual
|
||||||
|
// RAM in most of this space.
|
||||||
|
VALID_BLOCK_MASK_SIZE = (1ULL << 32) / 32,
|
||||||
|
// The number of elements in the allocated array. Each u32 contains 32 bits.
|
||||||
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
|
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
|
||||||
};
|
};
|
||||||
// Directly accessed by Jit64.
|
// Directly accessed by Jit64.
|
||||||
|
@ -79,33 +107,53 @@ public:
|
||||||
|
|
||||||
class JitBaseBlockCache
|
class JitBaseBlockCache
|
||||||
{
|
{
|
||||||
enum
|
public:
|
||||||
{
|
static constexpr int MAX_NUM_BLOCKS = 65536 * 2;
|
||||||
MAX_NUM_BLOCKS = 65536 * 2,
|
static constexpr u32 iCache_Num_Elements = 0x10000;
|
||||||
};
|
static constexpr u32 iCache_Mask = iCache_Num_Elements - 1;
|
||||||
|
|
||||||
std::array<const u8*, MAX_NUM_BLOCKS> blockCodePointers;
|
private:
|
||||||
std::array<JitBlock, MAX_NUM_BLOCKS> blocks;
|
// We store the metadata of all blocks in a linear way within this array.
|
||||||
|
std::array<JitBlock, MAX_NUM_BLOCKS> blocks; // number -> JitBlock
|
||||||
int num_blocks;
|
int num_blocks;
|
||||||
std::multimap<u32, int> links_to;
|
|
||||||
|
// links_to hold all exit points of all valid blocks in a reverse way.
|
||||||
|
// It is used to query all blocks which links to an address.
|
||||||
|
std::multimap<u32, int> links_to; // destination_PC -> number
|
||||||
|
|
||||||
|
// Map indexed by the physical memory location.
|
||||||
|
// It is used to invalidate blocks based on memory location.
|
||||||
std::map<std::pair<u32, u32>, u32> block_map; // (end_addr, start_addr) -> number
|
std::map<std::pair<u32, u32>, u32> block_map; // (end_addr, start_addr) -> number
|
||||||
|
|
||||||
|
// Map indexed by the physical address of the entry point.
|
||||||
|
// This is used to query the block based on the current PC in a slow way.
|
||||||
|
// TODO: This is redundant with block_map, and both should be a multimap.
|
||||||
|
std::map<u32, u32> start_block_map; // start_addr -> number
|
||||||
|
|
||||||
|
// This bitsets shows which cachelines overlap with any blocks.
|
||||||
|
// It is used to provide a fast way to query if no icache invalidation is needed.
|
||||||
ValidBlockBitSet valid_block;
|
ValidBlockBitSet valid_block;
|
||||||
|
|
||||||
bool m_initialized;
|
// This array is indexed with the masked PC and likely holds the correct block id.
|
||||||
|
// This is used as a fast cache of start_block_map used in the assembly dispatcher.
|
||||||
|
std::array<int, iCache_Num_Elements> iCache; // start_addr & mask -> number
|
||||||
|
|
||||||
void LinkBlockExits(int i);
|
void LinkBlockExits(int i);
|
||||||
void LinkBlock(int i);
|
void LinkBlock(int i);
|
||||||
void UnlinkBlock(int i);
|
void UnlinkBlock(int i);
|
||||||
|
|
||||||
u8* GetICachePtr(u32 addr);
|
|
||||||
void DestroyBlock(int block_num, bool invalidate);
|
void DestroyBlock(int block_num, bool invalidate);
|
||||||
|
|
||||||
|
void MoveBlockIntoFastCache(u32 em_address, u32 msr);
|
||||||
|
|
||||||
|
// Fast but risky block lookup based on iCache.
|
||||||
|
int& FastLookupEntryForAddress(u32 address) { return iCache[(address >> 2) & iCache_Mask]; }
|
||||||
// Virtual for overloaded
|
// Virtual for overloaded
|
||||||
virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0;
|
virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0;
|
||||||
virtual void WriteDestroyBlock(const u8* location, u32 address) = 0;
|
virtual void WriteDestroyBlock(const u8* location, u32 address) = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
JitBaseBlockCache() : num_blocks(0), m_initialized(false) {}
|
JitBaseBlockCache() : num_blocks(0) {}
|
||||||
virtual ~JitBaseBlockCache() {}
|
virtual ~JitBaseBlockCache() {}
|
||||||
int AllocateBlock(u32 em_address);
|
int AllocateBlock(u32 em_address);
|
||||||
void FinalizeBlock(int block_num, bool block_link, const u8* code_ptr);
|
void FinalizeBlock(int block_num, bool block_link, const u8* code_ptr);
|
||||||
|
@ -119,18 +167,20 @@ public:
|
||||||
|
|
||||||
// Code Cache
|
// Code Cache
|
||||||
JitBlock* GetBlock(int block_num);
|
JitBlock* GetBlock(int block_num);
|
||||||
|
JitBlock* GetBlocks() { return blocks.data(); }
|
||||||
|
int* GetICache() { return iCache.data(); }
|
||||||
int GetNumBlocks() const;
|
int GetNumBlocks() const;
|
||||||
const u8** GetCodePointers();
|
|
||||||
std::array<u8, JIT_ICACHE_SIZE> iCache;
|
|
||||||
std::array<u8, JIT_ICACHEEX_SIZE> iCacheEx;
|
|
||||||
std::array<u8, JIT_ICACHE_SIZE> iCacheVMEM;
|
|
||||||
|
|
||||||
// Fast way to get a block. Only works on the first ppc instruction of a block.
|
// Look for the block in the slow but accurate way.
|
||||||
int GetBlockNumberFromStartAddress(u32 em_address);
|
// This function shall be used if FastLookupEntryForAddress() failed.
|
||||||
|
int GetBlockNumberFromStartAddress(u32 em_address, u32 msr);
|
||||||
|
|
||||||
CompiledCode GetCompiledCodeFromBlock(int block_num);
|
// Get the normal entry for the block associated with the current program
|
||||||
|
// counter. This will JIT code if necessary. (This is the reference
|
||||||
|
// implementation; high-performance JITs will want to use a custom
|
||||||
|
// assembly version.)
|
||||||
|
const u8* Dispatch();
|
||||||
|
|
||||||
// DOES NOT WORK CORRECTLY WITH INLINING
|
|
||||||
void InvalidateICache(u32 address, const u32 length, bool forced);
|
void InvalidateICache(u32 address, const u32 length, bool forced);
|
||||||
|
|
||||||
u32* GetBlockBitSet() const { return valid_block.m_valid_block.get(); }
|
u32* GetBlockBitSet() const { return valid_block.m_valid_block.get(); }
|
||||||
|
|
|
@ -150,7 +150,7 @@ void GetProfileResults(ProfileStats* prof_stats)
|
||||||
u64 timecost = block->ticCounter;
|
u64 timecost = block->ticCounter;
|
||||||
// Todo: tweak.
|
// Todo: tweak.
|
||||||
if (block->runCount >= 1)
|
if (block->runCount >= 1)
|
||||||
prof_stats->block_stats.emplace_back(i, block->originalAddress, cost, timecost,
|
prof_stats->block_stats.emplace_back(i, block->effectiveAddress, cost, timecost,
|
||||||
block->runCount, block->codeSize);
|
block->runCount, block->codeSize);
|
||||||
prof_stats->cost_sum += cost;
|
prof_stats->cost_sum += cost;
|
||||||
prof_stats->timecost_sum += timecost;
|
prof_stats->timecost_sum += timecost;
|
||||||
|
@ -169,12 +169,12 @@ int GetHostCode(u32* address, const u8** code, u32* code_size)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address);
|
int block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address, MSR);
|
||||||
if (block_num < 0)
|
if (block_num < 0)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 500; i++)
|
for (int i = 0; i < 500; i++)
|
||||||
{
|
{
|
||||||
block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address - 4 * i);
|
block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address - 4 * i, MSR);
|
||||||
if (block_num >= 0)
|
if (block_num >= 0)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -182,8 +182,8 @@ int GetHostCode(u32* address, const u8** code, u32* code_size)
|
||||||
if (block_num >= 0)
|
if (block_num >= 0)
|
||||||
{
|
{
|
||||||
JitBlock* block = jit->GetBlockCache()->GetBlock(block_num);
|
JitBlock* block = jit->GetBlockCache()->GetBlock(block_num);
|
||||||
if (!(block->originalAddress <= *address &&
|
if (!(block->effectiveAddress <= *address &&
|
||||||
block->originalSize + block->originalAddress >= *address))
|
block->originalSize + block->effectiveAddress >= *address))
|
||||||
block_num = -1;
|
block_num = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,7 +199,7 @@ int GetHostCode(u32* address, const u8** code, u32* code_size)
|
||||||
|
|
||||||
*code = block->checkedEntry;
|
*code = block->checkedEntry;
|
||||||
*code_size = block->codeSize;
|
*code_size = block->codeSize;
|
||||||
*address = block->originalAddress;
|
*address = block->effectiveAddress;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -76,7 +76,19 @@ enum XCheckTLBFlag
|
||||||
FLAG_READ,
|
FLAG_READ,
|
||||||
FLAG_WRITE,
|
FLAG_WRITE,
|
||||||
FLAG_OPCODE,
|
FLAG_OPCODE,
|
||||||
|
FLAG_OPCODE_NO_EXCEPTION
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool IsOpcodeFlag(XCheckTLBFlag flag)
|
||||||
|
{
|
||||||
|
return flag == FLAG_OPCODE || flag == FLAG_OPCODE_NO_EXCEPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool IsNoExceptionFlag(XCheckTLBFlag flag)
|
||||||
|
{
|
||||||
|
return flag == FLAG_NO_EXCEPTION || flag == FLAG_OPCODE_NO_EXCEPTION;
|
||||||
|
}
|
||||||
|
|
||||||
template <const XCheckTLBFlag flag>
|
template <const XCheckTLBFlag flag>
|
||||||
static u32 TranslateAddress(const u32 address);
|
static u32 TranslateAddress(const u32 address);
|
||||||
|
|
||||||
|
@ -836,6 +848,43 @@ bool IsOptimizableGatherPipeWrite(u32 address)
|
||||||
return address == 0xCC008000;
|
return address == 0xCC008000;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TranslateResult JitCache_TranslateAddress(u32 address)
|
||||||
|
{
|
||||||
|
if (!UReg_MSR(MSR).IR)
|
||||||
|
return TranslateResult{true, true, address};
|
||||||
|
|
||||||
|
bool from_bat = true;
|
||||||
|
|
||||||
|
int segment = address >> 28;
|
||||||
|
|
||||||
|
if (SConfig::GetInstance().bMMU && (address & Memory::ADDR_MASK_MEM1))
|
||||||
|
{
|
||||||
|
u32 tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
|
||||||
|
if (tlb_addr == 0)
|
||||||
|
{
|
||||||
|
return TranslateResult{false, false, 0};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
address = tlb_addr;
|
||||||
|
from_bat = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((segment == 0x8 || segment == 0x0) && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
|
||||||
|
address = address & 0x3FFFFFFF;
|
||||||
|
else if (Memory::m_pEXRAM && segment == 0x9 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
|
||||||
|
address = address & 0x3FFFFFFF;
|
||||||
|
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
|
||||||
|
address = 0x7E000000 | (address & Memory::FAKEVMEM_MASK);
|
||||||
|
else
|
||||||
|
return TranslateResult{false, false, 0};
|
||||||
|
}
|
||||||
|
|
||||||
|
return TranslateResult{true, from_bat, address};
|
||||||
|
}
|
||||||
|
|
||||||
// *********************************************************************************
|
// *********************************************************************************
|
||||||
// Warning: Test Area
|
// Warning: Test Area
|
||||||
//
|
//
|
||||||
|
@ -990,7 +1039,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
|
||||||
u32* paddr)
|
u32* paddr)
|
||||||
{
|
{
|
||||||
u32 tag = vpa >> HW_PAGE_INDEX_SHIFT;
|
u32 tag = vpa >> HW_PAGE_INDEX_SHIFT;
|
||||||
PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK];
|
PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK];
|
||||||
if (tlbe->tag[0] == tag)
|
if (tlbe->tag[0] == tag)
|
||||||
{
|
{
|
||||||
// Check if C bit requires updating
|
// Check if C bit requires updating
|
||||||
|
@ -1006,7 +1055,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag != FLAG_NO_EXCEPTION)
|
if (!IsNoExceptionFlag(flag))
|
||||||
tlbe->recent = 0;
|
tlbe->recent = 0;
|
||||||
|
|
||||||
*paddr = tlbe->paddr[0] | (vpa & 0xfff);
|
*paddr = tlbe->paddr[0] | (vpa & 0xfff);
|
||||||
|
@ -1028,7 +1077,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag != FLAG_NO_EXCEPTION)
|
if (!IsNoExceptionFlag(flag))
|
||||||
tlbe->recent = 1;
|
tlbe->recent = 1;
|
||||||
|
|
||||||
*paddr = tlbe->paddr[1] | (vpa & 0xfff);
|
*paddr = tlbe->paddr[1] | (vpa & 0xfff);
|
||||||
|
@ -1040,11 +1089,11 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
|
||||||
|
|
||||||
static __forceinline void UpdateTLBEntry(const XCheckTLBFlag flag, UPTE2 PTE2, const u32 address)
|
static __forceinline void UpdateTLBEntry(const XCheckTLBFlag flag, UPTE2 PTE2, const u32 address)
|
||||||
{
|
{
|
||||||
if (flag == FLAG_NO_EXCEPTION)
|
if (IsNoExceptionFlag(flag))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int tag = address >> HW_PAGE_INDEX_SHIFT;
|
int tag = address >> HW_PAGE_INDEX_SHIFT;
|
||||||
PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK];
|
PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK];
|
||||||
int index = tlbe->recent == 0 && tlbe->tag[0] != TLB_TAG_INVALID;
|
int index = tlbe->recent == 0 && tlbe->tag[0] != TLB_TAG_INVALID;
|
||||||
tlbe->recent = index;
|
tlbe->recent = index;
|
||||||
tlbe->paddr[index] = PTE2.RPN << HW_PAGE_INDEX_SHIFT;
|
tlbe->paddr[index] = PTE2.RPN << HW_PAGE_INDEX_SHIFT;
|
||||||
|
@ -1110,6 +1159,7 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
|
||||||
switch (flag)
|
switch (flag)
|
||||||
{
|
{
|
||||||
case FLAG_NO_EXCEPTION:
|
case FLAG_NO_EXCEPTION:
|
||||||
|
case FLAG_OPCODE_NO_EXCEPTION:
|
||||||
break;
|
break;
|
||||||
case FLAG_READ:
|
case FLAG_READ:
|
||||||
PTE2.R = 1;
|
PTE2.R = 1;
|
||||||
|
@ -1123,7 +1173,7 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag != FLAG_NO_EXCEPTION)
|
if (!IsNoExceptionFlag(flag))
|
||||||
*(u32*)&Memory::physical_base[pteg_addr + 4] = bswap(PTE2.Hex);
|
*(u32*)&Memory::physical_base[pteg_addr + 4] = bswap(PTE2.Hex);
|
||||||
|
|
||||||
// We already updated the TLB entry if this was caused by a C bit.
|
// We already updated the TLB entry if this was caused by a C bit.
|
||||||
|
|
|
@ -272,6 +272,13 @@ bool IsOptimizableRAMAddress(const u32 address);
|
||||||
u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize);
|
u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize);
|
||||||
bool IsOptimizableGatherPipeWrite(u32 address);
|
bool IsOptimizableGatherPipeWrite(u32 address);
|
||||||
|
|
||||||
|
struct TranslateResult
|
||||||
|
{
|
||||||
|
bool valid;
|
||||||
|
bool from_bat;
|
||||||
|
u32 address;
|
||||||
|
};
|
||||||
|
TranslateResult JitCache_TranslateAddress(u32 address);
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
enum CRBits
|
enum CRBits
|
||||||
|
|
Loading…
Reference in New Issue