Merge pull request #4060 from dolphin-emu/revert-4004-dynamic-bat

Revert "JitCache: Support for VMEM + MSR bits"
This commit is contained in:
Scott Mansell 2016-07-27 13:07:32 +12:00 committed by GitHub
commit 2304d76914
19 changed files with 369 additions and 447 deletions

View File

@ -20,7 +20,6 @@ void CachedInterpreter::Init()
jo.enableBlocklink = false; jo.enableBlocklink = false;
JitBaseBlockCache::Init(); JitBaseBlockCache::Init();
UpdateMemoryOptions();
code_block.m_stats = &js.st; code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa; code_block.m_gpa = &js.gpa;
@ -42,29 +41,34 @@ void CachedInterpreter::Run()
void CachedInterpreter::SingleStep() void CachedInterpreter::SingleStep()
{ {
const u8* normalEntry = jit->GetBlockCache()->Dispatch(); int block = GetBlockNumberFromStartAddress(PC);
const Instruction* code = reinterpret_cast<const Instruction*>(normalEntry); if (block >= 0)
while (true)
{ {
switch (code->type) Instruction* code = (Instruction*)GetCompiledCodeFromBlock(block);
while (true)
{ {
case Instruction::INSTRUCTION_ABORT: switch (code->type)
return; {
case Instruction::INSTRUCTION_ABORT:
case Instruction::INSTRUCTION_TYPE_COMMON:
code->common_callback(UGeckoInstruction(code->data));
code++;
break;
case Instruction::INSTRUCTION_TYPE_CONDITIONAL:
bool ret = code->conditional_callback(code->data);
code++;
if (ret)
return; return;
break;
case Instruction::INSTRUCTION_TYPE_COMMON:
code->common_callback(UGeckoInstruction(code->data));
code++;
break;
case Instruction::INSTRUCTION_TYPE_CONDITIONAL:
bool ret = code->conditional_callback(code->data);
code++;
if (ret)
return;
break;
}
} }
} }
Jit(PC);
} }
static void EndBlock(UGeckoInstruction data) static void EndBlock(UGeckoInstruction data)
@ -83,30 +87,14 @@ static void WritePC(UGeckoInstruction data)
NPC = data.hex + 4; NPC = data.hex + 4;
} }
static void WriteBrokenBlockNPC(UGeckoInstruction data)
{
NPC = data.hex;
}
static bool CheckFPU(u32 data) static bool CheckFPU(u32 data)
{ {
UReg_MSR& msr = (UReg_MSR&)MSR; UReg_MSR& msr = (UReg_MSR&)MSR;
if (!msr.FP) if (!msr.FP)
{ {
PC = NPC = data;
PowerPC::ppcState.Exceptions |= EXCEPTION_FPU_UNAVAILABLE; PowerPC::ppcState.Exceptions |= EXCEPTION_FPU_UNAVAILABLE;
PowerPC::CheckExceptions(); PowerPC::CheckExceptions();
PowerPC::ppcState.downcount -= data;
return true;
}
return false;
}
static bool CheckDSI(u32 data)
{
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
PowerPC::CheckExceptions();
PowerPC::ppcState.downcount -= data;
return true; return true;
} }
return false; return false;
@ -173,29 +161,22 @@ void CachedInterpreter::Jit(u32 address)
if (!ops[i].skip) if (!ops[i].skip)
{ {
bool check_fpu = (ops[i].opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound; if ((ops[i].opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
bool endblock = (ops[i].opinfo->flags & FL_ENDBLOCK) != 0;
bool memcheck = (ops[i].opinfo->flags & FL_LOADSTORE) && jo.memcheck;
if (check_fpu)
{ {
m_code.emplace_back(WritePC, ops[i].address); m_code.emplace_back(CheckFPU, ops[i].address);
m_code.emplace_back(CheckFPU, js.downcountAmount);
js.firstFPInstructionFound = true; js.firstFPInstructionFound = true;
} }
if (endblock || memcheck) if (ops[i].opinfo->flags & FL_ENDBLOCK)
m_code.emplace_back(WritePC, ops[i].address); m_code.emplace_back(WritePC, ops[i].address);
m_code.emplace_back(GetInterpreterOp(ops[i].inst), ops[i].inst); m_code.emplace_back(GetInterpreterOp(ops[i].inst), ops[i].inst);
if (memcheck) if (ops[i].opinfo->flags & FL_ENDBLOCK)
m_code.emplace_back(CheckDSI, js.downcountAmount);
if (endblock)
m_code.emplace_back(EndBlock, js.downcountAmount); m_code.emplace_back(EndBlock, js.downcountAmount);
} }
} }
if (code_block.m_broken) if (code_block.m_broken)
{ {
m_code.emplace_back(WriteBrokenBlockNPC, nextPC); m_code.emplace_back(WritePC, nextPC);
m_code.emplace_back(EndBlock, js.downcountAmount); m_code.emplace_back(EndBlock, js.downcountAmount);
} }
m_code.emplace_back(); m_code.emplace_back();
@ -210,5 +191,12 @@ void CachedInterpreter::ClearCache()
{ {
m_code.clear(); m_code.clear();
JitBaseBlockCache::Clear(); JitBaseBlockCache::Clear();
UpdateMemoryOptions(); }
void CachedInterpreter::WriteDestroyBlock(const u8* location, u32 address)
{
}
void CachedInterpreter::WriteLinkBlock(u8* location, const JitBlock& block)
{
} }

View File

@ -28,8 +28,11 @@ public:
JitBaseBlockCache* GetBlockCache() override { return this; } JitBaseBlockCache* GetBlockCache() override { return this; }
const char* GetName() override { return "Cached Interpreter"; } const char* GetName() override { return "Cached Interpreter"; }
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override {} void WriteLinkBlock(u8* location, const JitBlock& block) override;
const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; }
void WriteDestroyBlock(const u8* location, u32 address) override;
const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; };
private: private:
struct Instruction struct Instruction
{ {

View File

@ -396,12 +396,29 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after)
linkData.exitAddress = destination; linkData.exitAddress = destination;
linkData.linkStatus = false; linkData.linkStatus = false;
MOV(32, PPCSTATE(pc), Imm32(destination)); // Link opportunity!
linkData.exitPtrs = GetWritableCodePtr(); int block;
if (bl) if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
CALL(asm_routines.dispatcher); {
// It exists! Joy of joy!
JitBlock* jb = blocks.GetBlock(block);
const u8* addr = jb->checkedEntry;
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(addr);
else
JMP(addr, true);
linkData.linkStatus = true;
}
else else
JMP(asm_routines.dispatcher, true); {
MOV(32, PPCSTATE(pc), Imm32(destination));
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(asm_routines.dispatcher);
else
JMP(asm_routines.dispatcher, true);
}
b->linkData.push_back(linkData); b->linkData.push_back(linkData);

View File

@ -58,13 +58,13 @@ void Jit64AsmRoutineManager::Generate()
AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC)); AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC));
#if 0 // debug mispredicts #if 0 // debug mispredicts
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0); ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0);
CALL(reinterpret_cast<void *>(&ReportMispredict)); CALL(reinterpret_cast<void *>(&ReportMispredict));
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0); ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
#endif #endif
ResetStack(*this); ResetStack();
SUB(32, PPCSTATE(downcount), R(RSCRATCH2)); SUB(32, PPCSTATE(downcount), R(RSCRATCH2));
@ -102,15 +102,31 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(RMEM), Imm64((u64)Memory::logical_base)); MOV(64, R(RMEM), Imm64((u64)Memory::logical_base));
SetJumpTarget(membaseend); SetJumpTarget(membaseend);
// The following is an translation of JitBaseBlockCache::Dispatch into assembly.
// Fast block number lookup.
MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
u64 icache = reinterpret_cast<u64>(jit->GetBlockCache()->GetICache());
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::iCache_Mask << 2)); // TODO: We need to handle code which executes the same PC with
// different values of MSR.IR. It probably makes sense to handle
// MSR.DR here too, to allow IsOptimizableRAMAddress-based
// optimizations safe, because IR and DR are usually set/cleared together.
// TODO: Branching based on the 20 most significant bits of instruction
// addresses without translating them is wrong.
u64 icache = (u64)jit->GetBlockCache()->iCache.data();
u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data();
u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data();
u32 mask = 0;
FixupBranch no_mem;
FixupBranch exit_mem;
FixupBranch exit_vmem;
if (SConfig::GetInstance().bWii)
mask = JIT_ICACHE_EXRAM_BIT;
mask |= JIT_ICACHE_VMEM_BIT;
TEST(32, R(RSCRATCH), Imm32(mask));
no_mem = J_CC(CC_NZ);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
if (icache <= INT_MAX) if (icache <= INT_MAX)
{ {
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, static_cast<s32>(icache))); MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache));
} }
else else
{ {
@ -118,46 +134,73 @@ void Jit64AsmRoutineManager::Generate()
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
} }
// Check whether the block we found matches the current state. exit_mem = J();
u64 blocks = reinterpret_cast<u64>(jit->GetBlockCache()->GetBlocks()); SetJumpTarget(no_mem);
IMUL(32, RSCRATCH, R(RSCRATCH), Imm32(sizeof(JitBlock))); TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
if (blocks <= INT_MAX) FixupBranch no_vmem = J_CC(CC_Z);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
if (icacheVmem <= INT_MAX)
{ {
ADD(64, R(RSCRATCH), Imm32(static_cast<s32>(blocks))); MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem));
} }
else else
{ {
MOV(64, R(RSCRATCH2), Imm64(blocks)); MOV(64, R(RSCRATCH2), Imm64(icacheVmem));
ADD(64, R(RSCRATCH), R(RSCRATCH2)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
} }
// Check both block.effectiveAddress and block.msrBits.
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
AND(32, R(RSCRATCH2), Imm32(JitBlock::JIT_CACHE_MSR_MASK));
SHL(64, R(RSCRATCH2), Imm8(32));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, effectiveAddress))));
FixupBranch notfound = J_CC(CC_NE);
// Success; branch to the block we found.
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlock, normalEntry))));
SetJumpTarget(notfound);
// Failure; call into the block cache to update the state, then try again. if (SConfig::GetInstance().bWii)
// (We need to loop because Jit() might not actually generate a block exit_vmem = J();
// if we hit an ISI.) SetJumpTarget(no_vmem);
if (SConfig::GetInstance().bWii)
{
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));
if (icacheEx <= INT_MAX)
{
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icacheEx));
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
}
SetJumpTarget(no_exram);
}
SetJumpTarget(exit_mem);
if (SConfig::GetInstance().bWii)
SetJumpTarget(exit_vmem);
TEST(32, R(RSCRATCH), R(RSCRATCH));
FixupBranch notfound = J_CC(CC_L);
// grab from list and jump to it
u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers();
if (codePointers <= INT_MAX)
{
JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(codePointers));
JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0));
}
SetJumpTarget(notfound);
// We reset the stack because Jit might clear the code cache. // We reset the stack because Jit might clear the code cache.
// Also if we are in the middle of disabling BLR optimization on windows // Also if we are in the middle of disabling BLR optimization on windows
// we need to reset the stack before _resetstkoflw() is called in Jit // we need to reset the stack before _resetstkoflw() is called in Jit
// otherwise we will generate a second stack overflow exception during DoJit() // otherwise we will generate a second stack overflow exception during DoJit()
ResetStack(*this); ResetStack();
// Ok, no block, let's call the slow dispatcher // Ok, no block, let's jit
ABI_PushRegistersAndAdjustStack({}, 0); ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunction(reinterpret_cast<void*>(&JitBase::Dispatch)); ABI_CallFunctionA(32, (void*)&Jit, PPCSTATE(pc));
ABI_PopRegistersAndAdjustStack({}, 0); ABI_PopRegistersAndAdjustStack({}, 0);
// JMPptr(R(ABI_RETURN));
JMP(dispatcherNoCheck, true); JMP(dispatcherNoCheck, true); // no point in special casing this
SetJumpTarget(bail); SetJumpTarget(bail);
doTiming = GetCodePtr(); doTiming = GetCodePtr();
@ -174,7 +217,7 @@ void Jit64AsmRoutineManager::Generate()
// Landing pad for drec space // Landing pad for drec space
if (SConfig::GetInstance().bEnableDebugging) if (SConfig::GetInstance().bEnableDebugging)
SetJumpTarget(dbg_exit); SetJumpTarget(dbg_exit);
ResetStack(*this); ResetStack();
if (m_stack_top) if (m_stack_top)
{ {
ADD(64, R(RSP), Imm8(0x18)); ADD(64, R(RSP), Imm8(0x18));
@ -189,12 +232,12 @@ void Jit64AsmRoutineManager::Generate()
GenerateCommon(); GenerateCommon();
} }
void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter) void Jit64AsmRoutineManager::ResetStack()
{ {
if (m_stack_top) if (m_stack_top)
emitter.MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
else else
emitter.MOV(64, R(RSP), M(&s_saved_rsp)); MOV(64, R(RSP), M(&s_saved_rsp));
} }
void Jit64AsmRoutineManager::GenerateCommon() void Jit64AsmRoutineManager::GenerateCommon()

View File

@ -25,6 +25,7 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
{ {
private: private:
void Generate(); void Generate();
void ResetStack();
void GenerateCommon(); void GenerateCommon();
u8* m_stack_top; u8* m_stack_top;
@ -40,5 +41,4 @@ public:
} }
void Shutdown() { FreeCodeSpace(); } void Shutdown() { FreeCodeSpace(); }
void ResetStack(X64CodeBlock& emitter);
}; };

View File

@ -310,7 +310,6 @@ void Jit64::dcbx(UGeckoInstruction inst)
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3)); XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
ABI_CallFunction((void*)JitInterface::InvalidateICache); ABI_CallFunction((void*)JitInterface::InvalidateICache);
ABI_PopRegistersAndAdjustStack(registersInUse, 0); ABI_PopRegistersAndAdjustStack(registersInUse, 0);
asm_routines.ResetStack(*this);
c = J(true); c = J(true);
SwitchToNearCode(); SwitchToNearCode();
SetJumpTarget(c); SetJumpTarget(c);

View File

@ -391,10 +391,6 @@ void Jit64::mtmsr(UGeckoInstruction inst)
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
// Our jit cache also stores some MSR bits, as they have changed, we either
// have to validate them in the BLR/RET check, or just flush the stack here.
asm_routines.ResetStack(*this);
// If some exceptions are pending and EE are now enabled, force checking // If some exceptions are pending and EE are now enabled, force checking
// external exceptions when going out of mtmsr in order to execute delayed // external exceptions when going out of mtmsr in order to execute delayed
// interrupts as soon as possible. // interrupts as soon as possible.

View File

@ -373,9 +373,19 @@ void JitIL::WriteExit(u32 destination)
linkData.exitPtrs = GetWritableCodePtr(); linkData.exitPtrs = GetWritableCodePtr();
linkData.linkStatus = false; linkData.linkStatus = false;
MOV(32, PPCSTATE(pc), Imm32(destination)); // Link opportunity!
JMP(asm_routines.dispatcher, true); int block;
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{
// It exists! Joy of joy!
JMP(blocks.GetBlock(block)->checkedEntry, true);
linkData.linkStatus = true;
}
else
{
MOV(32, PPCSTATE(pc), Imm32(destination));
JMP(asm_routines.dispatcher, true);
}
b->linkData.push_back(linkData); b->linkData.push_back(linkData);
} }

View File

@ -196,6 +196,7 @@ void JitArm64::WriteExit(u32 destination)
linkData.linkStatus = false; linkData.linkStatus = false;
b->linkData.push_back(linkData); b->linkData.push_back(linkData);
// the code generated in JitArm64BlockCache::WriteDestroyBlock must fit in this block
MOVI2R(DISPATCHER_PC, destination); MOVI2R(DISPATCHER_PC, destination);
B(dispatcher); B(dispatcher);
} }

View File

@ -7,39 +7,32 @@
#include "Core/PowerPC/JitArm64/JitArm64Cache.h" #include "Core/PowerPC/JitArm64/JitArm64Cache.h"
#include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/JitInterface.h"
void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) void JitArm64BlockCache::WriteLinkBlock(u8* location, const JitBlock& block)
{ {
u8* location = source.exitPtrs;
ARM64XEmitter emit(location); ARM64XEmitter emit(location);
if (dest) // Are we able to jump directly to the normal entry?
s64 distance = ((s64)block.normalEntry - (s64)location) >> 2;
if (distance >= -0x40000 && distance <= 0x3FFFF)
{ {
// Are we able to jump directly to the normal entry? emit.B(CC_LE, block.normalEntry);
s64 distance = ((s64)dest->normalEntry - (s64)location) >> 2;
if (distance >= -0x40000 && distance <= 0x3FFFF)
{
emit.B(CC_LE, dest->normalEntry);
}
// Use the checked entry if either downcount is smaller zero, // We can't write DISPATCHER_PC here, as blink linking is only for 8bytes.
// or if we're not able to inline the downcount check here. // So we'll hit two jumps when calling Advance.
emit.B(dest->checkedEntry); emit.B(block.checkedEntry);
} }
else else
{ {
emit.MOVI2R(DISPATCHER_PC, source.exitAddress); emit.B(block.checkedEntry);
emit.B(jit->GetAsmRoutines()->dispatcher);
} }
emit.FlushIcache(); emit.FlushIcache();
} }
void JitArm64BlockCache::WriteDestroyBlock(const JitBlock& block) void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address)
{ {
// Only clear the entry points as we might still be within this block. // must fit within the code generated in JitArm64::WriteExit
ARM64XEmitter emit((u8*)block.checkedEntry); ARM64XEmitter emit((u8*)location);
emit.MOVI2R(DISPATCHER_PC, address);
while (emit.GetWritableCodePtr() <= block.normalEntry) emit.B(jit->GetAsmRoutines()->dispatcher);
emit.BRK(0x123);
emit.FlushIcache(); emit.FlushIcache();
} }

View File

@ -11,6 +11,6 @@ typedef void (*CompiledCode)();
class JitArm64BlockCache : public JitBaseBlockCache class JitArm64BlockCache : public JitBaseBlockCache
{ {
private: private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override; void WriteLinkBlock(u8* location, const JitBlock& block);
void WriteDestroyBlock(const JitBlock& block) override; void WriteDestroyBlock(const u8* location, u32 address);
}; };

View File

@ -791,6 +791,11 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB; int a = inst.RA, b = inst.RB;
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
gpr.Lock(W0); gpr.Lock(W0);
ARM64Reg addr_reg = W0; ARM64Reg addr_reg = W0;

View File

@ -48,54 +48,54 @@ void JitArm64::GenerateAsm()
dispatcherNoCheck = GetCodePtr(); dispatcherNoCheck = GetCodePtr();
bool assembly_dispatcher = true; FixupBranch exram, vmem, not_exram, not_vmem;
ARM64Reg pc_masked = W25;
ARM64Reg cache_base = X27;
if (assembly_dispatcher) // VMEM
not_vmem = TBZ(DISPATCHER_PC, IntLog2(JIT_ICACHE_VMEM_BIT));
ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHE_MASK);
MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCacheVMEM.data());
vmem = B();
SetJumpTarget(not_vmem);
if (SConfig::GetInstance().bWii)
{ {
// iCache[(address >> 2) & iCache_Mask]; // Wii EX-RAM
ARM64Reg pc_masked = W25; not_exram = TBZ(DISPATCHER_PC, IntLog2(JIT_ICACHE_EXRAM_BIT));
ARM64Reg cache_base = X27; ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHEEX_MASK);
ARM64Reg block_num = W27; MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCacheEx.data());
ANDI2R(pc_masked, DISPATCHER_PC, JitBaseBlockCache::iCache_Mask << 2); exram = B();
MOVP2R(cache_base, jit->GetBlockCache()->GetICache()); SetJumpTarget(not_exram);
LDR(block_num, cache_base, EncodeRegTo64(pc_masked));
// blocks[block_num]
ARM64Reg block = X30;
ARM64Reg jit_block_size = W24;
MOVI2R(jit_block_size, sizeof(JitBlock));
MUL(block_num, block_num, jit_block_size);
MOVP2R(block, jit->GetBlockCache()->GetBlocks());
ADD(block, block, EncodeRegTo64(block_num));
// b.effectiveAddress != addr || b.msrBits != msr
ARM64Reg pc_and_msr = W25;
ARM64Reg pc_and_msr2 = W24;
LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, effectiveAddress));
CMP(pc_and_msr, DISPATCHER_PC);
FixupBranch pc_missmatch = B(CC_NEQ);
LDR(INDEX_UNSIGNED, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr));
ANDI2R(pc_and_msr2, pc_and_msr2, JitBlock::JIT_CACHE_MSR_MASK);
LDR(INDEX_UNSIGNED, pc_and_msr, block, offsetof(JitBlock, msrBits));
CMP(pc_and_msr, pc_and_msr2);
FixupBranch msr_missmatch = B(CC_NEQ);
// return blocks[block_num].normalEntry;
LDR(INDEX_UNSIGNED, block, block, offsetof(JitBlock, normalEntry));
BR(block);
SetJumpTarget(pc_missmatch);
SetJumpTarget(msr_missmatch);
} }
// Call C version of Dispatch(). // Common memory
// FIXME: Implement this in inline assembly. ANDI2R(pc_masked, DISPATCHER_PC, JIT_ICACHE_MASK);
MOVI2R(cache_base, (u64)jit->GetBlockCache()->iCache.data());
SetJumpTarget(vmem);
if (SConfig::GetInstance().bWii)
SetJumpTarget(exram);
LDR(W27, cache_base, EncodeRegTo64(pc_masked));
FixupBranch JitBlock = TBNZ(W27, 7); // Test the 7th bit
// Success, it is our Jitblock.
MOVI2R(X30, (u64)jit->GetBlockCache()->GetCodePointers());
UBFM(X27, X27, 61, 60); // Same as X27 << 3
LDR(X30, X30, X27); // Load the block address in to R14
BR(X30);
// No need to jump anywhere after here, the block will go back to dispatcher start
SetJumpTarget(JitBlock);
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVP2R(X30, reinterpret_cast<void*>(&JitBase::Dispatch)); MOVI2R(X30, (u64) & ::Jit);
BLR(X30); BLR(X30);
// Jump to next block. LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
BR(X0);
B(dispatcherNoCheck);
SetJumpTarget(bail); SetJumpTarget(bail);
doTiming = GetCodePtr(); doTiming = GetCodePtr();

View File

@ -55,10 +55,6 @@
#define JITDISABLE(setting) \ #define JITDISABLE(setting) \
FALLBACK_IF(SConfig::GetInstance().bJITOff || SConfig::GetInstance().setting) FALLBACK_IF(SConfig::GetInstance().bJITOff || SConfig::GetInstance().setting)
class JitBase;
extern JitBase* jit;
class JitBase : public CPUCoreBase class JitBase : public CPUCoreBase
{ {
protected: protected:
@ -129,7 +125,6 @@ public:
JitOptions jo; JitOptions jo;
JitState js; JitState js;
static const u8* Dispatch() { return jit->GetBlockCache()->Dispatch(); };
virtual JitBaseBlockCache* GetBlockCache() = 0; virtual JitBaseBlockCache* GetBlockCache() = 0;
virtual void Jit(u32 em_address) = 0; virtual void Jit(u32 em_address) = 0;
@ -152,6 +147,8 @@ public:
bool HandleFault(uintptr_t access_address, SContext* ctx) override; bool HandleFault(uintptr_t access_address, SContext* ctx) override;
}; };
extern JitBase* jit;
void Jit(u32 em_address); void Jit(u32 em_address);
// Merged routines that should be moved somewhere better // Merged routines that should be moved somewhere better

View File

@ -34,15 +34,26 @@ bool JitBaseBlockCache::IsFull() const
void JitBaseBlockCache::Init() void JitBaseBlockCache::Init()
{ {
if (m_initialized)
{
PanicAlert("JitBaseBlockCache::Init() - iCache is already initialized");
return;
}
JitRegister::Init(SConfig::GetInstance().m_perfDir); JitRegister::Init(SConfig::GetInstance().m_perfDir);
iCache.fill(0); iCache.fill(JIT_ICACHE_INVALID_BYTE);
iCacheEx.fill(JIT_ICACHE_INVALID_BYTE);
iCacheVMEM.fill(JIT_ICACHE_INVALID_BYTE);
Clear(); Clear();
m_initialized = true;
} }
void JitBaseBlockCache::Shutdown() void JitBaseBlockCache::Shutdown()
{ {
num_blocks = 0; num_blocks = 0;
m_initialized = false;
JitRegister::Shutdown(); JitRegister::Shutdown();
} }
@ -69,8 +80,7 @@ void JitBaseBlockCache::Clear()
valid_block.ClearAll(); valid_block.ClearAll();
num_blocks = 0; num_blocks = 0;
blocks[0].msrBits = 0xFFFFFFFF; blockCodePointers.fill(nullptr);
blocks[0].invalid = true;
} }
void JitBaseBlockCache::Reset() void JitBaseBlockCache::Reset()
@ -93,9 +103,7 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address)
{ {
JitBlock& b = blocks[num_blocks]; JitBlock& b = blocks[num_blocks];
b.invalid = false; b.invalid = false;
b.effectiveAddress = em_address; b.originalAddress = em_address;
b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address;
b.msrBits = MSR & JitBlock::JIT_CACHE_MSR_MASK;
b.linkData.clear(); b.linkData.clear();
num_blocks++; // commit the current block num_blocks++; // commit the current block
return num_blocks - 1; return num_blocks - 1;
@ -103,23 +111,13 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address)
void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* code_ptr) void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* code_ptr)
{ {
blockCodePointers[block_num] = code_ptr;
JitBlock& b = blocks[block_num]; JitBlock& b = blocks[block_num];
if (start_block_map.count(b.physicalAddress))
{
// We already have a block at this address; invalidate the old block.
// This should be very rare. This will only happen if the same block
// is called both with DR/IR enabled or disabled.
WARN_LOG(DYNA_REC, "Invalidating compiled block at same address %08x", b.physicalAddress);
int old_block_num = start_block_map[b.physicalAddress];
const JitBlock& old_b = blocks[old_block_num];
block_map.erase(
std::make_pair(old_b.physicalAddress + 4 * old_b.originalSize - 1, old_b.physicalAddress));
DestroyBlock(old_block_num, true);
}
start_block_map[b.physicalAddress] = block_num;
FastLookupEntryForAddress(b.effectiveAddress) = block_num;
u32 pAddr = b.physicalAddress; std::memcpy(GetICachePtr(b.originalAddress), &block_num, sizeof(u32));
// Convert the logical address to a physical address for the block map
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block) for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block)
valid_block.Set(block); valid_block.Set(block);
@ -134,64 +132,49 @@ void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8*
} }
LinkBlock(block_num); LinkBlock(block_num);
LinkBlockExits(block_num);
} }
JitRegister::Register(b.checkedEntry, b.codeSize, "JIT_PPC_%08x", b.physicalAddress); JitRegister::Register(blockCodePointers[block_num], b.codeSize, "JIT_PPC_%08x",
b.originalAddress);
} }
int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr, u32 msr) const u8** JitBaseBlockCache::GetCodePointers()
{ {
u32 translated_addr = addr; return blockCodePointers.data();
if (UReg_MSR(msr).IR)
{
auto translated = PowerPC::JitCache_TranslateAddress(addr);
if (!translated.valid)
{
return -1;
}
translated_addr = translated.address;
}
auto map_result = start_block_map.find(translated_addr);
if (map_result == start_block_map.end())
return -1;
int block_num = map_result->second;
const JitBlock& b = blocks[block_num];
if (b.invalid)
return -1;
if (b.effectiveAddress != addr)
return -1;
if (b.msrBits != (msr & JitBlock::JIT_CACHE_MSR_MASK))
return -1;
return block_num;
} }
void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) u8* JitBaseBlockCache::GetICachePtr(u32 addr)
{ {
int block_num = GetBlockNumberFromStartAddress(addr, msr); if (addr & JIT_ICACHE_VMEM_BIT)
if (block_num < 0) return &jit->GetBlockCache()->iCacheVMEM[addr & JIT_ICACHE_MASK];
{
Jit(addr); if (addr & JIT_ICACHE_EXRAM_BIT)
} return &jit->GetBlockCache()->iCacheEx[addr & JIT_ICACHEEX_MASK];
else
{ return &jit->GetBlockCache()->iCache[addr & JIT_ICACHE_MASK];
FastLookupEntryForAddress(addr) = block_num;
LinkBlock(block_num);
}
} }
const u8* JitBaseBlockCache::Dispatch() int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr)
{ {
int block_num = FastLookupEntryForAddress(PC); u32 inst;
std::memcpy(&inst, GetICachePtr(addr), sizeof(u32));
while (blocks[block_num].effectiveAddress != PC || if (inst & 0xfc000000) // definitely not a JIT block
blocks[block_num].msrBits != (MSR & JitBlock::JIT_CACHE_MSR_MASK)) return -1;
{
MoveBlockIntoFastCache(PC, MSR & JitBlock::JIT_CACHE_MSR_MASK);
block_num = FastLookupEntryForAddress(PC);
}
return blocks[block_num].normalEntry; if ((int)inst >= num_blocks)
return -1;
if (blocks[inst].originalAddress != addr)
return -1;
return inst;
}
CompiledCode JitBaseBlockCache::GetCompiledCodeFromBlock(int block_num)
{
return (CompiledCode)blockCodePointers[block_num];
} }
// Block linker // Block linker
@ -212,14 +195,11 @@ void JitBaseBlockCache::LinkBlockExits(int i)
{ {
if (!e.linkStatus) if (!e.linkStatus)
{ {
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress, b.msrBits); int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress);
if (destinationBlock != -1) if (destinationBlock != -1)
{ {
if (!blocks[destinationBlock].invalid) WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]);
{ e.linkStatus = true;
WriteLinkBlock(e, &blocks[destinationBlock]);
e.linkStatus = true;
}
} }
} }
} }
@ -228,37 +208,39 @@ void JitBaseBlockCache::LinkBlockExits(int i)
void JitBaseBlockCache::LinkBlock(int i) void JitBaseBlockCache::LinkBlock(int i)
{ {
LinkBlockExits(i); LinkBlockExits(i);
const JitBlock& b = blocks[i]; JitBlock& b = blocks[i];
auto ppp = links_to.equal_range(b.effectiveAddress); // equal_range(b) returns pair<iterator,iterator> representing the range
// of element with key b
auto ppp = links_to.equal_range(b.originalAddress);
if (ppp.first == ppp.second)
return;
for (auto iter = ppp.first; iter != ppp.second; ++iter) for (auto iter = ppp.first; iter != ppp.second; ++iter)
{ {
const JitBlock& b2 = blocks[iter->second]; // PanicAlert("Linking block %i to block %i", iter->second, i);
if (b.msrBits == b2.msrBits) LinkBlockExits(iter->second);
LinkBlockExits(iter->second);
} }
} }
void JitBaseBlockCache::UnlinkBlock(int i) void JitBaseBlockCache::UnlinkBlock(int i)
{ {
JitBlock& b = blocks[i]; JitBlock& b = blocks[i];
auto ppp = links_to.equal_range(b.effectiveAddress); auto ppp = links_to.equal_range(b.originalAddress);
if (ppp.first == ppp.second)
return;
for (auto iter = ppp.first; iter != ppp.second; ++iter) for (auto iter = ppp.first; iter != ppp.second; ++iter)
{ {
JitBlock& sourceBlock = blocks[iter->second]; JitBlock& sourceBlock = blocks[iter->second];
if (sourceBlock.msrBits != b.msrBits)
continue;
for (auto& e : sourceBlock.linkData) for (auto& e : sourceBlock.linkData)
{ {
if (e.exitAddress == b.effectiveAddress) if (e.exitAddress == b.originalAddress)
{
WriteLinkBlock(e, nullptr);
e.linkStatus = false; e.linkStatus = false;
}
} }
} }
links_to.erase(b.originalAddress);
} }
void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate) void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
@ -276,31 +258,20 @@ void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
return; return;
} }
b.invalid = true; b.invalid = true;
start_block_map.erase(b.physicalAddress); std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
FastLookupEntryForAddress(b.effectiveAddress) = 0;
UnlinkBlock(block_num); UnlinkBlock(block_num);
// Delete linking adresses // Send anyone who tries to run this block back to the dispatcher.
auto it = links_to.equal_range(b.effectiveAddress); // Not entirely ideal, but .. pretty good.
while (it.first != it.second) // Spurious entrances from previously linked blocks can only come through checkedEntry
{ WriteDestroyBlock(b.checkedEntry, b.originalAddress);
if (it.first->second == block_num)
it.first = links_to.erase(it.first);
else
it.first++;
}
// Raise an signal if we are going to call this block again
WriteDestroyBlock(b);
} }
void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced) void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced)
{ {
auto translated = PowerPC::JitCache_TranslateAddress(address); // Convert the logical address to a physical address for the block map
if (!translated.valid) u32 pAddr = address & 0x1FFFFFFF;
return;
u32 pAddr = translated.address;
// Optimize the common case of length == 32 which is used by Interpreter::dcb* // Optimize the common case of length == 32 which is used by Interpreter::dcb*
bool destroy_block = true; bool destroy_block = true;
@ -317,11 +288,20 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
// address // address
if (destroy_block) if (destroy_block)
{ {
auto it = block_map.lower_bound(std::make_pair(pAddr, 0)); std::map<std::pair<u32, u32>, u32>::iterator it1 = block_map.lower_bound(
while (it != block_map.end() && it->first.second < pAddr + length) std::make_pair(pAddr, 0)),
it2 = it1;
while (it2 != block_map.end() && it2->first.second < pAddr + length)
{ {
DestroyBlock(it->second, true); JitBlock& b = blocks[it2->second];
it = block_map.erase(it); std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
DestroyBlock(it2->second, true);
++it2;
}
if (it1 != it2)
{
block_map.erase(it1, it2);
} }
// If the code was actually modified, we need to clear the relevant entries from the // If the code was actually modified, we need to clear the relevant entries from the
@ -339,10 +319,9 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
} }
} }
void JitBlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) void JitBlockCache::WriteLinkBlock(u8* location, const JitBlock& block)
{ {
u8* location = source.exitPtrs; const u8* address = block.checkedEntry;
const u8* address = dest ? dest->checkedEntry : jit->GetAsmRoutines()->dispatcher;
XEmitter emit(location); XEmitter emit(location);
if (*location == 0xE8) if (*location == 0xE8)
{ {
@ -361,11 +340,9 @@ void JitBlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBl
} }
} }
void JitBlockCache::WriteDestroyBlock(const JitBlock& block) void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address)
{ {
// Only clear the entry points as we might still be within this block. XEmitter emit((u8*)location);
XEmitter emit((u8*)block.checkedEntry); emit.MOV(32, PPCSTATE(pc), Imm32(address));
emit.INT3(); emit.JMP(jit->GetAsmRoutines()->dispatcher, true);
XEmitter emit2((u8*)block.normalEntry);
emit2.INT3();
} }

View File

@ -12,55 +12,32 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
// A JitBlock is block of compiled code which corresponds to the PowerPC static const u32 JIT_ICACHE_SIZE = 0x2000000;
// code at a given address. static const u32 JIT_ICACHE_MASK = 0x1ffffff;
// static const u32 JIT_ICACHEEX_SIZE = 0x4000000;
// The notion of the address of a block is a bit complicated because of the static const u32 JIT_ICACHEEX_MASK = 0x3ffffff;
// way address translation works, but basically it's the combination of an static const u32 JIT_ICACHE_EXRAM_BIT = 0x10000000;
// effective address, the address translation bits in MSR, and the physical static const u32 JIT_ICACHE_VMEM_BIT = 0x20000000;
// address.
// This corresponds to opcode 5 which is invalid in PowerPC
static const u32 JIT_ICACHE_INVALID_BYTE = 0x80;
static const u32 JIT_ICACHE_INVALID_WORD = 0x80808080;
struct JitBlock struct JitBlock
{ {
enum
{
// Mask for the MSR bits which determine whether a compiled block
// is valid (MSR.IR and MSR.DR, the address translation bits).
JIT_CACHE_MSR_MASK = 0x30,
};
// A special entry point for block linking; usually used to check the
// downcount.
const u8* checkedEntry; const u8* checkedEntry;
// The normal entry point for the block, returned by Dispatch().
const u8* normalEntry; const u8* normalEntry;
// The effective address (PC) for the beginning of the block. u32 originalAddress;
u32 effectiveAddress;
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
u32 msrBits;
// The physical address of the code represented by this block.
// Various maps in the cache are indexed by this (start_block_map,
// block_map, and valid_block in particular). This is useful because of
// of the way the instruction cache works on PowerPC.
u32 physicalAddress;
// The number of bytes of JIT'ed code contained in this block. Mostly
// useful for logging.
u32 codeSize; u32 codeSize;
// The number of PPC instructions represented by this block. Mostly
// useful for logging.
u32 originalSize; u32 originalSize;
int runCount; // for profiling. int runCount; // for profiling.
// Whether this struct refers to a valid block. This is mostly useful as
// a debugging aid.
// FIXME: Change current users of invalid bit to assertions?
bool invalid; bool invalid;
// Information about exits to a known address from this block.
// This is used to implement block linking.
struct LinkData struct LinkData
{ {
u8* exitPtrs; // to be able to rewrite the exit jump u8* exitPtrs; // to be able to rewrite the exit jum
u32 exitAddress; u32 exitAddress;
bool linkStatus; // is it already linked? bool linkStatus; // is it already linked?
}; };
@ -82,12 +59,7 @@ class ValidBlockBitSet final
public: public:
enum enum
{ {
// ValidBlockBitSet covers the whole 32-bit address-space in 32-byte VALID_BLOCK_MASK_SIZE = 0x20000000 / 32,
// chunks.
// FIXME: Maybe we can get away with less? There isn't any actual
// RAM in most of this space.
VALID_BLOCK_MASK_SIZE = (1ULL << 32) / 32,
// The number of elements in the allocated array. Each u32 contains 32 bits.
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32 VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
}; };
// Directly accessed by Jit64. // Directly accessed by Jit64.
@ -107,52 +79,33 @@ public:
class JitBaseBlockCache class JitBaseBlockCache
{ {
public: enum
static constexpr int MAX_NUM_BLOCKS = 65536 * 2; {
static constexpr u32 iCache_Num_Elements = 0x10000; MAX_NUM_BLOCKS = 65536 * 2,
static constexpr u32 iCache_Mask = iCache_Num_Elements - 1; };
private: std::array<const u8*, MAX_NUM_BLOCKS> blockCodePointers;
// We store the metadata of all blocks in a linear way within this array. std::array<JitBlock, MAX_NUM_BLOCKS> blocks;
std::array<JitBlock, MAX_NUM_BLOCKS> blocks; // number -> JitBlock
int num_blocks; int num_blocks;
std::multimap<u32, int> links_to;
// links_to hold all exit points of all valid blocks in a reverse way.
// It is used to query all blocks which links to an address.
std::multimap<u32, int> links_to; // destination_PC -> number
// Map indexed by the physical memory location.
// It is used to invalidate blocks based on memory location.
std::map<std::pair<u32, u32>, u32> block_map; // (end_addr, start_addr) -> number std::map<std::pair<u32, u32>, u32> block_map; // (end_addr, start_addr) -> number
// Map indexed by the physical address of the entry point.
// This is used to query the block based on the current PC in a slow way.
// TODO: This is redundant with block_map, and both should be a multimap.
std::map<u32, u32> start_block_map; // start_addr -> number
// This bitsets shows which cachelines overlap with any blocks.
// It is used to provide a fast way to query if no icache invalidation is needed.
ValidBlockBitSet valid_block; ValidBlockBitSet valid_block;
// This array is indexed with the masked PC and likely holds the correct block id. bool m_initialized;
// This is used as a fast cache of start_block_map used in the assembly dispatcher.
std::array<int, iCache_Num_Elements> iCache; // start_addr & mask -> number
void LinkBlockExits(int i); void LinkBlockExits(int i);
void LinkBlock(int i); void LinkBlock(int i);
void UnlinkBlock(int i); void UnlinkBlock(int i);
u8* GetICachePtr(u32 addr);
void DestroyBlock(int block_num, bool invalidate); void DestroyBlock(int block_num, bool invalidate);
void MoveBlockIntoFastCache(u32 em_address, u32 msr);
// Fast but risky block lookup based on iCache.
int& FastLookupEntryForAddress(u32 address) { return iCache[(address >> 2) & iCache_Mask]; }
// Virtual for overloaded // Virtual for overloaded
virtual void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) = 0; virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0;
virtual void WriteDestroyBlock(const JitBlock& block) {} virtual void WriteDestroyBlock(const u8* location, u32 address) = 0;
public: public:
JitBaseBlockCache() : num_blocks(0) {} JitBaseBlockCache() : num_blocks(0), m_initialized(false) {}
virtual ~JitBaseBlockCache() {} virtual ~JitBaseBlockCache() {}
int AllocateBlock(u32 em_address); int AllocateBlock(u32 em_address);
void FinalizeBlock(int block_num, bool block_link, const u8* code_ptr); void FinalizeBlock(int block_num, bool block_link, const u8* code_ptr);
@ -166,20 +119,18 @@ public:
// Code Cache // Code Cache
JitBlock* GetBlock(int block_num); JitBlock* GetBlock(int block_num);
JitBlock* GetBlocks() { return blocks.data(); }
int* GetICache() { return iCache.data(); }
int GetNumBlocks() const; int GetNumBlocks() const;
const u8** GetCodePointers();
std::array<u8, JIT_ICACHE_SIZE> iCache;
std::array<u8, JIT_ICACHEEX_SIZE> iCacheEx;
std::array<u8, JIT_ICACHE_SIZE> iCacheVMEM;
// Look for the block in the slow but accurate way. // Fast way to get a block. Only works on the first ppc instruction of a block.
// This function shall be used if FastLookupEntryForAddress() failed. int GetBlockNumberFromStartAddress(u32 em_address);
int GetBlockNumberFromStartAddress(u32 em_address, u32 msr);
// Get the normal entry for the block associated with the current program CompiledCode GetCompiledCodeFromBlock(int block_num);
// counter. This will JIT code if necessary. (This is the reference
// implementation; high-performance JITs will want to use a custom
// assembly version.)
const u8* Dispatch();
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 address, const u32 length, bool forced); void InvalidateICache(u32 address, const u32 length, bool forced);
u32* GetBlockBitSet() const { return valid_block.m_valid_block.get(); } u32* GetBlockBitSet() const { return valid_block.m_valid_block.get(); }
@ -189,6 +140,6 @@ public:
class JitBlockCache : public JitBaseBlockCache class JitBlockCache : public JitBaseBlockCache
{ {
private: private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override; void WriteLinkBlock(u8* location, const JitBlock& block) override;
void WriteDestroyBlock(const JitBlock& block) override; void WriteDestroyBlock(const u8* location, u32 address) override;
}; };

View File

@ -150,7 +150,7 @@ void GetProfileResults(ProfileStats* prof_stats)
u64 timecost = block->ticCounter; u64 timecost = block->ticCounter;
// Todo: tweak. // Todo: tweak.
if (block->runCount >= 1) if (block->runCount >= 1)
prof_stats->block_stats.emplace_back(i, block->effectiveAddress, cost, timecost, prof_stats->block_stats.emplace_back(i, block->originalAddress, cost, timecost,
block->runCount, block->codeSize); block->runCount, block->codeSize);
prof_stats->cost_sum += cost; prof_stats->cost_sum += cost;
prof_stats->timecost_sum += timecost; prof_stats->timecost_sum += timecost;
@ -169,12 +169,12 @@ int GetHostCode(u32* address, const u8** code, u32* code_size)
return 1; return 1;
} }
int block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address, MSR); int block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address);
if (block_num < 0) if (block_num < 0)
{ {
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
{ {
block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address - 4 * i, MSR); block_num = jit->GetBlockCache()->GetBlockNumberFromStartAddress(*address - 4 * i);
if (block_num >= 0) if (block_num >= 0)
break; break;
} }
@ -182,8 +182,8 @@ int GetHostCode(u32* address, const u8** code, u32* code_size)
if (block_num >= 0) if (block_num >= 0)
{ {
JitBlock* block = jit->GetBlockCache()->GetBlock(block_num); JitBlock* block = jit->GetBlockCache()->GetBlock(block_num);
if (!(block->effectiveAddress <= *address && if (!(block->originalAddress <= *address &&
block->originalSize + block->effectiveAddress >= *address)) block->originalSize + block->originalAddress >= *address))
block_num = -1; block_num = -1;
} }
@ -199,7 +199,7 @@ int GetHostCode(u32* address, const u8** code, u32* code_size)
*code = block->checkedEntry; *code = block->checkedEntry;
*code_size = block->codeSize; *code_size = block->codeSize;
*address = block->effectiveAddress; *address = block->originalAddress;
return 0; return 0;
} }

View File

@ -76,19 +76,7 @@ enum XCheckTLBFlag
FLAG_READ, FLAG_READ,
FLAG_WRITE, FLAG_WRITE,
FLAG_OPCODE, FLAG_OPCODE,
FLAG_OPCODE_NO_EXCEPTION
}; };
static bool IsOpcodeFlag(XCheckTLBFlag flag)
{
return flag == FLAG_OPCODE || flag == FLAG_OPCODE_NO_EXCEPTION;
}
static bool IsNoExceptionFlag(XCheckTLBFlag flag)
{
return flag == FLAG_NO_EXCEPTION || flag == FLAG_OPCODE_NO_EXCEPTION;
}
template <const XCheckTLBFlag flag> template <const XCheckTLBFlag flag>
static u32 TranslateAddress(const u32 address); static u32 TranslateAddress(const u32 address);
@ -848,43 +836,6 @@ bool IsOptimizableGatherPipeWrite(u32 address)
return address == 0xCC008000; return address == 0xCC008000;
} }
TranslateResult JitCache_TranslateAddress(u32 address)
{
if (!UReg_MSR(MSR).IR)
return TranslateResult{true, true, address};
bool from_bat = true;
int segment = address >> 28;
if (SConfig::GetInstance().bMMU && (address & Memory::ADDR_MASK_MEM1))
{
u32 tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (tlb_addr == 0)
{
return TranslateResult{false, false, 0};
}
else
{
address = tlb_addr;
from_bat = false;
}
}
else
{
if ((segment == 0x8 || segment == 0x0) && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
address = address & 0x3FFFFFFF;
else if (Memory::m_pEXRAM && segment == 0x9 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
address = address & 0x3FFFFFFF;
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
address = 0x7E000000 | (address & Memory::FAKEVMEM_MASK);
else
return TranslateResult{false, false, 0};
}
return TranslateResult{true, from_bat, address};
}
// ********************************************************************************* // *********************************************************************************
// Warning: Test Area // Warning: Test Area
// //
@ -1001,7 +952,6 @@ static void GenerateISIException(u32 _EffectiveAddress)
NPC = _EffectiveAddress; NPC = _EffectiveAddress;
PowerPC::ppcState.Exceptions |= EXCEPTION_ISI; PowerPC::ppcState.Exceptions |= EXCEPTION_ISI;
WARN_LOG(POWERPC, "ISI exception at 0x%08x", PC);
} }
void SDRUpdated() void SDRUpdated()
@ -1040,7 +990,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
u32* paddr) u32* paddr)
{ {
u32 tag = vpa >> HW_PAGE_INDEX_SHIFT; u32 tag = vpa >> HW_PAGE_INDEX_SHIFT;
PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK]; PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK];
if (tlbe->tag[0] == tag) if (tlbe->tag[0] == tag)
{ {
// Check if C bit requires updating // Check if C bit requires updating
@ -1056,7 +1006,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
} }
} }
if (!IsNoExceptionFlag(flag)) if (flag != FLAG_NO_EXCEPTION)
tlbe->recent = 0; tlbe->recent = 0;
*paddr = tlbe->paddr[0] | (vpa & 0xfff); *paddr = tlbe->paddr[0] | (vpa & 0xfff);
@ -1078,7 +1028,7 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
} }
} }
if (!IsNoExceptionFlag(flag)) if (flag != FLAG_NO_EXCEPTION)
tlbe->recent = 1; tlbe->recent = 1;
*paddr = tlbe->paddr[1] | (vpa & 0xfff); *paddr = tlbe->paddr[1] | (vpa & 0xfff);
@ -1090,11 +1040,11 @@ static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag fl
static __forceinline void UpdateTLBEntry(const XCheckTLBFlag flag, UPTE2 PTE2, const u32 address) static __forceinline void UpdateTLBEntry(const XCheckTLBFlag flag, UPTE2 PTE2, const u32 address)
{ {
if (IsNoExceptionFlag(flag)) if (flag == FLAG_NO_EXCEPTION)
return; return;
int tag = address >> HW_PAGE_INDEX_SHIFT; int tag = address >> HW_PAGE_INDEX_SHIFT;
PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK]; PowerPC::tlb_entry* tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK];
int index = tlbe->recent == 0 && tlbe->tag[0] != TLB_TAG_INVALID; int index = tlbe->recent == 0 && tlbe->tag[0] != TLB_TAG_INVALID;
tlbe->recent = index; tlbe->recent = index;
tlbe->paddr[index] = PTE2.RPN << HW_PAGE_INDEX_SHIFT; tlbe->paddr[index] = PTE2.RPN << HW_PAGE_INDEX_SHIFT;
@ -1160,7 +1110,6 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
switch (flag) switch (flag)
{ {
case FLAG_NO_EXCEPTION: case FLAG_NO_EXCEPTION:
case FLAG_OPCODE_NO_EXCEPTION:
break; break;
case FLAG_READ: case FLAG_READ:
PTE2.R = 1; PTE2.R = 1;
@ -1174,7 +1123,7 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
break; break;
} }
if (!IsNoExceptionFlag(flag)) if (flag != FLAG_NO_EXCEPTION)
*(u32*)&Memory::physical_base[pteg_addr + 4] = bswap(PTE2.Hex); *(u32*)&Memory::physical_base[pteg_addr + 4] = bswap(PTE2.Hex);
// We already updated the TLB entry if this was caused by a C bit. // We already updated the TLB entry if this was caused by a C bit.

View File

@ -272,13 +272,6 @@ bool IsOptimizableRAMAddress(const u32 address);
u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize); u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize);
bool IsOptimizableGatherPipeWrite(u32 address); bool IsOptimizableGatherPipeWrite(u32 address);
struct TranslateResult
{
bool valid;
bool from_bat;
u32 address;
};
TranslateResult JitCache_TranslateAddress(u32 address);
} // namespace } // namespace
enum CRBits enum CRBits