JIT: add "far code emitter" to reduce L1I cache pressure
This commit is contained in:
parent
09a62505c5
commit
c5381bae66
|
@ -132,8 +132,6 @@ ps_adds1
|
|||
|
||||
*/
|
||||
|
||||
static int CODE_SIZE = 1024*1024*32;
|
||||
|
||||
void Jit64::Init()
|
||||
{
|
||||
jo.optimizeStack = true;
|
||||
|
@ -169,6 +167,7 @@ void Jit64::Init()
|
|||
|
||||
trampolines.Init();
|
||||
AllocCodeSpace(CODE_SIZE);
|
||||
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
|
||||
|
||||
blocks.Init();
|
||||
asm_routines.Init();
|
||||
|
@ -183,6 +182,7 @@ void Jit64::ClearCache()
|
|||
{
|
||||
blocks.Clear();
|
||||
trampolines.ClearCodeSpace();
|
||||
farcode.ClearCodeSpace();
|
||||
ClearCodeSpace();
|
||||
}
|
||||
|
||||
|
@ -193,6 +193,7 @@ void Jit64::Shutdown()
|
|||
blocks.Shutdown();
|
||||
trampolines.Shutdown();
|
||||
asm_routines.Shutdown();
|
||||
farcode.Shutdown();
|
||||
}
|
||||
|
||||
// This is only called by FallBackToInterpreter() in this file. It will execute an instruction with the interpreter functions.
|
||||
|
@ -372,7 +373,8 @@ void Jit64::Trace()
|
|||
|
||||
void STACKALIGN Jit64::Jit(u32 em_address)
|
||||
{
|
||||
if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
|
||||
if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() ||
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
|
||||
{
|
||||
ClearCache();
|
||||
}
|
||||
|
@ -525,12 +527,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
{
|
||||
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
|
||||
{
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
//This instruction uses FPU - needs to add FP exception bailout
|
||||
TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit
|
||||
FixupBranch b1 = J_CC(CC_NZ, true);
|
||||
FixupBranch b1 = J_CC(CC_Z, true);
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(b1);
|
||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
|
||||
// If a FPU exception occurs, the exception handler will read
|
||||
// from PC. Update PC with the latest value in case that happens.
|
||||
|
@ -538,32 +541,34 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
|
||||
WriteExceptionExit();
|
||||
|
||||
SetJumpTarget(b1);
|
||||
|
||||
SwitchToNearCode();
|
||||
js.firstFPInstructionFound = true;
|
||||
}
|
||||
|
||||
// Add an external exception check if the instruction writes to the FIFO.
|
||||
if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end())
|
||||
{
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT));
|
||||
FixupBranch clearInt = J_CC(CC_NZ, true);
|
||||
FixupBranch clearInt = J_CC(CC_NZ);
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
|
||||
FixupBranch noExtException = J_CC(CC_Z, true);
|
||||
FixupBranch extException = J_CC(CC_NZ, true);
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(extException);
|
||||
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
|
||||
FixupBranch noExtIntEnable = J_CC(CC_Z, true);
|
||||
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
|
||||
FixupBranch noCPInt = J_CC(CC_Z, true);
|
||||
|
||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
|
||||
MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
|
||||
WriteExternalExceptionExit();
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
SetJumpTarget(noCPInt);
|
||||
SetJumpTarget(noExtIntEnable);
|
||||
SetJumpTarget(noExtException);
|
||||
SetJumpTarget(clearInt);
|
||||
}
|
||||
|
||||
|
@ -585,9 +590,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
|
||||
if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
|
||||
{
|
||||
// In case we are about to jump to the dispatcher, flush regs
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||
FixupBranch noMemException = J_CC(CC_Z, true);
|
||||
FixupBranch memException = J_CC(CC_NZ, true);
|
||||
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(memException);
|
||||
|
||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
|
@ -596,7 +603,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
// from PC. Update PC with the latest value in case that happens.
|
||||
MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
|
||||
WriteExceptionExit();
|
||||
SetJumpTarget(noMemException);
|
||||
SwitchToNearCode();
|
||||
}
|
||||
|
||||
if (opinfo->flags & FL_LOADSTORE)
|
||||
|
|
|
@ -241,8 +241,6 @@ namespace JitILProfiler
|
|||
}
|
||||
};
|
||||
|
||||
static int CODE_SIZE = 1024*1024*32;
|
||||
|
||||
void JitIL::Init()
|
||||
{
|
||||
jo.optimizeStack = true;
|
||||
|
@ -273,6 +271,7 @@ void JitIL::Init()
|
|||
|
||||
trampolines.Init();
|
||||
AllocCodeSpace(CODE_SIZE);
|
||||
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
|
||||
|
||||
blocks.Init();
|
||||
asm_routines.Init();
|
||||
|
@ -306,6 +305,7 @@ void JitIL::Shutdown()
|
|||
blocks.Shutdown();
|
||||
trampolines.Shutdown();
|
||||
asm_routines.Shutdown();
|
||||
farcode.Shutdown();
|
||||
}
|
||||
|
||||
|
||||
|
@ -504,7 +504,8 @@ void JitIL::Trace()
|
|||
|
||||
void STACKALIGN JitIL::Jit(u32 em_address)
|
||||
{
|
||||
if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
|
||||
if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() ||
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
|
||||
{
|
||||
ClearCache();
|
||||
}
|
||||
|
|
|
@ -32,10 +32,42 @@ namespace MMIO { class Mapping; }
|
|||
#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0])
|
||||
#define PPCSTATE_SRR1 PPCSTATE(spr[SPR_SRR1])
|
||||
|
||||
// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken
|
||||
// exception branches.
|
||||
class FarCodeCache : public Gen::X64CodeBlock
|
||||
{
|
||||
public:
|
||||
void Init(int size) { AllocCodeSpace(size); }
|
||||
void Shutdown() { FreeCodeSpace(); }
|
||||
};
|
||||
|
||||
// Like XCodeBlock but has some utilities for memory access.
|
||||
class EmuCodeBlock : public Gen::X64CodeBlock
|
||||
{
|
||||
public:
|
||||
static const int CODE_SIZE = 1024 * 1024 * 32;
|
||||
|
||||
// a bit of a hack; the MMU results in a vast amount more code ending up in the far cache,
|
||||
// mostly exception handling, so give it a whole bunch more space if the MMU is on.
|
||||
static const int FARCODE_SIZE = 1024 * 1024 * 8;
|
||||
static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48;
|
||||
|
||||
FarCodeCache farcode;
|
||||
u8* nearcode; // Backed up when we switch to far code.
|
||||
|
||||
// Simple functions to switch between near and far code emitting
|
||||
void SwitchToFarCode()
|
||||
{
|
||||
nearcode = GetWritableCodePtr();
|
||||
SetCodePtr(farcode.GetWritableCodePtr());
|
||||
}
|
||||
|
||||
void SwitchToNearCode()
|
||||
{
|
||||
farcode.SetCodePtr(GetWritableCodePtr());
|
||||
SetCodePtr(nearcode);
|
||||
}
|
||||
|
||||
void LoadAndSwap(int size, Gen::X64Reg dst, const Gen::OpArg& src);
|
||||
void SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src);
|
||||
|
||||
|
|
Loading…
Reference in New Issue