Changed the JIT code to make the FPU exception timing more accurate. The exception is now triggered at the first FP instruction instead of the start of the block.

Rearranged the JIT exception code for a tiny speed-up.  Only external exceptions are checked at the end of the block.  All other exceptions are checked at the time they occur.
This commit is contained in:
skidau 2012-04-15 21:34:15 +10:00
parent e09dd77914
commit 13b66df125
9 changed files with 138 additions and 79 deletions

View File

@ -333,16 +333,56 @@ void Jit64::WriteExitDestInEAX()
void Jit64::WriteRfiExitDestInEAX()
{
MOV(32, M(&PC), R(EAX));
MOV(32, M(&NPC), R(EAX));
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.testExceptions, true);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
MOV(32, R(EAX), M(&NPC));
MOV(32, M(&PC), R(EAX));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
J_CC(CC_Z, asm_routines.outerLoop, true);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
}
void Jit64::WriteExceptionExit()
{
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.testExceptions, true);
MOV(32, R(EAX), M(&PC));
MOV(32, M(&NPC), R(EAX));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
MOV(32, R(EAX), M(&NPC));
MOV(32, M(&PC), R(EAX));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
J_CC(CC_Z, asm_routines.outerLoop, true);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
}
void Jit64::WriteExternalExceptionExit()
{
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
MOV(32, R(EAX), M(&PC));
MOV(32, M(&NPC), R(EAX));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
MOV(32, R(EAX), M(&NPC));
MOV(32, M(&PC), R(EAX));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
J_CC(CC_Z, asm_routines.outerLoop, true);
//Landing pad for drec space
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
}
void STACKALIGN Jit64::Run()
@ -421,8 +461,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (em_address == 0)
{
Core::SetState(Core::CORE_PAUSE);
PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR);
// Memory exception occurred during instruction fetch
memory_exception = true;
}
if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
@ -435,6 +475,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
}
int size = 0;
js.firstFPInstructionFound = false;
js.isLastInstruction = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
@ -472,16 +513,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (ImHereDebug)
ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
if (js.fpa.any)
{
// This block uses FPU - needs to add FP exception bailout
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit
FixupBranch b1 = J_CC(CC_NZ);
MOV(32, M(&PC), Imm32(js.blockStart));
JMP(asm_routines.fpException, true);
SetJumpTarget(b1);
}
// Conditionally add profiling code.
if (Profiler::g_ProfileBlocks) {
ADD(32, M(&b->runCount), Imm8(1));
@ -557,8 +588,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (!ops[i].skip)
{
if (js.memcheck && (opinfo->flags & FL_USE_FPU))
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
{
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
//This instruction uses FPU - needs to add FP exception bailout
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit
FixupBranch b1 = J_CC(CC_NZ);
@ -567,8 +601,14 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
// from PC. Update PC with the latest value in case that happens.
MOV(32, M(&PC), Imm32(ops[i].address));
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.fpException, true);
LOCK();
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
WriteExceptionExit();
SetJumpTarget(b1);
js.firstFPInstructionFound = true;
}
// Add an external exception check if the instruction writes to the FIFO.
@ -577,7 +617,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT | EXCEPTION_DECREMENTER));
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT));
FixupBranch clearInt = J_CC(CC_NZ);
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch noExtException = J_CC(CC_Z);
@ -587,7 +627,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
FixupBranch noCPInt = J_CC(CC_Z);
MOV(32, M(&PC), Imm32(ops[i].address));
WriteExceptionExit();
WriteExternalExceptionExit();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtIntEnable);
@ -597,14 +637,14 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (Core::g_CoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i].address) && GetState() != CPU_STEPPING)
{
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
MOV(32, M(&PC), Imm32(ops[i].address));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
WriteExit(ops[i].address, 0);
SetJumpTarget(noBreakpoint);
}
@ -636,7 +676,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
NOTICE_LOG(DYNA_REC, "Unflushed reg: %s", ppcInst);
}
#endif
if (js.skipnext) {
js.skipnext = false;
i++; // Skip next instruction
@ -650,6 +689,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
{
// Address of instruction could not be translated
MOV(32, M(&NPC), Imm32(js.compilerPC));
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
LOCK();
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI));
// Remove the invalid instruction from the icache, forcing a recompile

View File

@ -65,32 +65,6 @@
class Jit64 : public JitBase
{
private:
struct JitState
{
u32 compilerPC;
u32 next_compilerPC;
u32 blockStart;
bool cancel;
bool skipnext;
UGeckoInstruction next_inst; // for easy peephole opt.
int blockSize;
int instructionNumber;
int downcountAmount;
int block_flags;
bool isLastInstruction;
bool memcheck;
int fifoBytesThisBlock;
PPCAnalyst::BlockStats st;
PPCAnalyst::BlockRegStats gpa;
PPCAnalyst::BlockRegStats fpa;
PPCAnalyst::CodeOp *op;
JitBlock *curBlock;
};
GPRRegCache gpr;
FPURegCache fpr;
@ -103,8 +77,6 @@ public:
Jit64() : code_buffer(32000) {}
~Jit64() {}
JitState js;
void Init();
void Shutdown();
@ -143,6 +115,7 @@ public:
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInEAX();
void WriteExceptionExit();
void WriteExternalExceptionExit();
void WriteRfiExitDestInEAX();
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();

View File

@ -70,7 +70,7 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough
#endif
const u8 *outerLoop = GetCodePtr();
outerLoop = GetCodePtr();
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
FixupBranch skipToRealDispatch = J(); //skip the sync and compare first time
@ -194,24 +194,13 @@ void Jit64AsmRoutineManager::Generate()
#endif
JMP(dispatcherNoCheck); // no point in special casing this
//FP blocks test for FPU available, jump here if false
fpException = AlignCode4();
LOCK();
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
MOV(32, R(EAX), M(&NPC));
MOV(32, M(&PC), R(EAX));
JMP(dispatcher, true);
SetJumpTarget(bail);
doTiming = GetCodePtr();
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
testExceptions = GetCodePtr();
testExternalExceptions = GetCodePtr();
MOV(32, R(EAX), M(&PC));
MOV(32, M(&NPC), R(EAX));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
MOV(32, R(EAX), M(&NPC));
MOV(32, M(&PC), R(EAX));

View File

@ -151,7 +151,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC));
JMP(asm_routines.testExceptions, true);
WriteExceptionExit();
SetJumpTarget(noIdle);

View File

@ -119,6 +119,7 @@ void Jit64::mtmsr(UGeckoInstruction inst)
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
WriteExit(js.compilerPC + 4, 0);
js.firstFPInstructionFound = false;
}
void Jit64::mfmsr(UGeckoInstruction inst)

View File

@ -40,12 +40,14 @@ public:
const u8 *enterCode;
const u8 *outerLoop;
const u8 *dispatcher;
const u8 *dispatcherNoCheck;
const u8 *dispatcherPcInEAX;
const u8 *fpException;
const u8 *testExceptions;
const u8 *testExternalExceptions;
const u8 *dispatchPcInEAX;
const u8 *doTiming;

View File

@ -58,10 +58,13 @@ protected:
int instructionNumber;
int downcountAmount;
bool firstFPInstructionFound;
bool isLastInstruction;
bool forceUnsafeLoad;
bool memcheck;
bool skipnext;
bool broken_block;
int block_flags;
int fifoBytesThisBlock;

View File

@ -298,9 +298,6 @@ void CheckExceptions()
// Read volatile data once
u32 exceptions = ppcState.Exceptions;
if (!exceptions)
return;
// Example procedure:
// set SRR0 to either PC or NPC
//SRR0 = NPC;
@ -320,7 +317,7 @@ void CheckExceptions()
SRR1 = (MSR & 0x87C0FFFF) | (1 << 30);
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000400;
NPC = 0x00000400;
INFO_LOG(POWERPC, "EXCEPTION_ISI");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_ISI);
@ -332,7 +329,7 @@ void CheckExceptions()
SRR1 = (MSR & 0x87C0FFFF) | 0x20000;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000700;
NPC = 0x00000700;
INFO_LOG(POWERPC, "EXCEPTION_PROGRAM");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_PROGRAM);
@ -343,7 +340,7 @@ void CheckExceptions()
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000C00;
NPC = 0x00000C00;
INFO_LOG(POWERPC, "EXCEPTION_SYSCALL (PC=%08x)", PC);
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_SYSCALL);
@ -355,7 +352,7 @@ void CheckExceptions()
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000800;
NPC = 0x00000800;
INFO_LOG(POWERPC, "EXCEPTION_FPU_UNAVAILABLE");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_FPU_UNAVAILABLE);
@ -366,7 +363,7 @@ void CheckExceptions()
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000300;
NPC = 0x00000300;
//DSISR and DAR regs are changed in GenerateDSIException()
INFO_LOG(POWERPC, "EXCEPTION_DSI");
@ -380,7 +377,7 @@ void CheckExceptions()
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000600;
NPC = 0x00000600;
//TODO crazy amount of DSISR options to check out
@ -398,7 +395,7 @@ void CheckExceptions()
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000500;
NPC = 0x00000500;
INFO_LOG(POWERPC, "EXCEPTION_EXTERNAL_INT");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_EXTERNAL_INT);
@ -411,7 +408,58 @@ void CheckExceptions()
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000900;
NPC = 0x00000900;
INFO_LOG(POWERPC, "EXCEPTION_DECREMENTER");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_DECREMENTER);
}
}
}
void CheckExternalExceptions()
{
// Read volatile data once
u32 exceptions = ppcState.Exceptions;
if (!exceptions)
return;
// Example procedure:
// set SRR0 to either PC or NPC
//SRR0 = NPC;
// save specified MSR bits
//SRR1 = MSR & 0x87C0FFFF;
// copy ILE bit to LE
//MSR |= (MSR >> 16) & 1;
// clear MSR as specified
//MSR &= ~0x04EF36; // 0x04FF36 also clears ME (only for machine check exception)
// set to exception type entry point
//NPC = 0x80000x00;
// EXTERNAL INTERRUPT
if (MSR & 0x0008000) //hacky...the exception shouldn't be generated if EE isn't set...
{
if (exceptions & EXCEPTION_EXTERNAL_INT)
{
// Pokemon gets this "too early", it hasn't a handler yet
SRR0 = NPC;
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x00000500;
INFO_LOG(POWERPC, "EXCEPTION_EXTERNAL_INT");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_EXTERNAL_INT);
_dbg_assert_msg_(POWERPC, (SRR1 & 0x02) != 0, "EXTERNAL_INT unrecoverable???");
}
else if (exceptions & EXCEPTION_DECREMENTER)
{
SRR0 = NPC;
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x00000900;
INFO_LOG(POWERPC, "EXCEPTION_DECREMENTER");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_DECREMENTER);

View File

@ -102,6 +102,7 @@ void SetMode(CoreMode _coreType);
void SingleStep();
void CheckExceptions();
void CheckExternalExceptions();
void CheckBreakPoints();
void RunLoop();
void Start();