From 13b66df125356d7d6a1be4c359d8afda6a5ebe17 Mon Sep 17 00:00:00 2001 From: skidau Date: Sun, 15 Apr 2012 21:34:15 +1000 Subject: [PATCH] Changed the JIT code to make the FPU exception timing more accurate. The exception is now triggered at the first FP instruction instead of the start of the block. Rearranged the JIT exception code for a tiny speed-up. Only external exceptions are checked at the end of the block. All other exceptions are checked at the time they occur. --- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 90 ++++++++++++++----- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 29 +----- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 19 +--- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 2 +- .../Src/PowerPC/Jit64/Jit_SystemRegisters.cpp | 1 + .../Core/Src/PowerPC/JitCommon/JitAsmCommon.h | 2 + .../Core/Core/Src/PowerPC/JitCommon/JitBase.h | 3 + Source/Core/Core/Src/PowerPC/PowerPC.cpp | 70 ++++++++++++--- Source/Core/Core/Src/PowerPC/PowerPC.h | 1 + 9 files changed, 138 insertions(+), 79 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index bd72d1e79c..0e231951df 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -333,16 +333,56 @@ void Jit64::WriteExitDestInEAX() void Jit64::WriteRfiExitDestInEAX() { MOV(32, M(&PC), R(EAX)); + MOV(32, M(&NPC), R(EAX)); + Cleanup(); SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - JMP(asm_routines.testExceptions, true); + + ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); + MOV(32, R(EAX), M(&NPC)); + MOV(32, M(&PC), R(EAX)); + + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); + J_CC(CC_Z, asm_routines.outerLoop, true); + + ABI_PopAllCalleeSavedRegsAndAdjustStack(); + RET(); } void Jit64::WriteExceptionExit() { Cleanup(); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - JMP(asm_routines.testExceptions, true); + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + + MOV(32, R(EAX), M(&PC)); + MOV(32, M(&NPC), R(EAX)); + ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); + MOV(32, R(EAX), M(&NPC)); + MOV(32, M(&PC), R(EAX)); + + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); + J_CC(CC_Z, asm_routines.outerLoop, true); + + ABI_PopAllCalleeSavedRegsAndAdjustStack(); + RET(); +} + +void Jit64::WriteExternalExceptionExit() +{ + Cleanup(); + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + MOV(32, R(EAX), M(&PC)); + MOV(32, M(&NPC), R(EAX)); + ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); + MOV(32, R(EAX), M(&NPC)); + MOV(32, M(&PC), R(EAX)); + + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); + J_CC(CC_Z, asm_routines.outerLoop, true); + + //Landing pad for drec space + ABI_PopAllCalleeSavedRegsAndAdjustStack(); + RET(); } void STACKALIGN Jit64::Run() @@ -421,8 +461,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (em_address == 0) { - Core::SetState(Core::CORE_PAUSE); - PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR); + // Memory exception occurred during instruction fetch + memory_exception = true; } if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT)) @@ -435,6 +475,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } int size = 0; + js.firstFPInstructionFound = false; js.isLastInstruction = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; @@ -472,16 +513,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (ImHereDebug) ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful - if (js.fpa.any) - { - // This block uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit - FixupBranch b1 = J_CC(CC_NZ); - MOV(32, M(&PC), Imm32(js.blockStart)); - JMP(asm_routines.fpException, true); - SetJumpTarget(b1); - } - // Conditionally add profiling code. if (Profiler::g_ProfileBlocks) { ADD(32, M(&b->runCount), Imm8(1)); @@ -557,8 +588,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (!ops[i].skip) { - if (js.memcheck && (opinfo->flags & FL_USE_FPU)) + if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) { + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + //This instruction uses FPU - needs to add FP exception bailout TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit FixupBranch b1 = J_CC(CC_NZ); @@ -566,9 +600,15 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. MOV(32, M(&PC), Imm32(ops[i].address)); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - JMP(asm_routines.fpException, true); + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + + LOCK(); + OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + WriteExceptionExit(); + SetJumpTarget(b1); + + js.firstFPInstructionFound = true; } // Add an external exception check if the instruction writes to the FIFO. @@ -577,7 +617,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT | EXCEPTION_DECREMENTER)); + TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = J_CC(CC_NZ); TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); FixupBranch noExtException = J_CC(CC_Z); @@ -587,7 +627,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc FixupBranch noCPInt = J_CC(CC_Z); MOV(32, M(&PC), Imm32(ops[i].address)); - WriteExceptionExit(); + WriteExternalExceptionExit(); SetJumpTarget(noCPInt); SetJumpTarget(noExtIntEnable); @@ -597,14 +637,14 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (Core::g_CoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i].address) && GetState() != CPU_STEPPING) { + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + MOV(32, M(&PC), Imm32(ops[i].address)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - WriteExit(ops[i].address, 0); SetJumpTarget(noBreakpoint); } @@ -636,7 +676,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc NOTICE_LOG(DYNA_REC, "Unflushed reg: %s", ppcInst); } #endif - if (js.skipnext) { js.skipnext = false; i++; // Skip next instruction @@ -650,6 +689,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { // Address of instruction could not be translated MOV(32, M(&NPC), Imm32(js.compilerPC)); + + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + LOCK(); OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index c01c1b017b..791934bd18 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -65,32 +65,6 @@ class Jit64 : public JitBase { private: - struct JitState - { - u32 compilerPC; - u32 next_compilerPC; - u32 blockStart; - bool cancel; - bool skipnext; - UGeckoInstruction next_inst; // for easy peephole opt. - int blockSize; - int instructionNumber; - int downcountAmount; - int block_flags; - - bool isLastInstruction; - bool memcheck; - - int fifoBytesThisBlock; - - PPCAnalyst::BlockStats st; - PPCAnalyst::BlockRegStats gpa; - PPCAnalyst::BlockRegStats fpa; - PPCAnalyst::CodeOp *op; - - JitBlock *curBlock; - }; - GPRRegCache gpr; FPURegCache fpr; @@ -103,8 +77,6 @@ public: Jit64() : code_buffer(32000) {} ~Jit64() {} - JitState js; - void Init(); void Shutdown(); @@ -143,6 +115,7 @@ public: void WriteExit(u32 destination, int exit_num); void WriteExitDestInEAX(); void WriteExceptionExit(); + void WriteExternalExceptionExit(); void WriteRfiExitDestInEAX(); void WriteCallInterpreter(UGeckoInstruction _inst); void Cleanup(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 04462872e6..120ee8e48f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -70,7 +70,7 @@ void Jit64AsmRoutineManager::Generate() MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough #endif - const u8 *outerLoop = GetCodePtr(); + outerLoop = GetCodePtr(); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); FixupBranch skipToRealDispatch = J(); //skip the sync and compare first time @@ -194,27 +194,16 @@ void Jit64AsmRoutineManager::Generate() #endif JMP(dispatcherNoCheck); // no point in special casing this - //FP blocks test for FPU available, jump here if false - fpException = AlignCode4(); - LOCK(); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); - ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - MOV(32, R(EAX), M(&NPC)); - MOV(32, M(&PC), R(EAX)); - JMP(dispatcher, true); - SetJumpTarget(bail); doTiming = GetCodePtr(); - ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); - - testExceptions = GetCodePtr(); + testExternalExceptions = GetCodePtr(); MOV(32, R(EAX), M(&PC)); MOV(32, M(&NPC), R(EAX)); - ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); + ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); MOV(32, R(EAX), M(&NPC)); MOV(32, M(&PC), R(EAX)); - + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); J_CC(CC_Z, outerLoop, true); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index 9a246008c6..4de1dc715a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -151,7 +151,7 @@ void Jit64::lXXx(UGeckoInstruction inst) // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); - JMP(asm_routines.testExceptions, true); + WriteExceptionExit(); SetJumpTarget(noIdle); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index 52c2ef3eee..d6bc03e778 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -119,6 +119,7 @@ void Jit64::mtmsr(UGeckoInstruction inst) gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); WriteExit(js.compilerPC + 4, 0); + js.firstFPInstructionFound = false; } void Jit64::mfmsr(UGeckoInstruction inst) diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h index 7385949ac0..cbec27d2aa 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h @@ -40,12 +40,14 @@ public: const u8 *enterCode; + const u8 *outerLoop; const u8 *dispatcher; const u8 *dispatcherNoCheck; const u8 *dispatcherPcInEAX; const u8 *fpException; const u8 *testExceptions; + const u8 *testExternalExceptions; const u8 *dispatchPcInEAX; const u8 *doTiming; diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h index a953a820be..fc5c632898 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h @@ -58,10 +58,13 @@ protected: int instructionNumber; int downcountAmount; + bool firstFPInstructionFound; bool isLastInstruction; bool forceUnsafeLoad; bool memcheck; + bool skipnext; bool broken_block; + int block_flags; int fifoBytesThisBlock; diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 2ae520faab..542ada6013 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -298,9 +298,6 @@ void CheckExceptions() // Read volatile data once u32 exceptions = ppcState.Exceptions; - if (!exceptions) - return; - // Example procedure: // set SRR0 to either PC or NPC //SRR0 = NPC; @@ -320,7 +317,7 @@ void CheckExceptions() SRR1 = (MSR & 0x87C0FFFF) | (1 << 30); MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000400; + NPC = 0x00000400; INFO_LOG(POWERPC, "EXCEPTION_ISI"); Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_ISI); @@ -332,7 +329,7 @@ void CheckExceptions() SRR1 = (MSR & 0x87C0FFFF) | 0x20000; MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000700; + NPC = 0x00000700; INFO_LOG(POWERPC, "EXCEPTION_PROGRAM"); Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_PROGRAM); @@ -343,7 +340,7 @@ void CheckExceptions() SRR1 = MSR & 0x87C0FFFF; MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000C00; + NPC = 0x00000C00; INFO_LOG(POWERPC, "EXCEPTION_SYSCALL (PC=%08x)", PC); Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_SYSCALL); @@ -355,7 +352,7 @@ void CheckExceptions() SRR1 = MSR & 0x87C0FFFF; MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000800; + NPC = 0x00000800; INFO_LOG(POWERPC, "EXCEPTION_FPU_UNAVAILABLE"); Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_FPU_UNAVAILABLE); @@ -366,7 +363,7 @@ void CheckExceptions() SRR1 = MSR & 0x87C0FFFF; MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000300; + NPC = 0x00000300; //DSISR and DAR regs are changed in GenerateDSIException() INFO_LOG(POWERPC, "EXCEPTION_DSI"); @@ -380,7 +377,7 @@ void CheckExceptions() SRR1 = MSR & 0x87C0FFFF; MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000600; + NPC = 0x00000600; //TODO crazy amount of DSISR options to check out @@ -398,7 +395,7 @@ void CheckExceptions() SRR1 = MSR & 0x87C0FFFF; MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000500; + NPC = 0x00000500; INFO_LOG(POWERPC, "EXCEPTION_EXTERNAL_INT"); Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_EXTERNAL_INT); @@ -411,7 +408,58 @@ void CheckExceptions() SRR1 = MSR & 0x87C0FFFF; MSR |= (MSR >> 16) & 1; MSR &= ~0x04EF36; - NPC = 0x80000900; + NPC = 0x00000900; + + INFO_LOG(POWERPC, "EXCEPTION_DECREMENTER"); + Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_DECREMENTER); + } + } +} + +void CheckExternalExceptions() +{ + // Read volatile data once + u32 exceptions = ppcState.Exceptions; + + if (!exceptions) + return; + + // Example procedure: + // set SRR0 to either PC or NPC + //SRR0 = NPC; + // save specified MSR bits + //SRR1 = MSR & 0x87C0FFFF; + // copy ILE bit to LE + //MSR |= (MSR >> 16) & 1; + // clear MSR as specified + //MSR &= ~0x04EF36; // 0x04FF36 also clears ME (only for machine check exception) + // set to exception type entry point + //NPC = 0x80000x00; + + // EXTERNAL INTERRUPT + if (MSR & 0x0008000) //hacky...the exception shouldn't be generated if EE isn't set... + { + if (exceptions & EXCEPTION_EXTERNAL_INT) + { + // Pokemon gets this "too early", it hasn't a handler yet + SRR0 = NPC; + SRR1 = MSR & 0x87C0FFFF; + MSR |= (MSR >> 16) & 1; + MSR &= ~0x04EF36; + NPC = 0x00000500; + + INFO_LOG(POWERPC, "EXCEPTION_EXTERNAL_INT"); + Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_EXTERNAL_INT); + + _dbg_assert_msg_(POWERPC, (SRR1 & 0x02) != 0, "EXTERNAL_INT unrecoverable???"); + } + else if (exceptions & EXCEPTION_DECREMENTER) + { + SRR0 = NPC; + SRR1 = MSR & 0x87C0FFFF; + MSR |= (MSR >> 16) & 1; + MSR &= ~0x04EF36; + NPC = 0x00000900; INFO_LOG(POWERPC, "EXCEPTION_DECREMENTER"); Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_DECREMENTER); diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.h b/Source/Core/Core/Src/PowerPC/PowerPC.h index 54690ac6b5..738c824cfa 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.h +++ b/Source/Core/Core/Src/PowerPC/PowerPC.h @@ -102,6 +102,7 @@ void SetMode(CoreMode _coreType); void SingleStep(); void CheckExceptions(); +void CheckExternalExceptions(); void CheckBreakPoints(); void RunLoop(); void Start();