From be70b43a2b8decf90da6386f78d2f1d43f60190d Mon Sep 17 00:00:00 2001 From: skidau Date: Sat, 10 Apr 2010 15:41:44 +0000 Subject: [PATCH] JIT single stepping Enable it by uncommenting the defines in JitBase.h. For breakpoints to work in JIT mode, the block cache must be disabled. The PPC instruction trace, regs, fpu regs, flags, generated x86 binary and generated x86 disasm are logged in the Dynamic Recompiler log. Fixes issue 1052. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5314 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/LogManager.cpp | 4 +- Source/Core/Common/Src/LogManager.h | 2 +- Source/Core/Core/Src/CoreTiming.cpp | 8 +- Source/Core/Core/Src/CoreTiming.h | 1 + Source/Core/Core/Src/PowerPC/Gekko.h | 12 +-- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 98 +++++++++++++++---- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 1 + Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 8 ++ .../Core/Src/PowerPC/Jit64/JitRegCache.cpp | 4 +- .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 3 +- .../Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp | 83 +++++++++++++--- Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h | 1 + .../Core/Src/PowerPC/Jit64IL/JitILAsm.cpp | 13 ++- .../Core/Src/PowerPC/JitCommon/JitBase.cpp | 44 +++++++++ .../Core/Core/Src/PowerPC/JitCommon/JitBase.h | 11 ++- Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp | 55 ++++++----- Source/Core/Core/Src/PowerPC/PPCAnalyst.h | 2 +- Source/Core/Core/Src/PowerPC/PowerPC.cpp | 1 + Source/Core/Core/Src/PowerPC/PowerPC.h | 1 - Source/Core/DebuggerWX/Src/CodeWindow.cpp | 2 +- Source/Core/DebuggerWX/Src/JitWindow.cpp | 2 +- 21 files changed, 282 insertions(+), 74 deletions(-) diff --git a/Source/Core/Common/Src/LogManager.cpp b/Source/Core/Common/Src/LogManager.cpp index 2abf20b2f3..5303dab13d 100644 --- a/Source/Core/Common/Src/LogManager.cpp +++ b/Source/Core/Common/Src/LogManager.cpp @@ -78,7 +78,7 @@ LogManager::LogManager() { m_Log[LogTypes::WII_IPC_NET] = new LogContainer("WII_IPC_NET", "WII IPC NET"); m_Log[LogTypes::WII_IPC_WIIMOTE] = new LogContainer("WII_IPC_WIIMOTE","WII IPC WIIMOTE"); m_Log[LogTypes::ACTIONREPLAY] = new LogContainer("ActionReplay", "ActionReplay"); - m_Log[LogTypes::MEMCARD_MANAGER] = new LogContainer("MemCard Manger", "MemCard Manger"); + m_Log[LogTypes::MEMCARD_MANAGER] = new LogContainer("MemCard Manager", "MemCard Manager"); m_Log[LogTypes::NETPLAY] = new LogContainer("NETPLAY", "Netplay"); m_fileLog = new FileLogListener(File::GetUserPath(F_MAINLOG_IDX)); @@ -110,7 +110,7 @@ void LogManager::Log(LogTypes::LOG_LEVELS level, LogTypes::LOG_TYPE type, va_list args) { char temp[MAX_MSGLEN]; - char msg[MAX_MSGLEN + 512]; + char msg[MAX_MSGLEN * 2]; LogContainer *log = m_Log[type]; if (! log->isEnable() || level > log->getLevel()) diff --git a/Source/Core/Common/Src/LogManager.h b/Source/Core/Common/Src/LogManager.h index 9b91e189de..e0157b246d 100644 --- a/Source/Core/Common/Src/LogManager.h +++ b/Source/Core/Common/Src/LogManager.h @@ -26,7 +26,7 @@ #include #define MAX_MESSAGES 8000 -#define MAX_MSGLEN 512 +#define MAX_MSGLEN 1024 // pure virtual interface (well, except the destructor which we just leave empty). diff --git a/Source/Core/Core/Src/CoreTiming.cpp b/Source/Core/Core/Src/CoreTiming.cpp index a752798761..3e1ae9eeb6 100644 --- a/Source/Core/Core/Src/CoreTiming.cpp +++ b/Source/Core/Core/Src/CoreTiming.cpp @@ -22,6 +22,8 @@ #include "CoreTiming.h" #include "StringUtil.h" +#define MAX_SLICE_LENGTH 20000 + namespace CoreTiming { @@ -53,7 +55,7 @@ Event *eventTsPool = 0; int allocatedTsEvents = 0; int downcount, slicelength; -int maxSliceLength = 20000; +int maxSliceLength = MAX_SLICE_LENGTH; s64 globalTimer; s64 idledCycles; @@ -332,6 +334,10 @@ void SetMaximumSlice(int maximumSliceLength) maxSliceLength = maximumSliceLength; } +void ResetSliceLength() +{ + maxSliceLength = MAX_SLICE_LENGTH; +} void Advance() { diff --git a/Source/Core/Core/Src/CoreTiming.h b/Source/Core/Core/Src/CoreTiming.h index e456fb1f32..d8176daab7 100644 --- a/Source/Core/Core/Src/CoreTiming.h +++ b/Source/Core/Core/Src/CoreTiming.h @@ -72,6 +72,7 @@ void ClearPendingEvents(); void LogPendingEvents(); void SetMaximumSlice(int maximumSliceLength); +void ResetSliceLength(); void RegisterAdvanceCallback(void (*callback)(int cyclesExecuted)); diff --git a/Source/Core/Core/Src/PowerPC/Gekko.h b/Source/Core/Core/Src/PowerPC/Gekko.h index 835a8f7d63..0c9422b336 100644 --- a/Source/Core/Core/Src/PowerPC/Gekko.h +++ b/Source/Core/Core/Src/PowerPC/Gekko.h @@ -522,7 +522,7 @@ union UReg_BAT_Up { unsigned VP : 1; unsigned VS : 1; - unsigned BL : 11; + unsigned BL : 11; // Block length (aka block size mask) unsigned : 4; unsigned BEPI : 15; }; @@ -540,7 +540,7 @@ union UReg_BAT_Lo unsigned : 1; unsigned WIMG : 4; unsigned : 10; - unsigned BRPN : 15; + unsigned BRPN : 15; // Physical Block Number }; u32 Hex; @@ -623,16 +623,16 @@ enum SPR_IBAT1L = 531, SPR_IBAT2U = 532, SPR_IBAT2L = 533, - SPR_IBAT3L = 534, - SPR_IBAT3U = 535, + SPR_IBAT3U = 534, + SPR_IBAT3L = 535, SPR_DBAT0U = 536, SPR_DBAT0L = 537, SPR_DBAT1U = 538, SPR_DBAT1L = 539, SPR_DBAT2U = 540, SPR_DBAT2L = 541, - SPR_DBAT3L = 542, - SPR_DBAT3U = 543, + SPR_DBAT3U = 542, + SPR_DBAT3L = 543, SPR_GQR0 = 912, SPR_HID0 = 1008, SPR_HID1 = 1009, diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 613c59171d..bd5858a220 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -175,7 +175,12 @@ void Jit64::Init() where this cause problems, so I'm enabling this by default, since I seem to get perhaps as much as 20% more fps with this option enabled. If you suspect that this option cause problems you can also disable it from the debugging window. */ +#ifdef JIT_SINGLESTEP + jo.enableBlocklink = false; + SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle = false; +#else jo.enableBlocklink = true; +#endif #ifdef _M_X64 jo.enableFastMem = SConfig::GetInstance().m_LocalCoreStartupParameter.bUseFastMem; #else @@ -369,14 +374,44 @@ void STACKALIGN Jit64::Run() void Jit64::SingleStep() { - // NOT USED, NOT TESTED, PROBABLY NOT WORKING YET - // PanicAlert("Single"); - /* - JitBlock temp_block; - PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step! - const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block); - CompiledCode pExecAddr = (CompiledCode)code; - pExecAddr();*/ +#ifndef JIT_NO_CACHE + CoreTiming::SetMaximumSlice(1); +#endif + + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); + +#ifndef JIT_NO_CACHE + CoreTiming::ResetSliceLength(); +#endif +} + +void Jit64::Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address) +{ + char reg[50] = ""; + char regs[500] = ""; + char fregs[750] = ""; + +#ifdef JIT_LOG_GPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); + strncat(regs, reg, 500); + } +#endif + +#ifdef JIT_LOG_FPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "f%02d: %016x ", i, riPS0(i)); + strncat(fregs, reg, 750); + } +#endif + const PPCAnalyst::CodeOp &op = code_buffer->codebuffer[0]; + char ppcInst[256]; + DisassembleGekko(op.inst.hex, em_address, ppcInst, 256); + + NOTICE_LOG(DYNA_REC, "JIT64 PC: %08x Cycles: %04d CR: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %s", em_address, js.st.numCycles, PowerPC::ppcState.cr, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs, ppcInst); } void STACKALIGN Jit64::Jit(u32 em_address) @@ -391,14 +426,31 @@ void STACKALIGN Jit64::Jit(u32 em_address) } ClearCache(); } +#ifdef JIT_NO_CACHE + ClearCache(); + if (PowerPC::breakpoints.IsAddressBreakPoint(em_address)) + { + PowerPC::Pause(); + if (PowerPC::breakpoints.IsTempBreakPoint(em_address)) + PowerPC::breakpoints.Remove(em_address); + return; + } +#endif int block_num = blocks.AllocateBlock(em_address); JitBlock *b = blocks.GetBlock(block_num); blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b)); } -const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) +const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b) { + int blockSize = code_buffer->GetSize(); + +#ifdef JIT_SINGLESTEP + blockSize = 1; + Trace(code_buffer, em_address); +#endif + if (em_address == 0) PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); @@ -412,8 +464,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc //Analyze the block, collect all instructions it is made of (including inlining, //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buf); - PPCAnalyst::CodeOp *ops = code_buf->codebuffer; + u32 nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer, blockSize); + + PPCAnalyst::CodeOp *ops = code_buffer->codebuffer; const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr b->checkedEntry = start; @@ -426,6 +479,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc SetJumpTarget(skip); const u8 *normalEntry = GetCodePtr(); + b->normalEntry = normalEntry; if (ImHereDebug) ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful @@ -473,7 +527,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Start(js.gpa); fpr.Start(js.fpa); +#ifdef JIT_SINGLESTEP + js.downcountAmount = js.st.numCycles; +#else js.downcountAmount = js.st.numCycles + PatchEngine::GetSpeedhackCycles(em_address); +#endif + js.blockSize = size; // Translate instructions for (int i = 0; i < (int)size; i++) @@ -512,12 +571,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); } - // If starting from the breakpointed instruction, we don't break. - if (em_address != ops[i].address && PowerPC::breakpoints.IsAddressBreakPoint(ops[i].address)) - { - - } - if (!ops[i].skip) Jit64Tables::CompileInstruction(ops[i].inst); @@ -527,8 +580,19 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc break; } +#ifdef JIT_SINGLESTEP + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + WriteExit(nextPC, 0); +#endif + b->flags = js.block_flags; b->codeSize = (u32)(GetCodePtr() - normalEntry); b->originalSize = size; + +#ifdef JIT_LOG_X86 + LogGeneratedX86(size, code_buffer, normalEntry, b); +#endif + return normalEntry; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 7353fe9477..72fafc5cdf 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -106,6 +106,7 @@ public: JitBlockCache *GetBlockCache() { return &blocks; } void NotifyBreakpoint(u32 em_address, bool set); + void Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address); void ClearCache(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 1fd3c44b39..a69c1db6f7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -114,6 +114,11 @@ void Jit64AsmRoutineManager::Generate() MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); CALL((void *)&Jit); #endif +#ifdef JIT_NO_CACHE + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); + FixupBranch notRunning = J_CC(CC_NZ); +#endif + JMP(dispatcherNoCheck); // no point in special casing this //FP blocks test for FPU available, jump here if false @@ -126,6 +131,9 @@ void Jit64AsmRoutineManager::Generate() MOV(32, M(&PC), R(EAX)); JMP(dispatcher); +#ifdef JIT_NO_CACHE + SetJumpTarget(notRunning); +#endif SetJumpTarget(bail); doTiming = GetCodePtr(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index 35383dc437..d8100d434d 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -262,7 +262,7 @@ void RegCache::KillImmediate(int preg) void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty) { if (!regs[i].away && regs[i].location.IsImm()) - PanicAlert("Bad immedaite"); + PanicAlert("Bad immediate"); if (!regs[i].away || (regs[i].away && regs[i].location.IsImm())) { @@ -375,7 +375,7 @@ void RegCache::Flush(FlushMode mode) { for (int i = 0; i < NUMXREGS; i++) { if (xlocks[i]) - PanicAlert("Somone forgot to unlock X64 reg %i.", i); + PanicAlert("Someone forgot to unlock X64 reg %i.", i); } for (int i = 0; i < 32; i++) { diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index c4b87ce49d..db90f44c7b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -1661,7 +1661,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak //if (!RI.MakeProfile && RI.numSpills) // printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills); - + + Jit->WriteExit(jit->js.curBlock->exitAddress[0], 0); Jit->UD2(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 2806f39be7..ebb47c5ebb 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -167,7 +167,12 @@ void JitIL::Init() CODE_SIZE = 1024*1024*8*8; jo.optimizeStack = true; +#ifdef JIT_SINGLESTEP + jo.enableBlocklink = false; + Core::g_CoreStartupParameter.bSkipIdle = false; +#else jo.enableBlocklink = true; // Speed boost, but not 100% safe +#endif #ifdef _M_X64 jo.enableFastMem = false; #else @@ -346,14 +351,44 @@ void STACKALIGN JitIL::Run() void JitIL::SingleStep() { - // NOT USED, NOT TESTED, PROBABLY NOT WORKING YET - // PanicAlert("Single"); - /* - JitBlock temp_block; - PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step! - const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block); - CompiledCode pExecAddr = (CompiledCode)code; - pExecAddr();*/ +#ifndef JIT_NO_CACHE + CoreTiming::SetMaximumSlice(1); +#endif + + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); + +#ifndef JIT_NO_CACHE + CoreTiming::ResetSliceLength(); +#endif +} + +void JitIL::Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address) +{ + char reg[50] = ""; + char regs[500] = ""; + char fregs[750] = ""; + +#ifdef JIT_LOG_GPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); + strncat(regs, reg, 500); + } +#endif + +#ifdef JIT_LOG_FPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "f%02d: %016x ", i, riPS0(i)); + strncat(fregs, reg, 750); + } +#endif + const PPCAnalyst::CodeOp &op = code_buffer->codebuffer[0]; + char ppcInst[256]; + DisassembleGekko(op.inst.hex, em_address, ppcInst, 256); + + NOTICE_LOG(DYNA_REC, "JITIL PC: %08x Cycles: %04d CR: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %s", em_address, js.st.numCycles, PowerPC::ppcState.cr, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs, ppcInst); } void STACKALIGN JitIL::Jit(u32 em_address) @@ -368,13 +403,30 @@ void STACKALIGN JitIL::Jit(u32 em_address) } ClearCache(); } +#ifdef JIT_NO_CACHE + ClearCache(); + if (PowerPC::breakpoints.IsAddressBreakPoint(em_address)) + { + PowerPC::Pause(); + if (PowerPC::breakpoints.IsTempBreakPoint(em_address)) + PowerPC::breakpoints.Remove(em_address); + return; + } +#endif int block_num = blocks.AllocateBlock(em_address); JitBlock *b = blocks.GetBlock(block_num); blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b)); } -const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock *b) +const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b) { + int blockSize = code_buffer->GetSize(); + +#ifdef JIT_SINGLESTEP + blockSize = 1; + Trace(code_buffer, em_address); +#endif + if (em_address == 0) PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); @@ -387,8 +439,8 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock //Analyze the block, collect all instructions it is made of (including inlining, //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, buffer); - PPCAnalyst::CodeOp *ops = buffer->codebuffer; + b->exitAddress[0] = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer, blockSize); + PPCAnalyst::CodeOp *ops = code_buffer->codebuffer; const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr b->checkedEntry = start; @@ -422,7 +474,11 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock // instruction processed by the JIT routines) ibuild.Reset(); +#ifdef JIT_SINGLESTEP + js.downcountAmount = js.st.numCycles; +#else js.downcountAmount = js.st.numCycles + PatchEngine::GetSpeedhackCycles(em_address); +#endif // Translate instructions for (int i = 0; i < (int)size; i++) { @@ -452,5 +508,10 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock b->codeSize = (u32)(GetCodePtr() - normalEntry); b->originalSize = size; + +#ifdef JIT_LOG_X86 + LogGeneratedX86(size, code_buffer, normalEntry, b); +#endif + return normalEntry; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h index d2b9dd853b..465dc389c4 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h @@ -85,6 +85,7 @@ public: const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); void NotifyBreakpoint(u32 em_address, bool set); + void Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address); void ClearCache(); const u8 *GetDispatcher() { diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp index da692c3b47..a56502d623 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp @@ -78,8 +78,8 @@ void JitILAsmRoutineManager::Generate() dispatcher = GetCodePtr(); //This is the place for CPUCompare! - //The result of slice decrementation should be in flags if somebody jumped here - FixupBranch bail = J_CC(CC_S); + //The result of slice decrement should be in flags if somebody jumped here + FixupBranch bail = J_CC(CC_BE); SetJumpTarget(skipToRealDispatch); dispatcherNoCheck = GetCodePtr(); @@ -116,6 +116,11 @@ void JitILAsmRoutineManager::Generate() MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); CALL((void *)&Jit); #endif +#ifdef JIT_NO_CACHE + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); + FixupBranch notRunning = J_CC(CC_NZ); +#endif + JMP(dispatcherNoCheck); // no point in special casing this //FP blocks test for FPU available, jump here if false @@ -128,6 +133,10 @@ void JitILAsmRoutineManager::Generate() MOV(32, M(&PC), R(EAX)); JMP(dispatcher); +#ifdef JIT_NO_CACHE + SetJumpTarget(notRunning); +#endif + SetJumpTarget(bail); doTiming = GetCodePtr(); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp index a9263c8b86..16149adb6f 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp @@ -29,3 +29,47 @@ u32 Helper_Mask(u8 mb, u8 me) (((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1)))) ); } + +void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *normalEntry, JitBlock *b) +{ + char pDis[1000] = ""; + + for (int i = 0; i < size; i++) + { + char temp[256] = ""; + const PPCAnalyst::CodeOp &op = code_buffer->codebuffer[i]; + DisassembleGekko(op.inst.hex, op.address, temp, 256); + sprintf(pDis, "%08x %s", op.address, temp); + DEBUG_LOG(DYNA_REC,"IR_X86 PPC: %s\n", pDis); + } + + disassembler x64disasm; + x64disasm.set_syntax_intel(); + + u64 disasmPtr = (u64)normalEntry; + const u8 *end = normalEntry + b->codeSize; + + while ((u8*)disasmPtr < end) + { + char sptr[1000] = ""; +#ifdef _M_X64 + disasmPtr += x64disasm.disasm64(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr); +#else + disasmPtr += x64disasm.disasm32(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr); +#endif + DEBUG_LOG(DYNA_REC,"IR_X86 x86: %s", sptr); + } + + if (b->codeSize <= 250) + { + char x86code[500] = ""; + for (u8 i = 0; i <= b->codeSize; i++) + { + char opcHex[2] = ""; + u8 opc = *(normalEntry + i); + sprintf(opcHex, "%02x", opc); + strncat(x86code, opcHex, 2); + } + DEBUG_LOG(DYNA_REC,"IR_X86 bin: %s\n\n\n", x86code); + } +} diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h index 9c3c201e6c..3488dc4b79 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h @@ -18,11 +18,20 @@ #ifndef _JITBASE_H #define _JITBASE_H +//#define JIT_SINGLESTEP // Enables single stepping +//#define JIT_NO_CACHE // Disables the block cache and enables breakpoints +//#define JIT_LOG_X86 // Enables logging of the generated x86 code +//#define JIT_LOG_GPR // Enables logging of the PPC general purpose regs +//#define JIT_LOG_FPR // Enables logging of the PPC floating point regs + #include "JitCache.h" #include "Jit_Util.h" // for EmuCodeBlock #include "JitBackpatch.h" // for EmuCodeBlock #include "JitAsmCommon.h" +#include "PowerPCDisasm.h" +#include "disasm.h" + #define JIT_OPCODE 0 // TODO: In the future, inherit this from CPUCoreBase and have Interpreter @@ -97,6 +106,6 @@ void Jit(u32 em_address); // Merged routines that should be moved somewhere better u32 Helper_Mask(u8 mb, u8 me); - +void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *normalEntry, JitBlock *b); #endif diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index 4c27a1820a..cd023efb76 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -284,7 +284,8 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b) } // Does not yet perform inlining - although there are plans for that. -bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer) +// Returns the exit address of the next PC +u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer, int blockSize) { memset(st, 0, sizeof(st)); UGeckoInstruction previnst = Memory::Read_Opcode_JIT_LC(address - 4); @@ -295,7 +296,8 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo fpa->any = false; u32 blockstart = address; - int maxsize = buffer->GetSize(); + int maxsize = blockSize; + int num_inst = 0; int numFollows = 0; int numCycles = 0; @@ -304,8 +306,9 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo bool foundExit = false; // Flatten! (Currently just copies, following branches is disabled) - for (int i = 0; i < maxsize; i++, num_inst++) + for (int i = 0; i < maxsize; i++) { + num_inst++; memset(&code[i], 0, sizeof(CodeOp)); code[i].address = address; @@ -322,7 +325,7 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo _assert_msg_(POWERPC, opinfo != 0, "Invalid Op - Error flattening %08x op %08x", address + i*4, inst.hex); bool follow = false; u32 destination; - if (inst.OPCD == 18) + if (inst.OPCD == 18 && blockSize > 1) { //Is bx - should we inline? yes! if (inst.AA) @@ -351,10 +354,9 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo code[i].skip = true; address = destination; } - } - - _assert_msg_(POWERPC, foundExit, "Analyzer ERROR - Function %08x too big", blockstart); - num_inst++; // why? + } + if (!foundExit && blockSize > 1) + NOTICE_LOG(POWERPC, "Analyzer ERROR - Function %08x too big, size is 0x%08x", blockstart, address-blockstart); st->numCycles = numCycles; // Do analysis of the code, look for dependencies etc @@ -497,33 +499,34 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo } // Instruction Reordering Pass - - // Bubble down compares towards branches, so that they can be merged. - // -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch. - for (int i = 0; i < num_inst - 2; i++) + if (blockSize > 1) { - CodeOp &a = code[i]; - CodeOp &b = code[i + 1]; - // All integer compares can be reordered. - if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) || - (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32))) + // Bubble down compares towards branches, so that they can be merged. + // -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch. + for (int i = 0; i < num_inst - 2; i++) { - // Got a compare instruction. - if (CanSwapAdjacentOps(a, b)) { - // Alright, let's bubble it down! - CodeOp c = a; - a = b; - b = c; + CodeOp &a = code[i]; + CodeOp &b = code[i + 1]; + // All integer compares can be reordered. + if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) || + (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32))) + { + // Got a compare instruction. + if (CanSwapAdjacentOps(a, b)) { + // Alright, let's bubble it down! + CodeOp c = a; + a = b; + b = c; + } } } } - // Scan for CR0 dependency // assume next block wants CR0 to be safe bool wantsCR0 = true; bool wantsCR1 = true; bool wantsPS1 = true; - for (int i = num_inst - 1; i; i--) + for (int i = num_inst; i; i--) { if (code[i].outputCR0) wantsCR0 = false; @@ -541,7 +544,7 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo *realsize = num_inst; // ... - return true; + return address; } diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.h b/Source/Core/Core/Src/PowerPC/PPCAnalyst.h index 61b3ec1868..da0c2b23a5 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.h @@ -92,7 +92,7 @@ public: }; -bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer); +u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer, int blockSize); void LogFunctionCall(u32 addr); void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db); bool AnalyzeFunction(u32 startAddr, Symbol &func, int max_size = 0); diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 5d3b8db24b..2b6c934697 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -158,6 +158,7 @@ void Shutdown() { // Shutdown both execution engines. Doesn't matter which one is active. jit->Shutdown(); + state = CPU_POWERDOWN; delete jit; jit = 0; Interpreter::Shutdown(); diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.h b/Source/Core/Core/Src/PowerPC/PowerPC.h index db6665e177..f4a2880622 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.h +++ b/Source/Core/Core/Src/PowerPC/PowerPC.h @@ -48,7 +48,6 @@ struct GC_ALIGNED64(PowerPCState) u32 pc; // program counter u32 npc; - u32 nextBlock; u32 cr; // flags u8 cr_fast[8]; // Possibly reorder to 0, 2, 4, 8, 1, 3, 5, 7 so that we can make Compact and Expand super fast? diff --git a/Source/Core/DebuggerWX/Src/CodeWindow.cpp b/Source/Core/DebuggerWX/Src/CodeWindow.cpp index c3a2c75790..75cb555ddf 100644 --- a/Source/Core/DebuggerWX/Src/CodeWindow.cpp +++ b/Source/Core/DebuggerWX/Src/CodeWindow.cpp @@ -692,7 +692,7 @@ void CCodeWindow::UpdateButtonStates() } } - ToolBar->EnableTool(IDM_STEP, Initialized && Stepping && UseInterpreter()); + ToolBar->EnableTool(IDM_STEP, Initialized && Stepping); if (ToolBar) ToolBar->Realize(); diff --git a/Source/Core/DebuggerWX/Src/JitWindow.cpp b/Source/Core/DebuggerWX/Src/JitWindow.cpp index a1b922a0d1..fc9cb29527 100644 --- a/Source/Core/DebuggerWX/Src/JitWindow.cpp +++ b/Source/Core/DebuggerWX/Src/JitWindow.cpp @@ -179,7 +179,7 @@ void CJitWindow::Compare(u32 em_address) PPCAnalyst::BlockStats st; PPCAnalyst::BlockRegStats gpa; PPCAnalyst::BlockRegStats fpa; - if (PPCAnalyst::Flatten(ppc_addr, &size, &st, &gpa, &fpa, &code_buffer)) + if (PPCAnalyst::Flatten(ppc_addr, &size, &st, &gpa, &fpa, &code_buffer, size) != 0xffffffff) { sptr = (char*)xDis; for (int i = 0; i < size; i++)