diff --git a/Source/Core/Common/Src/LogManager.cpp b/Source/Core/Common/Src/LogManager.cpp index 2abf20b2f3..5303dab13d 100644 --- a/Source/Core/Common/Src/LogManager.cpp +++ b/Source/Core/Common/Src/LogManager.cpp @@ -78,7 +78,7 @@ LogManager::LogManager() { m_Log[LogTypes::WII_IPC_NET] = new LogContainer("WII_IPC_NET", "WII IPC NET"); m_Log[LogTypes::WII_IPC_WIIMOTE] = new LogContainer("WII_IPC_WIIMOTE","WII IPC WIIMOTE"); m_Log[LogTypes::ACTIONREPLAY] = new LogContainer("ActionReplay", "ActionReplay"); - m_Log[LogTypes::MEMCARD_MANAGER] = new LogContainer("MemCard Manger", "MemCard Manger"); + m_Log[LogTypes::MEMCARD_MANAGER] = new LogContainer("MemCard Manager", "MemCard Manager"); m_Log[LogTypes::NETPLAY] = new LogContainer("NETPLAY", "Netplay"); m_fileLog = new FileLogListener(File::GetUserPath(F_MAINLOG_IDX)); @@ -110,7 +110,7 @@ void LogManager::Log(LogTypes::LOG_LEVELS level, LogTypes::LOG_TYPE type, va_list args) { char temp[MAX_MSGLEN]; - char msg[MAX_MSGLEN + 512]; + char msg[MAX_MSGLEN * 2]; LogContainer *log = m_Log[type]; if (! log->isEnable() || level > log->getLevel()) diff --git a/Source/Core/Common/Src/LogManager.h b/Source/Core/Common/Src/LogManager.h index 9b91e189de..e0157b246d 100644 --- a/Source/Core/Common/Src/LogManager.h +++ b/Source/Core/Common/Src/LogManager.h @@ -26,7 +26,7 @@ #include #define MAX_MESSAGES 8000 -#define MAX_MSGLEN 512 +#define MAX_MSGLEN 1024 // pure virtual interface (well, except the destructor which we just leave empty). diff --git a/Source/Core/Core/Src/CoreTiming.cpp b/Source/Core/Core/Src/CoreTiming.cpp index a752798761..3e1ae9eeb6 100644 --- a/Source/Core/Core/Src/CoreTiming.cpp +++ b/Source/Core/Core/Src/CoreTiming.cpp @@ -22,6 +22,8 @@ #include "CoreTiming.h" #include "StringUtil.h" +#define MAX_SLICE_LENGTH 20000 + namespace CoreTiming { @@ -53,7 +55,7 @@ Event *eventTsPool = 0; int allocatedTsEvents = 0; int downcount, slicelength; -int maxSliceLength = 20000; +int maxSliceLength = MAX_SLICE_LENGTH; s64 globalTimer; s64 idledCycles; @@ -332,6 +334,10 @@ void SetMaximumSlice(int maximumSliceLength) maxSliceLength = maximumSliceLength; } +void ResetSliceLength() +{ + maxSliceLength = MAX_SLICE_LENGTH; +} void Advance() { diff --git a/Source/Core/Core/Src/CoreTiming.h b/Source/Core/Core/Src/CoreTiming.h index e456fb1f32..d8176daab7 100644 --- a/Source/Core/Core/Src/CoreTiming.h +++ b/Source/Core/Core/Src/CoreTiming.h @@ -72,6 +72,7 @@ void ClearPendingEvents(); void LogPendingEvents(); void SetMaximumSlice(int maximumSliceLength); +void ResetSliceLength(); void RegisterAdvanceCallback(void (*callback)(int cyclesExecuted)); diff --git a/Source/Core/Core/Src/PowerPC/Gekko.h b/Source/Core/Core/Src/PowerPC/Gekko.h index 835a8f7d63..0c9422b336 100644 --- a/Source/Core/Core/Src/PowerPC/Gekko.h +++ b/Source/Core/Core/Src/PowerPC/Gekko.h @@ -522,7 +522,7 @@ union UReg_BAT_Up { unsigned VP : 1; unsigned VS : 1; - unsigned BL : 11; + unsigned BL : 11; // Block length (aka block size mask) unsigned : 4; unsigned BEPI : 15; }; @@ -540,7 +540,7 @@ union UReg_BAT_Lo unsigned : 1; unsigned WIMG : 4; unsigned : 10; - unsigned BRPN : 15; + unsigned BRPN : 15; // Physical Block Number }; u32 Hex; @@ -623,16 +623,16 @@ enum SPR_IBAT1L = 531, SPR_IBAT2U = 532, SPR_IBAT2L = 533, - SPR_IBAT3L = 534, - SPR_IBAT3U = 535, + SPR_IBAT3U = 534, + SPR_IBAT3L = 535, SPR_DBAT0U = 536, SPR_DBAT0L = 537, SPR_DBAT1U = 538, SPR_DBAT1L = 539, SPR_DBAT2U = 540, SPR_DBAT2L = 541, - SPR_DBAT3L = 542, - SPR_DBAT3U = 543, + SPR_DBAT3U = 542, + SPR_DBAT3L = 543, SPR_GQR0 = 912, SPR_HID0 = 1008, SPR_HID1 = 1009, diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 613c59171d..bd5858a220 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -175,7 +175,12 @@ void Jit64::Init() where this cause problems, so I'm enabling this by default, since I seem to get perhaps as much as 20% more fps with this option enabled. If you suspect that this option cause problems you can also disable it from the debugging window. */ +#ifdef JIT_SINGLESTEP + jo.enableBlocklink = false; + SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle = false; +#else jo.enableBlocklink = true; +#endif #ifdef _M_X64 jo.enableFastMem = SConfig::GetInstance().m_LocalCoreStartupParameter.bUseFastMem; #else @@ -369,14 +374,44 @@ void STACKALIGN Jit64::Run() void Jit64::SingleStep() { - // NOT USED, NOT TESTED, PROBABLY NOT WORKING YET - // PanicAlert("Single"); - /* - JitBlock temp_block; - PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step! - const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block); - CompiledCode pExecAddr = (CompiledCode)code; - pExecAddr();*/ +#ifndef JIT_NO_CACHE + CoreTiming::SetMaximumSlice(1); +#endif + + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); + +#ifndef JIT_NO_CACHE + CoreTiming::ResetSliceLength(); +#endif +} + +void Jit64::Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address) +{ + char reg[50] = ""; + char regs[500] = ""; + char fregs[750] = ""; + +#ifdef JIT_LOG_GPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); + strncat(regs, reg, 500); + } +#endif + +#ifdef JIT_LOG_FPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "f%02d: %016x ", i, riPS0(i)); + strncat(fregs, reg, 750); + } +#endif + const PPCAnalyst::CodeOp &op = code_buffer->codebuffer[0]; + char ppcInst[256]; + DisassembleGekko(op.inst.hex, em_address, ppcInst, 256); + + NOTICE_LOG(DYNA_REC, "JIT64 PC: %08x Cycles: %04d CR: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %s", em_address, js.st.numCycles, PowerPC::ppcState.cr, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs, ppcInst); } void STACKALIGN Jit64::Jit(u32 em_address) @@ -391,14 +426,31 @@ void STACKALIGN Jit64::Jit(u32 em_address) } ClearCache(); } +#ifdef JIT_NO_CACHE + ClearCache(); + if (PowerPC::breakpoints.IsAddressBreakPoint(em_address)) + { + PowerPC::Pause(); + if (PowerPC::breakpoints.IsTempBreakPoint(em_address)) + PowerPC::breakpoints.Remove(em_address); + return; + } +#endif int block_num = blocks.AllocateBlock(em_address); JitBlock *b = blocks.GetBlock(block_num); blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b)); } -const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) +const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b) { + int blockSize = code_buffer->GetSize(); + +#ifdef JIT_SINGLESTEP + blockSize = 1; + Trace(code_buffer, em_address); +#endif + if (em_address == 0) PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); @@ -412,8 +464,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc //Analyze the block, collect all instructions it is made of (including inlining, //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buf); - PPCAnalyst::CodeOp *ops = code_buf->codebuffer; + u32 nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer, blockSize); + + PPCAnalyst::CodeOp *ops = code_buffer->codebuffer; const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr b->checkedEntry = start; @@ -426,6 +479,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc SetJumpTarget(skip); const u8 *normalEntry = GetCodePtr(); + b->normalEntry = normalEntry; if (ImHereDebug) ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful @@ -473,7 +527,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Start(js.gpa); fpr.Start(js.fpa); +#ifdef JIT_SINGLESTEP + js.downcountAmount = js.st.numCycles; +#else js.downcountAmount = js.st.numCycles + PatchEngine::GetSpeedhackCycles(em_address); +#endif + js.blockSize = size; // Translate instructions for (int i = 0; i < (int)size; i++) @@ -512,12 +571,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); } - // If starting from the breakpointed instruction, we don't break. - if (em_address != ops[i].address && PowerPC::breakpoints.IsAddressBreakPoint(ops[i].address)) - { - - } - if (!ops[i].skip) Jit64Tables::CompileInstruction(ops[i].inst); @@ -527,8 +580,19 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc break; } +#ifdef JIT_SINGLESTEP + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + WriteExit(nextPC, 0); +#endif + b->flags = js.block_flags; b->codeSize = (u32)(GetCodePtr() - normalEntry); b->originalSize = size; + +#ifdef JIT_LOG_X86 + LogGeneratedX86(size, code_buffer, normalEntry, b); +#endif + return normalEntry; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 7353fe9477..72fafc5cdf 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -106,6 +106,7 @@ public: JitBlockCache *GetBlockCache() { return &blocks; } void NotifyBreakpoint(u32 em_address, bool set); + void Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address); void ClearCache(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 1fd3c44b39..a69c1db6f7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -114,6 +114,11 @@ void Jit64AsmRoutineManager::Generate() MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); CALL((void *)&Jit); #endif +#ifdef JIT_NO_CACHE + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); + FixupBranch notRunning = J_CC(CC_NZ); +#endif + JMP(dispatcherNoCheck); // no point in special casing this //FP blocks test for FPU available, jump here if false @@ -126,6 +131,9 @@ void Jit64AsmRoutineManager::Generate() MOV(32, M(&PC), R(EAX)); JMP(dispatcher); +#ifdef JIT_NO_CACHE + SetJumpTarget(notRunning); +#endif SetJumpTarget(bail); doTiming = GetCodePtr(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index 35383dc437..d8100d434d 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -262,7 +262,7 @@ void RegCache::KillImmediate(int preg) void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty) { if (!regs[i].away && regs[i].location.IsImm()) - PanicAlert("Bad immedaite"); + PanicAlert("Bad immediate"); if (!regs[i].away || (regs[i].away && regs[i].location.IsImm())) { @@ -375,7 +375,7 @@ void RegCache::Flush(FlushMode mode) { for (int i = 0; i < NUMXREGS; i++) { if (xlocks[i]) - PanicAlert("Somone forgot to unlock X64 reg %i.", i); + PanicAlert("Someone forgot to unlock X64 reg %i.", i); } for (int i = 0; i < 32; i++) { diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index c4b87ce49d..db90f44c7b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -1661,7 +1661,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak //if (!RI.MakeProfile && RI.numSpills) // printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills); - + + Jit->WriteExit(jit->js.curBlock->exitAddress[0], 0); Jit->UD2(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 2806f39be7..ebb47c5ebb 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -167,7 +167,12 @@ void JitIL::Init() CODE_SIZE = 1024*1024*8*8; jo.optimizeStack = true; +#ifdef JIT_SINGLESTEP + jo.enableBlocklink = false; + Core::g_CoreStartupParameter.bSkipIdle = false; +#else jo.enableBlocklink = true; // Speed boost, but not 100% safe +#endif #ifdef _M_X64 jo.enableFastMem = false; #else @@ -346,14 +351,44 @@ void STACKALIGN JitIL::Run() void JitIL::SingleStep() { - // NOT USED, NOT TESTED, PROBABLY NOT WORKING YET - // PanicAlert("Single"); - /* - JitBlock temp_block; - PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step! - const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block); - CompiledCode pExecAddr = (CompiledCode)code; - pExecAddr();*/ +#ifndef JIT_NO_CACHE + CoreTiming::SetMaximumSlice(1); +#endif + + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); + +#ifndef JIT_NO_CACHE + CoreTiming::ResetSliceLength(); +#endif +} + +void JitIL::Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address) +{ + char reg[50] = ""; + char regs[500] = ""; + char fregs[750] = ""; + +#ifdef JIT_LOG_GPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); + strncat(regs, reg, 500); + } +#endif + +#ifdef JIT_LOG_FPR + for (int i = 0; i < 32; i++) + { + sprintf(reg, "f%02d: %016x ", i, riPS0(i)); + strncat(fregs, reg, 750); + } +#endif + const PPCAnalyst::CodeOp &op = code_buffer->codebuffer[0]; + char ppcInst[256]; + DisassembleGekko(op.inst.hex, em_address, ppcInst, 256); + + NOTICE_LOG(DYNA_REC, "JITIL PC: %08x Cycles: %04d CR: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %s", em_address, js.st.numCycles, PowerPC::ppcState.cr, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs, ppcInst); } void STACKALIGN JitIL::Jit(u32 em_address) @@ -368,13 +403,30 @@ void STACKALIGN JitIL::Jit(u32 em_address) } ClearCache(); } +#ifdef JIT_NO_CACHE + ClearCache(); + if (PowerPC::breakpoints.IsAddressBreakPoint(em_address)) + { + PowerPC::Pause(); + if (PowerPC::breakpoints.IsTempBreakPoint(em_address)) + PowerPC::breakpoints.Remove(em_address); + return; + } +#endif int block_num = blocks.AllocateBlock(em_address); JitBlock *b = blocks.GetBlock(block_num); blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b)); } -const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock *b) +const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b) { + int blockSize = code_buffer->GetSize(); + +#ifdef JIT_SINGLESTEP + blockSize = 1; + Trace(code_buffer, em_address); +#endif + if (em_address == 0) PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); @@ -387,8 +439,8 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock //Analyze the block, collect all instructions it is made of (including inlining, //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, buffer); - PPCAnalyst::CodeOp *ops = buffer->codebuffer; + b->exitAddress[0] = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer, blockSize); + PPCAnalyst::CodeOp *ops = code_buffer->codebuffer; const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr b->checkedEntry = start; @@ -422,7 +474,11 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock // instruction processed by the JIT routines) ibuild.Reset(); +#ifdef JIT_SINGLESTEP + js.downcountAmount = js.st.numCycles; +#else js.downcountAmount = js.st.numCycles + PatchEngine::GetSpeedhackCycles(em_address); +#endif // Translate instructions for (int i = 0; i < (int)size; i++) { @@ -452,5 +508,10 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *buffer, JitBlock b->codeSize = (u32)(GetCodePtr() - normalEntry); b->originalSize = size; + +#ifdef JIT_LOG_X86 + LogGeneratedX86(size, code_buffer, normalEntry, b); +#endif + return normalEntry; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h index d2b9dd853b..465dc389c4 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h @@ -85,6 +85,7 @@ public: const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); void NotifyBreakpoint(u32 em_address, bool set); + void Trace(PPCAnalyst::CodeBuffer *code_buffer, u32 em_address); void ClearCache(); const u8 *GetDispatcher() { diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp index da692c3b47..a56502d623 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp @@ -78,8 +78,8 @@ void JitILAsmRoutineManager::Generate() dispatcher = GetCodePtr(); //This is the place for CPUCompare! - //The result of slice decrementation should be in flags if somebody jumped here - FixupBranch bail = J_CC(CC_S); + //The result of slice decrement should be in flags if somebody jumped here + FixupBranch bail = J_CC(CC_BE); SetJumpTarget(skipToRealDispatch); dispatcherNoCheck = GetCodePtr(); @@ -116,6 +116,11 @@ void JitILAsmRoutineManager::Generate() MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); CALL((void *)&Jit); #endif +#ifdef JIT_NO_CACHE + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); + FixupBranch notRunning = J_CC(CC_NZ); +#endif + JMP(dispatcherNoCheck); // no point in special casing this //FP blocks test for FPU available, jump here if false @@ -128,6 +133,10 @@ void JitILAsmRoutineManager::Generate() MOV(32, M(&PC), R(EAX)); JMP(dispatcher); +#ifdef JIT_NO_CACHE + SetJumpTarget(notRunning); +#endif + SetJumpTarget(bail); doTiming = GetCodePtr(); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp index a9263c8b86..16149adb6f 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.cpp @@ -29,3 +29,47 @@ u32 Helper_Mask(u8 mb, u8 me) (((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1)))) ); } + +void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *normalEntry, JitBlock *b) +{ + char pDis[1000] = ""; + + for (int i = 0; i < size; i++) + { + char temp[256] = ""; + const PPCAnalyst::CodeOp &op = code_buffer->codebuffer[i]; + DisassembleGekko(op.inst.hex, op.address, temp, 256); + sprintf(pDis, "%08x %s", op.address, temp); + DEBUG_LOG(DYNA_REC,"IR_X86 PPC: %s\n", pDis); + } + + disassembler x64disasm; + x64disasm.set_syntax_intel(); + + u64 disasmPtr = (u64)normalEntry; + const u8 *end = normalEntry + b->codeSize; + + while ((u8*)disasmPtr < end) + { + char sptr[1000] = ""; +#ifdef _M_X64 + disasmPtr += x64disasm.disasm64(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr); +#else + disasmPtr += x64disasm.disasm32(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr); +#endif + DEBUG_LOG(DYNA_REC,"IR_X86 x86: %s", sptr); + } + + if (b->codeSize <= 250) + { + char x86code[500] = ""; + for (u8 i = 0; i <= b->codeSize; i++) + { + char opcHex[2] = ""; + u8 opc = *(normalEntry + i); + sprintf(opcHex, "%02x", opc); + strncat(x86code, opcHex, 2); + } + DEBUG_LOG(DYNA_REC,"IR_X86 bin: %s\n\n\n", x86code); + } +} diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h index 9c3c201e6c..3488dc4b79 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h @@ -18,11 +18,20 @@ #ifndef _JITBASE_H #define _JITBASE_H +//#define JIT_SINGLESTEP // Enables single stepping +//#define JIT_NO_CACHE // Disables the block cache and enables breakpoints +//#define JIT_LOG_X86 // Enables logging of the generated x86 code +//#define JIT_LOG_GPR // Enables logging of the PPC general purpose regs +//#define JIT_LOG_FPR // Enables logging of the PPC floating point regs + #include "JitCache.h" #include "Jit_Util.h" // for EmuCodeBlock #include "JitBackpatch.h" // for EmuCodeBlock #include "JitAsmCommon.h" +#include "PowerPCDisasm.h" +#include "disasm.h" + #define JIT_OPCODE 0 // TODO: In the future, inherit this from CPUCoreBase and have Interpreter @@ -97,6 +106,6 @@ void Jit(u32 em_address); // Merged routines that should be moved somewhere better u32 Helper_Mask(u8 mb, u8 me); - +void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *normalEntry, JitBlock *b); #endif diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index 4c27a1820a..cd023efb76 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -284,7 +284,8 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b) } // Does not yet perform inlining - although there are plans for that. -bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer) +// Returns the exit address of the next PC +u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer, int blockSize) { memset(st, 0, sizeof(st)); UGeckoInstruction previnst = Memory::Read_Opcode_JIT_LC(address - 4); @@ -295,7 +296,8 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo fpa->any = false; u32 blockstart = address; - int maxsize = buffer->GetSize(); + int maxsize = blockSize; + int num_inst = 0; int numFollows = 0; int numCycles = 0; @@ -304,8 +306,9 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo bool foundExit = false; // Flatten! (Currently just copies, following branches is disabled) - for (int i = 0; i < maxsize; i++, num_inst++) + for (int i = 0; i < maxsize; i++) { + num_inst++; memset(&code[i], 0, sizeof(CodeOp)); code[i].address = address; @@ -322,7 +325,7 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo _assert_msg_(POWERPC, opinfo != 0, "Invalid Op - Error flattening %08x op %08x", address + i*4, inst.hex); bool follow = false; u32 destination; - if (inst.OPCD == 18) + if (inst.OPCD == 18 && blockSize > 1) { //Is bx - should we inline? yes! if (inst.AA) @@ -351,10 +354,9 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo code[i].skip = true; address = destination; } - } - - _assert_msg_(POWERPC, foundExit, "Analyzer ERROR - Function %08x too big", blockstart); - num_inst++; // why? + } + if (!foundExit && blockSize > 1) + NOTICE_LOG(POWERPC, "Analyzer ERROR - Function %08x too big, size is 0x%08x", blockstart, address-blockstart); st->numCycles = numCycles; // Do analysis of the code, look for dependencies etc @@ -497,33 +499,34 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo } // Instruction Reordering Pass - - // Bubble down compares towards branches, so that they can be merged. - // -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch. - for (int i = 0; i < num_inst - 2; i++) + if (blockSize > 1) { - CodeOp &a = code[i]; - CodeOp &b = code[i + 1]; - // All integer compares can be reordered. - if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) || - (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32))) + // Bubble down compares towards branches, so that they can be merged. + // -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch. + for (int i = 0; i < num_inst - 2; i++) { - // Got a compare instruction. - if (CanSwapAdjacentOps(a, b)) { - // Alright, let's bubble it down! - CodeOp c = a; - a = b; - b = c; + CodeOp &a = code[i]; + CodeOp &b = code[i + 1]; + // All integer compares can be reordered. + if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) || + (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32))) + { + // Got a compare instruction. + if (CanSwapAdjacentOps(a, b)) { + // Alright, let's bubble it down! + CodeOp c = a; + a = b; + b = c; + } } } } - // Scan for CR0 dependency // assume next block wants CR0 to be safe bool wantsCR0 = true; bool wantsCR1 = true; bool wantsPS1 = true; - for (int i = num_inst - 1; i; i--) + for (int i = num_inst; i; i--) { if (code[i].outputCR0) wantsCR0 = false; @@ -541,7 +544,7 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo *realsize = num_inst; // ... - return true; + return address; } diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.h b/Source/Core/Core/Src/PowerPC/PPCAnalyst.h index 61b3ec1868..da0c2b23a5 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.h @@ -92,7 +92,7 @@ public: }; -bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer); +u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer, int blockSize); void LogFunctionCall(u32 addr); void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db); bool AnalyzeFunction(u32 startAddr, Symbol &func, int max_size = 0); diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 5d3b8db24b..2b6c934697 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -158,6 +158,7 @@ void Shutdown() { // Shutdown both execution engines. Doesn't matter which one is active. jit->Shutdown(); + state = CPU_POWERDOWN; delete jit; jit = 0; Interpreter::Shutdown(); diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.h b/Source/Core/Core/Src/PowerPC/PowerPC.h index db6665e177..f4a2880622 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.h +++ b/Source/Core/Core/Src/PowerPC/PowerPC.h @@ -48,7 +48,6 @@ struct GC_ALIGNED64(PowerPCState) u32 pc; // program counter u32 npc; - u32 nextBlock; u32 cr; // flags u8 cr_fast[8]; // Possibly reorder to 0, 2, 4, 8, 1, 3, 5, 7 so that we can make Compact and Expand super fast? diff --git a/Source/Core/DebuggerWX/Src/CodeWindow.cpp b/Source/Core/DebuggerWX/Src/CodeWindow.cpp index c3a2c75790..75cb555ddf 100644 --- a/Source/Core/DebuggerWX/Src/CodeWindow.cpp +++ b/Source/Core/DebuggerWX/Src/CodeWindow.cpp @@ -692,7 +692,7 @@ void CCodeWindow::UpdateButtonStates() } } - ToolBar->EnableTool(IDM_STEP, Initialized && Stepping && UseInterpreter()); + ToolBar->EnableTool(IDM_STEP, Initialized && Stepping); if (ToolBar) ToolBar->Realize(); diff --git a/Source/Core/DebuggerWX/Src/JitWindow.cpp b/Source/Core/DebuggerWX/Src/JitWindow.cpp index a1b922a0d1..fc9cb29527 100644 --- a/Source/Core/DebuggerWX/Src/JitWindow.cpp +++ b/Source/Core/DebuggerWX/Src/JitWindow.cpp @@ -179,7 +179,7 @@ void CJitWindow::Compare(u32 em_address) PPCAnalyst::BlockStats st; PPCAnalyst::BlockRegStats gpa; PPCAnalyst::BlockRegStats fpa; - if (PPCAnalyst::Flatten(ppc_addr, &size, &st, &gpa, &fpa, &code_buffer)) + if (PPCAnalyst::Flatten(ppc_addr, &size, &st, &gpa, &fpa, &code_buffer, size) != 0xffffffff) { sptr = (char*)xDis; for (int i = 0; i < size; i++)