diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 176527ec63..9c2284e2d4 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -181,6 +181,11 @@ void Jit64::Init() blocks.Init(); asm_routines.Init(); + + code_block.m_stats = &js.st; + code_block.m_gpa = &js.gpa; + code_block.m_fpa = &js.fpa; + analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); } void Jit64::ClearCache() @@ -404,9 +409,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Memory exception on instruction fetch bool memory_exception = false; - // A broken block is a block that does not end in a branch - bool broken_block = false; - if (Core::g_CoreStartupParameter.bEnableDebugging) { // Comment out the following to disable breakpoints (speed-up) @@ -433,7 +435,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } } - int size = 0; js.firstFPInstructionFound = false; js.isLastInstruction = false; js.blockStart = em_address; @@ -444,17 +445,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc jit->js.numLoadStoreInst = 0; jit->js.numFloatingPointInst = 0; + u32 nextPC = em_address; // Analyze the block, collect all instructions it is made of (including inlining, // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - u32 nextPC = em_address; - u32 merged_addresses[32]; - const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]); - int size_of_merged_addresses = 0; if (!memory_exception) - { - // If there is a memory exception inside a block (broken_block==true), compile up to that instruction. - nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses); - } + nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); + PPCAnalyst::CodeOp *ops = code_buf->codebuffer; @@ -499,19 +495,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.downcountAmount = 0; if (!Core::g_CoreStartupParameter.bEnableDebugging) - { - for (int i = 0; i < size_of_merged_addresses; ++i) - { - const u32 address = merged_addresses[i]; - js.downcountAmount += PatchEngine::GetSpeedhackCycles(address); - } - } + js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address); js.skipnext = false; - js.blockSize = size; + js.blockSize = code_block.m_num_instructions; js.compilerPC = nextPC; // Translate instructions - for (int i = 0; i < (int)size; i++) + for (u32 i = 0; i < code_block.m_num_instructions; i++) { js.compilerPC = ops[i].address; js.op = &ops[i]; @@ -519,7 +509,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc const GekkoOPInfo *opinfo = ops[i].opinfo; js.downcountAmount += opinfo->numCycles; - if (i == (int)size - 1) + if (i == (code_block.m_num_instructions - 1)) { // WARNING - cmp->branch merging will screw this up. js.isLastInstruction = true; @@ -706,7 +696,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc WriteExceptionExit(); } - if (broken_block) + if (code_block.m_broken) { gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); @@ -715,10 +705,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc b->flags = js.block_flags; b->codeSize = (u32)(GetCodePtr() - normalEntry); - b->originalSize = size; + b->originalSize = code_block.m_num_instructions; #ifdef JIT_LOG_X86 - LogGeneratedX86(size, code_buf, normalEntry, b); + LogGeneratedX86(code_block.m_num_instructions, code_buf, normalEntry, b); #endif return normalEntry; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 2092e0eccd..73d3c34294 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -103,7 +103,6 @@ void Jit64::bcx(UGeckoInstruction inst) JITDISABLE(bJITBranchOff) // USES_CR - _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); @@ -142,7 +141,9 @@ void Jit64::bcx(UGeckoInstruction inst) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); } void Jit64::bcctrx(UGeckoInstruction inst) @@ -190,7 +191,9 @@ void Jit64::bcctrx(UGeckoInstruction inst) WriteExitDestInEAX(); // Would really like to continue the block here, but it ends. TODO. SetJumpTarget(b); - WriteExit(js.compilerPC + 4); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); } } @@ -199,13 +202,6 @@ void Jit64::bclrx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITBranchOff) - if (!js.isLastInstruction && - (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) { - if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); - return; - } - gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); @@ -245,5 +241,7 @@ void Jit64::bclrx(UGeckoInstruction inst) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 0bb27f1eed..d296693d36 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -425,10 +425,12 @@ void Jit64::cmpXX(UGeckoInstruction inst) } else { - WriteExit(js.next_compilerPC + 4); + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + { + js.skipnext = true; + WriteExit(js.next_compilerPC + 4); + } } - - js.cancel = true; } } else @@ -535,9 +537,11 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); - WriteExit(js.next_compilerPC + 4); - - js.cancel = true; + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + { + js.skipnext = true; + WriteExit(js.next_compilerPC + 4); + } } } @@ -2229,5 +2233,7 @@ void Jit64::twx(UGeckoInstruction inst) SetJumpTarget(exit3); SetJumpTarget(exit4); SetJumpTarget(exit5); - WriteExit(js.compilerPC + 4); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index b145162233..b686cbc514 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -268,6 +268,10 @@ void JitIL::Init() blocks.Init(); asm_routines.Init(); + code_block.m_stats = &js.st; + code_block.m_gpa = &js.gpa; + code_block.m_fpa = &js.fpa; + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { JitILProfiler::Init(); } @@ -500,9 +504,6 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Memory exception on instruction fetch bool memory_exception = false; - // A broken block is a block that does not end in a branch - bool broken_block = false; - if (Core::g_CoreStartupParameter.bEnableDebugging) { // Comment out the following to disable breakpoints (speed-up) @@ -529,7 +530,6 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } } - int size = 0; js.isLastInstruction = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; @@ -538,19 +538,12 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc jit->js.numLoadStoreInst = 0; jit->js.numFloatingPointInst = 0; + u32 nextPC = em_address; // Analyze the block, collect all instructions it is made of (including inlining, // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - u32 exitAddress = em_address; - - u32 merged_addresses[32]; - const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]); - int size_of_merged_addresses = 0; if (!memory_exception) - { - // If there is a memory exception inside a block (broken_block==true), compile up to that instruction. - // TODO - exitAddress = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses); - } + nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); + PPCAnalyst::CodeOp *ops = code_buf->codebuffer; const u8 *start = AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr @@ -586,7 +579,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR) { // For profiling and IR Writer - for (int i = 0; i < (int)size; i++) + for (u32 i = 0; i < code_block.m_num_instructions; i++) { const u64 inst = ops[i].inst.hex; // Ported from boost::hash @@ -606,16 +599,10 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.downcountAmount = 0; if (!Core::g_CoreStartupParameter.bEnableDebugging) - { - for (int i = 0; i < size_of_merged_addresses; ++i) - { - const u32 address = merged_addresses[i]; - js.downcountAmount += PatchEngine::GetSpeedhackCycles(address); - } - } + js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address); // Translate instructions - for (int i = 0; i < (int)size; i++) + for (u32 i = 0; i < code_block.m_num_instructions; i++) { js.compilerPC = ops[i].address; js.op = &ops[i]; @@ -623,7 +610,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc const GekkoOPInfo *opinfo = GetOpInfo(ops[i].inst); js.downcountAmount += opinfo->numCycles; - if (i == (int)size - 1) + if (i == (code_block.m_num_instructions - 1)) { js.isLastInstruction = true; js.next_inst = 0; @@ -708,13 +695,13 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } // Perform actual code generation - WriteCode(exitAddress); + WriteCode(nextPC); b->codeSize = (u32)(GetCodePtr() - normalEntry); - b->originalSize = size; + b->originalSize = code_block.m_num_instructions; #ifdef JIT_LOG_X86 - LogGeneratedX86(size, code_buf, normalEntry, b); + LogGeneratedX86(code_block.m_num_instructions, code_buf, normalEntry, b); #endif if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR) diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp index 80562f5fe8..c08e1809be 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp @@ -40,6 +40,11 @@ void JitArm::Init() fpr.Init(this); jo.enableBlocklink = true; jo.optimizeGatherPipe = true; + + code_block.m_stats = &js.st; + code_block.m_gpa = &js.gpa; + code_block.m_fpa = &js.fpa; + analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); } void JitArm::ClearCache() @@ -250,27 +255,27 @@ void JitArm::Trace() void JitArm::PrintDebug(UGeckoInstruction inst, u32 level) { if (level > 0) - printf("Start: %08x OP '%s' Info\n", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst)); + WARN_LOG(DYNA_REC, "Start: %08x OP '%s' Info", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst)); if (level > 1) { GekkoOPInfo* Info = GetOpInfo(inst.hex); - printf("\tOuts\n"); + WARN_LOG(DYNA_REC, "\tOuts"); if (Info->flags & FL_OUT_A) - printf("\t-OUT_A: %x\n", inst.RA); + WARN_LOG(DYNA_REC, "\t-OUT_A: %x", inst.RA); if (Info->flags & FL_OUT_D) - printf("\t-OUT_D: %x\n", inst.RD); - printf("\tIns\n"); + WARN_LOG(DYNA_REC, "\t-OUT_D: %x", inst.RD); + WARN_LOG(DYNA_REC, "\tIns"); // A, AO, B, C, S if (Info->flags & FL_IN_A) - printf("\t-IN_A: %x\n", inst.RA); + WARN_LOG(DYNA_REC, "\t-IN_A: %x", inst.RA); if (Info->flags & FL_IN_A0) - printf("\t-IN_A0: %x\n", inst.RA); + WARN_LOG(DYNA_REC, "\t-IN_A0: %x", inst.RA); if (Info->flags & FL_IN_B) - printf("\t-IN_B: %x\n", inst.RB); + WARN_LOG(DYNA_REC, "\t-IN_B: %x", inst.RB); if (Info->flags & FL_IN_C) - printf("\t-IN_C: %x\n", inst.RC); + WARN_LOG(DYNA_REC, "\t-IN_C: %x", inst.RC); if (Info->flags & FL_IN_S) - printf("\t-IN_S: %x\n", inst.RS); + WARN_LOG(DYNA_REC, "\t-IN_S: %x", inst.RS); } } @@ -298,14 +303,10 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo // Memory exception on instruction fetch bool memory_exception = false; - // A broken block is a block that does not end in a branch - bool broken_block = false; - if (Core::g_CoreStartupParameter.bEnableDebugging) { // Comment out the following to disable breakpoints (speed-up) blockSize = 1; - broken_block = true; Trace(); } @@ -324,8 +325,6 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo } } - - int size = 0; js.isLastInstruction = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; @@ -333,17 +332,12 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo js.block_flags = 0; js.cancel = false; + u32 nextPC = em_address; // Analyze the block, collect all instructions it is made of (including inlining, // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - u32 nextPC = em_address; - u32 merged_addresses[32]; - const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]); - int size_of_merged_addresses = 0; if (!memory_exception) - { - // If there is a memory exception inside a block (broken_block==true), compile up to that instruction. - nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses); - } + nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); + PPCAnalyst::CodeOp *ops = code_buf->codebuffer; const u8 *start = GetCodePtr(); @@ -398,20 +392,21 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo gpr.Start(js.gpa); fpr.Start(js.fpa); js.downcountAmount = 0; + if (!Core::g_CoreStartupParameter.bEnableDebugging) - { - for (int i = 0; i < size_of_merged_addresses; ++i) - { - const u32 address = merged_addresses[i]; - js.downcountAmount += PatchEngine::GetSpeedhackCycles(address); - } - } + js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); js.skipnext = false; - js.blockSize = size; + js.blockSize = code_block.m_num_instructions; js.compilerPC = nextPC; + + const int DEBUG_OUTPUT = 0; + + if (DEBUG_OUTPUT) + WARN_LOG(DYNA_REC, "-------0x%08x-------", em_address); + // Translate instructions - for (int i = 0; i < (int)size; i++) + for (u32 i = 0; i < code_block.m_num_instructions; i++) { js.compilerPC = ops[i].address; js.op = &ops[i]; @@ -419,7 +414,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo const GekkoOPInfo *opinfo = ops[i].opinfo; js.downcountAmount += opinfo->numCycles; - if (i == (int)size - 1) + if (i == (code_block.m_num_instructions - 1)) { // WARNING - cmp->branch merging will screw this up. js.isLastInstruction = true; @@ -468,7 +463,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo } if (!ops[i].skip) { - PrintDebug(ops[i].inst, 0); + PrintDebug(ops[i].inst, DEBUG_OUTPUT); if (js.memcheck && (opinfo->flags & FL_USE_FPU)) { // Don't do this yet @@ -485,7 +480,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo } if (memory_exception) BKPT(0x500); - if (broken_block) + if (code_block.m_broken) { printf("Broken Block going to 0x%08x\n", nextPC); WriteExit(nextPC); @@ -493,7 +488,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo b->flags = js.block_flags; b->codeSize = (u32)(GetCodePtr() - normalEntry); - b->originalSize = size; + b->originalSize = code_block.m_num_instructions; FlushIcache(); return start; } diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 163c1a3fc1..30681d63e5 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -42,6 +42,7 @@ private: ArmFPRCache fpr; PPCAnalyst::CodeBuffer code_buffer; + void DoDownCount(); void PrintDebug(UGeckoInstruction inst, u32 level); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp index 8a45349066..b1b310b5ad 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp @@ -149,7 +149,6 @@ void JitArm::bcx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITBranchOff) // USES_CR - _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); gpr.Flush(); fpr.Flush(); @@ -203,7 +202,8 @@ void JitArm::bcx(UGeckoInstruction inst) if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); } void JitArm::bcctrx(UGeckoInstruction inst) { @@ -265,25 +265,16 @@ void JitArm::bcctrx(UGeckoInstruction inst) WriteExitDestInR(rA); SetJumpTarget(b); - WriteExit(js.compilerPC + 4); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); } } void JitArm::bclrx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITBranchOff) - if (!js.isLastInstruction && - (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) { - if (inst.LK) - { - ARMReg rA = gpr.GetReg(false); - u32 Jumpto = js.compilerPC + 4; - MOVI2R(rA, Jumpto); - STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); - // ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); - } - return; - } + gpr.Flush(); fpr.Flush(); @@ -342,5 +333,7 @@ void JitArm::bclrx(UGeckoInstruction inst) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) SetJumpTarget( pCTRDontBranch ); - WriteExit(js.compilerPC + 4); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); } diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp index 12c5976d7a..4b690b966c 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp @@ -1014,7 +1014,9 @@ void JitArm::twx(UGeckoInstruction inst) SetJumpTarget(exit3); SetJumpTarget(exit4); SetJumpTarget(exit5); - WriteExit(js.compilerPC + 4); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + WriteExit(js.compilerPC + 4); gpr.Unlock(RA, RB); } diff --git a/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp b/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp index 5db50b6399..2c5baf33bb 100644 --- a/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/JitArmIL/JitIL.cpp @@ -34,6 +34,10 @@ void JitArmIL::Init() AllocCodeSpace(CODE_SIZE); blocks.Init(); asm_routines.Init(); + + code_block.m_stats = &js.st; + code_block.m_gpa = &js.gpa; + code_block.m_fpa = &js.fpa; } void JitArmIL::ClearCache() @@ -202,14 +206,10 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB // Memory exception on instruction fetch bool memory_exception = false; - // A broken block is a block that does not end in a branch - bool broken_block = false; - if (Core::g_CoreStartupParameter.bEnableDebugging) { // Comment out the following to disable breakpoints (speed-up) blockSize = 1; - broken_block = true; } if (em_address == 0) @@ -228,7 +228,6 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB } - int size = 0; js.isLastInstruction = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; @@ -236,17 +235,12 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB js.block_flags = 0; js.cancel = false; + u32 nextPC = em_address; // Analyze the block, collect all instructions it is made of (including inlining, // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - u32 nextPC = em_address; - u32 merged_addresses[32]; - const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]); - int size_of_merged_addresses = 0; if (!memory_exception) - { - // If there is a memory exception inside a block (broken_block==true), compile up to that instruction. - nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses); - } + nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); + PPCAnalyst::CodeOp *ops = code_buf->codebuffer; const u8 *start = GetCodePtr(); @@ -271,7 +265,7 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB u64 codeHash = -1; { // For profiling and IR Writer - for (int i = 0; i < (int)size; i++) + for (u32 i = 0; i < code_block.m_num_instructions; i++) { const u64 inst = ops[i].inst.hex; // Ported from boost::hash @@ -289,19 +283,13 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB js.downcountAmount = 0; if (!Core::g_CoreStartupParameter.bEnableDebugging) - { - for (int i = 0; i < size_of_merged_addresses; ++i) - { - const u32 address = merged_addresses[i]; - js.downcountAmount += PatchEngine::GetSpeedhackCycles(address); - } - } + js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); js.skipnext = false; - js.blockSize = size; + js.blockSize = code_block.m_num_instructions; js.compilerPC = nextPC; // Translate instructions - for (int i = 0; i < (int)size; i++) + for (u32 i = 0; i < code_block.m_num_instructions; i++) { js.compilerPC = ops[i].address; js.op = &ops[i]; @@ -309,7 +297,7 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB const GekkoOPInfo *opinfo = ops[i].opinfo; js.downcountAmount += opinfo->numCycles; - if (i == (int)size - 1) + if (i == (code_block.m_num_instructions - 1)) { // WARNING - cmp->branch merging will screw this up. js.isLastInstruction = true; @@ -348,21 +336,19 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB } if (memory_exception) BKPT(0x500); - if (broken_block) + + if (code_block.m_broken) { printf("Broken Block going to 0x%08x\n", nextPC); WriteExit(nextPC); } // Perform actual code generation - WriteCode(nextPC); b->flags = js.block_flags; b->codeSize = (u32)(GetCodePtr() - normalEntry); - b->originalSize = size; + b->originalSize = code_block.m_num_instructions;; - { - } FlushIcache(); return start; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 9bebdf1411..e2cb99514f 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -83,6 +83,9 @@ protected: std::unordered_set fifoWriteAddresses; }; + PPCAnalyst::CodeBlock code_block; + PPCAnalyst::PPCAnalyzer analyzer; + public: // This should probably be removed from public: JitOptions jo; diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 26edf823e6..edf1425299 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -269,302 +269,6 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b) return true; } -// Does not yet perform inlining - although there are plans for that. -// Returns the exit address of the next PC -u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, - BlockRegStats *fpa, bool &broken_block, CodeBuffer *buffer, - int blockSize, u32* merged_addresses, - int capacity_of_merged_addresses, int& size_of_merged_addresses) -{ - if (capacity_of_merged_addresses < FUNCTION_FOLLOWING_THRESHOLD) { - PanicAlert("Capacity of merged_addresses is too small!"); - } - std::fill_n(merged_addresses, capacity_of_merged_addresses, 0); - merged_addresses[0] = address; - size_of_merged_addresses = 1; - - memset(st, 0, sizeof(*st)); - - // Disabled the following optimization in preference of FAST_ICACHE - //UGeckoInstruction previnst = Memory::Read_Opcode_JIT_LC(address - 4); - //if (previnst.hex == 0x4e800020) - // st->isFirstBlockOfFunction = true; - - gpa->any = true; - fpa->any = false; - - for (int i = 0; i < 32; i++) - { - gpa->firstRead[i] = -1; - gpa->firstWrite[i] = -1; - gpa->numReads[i] = 0; - gpa->numWrites[i] = 0; - } - - u32 blockstart = address; - int maxsize = blockSize; - - int num_inst = 0; - int numFollows = 0; - int numCycles = 0; - - CodeOp *code = buffer->codebuffer; - bool foundExit = false; - - u32 returnAddress = 0; - - // Do analysis of the code, look for dependencies etc - int numSystemInstructions = 0; - for (int i = 0; i < maxsize; i++) - { - UGeckoInstruction inst = JitInterface::Read_Opcode_JIT(address); - - if (inst.hex != 0) - { - num_inst++; - memset(&code[i], 0, sizeof(CodeOp)); - GekkoOPInfo *opinfo = GetOpInfo(inst); - code[i].opinfo = opinfo; - // FIXME: code[i].address may not be correct due to CST1 code. - code[i].address = address; - code[i].inst = inst; - code[i].branchTo = -1; - code[i].branchToIndex = -1; - code[i].skip = false; - numCycles += opinfo->numCycles; - - code[i].wantsCR0 = false; - code[i].wantsCR1 = false; - code[i].wantsPS1 = false; - - int flags = opinfo->flags; - - if (flags & FL_USE_FPU) - fpa->any = true; - - if (flags & FL_TIMER) - gpa->anyTimer = true; - - // Does the instruction output CR0? - if (flags & FL_RC_BIT) - code[i].outputCR0 = inst.hex & 1; //todo fix - else if ((flags & FL_SET_CRn) && inst.CRFD == 0) - code[i].outputCR0 = true; - else - code[i].outputCR0 = (flags & FL_SET_CR0) ? true : false; - - // Does the instruction output CR1? - if (flags & FL_RC_BIT_F) - code[i].outputCR1 = inst.hex & 1; //todo fix - else if ((flags & FL_SET_CRn) && inst.CRFD == 1) - code[i].outputCR1 = true; - else - code[i].outputCR1 = (flags & FL_SET_CR1) ? true : false; - - int numOut = 0; - int numIn = 0; - if (flags & FL_OUT_A) - { - code[i].regsOut[numOut++] = inst.RA; - gpa->SetOutputRegister(inst.RA, i); - } - if (flags & FL_OUT_D) - { - code[i].regsOut[numOut++] = inst.RD; - gpa->SetOutputRegister(inst.RD, i); - } - if (flags & FL_OUT_S) - { - code[i].regsOut[numOut++] = inst.RS; - gpa->SetOutputRegister(inst.RS, i); - } - if ((flags & FL_IN_A) || ((flags & FL_IN_A0) && inst.RA != 0)) - { - code[i].regsIn[numIn++] = inst.RA; - gpa->SetInputRegister(inst.RA, i); - } - if (flags & FL_IN_B) - { - code[i].regsIn[numIn++] = inst.RB; - gpa->SetInputRegister(inst.RB, i); - } - if (flags & FL_IN_C) - { - code[i].regsIn[numIn++] = inst.RC; - gpa->SetInputRegister(inst.RC, i); - } - if (flags & FL_IN_S) - { - code[i].regsIn[numIn++] = inst.RS; - gpa->SetInputRegister(inst.RS, i); - } - - // Set remaining register slots as unused (-1) - for (int j = numIn; j < 3; j++) - code[i].regsIn[j] = -1; - for (int j = numOut; j < 2; j++) - code[i].regsOut[j] = -1; - for (int j = 0; j < 3; j++) - code[i].fregsIn[j] = -1; - code[i].fregOut = -1; - - switch (opinfo->type) - { - case OPTYPE_INTEGER: - case OPTYPE_LOAD: - case OPTYPE_STORE: - case OPTYPE_LOADFP: - case OPTYPE_STOREFP: - break; - case OPTYPE_FPU: - break; - case OPTYPE_BRANCH: - if (code[i].inst.hex == 0x4e800020) - { - // For analysis purposes, we can assume that blr eats flags. - code[i].outputCR0 = true; - code[i].outputCR1 = true; - } - break; - case OPTYPE_SYSTEM: - case OPTYPE_SYSTEMFP: - numSystemInstructions++; - break; - } - - bool follow = false; - u32 destination = 0; - if (inst.OPCD == 18 && blockSize > 1) - { - //Is bx - should we inline? yes! - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = address + SignExt26(inst.LI << 2); - if (destination != blockstart) - follow = true; - } - else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && - (inst.BO & (1 << 4)) && (inst.BO & (1 << 2)) && - returnAddress != 0) - { - // bclrx with unconditional branch = return - follow = true; - destination = returnAddress; - returnAddress = 0; - - if (inst.LK) - returnAddress = address + 4; - } - else if (inst.OPCD == 31 && inst.SUBOP10 == 467) - { - // mtspr - const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - if (index == SPR_LR) { - // We give up to follow the return address - // because we have to check the register usage. - returnAddress = 0; - } - } - - if (follow) - numFollows++; - // TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD. - // If it is small, the performance will be down. - // If it is big, the size of generated code will be big and - // cache clearning will happen many times. - // TODO: Investivate the reason why - // "0" is fastest in some games, MP2 for example. - if (numFollows > FUNCTION_FOLLOWING_THRESHOLD) - follow = false; - - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMergeBlocks) { - follow = false; - } - - if (!follow) - { - if (opinfo->flags & FL_ENDBLOCK) //right now we stop early - { - foundExit = true; - break; - } - address += 4; - } - else - { - // We don't "code[i].skip = true" here - // because bx may store a certain value to the link register. - // Instead, we skip a part of bx in Jit**::bx(). - address = destination; - merged_addresses[size_of_merged_addresses++] = address; - } - } - else - { - // ISI exception or other critical memory exception occurred (game over) - break; - } - } - st->numCycles = numCycles; - - // Instruction Reordering Pass - if (num_inst > 1) - { - // Bubble down compares towards branches, so that they can be merged. - // -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch. - for (int i = 0; i < num_inst - 2; i++) - { - CodeOp &a = code[i]; - CodeOp &b = code[i + 1]; - // All integer compares can be reordered. - if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) || - (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32))) - { - // Got a compare instruction. - if (CanSwapAdjacentOps(a, b)) { - // Alright, let's bubble it down! - CodeOp c = a; - a = b; - b = c; - } - } - } - } - - if (!foundExit && num_inst > 0) - { - // A broken block is a block that does not end in a branch - broken_block = true; - } - - // Scan for CR0 dependency - // assume next block wants CR0 to be safe - bool wantsCR0 = true; - bool wantsCR1 = true; - bool wantsPS1 = true; - for (int i = num_inst - 1; i >= 0; i--) - { - if (code[i].outputCR0) - wantsCR0 = false; - if (code[i].outputCR1) - wantsCR1 = false; - if (code[i].outputPS1) - wantsPS1 = false; - wantsCR0 |= code[i].wantsCR0; - wantsCR1 |= code[i].wantsCR1; - wantsPS1 |= code[i].wantsPS1; - code[i].wantsCR0 = wantsCR0; - code[i].wantsCR1 = wantsCR1; - code[i].wantsPS1 = wantsPS1; - } - - *realsize = num_inst; - // ... - return address; -} - - // Most functions that are relevant to analyze should be // called by another function. Therefore, let's scan the // entire space for bl operations and find what functions @@ -699,4 +403,317 @@ void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db) leafSize, niceSize, unniceSize); } +void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code) +{ + // Instruction Reordering Pass + // Bubble down compares towards branches, so that they can be merged. + // -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch. + for (u32 i = 0; i < (instructions - 2); ++i) + { + CodeOp &a = code[i]; + CodeOp &b = code[i + 1]; + // All integer compares can be reordered. + if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) || + (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32))) + { + // Got a compare instruction. + if (CanSwapAdjacentOps(a, b)) { + // Alright, let's bubble it down! + CodeOp c = a; + a = b; + b = c; + } + } + } +} + +void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index) +{ + code->wantsCR0 = false; + code->wantsCR1 = false; + code->wantsPS1 = false; + + if (opinfo->flags & FL_USE_FPU) + block->m_fpa->any = true; + + if (opinfo->flags & FL_TIMER) + block->m_gpa->anyTimer = true; + + // Does the instruction output CR0? + if (opinfo->flags & FL_RC_BIT) + code->outputCR0 = code->inst.hex & 1; //todo fix + else if ((opinfo->flags & FL_SET_CRn) && code->inst.CRFD == 0) + code->outputCR0 = true; + else + code->outputCR0 = (opinfo->flags & FL_SET_CR0) ? true : false; + + // Does the instruction output CR1? + if (opinfo->flags & FL_RC_BIT_F) + code->outputCR1 = code->inst.hex & 1; //todo fix + else if ((opinfo->flags & FL_SET_CRn) && code->inst.CRFD == 1) + code->outputCR1 = true; + else + code->outputCR1 = (opinfo->flags & FL_SET_CR1) ? true : false; + + int numOut = 0; + int numIn = 0; + if (opinfo->flags & FL_OUT_A) + { + code->regsOut[numOut++] = code->inst.RA; + block->m_gpa->SetOutputRegister(code->inst.RA, index); + } + if (opinfo->flags & FL_OUT_D) + { + code->regsOut[numOut++] = code->inst.RD; + block->m_gpa->SetOutputRegister(code->inst.RD, index); + } + if (opinfo->flags & FL_OUT_S) + { + code->regsOut[numOut++] = code->inst.RS; + block->m_gpa->SetOutputRegister(code->inst.RS, index); + } + if ((opinfo->flags & FL_IN_A) || ((opinfo->flags & FL_IN_A0) && code->inst.RA != 0)) + { + code->regsIn[numIn++] = code->inst.RA; + block->m_gpa->SetInputRegister(code->inst.RA, index); + } + if (opinfo->flags & FL_IN_B) + { + code->regsIn[numIn++] = code->inst.RB; + block->m_gpa->SetInputRegister(code->inst.RB, index); + } + if (opinfo->flags & FL_IN_C) + { + code->regsIn[numIn++] = code->inst.RC; + block->m_gpa->SetInputRegister(code->inst.RC, index); + } + if (opinfo->flags & FL_IN_S) + { + code->regsIn[numIn++] = code->inst.RS; + block->m_gpa->SetInputRegister(code->inst.RS, index); + } + + // Set remaining register slots as unused (-1) + for (int j = numIn; j < 3; j++) + code->regsIn[j] = -1; + for (int j = numOut; j < 2; j++) + code->regsOut[j] = -1; + for (int j = 0; j < 3; j++) + code->fregsIn[j] = -1; + code->fregOut = -1; + + switch (opinfo->type) + { + case OPTYPE_INTEGER: + case OPTYPE_LOAD: + case OPTYPE_STORE: + case OPTYPE_LOADFP: + case OPTYPE_STOREFP: + break; + case OPTYPE_FPU: + break; + case OPTYPE_BRANCH: + if (code->inst.hex == 0x4e800020) + { + // For analysis purposes, we can assume that blr eats opinfo->flags. + code->outputCR0 = true; + code->outputCR1 = true; + } + break; + case OPTYPE_SYSTEM: + case OPTYPE_SYSTEMFP: + break; + } +} + +u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 blockSize) +{ + // Clear block stats + memset(block->m_stats, 0, sizeof(BlockStats)); + + // Clear register stats + block->m_gpa->any = true; + block->m_fpa->any = false; + + block->m_gpa->Clear(); + block->m_fpa->Clear(); + + // Set the blocks start address + block->m_address = address; + + // Reset our block state + block->m_broken = false; + block->m_num_instructions = 0; + + CodeOp *code = buffer->codebuffer; + + bool found_exit = false; + u32 return_address = 0; + u32 numFollows = 0; + u32 num_inst = 0; + + for (u32 i = 0; i < blockSize; ++i) + { + UGeckoInstruction inst = JitInterface::Read_Opcode_JIT(address); + + if (inst.hex != 0) + { + num_inst++; + memset(&code[i], 0, sizeof(CodeOp)); + GekkoOPInfo *opinfo = GetOpInfo(inst); + + code[i].opinfo = opinfo; + code[i].address = address; + code[i].inst = inst; + code[i].branchTo = -1; + code[i].branchToIndex = -1; + code[i].skip = false; + block->m_stats->numCycles += opinfo->numCycles; + + SetInstructionStats(block, &code[i], opinfo, i); + + bool follow = false; + u32 destination = 0; + + bool conditional_continue = false; + + // Do we inline leaf functions? + if (HasOption(OPTION_LEAF_INLINE)) + { + if (inst.OPCD == 18 && blockSize > 1) + { + //Is bx - should we inline? yes! + if (inst.AA) + destination = SignExt26(inst.LI << 2); + else + destination = address + SignExt26(inst.LI << 2); + if (destination != block->m_address) + follow = true; + } + else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && + (inst.BO & (1 << 4)) && (inst.BO & (1 << 2)) && + return_address != 0) + { + // bclrx with unconditional branch = return + follow = true; + destination = return_address; + return_address = 0; + + if (inst.LK) + return_address = address + 4; + } + else if (inst.OPCD == 31 && inst.SUBOP10 == 467) + { + // mtspr + const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + if (index == SPR_LR) { + // We give up to follow the return address + // because we have to check the register usage. + return_address = 0; + } + } + + // TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD. + // If it is small, the performance will be down. + // If it is big, the size of generated code will be big and + // cache clearning will happen many times. + // TODO: Investivate the reason why + // "0" is fastest in some games, MP2 for example. + if (numFollows > FUNCTION_FOLLOWING_THRESHOLD) + follow = false; + } + + if (HasOption(OPTION_CONDITIONAL_CONTINUE)) + { + if (inst.OPCD == 16 && + ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0)) + { + // bcx with conditional branch + conditional_continue = true; + } + else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && + ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0)) + { + // bclrx with conditional branch + conditional_continue = true; + } + else if (inst.OPCD == 3 || + (inst.OPCD == 31 && inst.SUBOP10 == 4)) + { + // tw/twi tests and raises an exception + conditional_continue = true; + } + else if (inst.OPCD == 19 && inst.SUBOP10 == 528 && + (inst.BO_2 & BO_DONT_CHECK_CONDITION) == 0) + { + // Rare bcctrx with conditional branch + // Seen in NES games + conditional_continue = true; + } + } + + if (!follow) + { + if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) //right now we stop early + { + found_exit = true; + break; + } + address += 4; + } + // XXX: We don't support inlining yet. +#if 0 + else + { + numFollows++; + // We don't "code[i].skip = true" here + // because bx may store a certain value to the link register. + // Instead, we skip a part of bx in Jit**::bx(). + address = destination; + merged_addresses[size_of_merged_addresses++] = address; + } +#endif + } + else + { + // ISI exception or other critical memory exception occured (game over) + ERROR_LOG(DYNA_REC, "Instruction hex was 0!"); + break; + } + } + + if (block->m_num_instructions > 1) + ReorderInstructions(block->m_num_instructions, code); + + if ((!found_exit && num_inst > 0) || blockSize == 1) + { + // We couldn't find an exit + block->m_broken = true; + } + + // Scan for CR0 dependency + // assume next block wants CR0 to be safe + bool wantsCR0 = true; + bool wantsCR1 = true; + bool wantsPS1 = true; + for (int i = block->m_num_instructions - 1; i >= 0; i--) + { + if (code[i].outputCR0) + wantsCR0 = false; + if (code[i].outputCR1) + wantsCR1 = false; + if (code[i].outputPS1) + wantsPS1 = false; + wantsCR0 |= code[i].wantsCR0; + wantsCR1 |= code[i].wantsCR1; + wantsPS1 |= code[i].wantsPS1; + code[i].wantsCR0 = wantsCR0; + code[i].wantsCR1 = wantsCR1; + code[i].wantsPS1 = wantsPS1; + } + block->m_num_instructions = num_inst; + return address; +} + + } // namespace diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index 9f898f57ca..16c4f2cd14 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -64,19 +64,32 @@ struct BlockRegStats return std::max(lastRead[reg], lastWrite[reg]) - std::min(firstRead[reg], firstWrite[reg]);} - inline void SetInputRegister(int reg, short opindex) { + inline void SetInputRegister(int reg, short opindex) + { if (firstRead[reg] == -1) firstRead[reg] = (short)(opindex); lastRead[reg] = (short)(opindex); numReads[reg]++; } - inline void SetOutputRegister(int reg, short opindex) { + inline void SetOutputRegister(int reg, short opindex) + { if (firstWrite[reg] == -1) firstWrite[reg] = (short)(opindex); lastWrite[reg] = (short)(opindex); numWrites[reg]++; } + + inline void Clear() + { + for (int i = 0; i < 32; ++i) + { + firstRead[i] = -1; + firstWrite[i] = -1; + numReads[i] = 0; + numWrites[i] = 0; + } + } }; @@ -94,10 +107,74 @@ public: }; -u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, - BlockRegStats *fpa, bool &broken_block, CodeBuffer *buffer, - int blockSize, u32* merged_addresses, - int capacity_of_merged_addresses, int& size_of_merged_addresses); +struct CodeBlock +{ + // Beginning PPC address. + u32 m_address; + + // Number of instructions + // Gives us the size of the block. + u32 m_num_instructions; + + // Some basic statistics about the block. + BlockStats *m_stats; + + // Register statistics about the block. + BlockRegStats *m_gpa, *m_fpa; + + // Are we a broken block? + bool m_broken; +}; + +class PPCAnalyzer +{ +private: + + void ReorderInstructions(u32 instructions, CodeOp *code); + void SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index); + + // Options + u32 m_options; +public: + + enum AnalystOption + { + // Conditional branch continuing + // If the JIT core supports conditional branches within the blocks + // Block will end on unconditional branch or other ENDBLOCK flagged instruction. + // Requires JIT support to be enabled. + OPTION_CONDITIONAL_CONTINUE = (1 << 0), + + // If there is a unconditional branch that jumps to a leaf function then inline it. + // Might require JIT intervention to support it correctly. + // Requires JITBLock support for inlined code + // XXX: NOT COMPLETE + OPTION_LEAF_INLINE = (1 << 1), + + // Complex blocks support jumping backwards on to themselves. + // Happens commonly in loops, pretty complex to support. + // May require register caches to use register usage metrics. + // XXX: NOT COMPLETE + OPTION_COMPLEX_BLOCK = (1 << 2), + + // Similar to complex blocks. + // Instead of jumping backwards, this jumps forwards within the block. + // Requires JIT support to work. + // XXX: NOT COMPLETE + OPTION_FORWARD_JUMP = (1 << 3), + }; + + + PPCAnalyzer() : m_options(0) {} + + // Option setting/getting + void SetOption(AnalystOption option) { m_options |= option; } + void ClearOption(AnalystOption option) { m_options &= ~(option); } + bool HasOption(AnalystOption option) { return !!(m_options & option); } + + u32 Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 blockSize); +}; + void LogFunctionCall(u32 addr); void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db); bool AnalyzeFunction(u32 startAddr, Symbol &func, int max_size = 0); diff --git a/Source/Core/DolphinWX/Debugger/JitWindow.cpp b/Source/Core/DolphinWX/Debugger/JitWindow.cpp index 8aa3bcdfc0..5b13419f0d 100644 --- a/Source/Core/DolphinWX/Debugger/JitWindow.cpp +++ b/Source/Core/DolphinWX/Debugger/JitWindow.cpp @@ -129,8 +129,7 @@ void CJitWindow::Compare(u32 em_address) const u8 *code = (const u8 *)jit->GetBlockCache()->GetCompiledCodeFromBlock(block_num); u64 disasmPtr = (u64)code; - int size = block->codeSize; - const u8 *end = code + size; + const u8 *end = code + block->codeSize; char *sptr = (char*)xDis; int num_x86_instructions = 0; @@ -154,14 +153,17 @@ void CJitWindow::Compare(u32 em_address) PPCAnalyst::BlockStats st; PPCAnalyst::BlockRegStats gpa; PPCAnalyst::BlockRegStats fpa; - bool broken_block = false; - u32 merged_addresses[32]; - const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]); - int size_of_merged_addresses; - if (PPCAnalyst::Flatten(ppc_addr, &size, &st, &gpa, &fpa, broken_block, &code_buffer, size, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses) != 0xffffffff) + PPCAnalyst::CodeBlock code_block; + PPCAnalyst::PPCAnalyzer analyzer; + + code_block.m_stats = &st; + code_block.m_gpa = &gpa; + code_block.m_fpa = &fpa; + + if (analyzer.Analyze(ppc_addr, &code_block, &code_buffer, block->codeSize) != 0xFFFFFFFF) { sptr = (char*)xDis; - for (int i = 0; i < size; i++) + for (u32 i = 0; i < code_block.m_num_instructions; i++) { const PPCAnalyst::CodeOp &op = code_buffer.codebuffer[i]; char temp[256]; @@ -181,9 +183,9 @@ void CJitWindow::Compare(u32 em_address) sptr += sprintf(sptr, "%i estimated cycles\n", st.numCycles); sptr += sprintf(sptr, "Num instr: PPC: %i x86: %i (blowup: %i%%)\n", - size, num_x86_instructions, 100 * (num_x86_instructions / size - 1)); + code_block.m_num_instructions, num_x86_instructions, 100 * (num_x86_instructions / code_block.m_num_instructions - 1)); sptr += sprintf(sptr, "Num bytes: PPC: %i x86: %i (blowup: %i%%)\n", - size * 4, block->codeSize, 100 * (block->codeSize / (4 * size) - 1)); + code_block.m_num_instructions * 4, block->codeSize, 100 * (block->codeSize / (4 * code_block.m_num_instructions) - 1)); ppc_box->SetValue(StrToWxStr((char*)xDis)); }