Merge pull request #323 from Sonicadvance1/newer-ppcanalyst
[RFC] New PPCAnalyst class.
This commit is contained in:
commit
c4221e8f68
|
@ -181,6 +181,11 @@ void Jit64::Init()
|
||||||
|
|
||||||
blocks.Init();
|
blocks.Init();
|
||||||
asm_routines.Init();
|
asm_routines.Init();
|
||||||
|
|
||||||
|
code_block.m_stats = &js.st;
|
||||||
|
code_block.m_gpa = &js.gpa;
|
||||||
|
code_block.m_fpa = &js.fpa;
|
||||||
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::ClearCache()
|
void Jit64::ClearCache()
|
||||||
|
@ -404,9 +409,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
// Memory exception on instruction fetch
|
// Memory exception on instruction fetch
|
||||||
bool memory_exception = false;
|
bool memory_exception = false;
|
||||||
|
|
||||||
// A broken block is a block that does not end in a branch
|
|
||||||
bool broken_block = false;
|
|
||||||
|
|
||||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
{
|
||||||
// Comment out the following to disable breakpoints (speed-up)
|
// Comment out the following to disable breakpoints (speed-up)
|
||||||
|
@ -433,7 +435,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int size = 0;
|
|
||||||
js.firstFPInstructionFound = false;
|
js.firstFPInstructionFound = false;
|
||||||
js.isLastInstruction = false;
|
js.isLastInstruction = false;
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
|
@ -444,17 +445,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
jit->js.numLoadStoreInst = 0;
|
jit->js.numLoadStoreInst = 0;
|
||||||
jit->js.numFloatingPointInst = 0;
|
jit->js.numFloatingPointInst = 0;
|
||||||
|
|
||||||
|
u32 nextPC = em_address;
|
||||||
// Analyze the block, collect all instructions it is made of (including inlining,
|
// Analyze the block, collect all instructions it is made of (including inlining,
|
||||||
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
||||||
u32 nextPC = em_address;
|
|
||||||
u32 merged_addresses[32];
|
|
||||||
const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]);
|
|
||||||
int size_of_merged_addresses = 0;
|
|
||||||
if (!memory_exception)
|
if (!memory_exception)
|
||||||
{
|
nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize);
|
||||||
// If there is a memory exception inside a block (broken_block==true), compile up to that instruction.
|
|
||||||
nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses);
|
|
||||||
}
|
|
||||||
|
|
||||||
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
||||||
|
|
||||||
|
@ -499,19 +495,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
|
|
||||||
js.downcountAmount = 0;
|
js.downcountAmount = 0;
|
||||||
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
|
||||||
for (int i = 0; i < size_of_merged_addresses; ++i)
|
|
||||||
{
|
|
||||||
const u32 address = merged_addresses[i];
|
|
||||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
js.skipnext = false;
|
js.skipnext = false;
|
||||||
js.blockSize = size;
|
js.blockSize = code_block.m_num_instructions;
|
||||||
js.compilerPC = nextPC;
|
js.compilerPC = nextPC;
|
||||||
// Translate instructions
|
// Translate instructions
|
||||||
for (int i = 0; i < (int)size; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
js.compilerPC = ops[i].address;
|
js.compilerPC = ops[i].address;
|
||||||
js.op = &ops[i];
|
js.op = &ops[i];
|
||||||
|
@ -519,7 +509,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
const GekkoOPInfo *opinfo = ops[i].opinfo;
|
const GekkoOPInfo *opinfo = ops[i].opinfo;
|
||||||
js.downcountAmount += opinfo->numCycles;
|
js.downcountAmount += opinfo->numCycles;
|
||||||
|
|
||||||
if (i == (int)size - 1)
|
if (i == (code_block.m_num_instructions - 1))
|
||||||
{
|
{
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
// WARNING - cmp->branch merging will screw this up.
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
|
@ -706,7 +696,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
WriteExceptionExit();
|
WriteExceptionExit();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (broken_block)
|
if (code_block.m_broken)
|
||||||
{
|
{
|
||||||
gpr.Flush(FLUSH_ALL);
|
gpr.Flush(FLUSH_ALL);
|
||||||
fpr.Flush(FLUSH_ALL);
|
fpr.Flush(FLUSH_ALL);
|
||||||
|
@ -715,10 +705,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
|
|
||||||
b->flags = js.block_flags;
|
b->flags = js.block_flags;
|
||||||
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
||||||
b->originalSize = size;
|
b->originalSize = code_block.m_num_instructions;
|
||||||
|
|
||||||
#ifdef JIT_LOG_X86
|
#ifdef JIT_LOG_X86
|
||||||
LogGeneratedX86(size, code_buf, normalEntry, b);
|
LogGeneratedX86(code_block.m_num_instructions, code_buf, normalEntry, b);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return normalEntry;
|
return normalEntry;
|
||||||
|
|
|
@ -103,7 +103,6 @@ void Jit64::bcx(UGeckoInstruction inst)
|
||||||
JITDISABLE(bJITBranchOff)
|
JITDISABLE(bJITBranchOff)
|
||||||
|
|
||||||
// USES_CR
|
// USES_CR
|
||||||
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
|
|
||||||
|
|
||||||
gpr.Flush(FLUSH_ALL);
|
gpr.Flush(FLUSH_ALL);
|
||||||
fpr.Flush(FLUSH_ALL);
|
fpr.Flush(FLUSH_ALL);
|
||||||
|
@ -142,7 +141,9 @@ void Jit64::bcx(UGeckoInstruction inst)
|
||||||
SetJumpTarget( pConditionDontBranch );
|
SetJumpTarget( pConditionDontBranch );
|
||||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
||||||
SetJumpTarget( pCTRDontBranch );
|
SetJumpTarget( pCTRDontBranch );
|
||||||
WriteExit(js.compilerPC + 4);
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::bcctrx(UGeckoInstruction inst)
|
void Jit64::bcctrx(UGeckoInstruction inst)
|
||||||
|
@ -190,7 +191,9 @@ void Jit64::bcctrx(UGeckoInstruction inst)
|
||||||
WriteExitDestInEAX();
|
WriteExitDestInEAX();
|
||||||
// Would really like to continue the block here, but it ends. TODO.
|
// Would really like to continue the block here, but it ends. TODO.
|
||||||
SetJumpTarget(b);
|
SetJumpTarget(b);
|
||||||
WriteExit(js.compilerPC + 4);
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,13 +202,6 @@ void Jit64::bclrx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITBranchOff)
|
JITDISABLE(bJITBranchOff)
|
||||||
|
|
||||||
if (!js.isLastInstruction &&
|
|
||||||
(inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) {
|
|
||||||
if (inst.LK)
|
|
||||||
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
gpr.Flush(FLUSH_ALL);
|
gpr.Flush(FLUSH_ALL);
|
||||||
fpr.Flush(FLUSH_ALL);
|
fpr.Flush(FLUSH_ALL);
|
||||||
|
|
||||||
|
@ -245,5 +241,7 @@ void Jit64::bclrx(UGeckoInstruction inst)
|
||||||
SetJumpTarget( pConditionDontBranch );
|
SetJumpTarget( pConditionDontBranch );
|
||||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
||||||
SetJumpTarget( pCTRDontBranch );
|
SetJumpTarget( pCTRDontBranch );
|
||||||
WriteExit(js.compilerPC + 4);
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
}
|
}
|
||||||
|
|
|
@ -425,10 +425,12 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
WriteExit(js.next_compilerPC + 4);
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
{
|
||||||
|
js.skipnext = true;
|
||||||
|
WriteExit(js.next_compilerPC + 4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
js.cancel = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -535,9 +537,11 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
||||||
if (!!(4 & test_bit) == condition) SetJumpTarget(continue2);
|
if (!!(4 & test_bit) == condition) SetJumpTarget(continue2);
|
||||||
if (!!(2 & test_bit) == condition) SetJumpTarget(continue1);
|
if (!!(2 & test_bit) == condition) SetJumpTarget(continue1);
|
||||||
|
|
||||||
WriteExit(js.next_compilerPC + 4);
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
{
|
||||||
js.cancel = true;
|
js.skipnext = true;
|
||||||
|
WriteExit(js.next_compilerPC + 4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2229,5 +2233,7 @@ void Jit64::twx(UGeckoInstruction inst)
|
||||||
SetJumpTarget(exit3);
|
SetJumpTarget(exit3);
|
||||||
SetJumpTarget(exit4);
|
SetJumpTarget(exit4);
|
||||||
SetJumpTarget(exit5);
|
SetJumpTarget(exit5);
|
||||||
WriteExit(js.compilerPC + 4);
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
}
|
}
|
||||||
|
|
|
@ -268,6 +268,10 @@ void JitIL::Init()
|
||||||
blocks.Init();
|
blocks.Init();
|
||||||
asm_routines.Init();
|
asm_routines.Init();
|
||||||
|
|
||||||
|
code_block.m_stats = &js.st;
|
||||||
|
code_block.m_gpa = &js.gpa;
|
||||||
|
code_block.m_fpa = &js.fpa;
|
||||||
|
|
||||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) {
|
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) {
|
||||||
JitILProfiler::Init();
|
JitILProfiler::Init();
|
||||||
}
|
}
|
||||||
|
@ -500,9 +504,6 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
// Memory exception on instruction fetch
|
// Memory exception on instruction fetch
|
||||||
bool memory_exception = false;
|
bool memory_exception = false;
|
||||||
|
|
||||||
// A broken block is a block that does not end in a branch
|
|
||||||
bool broken_block = false;
|
|
||||||
|
|
||||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
{
|
||||||
// Comment out the following to disable breakpoints (speed-up)
|
// Comment out the following to disable breakpoints (speed-up)
|
||||||
|
@ -529,7 +530,6 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int size = 0;
|
|
||||||
js.isLastInstruction = false;
|
js.isLastInstruction = false;
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesThisBlock = 0;
|
||||||
|
@ -538,19 +538,12 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
jit->js.numLoadStoreInst = 0;
|
jit->js.numLoadStoreInst = 0;
|
||||||
jit->js.numFloatingPointInst = 0;
|
jit->js.numFloatingPointInst = 0;
|
||||||
|
|
||||||
|
u32 nextPC = em_address;
|
||||||
// Analyze the block, collect all instructions it is made of (including inlining,
|
// Analyze the block, collect all instructions it is made of (including inlining,
|
||||||
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
||||||
u32 exitAddress = em_address;
|
|
||||||
|
|
||||||
u32 merged_addresses[32];
|
|
||||||
const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]);
|
|
||||||
int size_of_merged_addresses = 0;
|
|
||||||
if (!memory_exception)
|
if (!memory_exception)
|
||||||
{
|
nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize);
|
||||||
// If there is a memory exception inside a block (broken_block==true), compile up to that instruction.
|
|
||||||
// TODO
|
|
||||||
exitAddress = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses);
|
|
||||||
}
|
|
||||||
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
||||||
|
|
||||||
const u8 *start = AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
|
const u8 *start = AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
|
||||||
|
@ -586,7 +579,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR)
|
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR)
|
||||||
{
|
{
|
||||||
// For profiling and IR Writer
|
// For profiling and IR Writer
|
||||||
for (int i = 0; i < (int)size; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
const u64 inst = ops[i].inst.hex;
|
const u64 inst = ops[i].inst.hex;
|
||||||
// Ported from boost::hash
|
// Ported from boost::hash
|
||||||
|
@ -606,16 +599,10 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
|
|
||||||
js.downcountAmount = 0;
|
js.downcountAmount = 0;
|
||||||
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
|
||||||
for (int i = 0; i < size_of_merged_addresses; ++i)
|
|
||||||
{
|
|
||||||
const u32 address = merged_addresses[i];
|
|
||||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Translate instructions
|
// Translate instructions
|
||||||
for (int i = 0; i < (int)size; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
js.compilerPC = ops[i].address;
|
js.compilerPC = ops[i].address;
|
||||||
js.op = &ops[i];
|
js.op = &ops[i];
|
||||||
|
@ -623,7 +610,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
const GekkoOPInfo *opinfo = GetOpInfo(ops[i].inst);
|
const GekkoOPInfo *opinfo = GetOpInfo(ops[i].inst);
|
||||||
js.downcountAmount += opinfo->numCycles;
|
js.downcountAmount += opinfo->numCycles;
|
||||||
|
|
||||||
if (i == (int)size - 1)
|
if (i == (code_block.m_num_instructions - 1))
|
||||||
{
|
{
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
js.next_inst = 0;
|
js.next_inst = 0;
|
||||||
|
@ -708,13 +695,13 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform actual code generation
|
// Perform actual code generation
|
||||||
WriteCode(exitAddress);
|
WriteCode(nextPC);
|
||||||
|
|
||||||
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
||||||
b->originalSize = size;
|
b->originalSize = code_block.m_num_instructions;
|
||||||
|
|
||||||
#ifdef JIT_LOG_X86
|
#ifdef JIT_LOG_X86
|
||||||
LogGeneratedX86(size, code_buf, normalEntry, b);
|
LogGeneratedX86(code_block.m_num_instructions, code_buf, normalEntry, b);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR)
|
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR)
|
||||||
|
|
|
@ -40,6 +40,11 @@ void JitArm::Init()
|
||||||
fpr.Init(this);
|
fpr.Init(this);
|
||||||
jo.enableBlocklink = true;
|
jo.enableBlocklink = true;
|
||||||
jo.optimizeGatherPipe = true;
|
jo.optimizeGatherPipe = true;
|
||||||
|
|
||||||
|
code_block.m_stats = &js.st;
|
||||||
|
code_block.m_gpa = &js.gpa;
|
||||||
|
code_block.m_fpa = &js.fpa;
|
||||||
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm::ClearCache()
|
void JitArm::ClearCache()
|
||||||
|
@ -250,27 +255,27 @@ void JitArm::Trace()
|
||||||
void JitArm::PrintDebug(UGeckoInstruction inst, u32 level)
|
void JitArm::PrintDebug(UGeckoInstruction inst, u32 level)
|
||||||
{
|
{
|
||||||
if (level > 0)
|
if (level > 0)
|
||||||
printf("Start: %08x OP '%s' Info\n", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst));
|
WARN_LOG(DYNA_REC, "Start: %08x OP '%s' Info", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst));
|
||||||
if (level > 1)
|
if (level > 1)
|
||||||
{
|
{
|
||||||
GekkoOPInfo* Info = GetOpInfo(inst.hex);
|
GekkoOPInfo* Info = GetOpInfo(inst.hex);
|
||||||
printf("\tOuts\n");
|
WARN_LOG(DYNA_REC, "\tOuts");
|
||||||
if (Info->flags & FL_OUT_A)
|
if (Info->flags & FL_OUT_A)
|
||||||
printf("\t-OUT_A: %x\n", inst.RA);
|
WARN_LOG(DYNA_REC, "\t-OUT_A: %x", inst.RA);
|
||||||
if (Info->flags & FL_OUT_D)
|
if (Info->flags & FL_OUT_D)
|
||||||
printf("\t-OUT_D: %x\n", inst.RD);
|
WARN_LOG(DYNA_REC, "\t-OUT_D: %x", inst.RD);
|
||||||
printf("\tIns\n");
|
WARN_LOG(DYNA_REC, "\tIns");
|
||||||
// A, AO, B, C, S
|
// A, AO, B, C, S
|
||||||
if (Info->flags & FL_IN_A)
|
if (Info->flags & FL_IN_A)
|
||||||
printf("\t-IN_A: %x\n", inst.RA);
|
WARN_LOG(DYNA_REC, "\t-IN_A: %x", inst.RA);
|
||||||
if (Info->flags & FL_IN_A0)
|
if (Info->flags & FL_IN_A0)
|
||||||
printf("\t-IN_A0: %x\n", inst.RA);
|
WARN_LOG(DYNA_REC, "\t-IN_A0: %x", inst.RA);
|
||||||
if (Info->flags & FL_IN_B)
|
if (Info->flags & FL_IN_B)
|
||||||
printf("\t-IN_B: %x\n", inst.RB);
|
WARN_LOG(DYNA_REC, "\t-IN_B: %x", inst.RB);
|
||||||
if (Info->flags & FL_IN_C)
|
if (Info->flags & FL_IN_C)
|
||||||
printf("\t-IN_C: %x\n", inst.RC);
|
WARN_LOG(DYNA_REC, "\t-IN_C: %x", inst.RC);
|
||||||
if (Info->flags & FL_IN_S)
|
if (Info->flags & FL_IN_S)
|
||||||
printf("\t-IN_S: %x\n", inst.RS);
|
WARN_LOG(DYNA_REC, "\t-IN_S: %x", inst.RS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,14 +303,10 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
// Memory exception on instruction fetch
|
// Memory exception on instruction fetch
|
||||||
bool memory_exception = false;
|
bool memory_exception = false;
|
||||||
|
|
||||||
// A broken block is a block that does not end in a branch
|
|
||||||
bool broken_block = false;
|
|
||||||
|
|
||||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
{
|
||||||
// Comment out the following to disable breakpoints (speed-up)
|
// Comment out the following to disable breakpoints (speed-up)
|
||||||
blockSize = 1;
|
blockSize = 1;
|
||||||
broken_block = true;
|
|
||||||
Trace();
|
Trace();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -324,8 +325,6 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int size = 0;
|
|
||||||
js.isLastInstruction = false;
|
js.isLastInstruction = false;
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesThisBlock = 0;
|
||||||
|
@ -333,17 +332,12 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
js.block_flags = 0;
|
js.block_flags = 0;
|
||||||
js.cancel = false;
|
js.cancel = false;
|
||||||
|
|
||||||
|
u32 nextPC = em_address;
|
||||||
// Analyze the block, collect all instructions it is made of (including inlining,
|
// Analyze the block, collect all instructions it is made of (including inlining,
|
||||||
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
||||||
u32 nextPC = em_address;
|
|
||||||
u32 merged_addresses[32];
|
|
||||||
const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]);
|
|
||||||
int size_of_merged_addresses = 0;
|
|
||||||
if (!memory_exception)
|
if (!memory_exception)
|
||||||
{
|
nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize);
|
||||||
// If there is a memory exception inside a block (broken_block==true), compile up to that instruction.
|
|
||||||
nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses);
|
|
||||||
}
|
|
||||||
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
||||||
|
|
||||||
const u8 *start = GetCodePtr();
|
const u8 *start = GetCodePtr();
|
||||||
|
@ -398,20 +392,21 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
gpr.Start(js.gpa);
|
gpr.Start(js.gpa);
|
||||||
fpr.Start(js.fpa);
|
fpr.Start(js.fpa);
|
||||||
js.downcountAmount = 0;
|
js.downcountAmount = 0;
|
||||||
|
|
||||||
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
|
||||||
for (int i = 0; i < size_of_merged_addresses; ++i)
|
|
||||||
{
|
|
||||||
const u32 address = merged_addresses[i];
|
|
||||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
js.skipnext = false;
|
js.skipnext = false;
|
||||||
js.blockSize = size;
|
js.blockSize = code_block.m_num_instructions;
|
||||||
js.compilerPC = nextPC;
|
js.compilerPC = nextPC;
|
||||||
|
|
||||||
|
const int DEBUG_OUTPUT = 0;
|
||||||
|
|
||||||
|
if (DEBUG_OUTPUT)
|
||||||
|
WARN_LOG(DYNA_REC, "-------0x%08x-------", em_address);
|
||||||
|
|
||||||
// Translate instructions
|
// Translate instructions
|
||||||
for (int i = 0; i < (int)size; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
js.compilerPC = ops[i].address;
|
js.compilerPC = ops[i].address;
|
||||||
js.op = &ops[i];
|
js.op = &ops[i];
|
||||||
|
@ -419,7 +414,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
const GekkoOPInfo *opinfo = ops[i].opinfo;
|
const GekkoOPInfo *opinfo = ops[i].opinfo;
|
||||||
js.downcountAmount += opinfo->numCycles;
|
js.downcountAmount += opinfo->numCycles;
|
||||||
|
|
||||||
if (i == (int)size - 1)
|
if (i == (code_block.m_num_instructions - 1))
|
||||||
{
|
{
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
// WARNING - cmp->branch merging will screw this up.
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
|
@ -468,7 +463,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
}
|
}
|
||||||
if (!ops[i].skip)
|
if (!ops[i].skip)
|
||||||
{
|
{
|
||||||
PrintDebug(ops[i].inst, 0);
|
PrintDebug(ops[i].inst, DEBUG_OUTPUT);
|
||||||
if (js.memcheck && (opinfo->flags & FL_USE_FPU))
|
if (js.memcheck && (opinfo->flags & FL_USE_FPU))
|
||||||
{
|
{
|
||||||
// Don't do this yet
|
// Don't do this yet
|
||||||
|
@ -485,7 +480,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
}
|
}
|
||||||
if (memory_exception)
|
if (memory_exception)
|
||||||
BKPT(0x500);
|
BKPT(0x500);
|
||||||
if (broken_block)
|
if (code_block.m_broken)
|
||||||
{
|
{
|
||||||
printf("Broken Block going to 0x%08x\n", nextPC);
|
printf("Broken Block going to 0x%08x\n", nextPC);
|
||||||
WriteExit(nextPC);
|
WriteExit(nextPC);
|
||||||
|
@ -493,7 +488,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
|
|
||||||
b->flags = js.block_flags;
|
b->flags = js.block_flags;
|
||||||
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
||||||
b->originalSize = size;
|
b->originalSize = code_block.m_num_instructions;
|
||||||
FlushIcache();
|
FlushIcache();
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ private:
|
||||||
ArmFPRCache fpr;
|
ArmFPRCache fpr;
|
||||||
|
|
||||||
PPCAnalyst::CodeBuffer code_buffer;
|
PPCAnalyst::CodeBuffer code_buffer;
|
||||||
|
|
||||||
void DoDownCount();
|
void DoDownCount();
|
||||||
|
|
||||||
void PrintDebug(UGeckoInstruction inst, u32 level);
|
void PrintDebug(UGeckoInstruction inst, u32 level);
|
||||||
|
|
|
@ -149,7 +149,6 @@ void JitArm::bcx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITBranchOff)
|
JITDISABLE(bJITBranchOff)
|
||||||
// USES_CR
|
// USES_CR
|
||||||
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
|
|
||||||
|
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
|
@ -203,7 +202,8 @@ void JitArm::bcx(UGeckoInstruction inst)
|
||||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
||||||
SetJumpTarget( pCTRDontBranch );
|
SetJumpTarget( pCTRDontBranch );
|
||||||
|
|
||||||
WriteExit(js.compilerPC + 4);
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
}
|
}
|
||||||
void JitArm::bcctrx(UGeckoInstruction inst)
|
void JitArm::bcctrx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
|
@ -265,25 +265,16 @@ void JitArm::bcctrx(UGeckoInstruction inst)
|
||||||
WriteExitDestInR(rA);
|
WriteExitDestInR(rA);
|
||||||
|
|
||||||
SetJumpTarget(b);
|
SetJumpTarget(b);
|
||||||
WriteExit(js.compilerPC + 4);
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void JitArm::bclrx(UGeckoInstruction inst)
|
void JitArm::bclrx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITBranchOff)
|
JITDISABLE(bJITBranchOff)
|
||||||
if (!js.isLastInstruction &&
|
|
||||||
(inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) {
|
|
||||||
if (inst.LK)
|
|
||||||
{
|
|
||||||
ARMReg rA = gpr.GetReg(false);
|
|
||||||
u32 Jumpto = js.compilerPC + 4;
|
|
||||||
MOVI2R(rA, Jumpto);
|
|
||||||
STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));
|
|
||||||
// ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
|
|
||||||
|
@ -342,5 +333,7 @@ void JitArm::bclrx(UGeckoInstruction inst)
|
||||||
SetJumpTarget( pConditionDontBranch );
|
SetJumpTarget( pConditionDontBranch );
|
||||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
|
||||||
SetJumpTarget( pCTRDontBranch );
|
SetJumpTarget( pCTRDontBranch );
|
||||||
WriteExit(js.compilerPC + 4);
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1014,7 +1014,9 @@ void JitArm::twx(UGeckoInstruction inst)
|
||||||
SetJumpTarget(exit3);
|
SetJumpTarget(exit3);
|
||||||
SetJumpTarget(exit4);
|
SetJumpTarget(exit4);
|
||||||
SetJumpTarget(exit5);
|
SetJumpTarget(exit5);
|
||||||
WriteExit(js.compilerPC + 4);
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
WriteExit(js.compilerPC + 4);
|
||||||
|
|
||||||
gpr.Unlock(RA, RB);
|
gpr.Unlock(RA, RB);
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,10 @@ void JitArmIL::Init()
|
||||||
AllocCodeSpace(CODE_SIZE);
|
AllocCodeSpace(CODE_SIZE);
|
||||||
blocks.Init();
|
blocks.Init();
|
||||||
asm_routines.Init();
|
asm_routines.Init();
|
||||||
|
|
||||||
|
code_block.m_stats = &js.st;
|
||||||
|
code_block.m_gpa = &js.gpa;
|
||||||
|
code_block.m_fpa = &js.fpa;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArmIL::ClearCache()
|
void JitArmIL::ClearCache()
|
||||||
|
@ -202,14 +206,10 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
// Memory exception on instruction fetch
|
// Memory exception on instruction fetch
|
||||||
bool memory_exception = false;
|
bool memory_exception = false;
|
||||||
|
|
||||||
// A broken block is a block that does not end in a branch
|
|
||||||
bool broken_block = false;
|
|
||||||
|
|
||||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
{
|
||||||
// Comment out the following to disable breakpoints (speed-up)
|
// Comment out the following to disable breakpoints (speed-up)
|
||||||
blockSize = 1;
|
blockSize = 1;
|
||||||
broken_block = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (em_address == 0)
|
if (em_address == 0)
|
||||||
|
@ -228,7 +228,6 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int size = 0;
|
|
||||||
js.isLastInstruction = false;
|
js.isLastInstruction = false;
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesThisBlock = 0;
|
||||||
|
@ -236,17 +235,12 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
js.block_flags = 0;
|
js.block_flags = 0;
|
||||||
js.cancel = false;
|
js.cancel = false;
|
||||||
|
|
||||||
|
u32 nextPC = em_address;
|
||||||
// Analyze the block, collect all instructions it is made of (including inlining,
|
// Analyze the block, collect all instructions it is made of (including inlining,
|
||||||
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
||||||
u32 nextPC = em_address;
|
|
||||||
u32 merged_addresses[32];
|
|
||||||
const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]);
|
|
||||||
int size_of_merged_addresses = 0;
|
|
||||||
if (!memory_exception)
|
if (!memory_exception)
|
||||||
{
|
nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize);
|
||||||
// If there is a memory exception inside a block (broken_block==true), compile up to that instruction.
|
|
||||||
nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses);
|
|
||||||
}
|
|
||||||
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
|
||||||
|
|
||||||
const u8 *start = GetCodePtr();
|
const u8 *start = GetCodePtr();
|
||||||
|
@ -271,7 +265,7 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
u64 codeHash = -1;
|
u64 codeHash = -1;
|
||||||
{
|
{
|
||||||
// For profiling and IR Writer
|
// For profiling and IR Writer
|
||||||
for (int i = 0; i < (int)size; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
const u64 inst = ops[i].inst.hex;
|
const u64 inst = ops[i].inst.hex;
|
||||||
// Ported from boost::hash
|
// Ported from boost::hash
|
||||||
|
@ -289,19 +283,13 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
|
|
||||||
js.downcountAmount = 0;
|
js.downcountAmount = 0;
|
||||||
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
if (!Core::g_CoreStartupParameter.bEnableDebugging)
|
||||||
{
|
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
|
||||||
for (int i = 0; i < size_of_merged_addresses; ++i)
|
|
||||||
{
|
|
||||||
const u32 address = merged_addresses[i];
|
|
||||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
js.skipnext = false;
|
js.skipnext = false;
|
||||||
js.blockSize = size;
|
js.blockSize = code_block.m_num_instructions;
|
||||||
js.compilerPC = nextPC;
|
js.compilerPC = nextPC;
|
||||||
// Translate instructions
|
// Translate instructions
|
||||||
for (int i = 0; i < (int)size; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
js.compilerPC = ops[i].address;
|
js.compilerPC = ops[i].address;
|
||||||
js.op = &ops[i];
|
js.op = &ops[i];
|
||||||
|
@ -309,7 +297,7 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
const GekkoOPInfo *opinfo = ops[i].opinfo;
|
const GekkoOPInfo *opinfo = ops[i].opinfo;
|
||||||
js.downcountAmount += opinfo->numCycles;
|
js.downcountAmount += opinfo->numCycles;
|
||||||
|
|
||||||
if (i == (int)size - 1)
|
if (i == (code_block.m_num_instructions - 1))
|
||||||
{
|
{
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
// WARNING - cmp->branch merging will screw this up.
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
|
@ -348,21 +336,19 @@ const u8* JitArmIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
}
|
}
|
||||||
if (memory_exception)
|
if (memory_exception)
|
||||||
BKPT(0x500);
|
BKPT(0x500);
|
||||||
if (broken_block)
|
|
||||||
|
if (code_block.m_broken)
|
||||||
{
|
{
|
||||||
printf("Broken Block going to 0x%08x\n", nextPC);
|
printf("Broken Block going to 0x%08x\n", nextPC);
|
||||||
WriteExit(nextPC);
|
WriteExit(nextPC);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform actual code generation
|
// Perform actual code generation
|
||||||
|
|
||||||
WriteCode(nextPC);
|
WriteCode(nextPC);
|
||||||
b->flags = js.block_flags;
|
b->flags = js.block_flags;
|
||||||
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
||||||
b->originalSize = size;
|
b->originalSize = code_block.m_num_instructions;;
|
||||||
|
|
||||||
{
|
|
||||||
}
|
|
||||||
FlushIcache();
|
FlushIcache();
|
||||||
return start;
|
return start;
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,9 @@ protected:
|
||||||
std::unordered_set<u32> fifoWriteAddresses;
|
std::unordered_set<u32> fifoWriteAddresses;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
PPCAnalyst::CodeBlock code_block;
|
||||||
|
PPCAnalyst::PPCAnalyzer analyzer;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// This should probably be removed from public:
|
// This should probably be removed from public:
|
||||||
JitOptions jo;
|
JitOptions jo;
|
||||||
|
|
|
@ -269,302 +269,6 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Does not yet perform inlining - although there are plans for that.
|
|
||||||
// Returns the exit address of the next PC
|
|
||||||
u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa,
|
|
||||||
BlockRegStats *fpa, bool &broken_block, CodeBuffer *buffer,
|
|
||||||
int blockSize, u32* merged_addresses,
|
|
||||||
int capacity_of_merged_addresses, int& size_of_merged_addresses)
|
|
||||||
{
|
|
||||||
if (capacity_of_merged_addresses < FUNCTION_FOLLOWING_THRESHOLD) {
|
|
||||||
PanicAlert("Capacity of merged_addresses is too small!");
|
|
||||||
}
|
|
||||||
std::fill_n(merged_addresses, capacity_of_merged_addresses, 0);
|
|
||||||
merged_addresses[0] = address;
|
|
||||||
size_of_merged_addresses = 1;
|
|
||||||
|
|
||||||
memset(st, 0, sizeof(*st));
|
|
||||||
|
|
||||||
// Disabled the following optimization in preference of FAST_ICACHE
|
|
||||||
//UGeckoInstruction previnst = Memory::Read_Opcode_JIT_LC(address - 4);
|
|
||||||
//if (previnst.hex == 0x4e800020)
|
|
||||||
// st->isFirstBlockOfFunction = true;
|
|
||||||
|
|
||||||
gpa->any = true;
|
|
||||||
fpa->any = false;
|
|
||||||
|
|
||||||
for (int i = 0; i < 32; i++)
|
|
||||||
{
|
|
||||||
gpa->firstRead[i] = -1;
|
|
||||||
gpa->firstWrite[i] = -1;
|
|
||||||
gpa->numReads[i] = 0;
|
|
||||||
gpa->numWrites[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 blockstart = address;
|
|
||||||
int maxsize = blockSize;
|
|
||||||
|
|
||||||
int num_inst = 0;
|
|
||||||
int numFollows = 0;
|
|
||||||
int numCycles = 0;
|
|
||||||
|
|
||||||
CodeOp *code = buffer->codebuffer;
|
|
||||||
bool foundExit = false;
|
|
||||||
|
|
||||||
u32 returnAddress = 0;
|
|
||||||
|
|
||||||
// Do analysis of the code, look for dependencies etc
|
|
||||||
int numSystemInstructions = 0;
|
|
||||||
for (int i = 0; i < maxsize; i++)
|
|
||||||
{
|
|
||||||
UGeckoInstruction inst = JitInterface::Read_Opcode_JIT(address);
|
|
||||||
|
|
||||||
if (inst.hex != 0)
|
|
||||||
{
|
|
||||||
num_inst++;
|
|
||||||
memset(&code[i], 0, sizeof(CodeOp));
|
|
||||||
GekkoOPInfo *opinfo = GetOpInfo(inst);
|
|
||||||
code[i].opinfo = opinfo;
|
|
||||||
// FIXME: code[i].address may not be correct due to CST1 code.
|
|
||||||
code[i].address = address;
|
|
||||||
code[i].inst = inst;
|
|
||||||
code[i].branchTo = -1;
|
|
||||||
code[i].branchToIndex = -1;
|
|
||||||
code[i].skip = false;
|
|
||||||
numCycles += opinfo->numCycles;
|
|
||||||
|
|
||||||
code[i].wantsCR0 = false;
|
|
||||||
code[i].wantsCR1 = false;
|
|
||||||
code[i].wantsPS1 = false;
|
|
||||||
|
|
||||||
int flags = opinfo->flags;
|
|
||||||
|
|
||||||
if (flags & FL_USE_FPU)
|
|
||||||
fpa->any = true;
|
|
||||||
|
|
||||||
if (flags & FL_TIMER)
|
|
||||||
gpa->anyTimer = true;
|
|
||||||
|
|
||||||
// Does the instruction output CR0?
|
|
||||||
if (flags & FL_RC_BIT)
|
|
||||||
code[i].outputCR0 = inst.hex & 1; //todo fix
|
|
||||||
else if ((flags & FL_SET_CRn) && inst.CRFD == 0)
|
|
||||||
code[i].outputCR0 = true;
|
|
||||||
else
|
|
||||||
code[i].outputCR0 = (flags & FL_SET_CR0) ? true : false;
|
|
||||||
|
|
||||||
// Does the instruction output CR1?
|
|
||||||
if (flags & FL_RC_BIT_F)
|
|
||||||
code[i].outputCR1 = inst.hex & 1; //todo fix
|
|
||||||
else if ((flags & FL_SET_CRn) && inst.CRFD == 1)
|
|
||||||
code[i].outputCR1 = true;
|
|
||||||
else
|
|
||||||
code[i].outputCR1 = (flags & FL_SET_CR1) ? true : false;
|
|
||||||
|
|
||||||
int numOut = 0;
|
|
||||||
int numIn = 0;
|
|
||||||
if (flags & FL_OUT_A)
|
|
||||||
{
|
|
||||||
code[i].regsOut[numOut++] = inst.RA;
|
|
||||||
gpa->SetOutputRegister(inst.RA, i);
|
|
||||||
}
|
|
||||||
if (flags & FL_OUT_D)
|
|
||||||
{
|
|
||||||
code[i].regsOut[numOut++] = inst.RD;
|
|
||||||
gpa->SetOutputRegister(inst.RD, i);
|
|
||||||
}
|
|
||||||
if (flags & FL_OUT_S)
|
|
||||||
{
|
|
||||||
code[i].regsOut[numOut++] = inst.RS;
|
|
||||||
gpa->SetOutputRegister(inst.RS, i);
|
|
||||||
}
|
|
||||||
if ((flags & FL_IN_A) || ((flags & FL_IN_A0) && inst.RA != 0))
|
|
||||||
{
|
|
||||||
code[i].regsIn[numIn++] = inst.RA;
|
|
||||||
gpa->SetInputRegister(inst.RA, i);
|
|
||||||
}
|
|
||||||
if (flags & FL_IN_B)
|
|
||||||
{
|
|
||||||
code[i].regsIn[numIn++] = inst.RB;
|
|
||||||
gpa->SetInputRegister(inst.RB, i);
|
|
||||||
}
|
|
||||||
if (flags & FL_IN_C)
|
|
||||||
{
|
|
||||||
code[i].regsIn[numIn++] = inst.RC;
|
|
||||||
gpa->SetInputRegister(inst.RC, i);
|
|
||||||
}
|
|
||||||
if (flags & FL_IN_S)
|
|
||||||
{
|
|
||||||
code[i].regsIn[numIn++] = inst.RS;
|
|
||||||
gpa->SetInputRegister(inst.RS, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set remaining register slots as unused (-1)
|
|
||||||
for (int j = numIn; j < 3; j++)
|
|
||||||
code[i].regsIn[j] = -1;
|
|
||||||
for (int j = numOut; j < 2; j++)
|
|
||||||
code[i].regsOut[j] = -1;
|
|
||||||
for (int j = 0; j < 3; j++)
|
|
||||||
code[i].fregsIn[j] = -1;
|
|
||||||
code[i].fregOut = -1;
|
|
||||||
|
|
||||||
switch (opinfo->type)
|
|
||||||
{
|
|
||||||
case OPTYPE_INTEGER:
|
|
||||||
case OPTYPE_LOAD:
|
|
||||||
case OPTYPE_STORE:
|
|
||||||
case OPTYPE_LOADFP:
|
|
||||||
case OPTYPE_STOREFP:
|
|
||||||
break;
|
|
||||||
case OPTYPE_FPU:
|
|
||||||
break;
|
|
||||||
case OPTYPE_BRANCH:
|
|
||||||
if (code[i].inst.hex == 0x4e800020)
|
|
||||||
{
|
|
||||||
// For analysis purposes, we can assume that blr eats flags.
|
|
||||||
code[i].outputCR0 = true;
|
|
||||||
code[i].outputCR1 = true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case OPTYPE_SYSTEM:
|
|
||||||
case OPTYPE_SYSTEMFP:
|
|
||||||
numSystemInstructions++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool follow = false;
|
|
||||||
u32 destination = 0;
|
|
||||||
if (inst.OPCD == 18 && blockSize > 1)
|
|
||||||
{
|
|
||||||
//Is bx - should we inline? yes!
|
|
||||||
if (inst.AA)
|
|
||||||
destination = SignExt26(inst.LI << 2);
|
|
||||||
else
|
|
||||||
destination = address + SignExt26(inst.LI << 2);
|
|
||||||
if (destination != blockstart)
|
|
||||||
follow = true;
|
|
||||||
}
|
|
||||||
else if (inst.OPCD == 19 && inst.SUBOP10 == 16 &&
|
|
||||||
(inst.BO & (1 << 4)) && (inst.BO & (1 << 2)) &&
|
|
||||||
returnAddress != 0)
|
|
||||||
{
|
|
||||||
// bclrx with unconditional branch = return
|
|
||||||
follow = true;
|
|
||||||
destination = returnAddress;
|
|
||||||
returnAddress = 0;
|
|
||||||
|
|
||||||
if (inst.LK)
|
|
||||||
returnAddress = address + 4;
|
|
||||||
}
|
|
||||||
else if (inst.OPCD == 31 && inst.SUBOP10 == 467)
|
|
||||||
{
|
|
||||||
// mtspr
|
|
||||||
const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
|
||||||
if (index == SPR_LR) {
|
|
||||||
// We give up to follow the return address
|
|
||||||
// because we have to check the register usage.
|
|
||||||
returnAddress = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (follow)
|
|
||||||
numFollows++;
|
|
||||||
// TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD.
|
|
||||||
// If it is small, the performance will be down.
|
|
||||||
// If it is big, the size of generated code will be big and
|
|
||||||
// cache clearning will happen many times.
|
|
||||||
// TODO: Investivate the reason why
|
|
||||||
// "0" is fastest in some games, MP2 for example.
|
|
||||||
if (numFollows > FUNCTION_FOLLOWING_THRESHOLD)
|
|
||||||
follow = false;
|
|
||||||
|
|
||||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMergeBlocks) {
|
|
||||||
follow = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!follow)
|
|
||||||
{
|
|
||||||
if (opinfo->flags & FL_ENDBLOCK) //right now we stop early
|
|
||||||
{
|
|
||||||
foundExit = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
address += 4;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// We don't "code[i].skip = true" here
|
|
||||||
// because bx may store a certain value to the link register.
|
|
||||||
// Instead, we skip a part of bx in Jit**::bx().
|
|
||||||
address = destination;
|
|
||||||
merged_addresses[size_of_merged_addresses++] = address;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// ISI exception or other critical memory exception occurred (game over)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
st->numCycles = numCycles;
|
|
||||||
|
|
||||||
// Instruction Reordering Pass
|
|
||||||
if (num_inst > 1)
|
|
||||||
{
|
|
||||||
// Bubble down compares towards branches, so that they can be merged.
|
|
||||||
// -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch.
|
|
||||||
for (int i = 0; i < num_inst - 2; i++)
|
|
||||||
{
|
|
||||||
CodeOp &a = code[i];
|
|
||||||
CodeOp &b = code[i + 1];
|
|
||||||
// All integer compares can be reordered.
|
|
||||||
if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) ||
|
|
||||||
(a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32)))
|
|
||||||
{
|
|
||||||
// Got a compare instruction.
|
|
||||||
if (CanSwapAdjacentOps(a, b)) {
|
|
||||||
// Alright, let's bubble it down!
|
|
||||||
CodeOp c = a;
|
|
||||||
a = b;
|
|
||||||
b = c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!foundExit && num_inst > 0)
|
|
||||||
{
|
|
||||||
// A broken block is a block that does not end in a branch
|
|
||||||
broken_block = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scan for CR0 dependency
|
|
||||||
// assume next block wants CR0 to be safe
|
|
||||||
bool wantsCR0 = true;
|
|
||||||
bool wantsCR1 = true;
|
|
||||||
bool wantsPS1 = true;
|
|
||||||
for (int i = num_inst - 1; i >= 0; i--)
|
|
||||||
{
|
|
||||||
if (code[i].outputCR0)
|
|
||||||
wantsCR0 = false;
|
|
||||||
if (code[i].outputCR1)
|
|
||||||
wantsCR1 = false;
|
|
||||||
if (code[i].outputPS1)
|
|
||||||
wantsPS1 = false;
|
|
||||||
wantsCR0 |= code[i].wantsCR0;
|
|
||||||
wantsCR1 |= code[i].wantsCR1;
|
|
||||||
wantsPS1 |= code[i].wantsPS1;
|
|
||||||
code[i].wantsCR0 = wantsCR0;
|
|
||||||
code[i].wantsCR1 = wantsCR1;
|
|
||||||
code[i].wantsPS1 = wantsPS1;
|
|
||||||
}
|
|
||||||
|
|
||||||
*realsize = num_inst;
|
|
||||||
// ...
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Most functions that are relevant to analyze should be
|
// Most functions that are relevant to analyze should be
|
||||||
// called by another function. Therefore, let's scan the
|
// called by another function. Therefore, let's scan the
|
||||||
// entire space for bl operations and find what functions
|
// entire space for bl operations and find what functions
|
||||||
|
@ -699,4 +403,317 @@ void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db)
|
||||||
leafSize, niceSize, unniceSize);
|
leafSize, niceSize, unniceSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
|
||||||
|
{
|
||||||
|
// Instruction Reordering Pass
|
||||||
|
// Bubble down compares towards branches, so that they can be merged.
|
||||||
|
// -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch.
|
||||||
|
for (u32 i = 0; i < (instructions - 2); ++i)
|
||||||
|
{
|
||||||
|
CodeOp &a = code[i];
|
||||||
|
CodeOp &b = code[i + 1];
|
||||||
|
// All integer compares can be reordered.
|
||||||
|
if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) ||
|
||||||
|
(a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32)))
|
||||||
|
{
|
||||||
|
// Got a compare instruction.
|
||||||
|
if (CanSwapAdjacentOps(a, b)) {
|
||||||
|
// Alright, let's bubble it down!
|
||||||
|
CodeOp c = a;
|
||||||
|
a = b;
|
||||||
|
b = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index)
|
||||||
|
{
|
||||||
|
code->wantsCR0 = false;
|
||||||
|
code->wantsCR1 = false;
|
||||||
|
code->wantsPS1 = false;
|
||||||
|
|
||||||
|
if (opinfo->flags & FL_USE_FPU)
|
||||||
|
block->m_fpa->any = true;
|
||||||
|
|
||||||
|
if (opinfo->flags & FL_TIMER)
|
||||||
|
block->m_gpa->anyTimer = true;
|
||||||
|
|
||||||
|
// Does the instruction output CR0?
|
||||||
|
if (opinfo->flags & FL_RC_BIT)
|
||||||
|
code->outputCR0 = code->inst.hex & 1; //todo fix
|
||||||
|
else if ((opinfo->flags & FL_SET_CRn) && code->inst.CRFD == 0)
|
||||||
|
code->outputCR0 = true;
|
||||||
|
else
|
||||||
|
code->outputCR0 = (opinfo->flags & FL_SET_CR0) ? true : false;
|
||||||
|
|
||||||
|
// Does the instruction output CR1?
|
||||||
|
if (opinfo->flags & FL_RC_BIT_F)
|
||||||
|
code->outputCR1 = code->inst.hex & 1; //todo fix
|
||||||
|
else if ((opinfo->flags & FL_SET_CRn) && code->inst.CRFD == 1)
|
||||||
|
code->outputCR1 = true;
|
||||||
|
else
|
||||||
|
code->outputCR1 = (opinfo->flags & FL_SET_CR1) ? true : false;
|
||||||
|
|
||||||
|
int numOut = 0;
|
||||||
|
int numIn = 0;
|
||||||
|
if (opinfo->flags & FL_OUT_A)
|
||||||
|
{
|
||||||
|
code->regsOut[numOut++] = code->inst.RA;
|
||||||
|
block->m_gpa->SetOutputRegister(code->inst.RA, index);
|
||||||
|
}
|
||||||
|
if (opinfo->flags & FL_OUT_D)
|
||||||
|
{
|
||||||
|
code->regsOut[numOut++] = code->inst.RD;
|
||||||
|
block->m_gpa->SetOutputRegister(code->inst.RD, index);
|
||||||
|
}
|
||||||
|
if (opinfo->flags & FL_OUT_S)
|
||||||
|
{
|
||||||
|
code->regsOut[numOut++] = code->inst.RS;
|
||||||
|
block->m_gpa->SetOutputRegister(code->inst.RS, index);
|
||||||
|
}
|
||||||
|
if ((opinfo->flags & FL_IN_A) || ((opinfo->flags & FL_IN_A0) && code->inst.RA != 0))
|
||||||
|
{
|
||||||
|
code->regsIn[numIn++] = code->inst.RA;
|
||||||
|
block->m_gpa->SetInputRegister(code->inst.RA, index);
|
||||||
|
}
|
||||||
|
if (opinfo->flags & FL_IN_B)
|
||||||
|
{
|
||||||
|
code->regsIn[numIn++] = code->inst.RB;
|
||||||
|
block->m_gpa->SetInputRegister(code->inst.RB, index);
|
||||||
|
}
|
||||||
|
if (opinfo->flags & FL_IN_C)
|
||||||
|
{
|
||||||
|
code->regsIn[numIn++] = code->inst.RC;
|
||||||
|
block->m_gpa->SetInputRegister(code->inst.RC, index);
|
||||||
|
}
|
||||||
|
if (opinfo->flags & FL_IN_S)
|
||||||
|
{
|
||||||
|
code->regsIn[numIn++] = code->inst.RS;
|
||||||
|
block->m_gpa->SetInputRegister(code->inst.RS, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set remaining register slots as unused (-1)
|
||||||
|
for (int j = numIn; j < 3; j++)
|
||||||
|
code->regsIn[j] = -1;
|
||||||
|
for (int j = numOut; j < 2; j++)
|
||||||
|
code->regsOut[j] = -1;
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
code->fregsIn[j] = -1;
|
||||||
|
code->fregOut = -1;
|
||||||
|
|
||||||
|
switch (opinfo->type)
|
||||||
|
{
|
||||||
|
case OPTYPE_INTEGER:
|
||||||
|
case OPTYPE_LOAD:
|
||||||
|
case OPTYPE_STORE:
|
||||||
|
case OPTYPE_LOADFP:
|
||||||
|
case OPTYPE_STOREFP:
|
||||||
|
break;
|
||||||
|
case OPTYPE_FPU:
|
||||||
|
break;
|
||||||
|
case OPTYPE_BRANCH:
|
||||||
|
if (code->inst.hex == 0x4e800020)
|
||||||
|
{
|
||||||
|
// For analysis purposes, we can assume that blr eats opinfo->flags.
|
||||||
|
code->outputCR0 = true;
|
||||||
|
code->outputCR1 = true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPTYPE_SYSTEM:
|
||||||
|
case OPTYPE_SYSTEMFP:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 blockSize)
|
||||||
|
{
|
||||||
|
// Clear block stats
|
||||||
|
memset(block->m_stats, 0, sizeof(BlockStats));
|
||||||
|
|
||||||
|
// Clear register stats
|
||||||
|
block->m_gpa->any = true;
|
||||||
|
block->m_fpa->any = false;
|
||||||
|
|
||||||
|
block->m_gpa->Clear();
|
||||||
|
block->m_fpa->Clear();
|
||||||
|
|
||||||
|
// Set the blocks start address
|
||||||
|
block->m_address = address;
|
||||||
|
|
||||||
|
// Reset our block state
|
||||||
|
block->m_broken = false;
|
||||||
|
block->m_num_instructions = 0;
|
||||||
|
|
||||||
|
CodeOp *code = buffer->codebuffer;
|
||||||
|
|
||||||
|
bool found_exit = false;
|
||||||
|
u32 return_address = 0;
|
||||||
|
u32 numFollows = 0;
|
||||||
|
u32 num_inst = 0;
|
||||||
|
|
||||||
|
for (u32 i = 0; i < blockSize; ++i)
|
||||||
|
{
|
||||||
|
UGeckoInstruction inst = JitInterface::Read_Opcode_JIT(address);
|
||||||
|
|
||||||
|
if (inst.hex != 0)
|
||||||
|
{
|
||||||
|
num_inst++;
|
||||||
|
memset(&code[i], 0, sizeof(CodeOp));
|
||||||
|
GekkoOPInfo *opinfo = GetOpInfo(inst);
|
||||||
|
|
||||||
|
code[i].opinfo = opinfo;
|
||||||
|
code[i].address = address;
|
||||||
|
code[i].inst = inst;
|
||||||
|
code[i].branchTo = -1;
|
||||||
|
code[i].branchToIndex = -1;
|
||||||
|
code[i].skip = false;
|
||||||
|
block->m_stats->numCycles += opinfo->numCycles;
|
||||||
|
|
||||||
|
SetInstructionStats(block, &code[i], opinfo, i);
|
||||||
|
|
||||||
|
bool follow = false;
|
||||||
|
u32 destination = 0;
|
||||||
|
|
||||||
|
bool conditional_continue = false;
|
||||||
|
|
||||||
|
// Do we inline leaf functions?
|
||||||
|
if (HasOption(OPTION_LEAF_INLINE))
|
||||||
|
{
|
||||||
|
if (inst.OPCD == 18 && blockSize > 1)
|
||||||
|
{
|
||||||
|
//Is bx - should we inline? yes!
|
||||||
|
if (inst.AA)
|
||||||
|
destination = SignExt26(inst.LI << 2);
|
||||||
|
else
|
||||||
|
destination = address + SignExt26(inst.LI << 2);
|
||||||
|
if (destination != block->m_address)
|
||||||
|
follow = true;
|
||||||
|
}
|
||||||
|
else if (inst.OPCD == 19 && inst.SUBOP10 == 16 &&
|
||||||
|
(inst.BO & (1 << 4)) && (inst.BO & (1 << 2)) &&
|
||||||
|
return_address != 0)
|
||||||
|
{
|
||||||
|
// bclrx with unconditional branch = return
|
||||||
|
follow = true;
|
||||||
|
destination = return_address;
|
||||||
|
return_address = 0;
|
||||||
|
|
||||||
|
if (inst.LK)
|
||||||
|
return_address = address + 4;
|
||||||
|
}
|
||||||
|
else if (inst.OPCD == 31 && inst.SUBOP10 == 467)
|
||||||
|
{
|
||||||
|
// mtspr
|
||||||
|
const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||||
|
if (index == SPR_LR) {
|
||||||
|
// We give up to follow the return address
|
||||||
|
// because we have to check the register usage.
|
||||||
|
return_address = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD.
|
||||||
|
// If it is small, the performance will be down.
|
||||||
|
// If it is big, the size of generated code will be big and
|
||||||
|
// cache clearning will happen many times.
|
||||||
|
// TODO: Investivate the reason why
|
||||||
|
// "0" is fastest in some games, MP2 for example.
|
||||||
|
if (numFollows > FUNCTION_FOLLOWING_THRESHOLD)
|
||||||
|
follow = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HasOption(OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
{
|
||||||
|
if (inst.OPCD == 16 &&
|
||||||
|
((inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0))
|
||||||
|
{
|
||||||
|
// bcx with conditional branch
|
||||||
|
conditional_continue = true;
|
||||||
|
}
|
||||||
|
else if (inst.OPCD == 19 && inst.SUBOP10 == 16 &&
|
||||||
|
((inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0))
|
||||||
|
{
|
||||||
|
// bclrx with conditional branch
|
||||||
|
conditional_continue = true;
|
||||||
|
}
|
||||||
|
else if (inst.OPCD == 3 ||
|
||||||
|
(inst.OPCD == 31 && inst.SUBOP10 == 4))
|
||||||
|
{
|
||||||
|
// tw/twi tests and raises an exception
|
||||||
|
conditional_continue = true;
|
||||||
|
}
|
||||||
|
else if (inst.OPCD == 19 && inst.SUBOP10 == 528 &&
|
||||||
|
(inst.BO_2 & BO_DONT_CHECK_CONDITION) == 0)
|
||||||
|
{
|
||||||
|
// Rare bcctrx with conditional branch
|
||||||
|
// Seen in NES games
|
||||||
|
conditional_continue = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!follow)
|
||||||
|
{
|
||||||
|
if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) //right now we stop early
|
||||||
|
{
|
||||||
|
found_exit = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
address += 4;
|
||||||
|
}
|
||||||
|
// XXX: We don't support inlining yet.
|
||||||
|
#if 0
|
||||||
|
else
|
||||||
|
{
|
||||||
|
numFollows++;
|
||||||
|
// We don't "code[i].skip = true" here
|
||||||
|
// because bx may store a certain value to the link register.
|
||||||
|
// Instead, we skip a part of bx in Jit**::bx().
|
||||||
|
address = destination;
|
||||||
|
merged_addresses[size_of_merged_addresses++] = address;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// ISI exception or other critical memory exception occured (game over)
|
||||||
|
ERROR_LOG(DYNA_REC, "Instruction hex was 0!");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block->m_num_instructions > 1)
|
||||||
|
ReorderInstructions(block->m_num_instructions, code);
|
||||||
|
|
||||||
|
if ((!found_exit && num_inst > 0) || blockSize == 1)
|
||||||
|
{
|
||||||
|
// We couldn't find an exit
|
||||||
|
block->m_broken = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan for CR0 dependency
|
||||||
|
// assume next block wants CR0 to be safe
|
||||||
|
bool wantsCR0 = true;
|
||||||
|
bool wantsCR1 = true;
|
||||||
|
bool wantsPS1 = true;
|
||||||
|
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (code[i].outputCR0)
|
||||||
|
wantsCR0 = false;
|
||||||
|
if (code[i].outputCR1)
|
||||||
|
wantsCR1 = false;
|
||||||
|
if (code[i].outputPS1)
|
||||||
|
wantsPS1 = false;
|
||||||
|
wantsCR0 |= code[i].wantsCR0;
|
||||||
|
wantsCR1 |= code[i].wantsCR1;
|
||||||
|
wantsPS1 |= code[i].wantsPS1;
|
||||||
|
code[i].wantsCR0 = wantsCR0;
|
||||||
|
code[i].wantsCR1 = wantsCR1;
|
||||||
|
code[i].wantsPS1 = wantsPS1;
|
||||||
|
}
|
||||||
|
block->m_num_instructions = num_inst;
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -64,19 +64,32 @@ struct BlockRegStats
|
||||||
return std::max(lastRead[reg], lastWrite[reg]) -
|
return std::max(lastRead[reg], lastWrite[reg]) -
|
||||||
std::min(firstRead[reg], firstWrite[reg]);}
|
std::min(firstRead[reg], firstWrite[reg]);}
|
||||||
|
|
||||||
inline void SetInputRegister(int reg, short opindex) {
|
inline void SetInputRegister(int reg, short opindex)
|
||||||
|
{
|
||||||
if (firstRead[reg] == -1)
|
if (firstRead[reg] == -1)
|
||||||
firstRead[reg] = (short)(opindex);
|
firstRead[reg] = (short)(opindex);
|
||||||
lastRead[reg] = (short)(opindex);
|
lastRead[reg] = (short)(opindex);
|
||||||
numReads[reg]++;
|
numReads[reg]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void SetOutputRegister(int reg, short opindex) {
|
inline void SetOutputRegister(int reg, short opindex)
|
||||||
|
{
|
||||||
if (firstWrite[reg] == -1)
|
if (firstWrite[reg] == -1)
|
||||||
firstWrite[reg] = (short)(opindex);
|
firstWrite[reg] = (short)(opindex);
|
||||||
lastWrite[reg] = (short)(opindex);
|
lastWrite[reg] = (short)(opindex);
|
||||||
numWrites[reg]++;
|
numWrites[reg]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void Clear()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 32; ++i)
|
||||||
|
{
|
||||||
|
firstRead[i] = -1;
|
||||||
|
firstWrite[i] = -1;
|
||||||
|
numReads[i] = 0;
|
||||||
|
numWrites[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -94,10 +107,74 @@ public:
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa,
|
struct CodeBlock
|
||||||
BlockRegStats *fpa, bool &broken_block, CodeBuffer *buffer,
|
{
|
||||||
int blockSize, u32* merged_addresses,
|
// Beginning PPC address.
|
||||||
int capacity_of_merged_addresses, int& size_of_merged_addresses);
|
u32 m_address;
|
||||||
|
|
||||||
|
// Number of instructions
|
||||||
|
// Gives us the size of the block.
|
||||||
|
u32 m_num_instructions;
|
||||||
|
|
||||||
|
// Some basic statistics about the block.
|
||||||
|
BlockStats *m_stats;
|
||||||
|
|
||||||
|
// Register statistics about the block.
|
||||||
|
BlockRegStats *m_gpa, *m_fpa;
|
||||||
|
|
||||||
|
// Are we a broken block?
|
||||||
|
bool m_broken;
|
||||||
|
};
|
||||||
|
|
||||||
|
class PPCAnalyzer
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
void ReorderInstructions(u32 instructions, CodeOp *code);
|
||||||
|
void SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index);
|
||||||
|
|
||||||
|
// Options
|
||||||
|
u32 m_options;
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum AnalystOption
|
||||||
|
{
|
||||||
|
// Conditional branch continuing
|
||||||
|
// If the JIT core supports conditional branches within the blocks
|
||||||
|
// Block will end on unconditional branch or other ENDBLOCK flagged instruction.
|
||||||
|
// Requires JIT support to be enabled.
|
||||||
|
OPTION_CONDITIONAL_CONTINUE = (1 << 0),
|
||||||
|
|
||||||
|
// If there is a unconditional branch that jumps to a leaf function then inline it.
|
||||||
|
// Might require JIT intervention to support it correctly.
|
||||||
|
// Requires JITBLock support for inlined code
|
||||||
|
// XXX: NOT COMPLETE
|
||||||
|
OPTION_LEAF_INLINE = (1 << 1),
|
||||||
|
|
||||||
|
// Complex blocks support jumping backwards on to themselves.
|
||||||
|
// Happens commonly in loops, pretty complex to support.
|
||||||
|
// May require register caches to use register usage metrics.
|
||||||
|
// XXX: NOT COMPLETE
|
||||||
|
OPTION_COMPLEX_BLOCK = (1 << 2),
|
||||||
|
|
||||||
|
// Similar to complex blocks.
|
||||||
|
// Instead of jumping backwards, this jumps forwards within the block.
|
||||||
|
// Requires JIT support to work.
|
||||||
|
// XXX: NOT COMPLETE
|
||||||
|
OPTION_FORWARD_JUMP = (1 << 3),
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
PPCAnalyzer() : m_options(0) {}
|
||||||
|
|
||||||
|
// Option setting/getting
|
||||||
|
void SetOption(AnalystOption option) { m_options |= option; }
|
||||||
|
void ClearOption(AnalystOption option) { m_options &= ~(option); }
|
||||||
|
bool HasOption(AnalystOption option) { return !!(m_options & option); }
|
||||||
|
|
||||||
|
u32 Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 blockSize);
|
||||||
|
};
|
||||||
|
|
||||||
void LogFunctionCall(u32 addr);
|
void LogFunctionCall(u32 addr);
|
||||||
void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db);
|
void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db);
|
||||||
bool AnalyzeFunction(u32 startAddr, Symbol &func, int max_size = 0);
|
bool AnalyzeFunction(u32 startAddr, Symbol &func, int max_size = 0);
|
||||||
|
|
|
@ -129,8 +129,7 @@ void CJitWindow::Compare(u32 em_address)
|
||||||
|
|
||||||
const u8 *code = (const u8 *)jit->GetBlockCache()->GetCompiledCodeFromBlock(block_num);
|
const u8 *code = (const u8 *)jit->GetBlockCache()->GetCompiledCodeFromBlock(block_num);
|
||||||
u64 disasmPtr = (u64)code;
|
u64 disasmPtr = (u64)code;
|
||||||
int size = block->codeSize;
|
const u8 *end = code + block->codeSize;
|
||||||
const u8 *end = code + size;
|
|
||||||
char *sptr = (char*)xDis;
|
char *sptr = (char*)xDis;
|
||||||
|
|
||||||
int num_x86_instructions = 0;
|
int num_x86_instructions = 0;
|
||||||
|
@ -154,14 +153,17 @@ void CJitWindow::Compare(u32 em_address)
|
||||||
PPCAnalyst::BlockStats st;
|
PPCAnalyst::BlockStats st;
|
||||||
PPCAnalyst::BlockRegStats gpa;
|
PPCAnalyst::BlockRegStats gpa;
|
||||||
PPCAnalyst::BlockRegStats fpa;
|
PPCAnalyst::BlockRegStats fpa;
|
||||||
bool broken_block = false;
|
PPCAnalyst::CodeBlock code_block;
|
||||||
u32 merged_addresses[32];
|
PPCAnalyst::PPCAnalyzer analyzer;
|
||||||
const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]);
|
|
||||||
int size_of_merged_addresses;
|
code_block.m_stats = &st;
|
||||||
if (PPCAnalyst::Flatten(ppc_addr, &size, &st, &gpa, &fpa, broken_block, &code_buffer, size, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses) != 0xffffffff)
|
code_block.m_gpa = &gpa;
|
||||||
|
code_block.m_fpa = &fpa;
|
||||||
|
|
||||||
|
if (analyzer.Analyze(ppc_addr, &code_block, &code_buffer, block->codeSize) != 0xFFFFFFFF)
|
||||||
{
|
{
|
||||||
sptr = (char*)xDis;
|
sptr = (char*)xDis;
|
||||||
for (int i = 0; i < size; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
const PPCAnalyst::CodeOp &op = code_buffer.codebuffer[i];
|
const PPCAnalyst::CodeOp &op = code_buffer.codebuffer[i];
|
||||||
char temp[256];
|
char temp[256];
|
||||||
|
@ -181,9 +183,9 @@ void CJitWindow::Compare(u32 em_address)
|
||||||
sptr += sprintf(sptr, "%i estimated cycles\n", st.numCycles);
|
sptr += sprintf(sptr, "%i estimated cycles\n", st.numCycles);
|
||||||
|
|
||||||
sptr += sprintf(sptr, "Num instr: PPC: %i x86: %i (blowup: %i%%)\n",
|
sptr += sprintf(sptr, "Num instr: PPC: %i x86: %i (blowup: %i%%)\n",
|
||||||
size, num_x86_instructions, 100 * (num_x86_instructions / size - 1));
|
code_block.m_num_instructions, num_x86_instructions, 100 * (num_x86_instructions / code_block.m_num_instructions - 1));
|
||||||
sptr += sprintf(sptr, "Num bytes: PPC: %i x86: %i (blowup: %i%%)\n",
|
sptr += sprintf(sptr, "Num bytes: PPC: %i x86: %i (blowup: %i%%)\n",
|
||||||
size * 4, block->codeSize, 100 * (block->codeSize / (4 * size) - 1));
|
code_block.m_num_instructions * 4, block->codeSize, 100 * (block->codeSize / (4 * code_block.m_num_instructions) - 1));
|
||||||
|
|
||||||
ppc_box->SetValue(StrToWxStr((char*)xDis));
|
ppc_box->SetValue(StrToWxStr((char*)xDis));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue