JIT: load registers if they're going to be used later in the block
This commit is contained in:
parent
6bff99fa75
commit
8fe730194b
|
@ -727,6 +727,26 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
SetJumpTarget(noBreakpoint);
|
SetJumpTarget(noBreakpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we have an input register that is going to be used again, load it pre-emptively,
|
||||||
|
// even if the instruction doesn't strictly need it in a register, to avoid redundant
|
||||||
|
// loads later. Of course, don't do this if we're already out of registers.
|
||||||
|
// As a bit of a heuristic, make sure we have at least one register left over for the
|
||||||
|
// output, which needs to be bound in the actual instruction compilation.
|
||||||
|
// TODO: make this smarter in the case that we're actually register-starved, i.e.
|
||||||
|
// prioritize the more important registers.
|
||||||
|
for (int k = 0; k < 3 && gpr.NumFreeRegisters() >= 2; k++)
|
||||||
|
{
|
||||||
|
int reg = ops[i].regsIn[k];
|
||||||
|
if (reg >= 0 && (ops[i].gprInUse & (1 << reg)) && !gpr.R(reg).IsImm())
|
||||||
|
gpr.BindToRegister(reg, true, false);
|
||||||
|
}
|
||||||
|
for (int k = 0; k < 4 && fpr.NumFreeRegisters() >= 2; k++)
|
||||||
|
{
|
||||||
|
int reg = ops[i].fregsIn[k];
|
||||||
|
if (reg >= 0 && (ops[i].fprInXmm & (1 << reg)))
|
||||||
|
fpr.BindToRegister(reg, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
Jit64Tables::CompileInstruction(ops[i]);
|
Jit64Tables::CompileInstruction(ops[i]);
|
||||||
|
|
||||||
// If we have a register that will never be used again, flush it.
|
// If we have a register that will never be used again, flush it.
|
||||||
|
|
|
@ -108,7 +108,22 @@ X64Reg RegCache::GetFreeXReg()
|
||||||
return (X64Reg)xr;
|
return (X64Reg)xr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Okay, not found :( Force grab one
|
// Okay, not found :( Force grab one!
|
||||||
|
|
||||||
|
// First, see if we have any registers that are only going to be used for a float store.
|
||||||
|
// These go through GPRs, so the cost of tossing them back into memory is lower than anything else.
|
||||||
|
for (size_t i = 0; i < aCount; i++)
|
||||||
|
{
|
||||||
|
X64Reg xr = (X64Reg)aOrder[i];
|
||||||
|
if (xregs[xr].locked)
|
||||||
|
continue;
|
||||||
|
size_t preg = xregs[xr].ppcReg;
|
||||||
|
if (!regs[preg].locked && !(jit->js.op->fprInXmm & (1 << preg)))
|
||||||
|
{
|
||||||
|
StoreFromRegister(preg);
|
||||||
|
return xr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//TODO - add a pass to grab xregs whose ppcreg is not used in the next 3 instructions
|
//TODO - add a pass to grab xregs whose ppcreg is not used in the next 3 instructions
|
||||||
u32 last_used = 0xFFFFFFFF;
|
u32 last_used = 0xFFFFFFFF;
|
||||||
|
@ -366,3 +381,14 @@ void RegCache::Flush(FlushMode mode)
|
||||||
|
|
||||||
cur_use_quantum = 0;
|
cur_use_quantum = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int RegCache::NumFreeRegisters()
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
size_t aCount;
|
||||||
|
const int* aOrder = GetAllocationOrder(aCount);
|
||||||
|
for (size_t i = 0; i < aCount; i++)
|
||||||
|
if (!xregs[aOrder[i]].locked && xregs[aOrder[i]].free)
|
||||||
|
count++;
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
|
@ -123,6 +123,7 @@ public:
|
||||||
|
|
||||||
|
|
||||||
Gen::X64Reg GetFreeXReg();
|
Gen::X64Reg GetFreeXReg();
|
||||||
|
int NumFreeRegisters();
|
||||||
};
|
};
|
||||||
|
|
||||||
class GPRRegCache : public RegCache
|
class GPRRegCache : public RegCache
|
||||||
|
|
|
@ -802,6 +802,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
bool wantsCA = true;
|
bool wantsCA = true;
|
||||||
u32 fregInUse = 0;
|
u32 fregInUse = 0;
|
||||||
u32 regInUse = 0;
|
u32 regInUse = 0;
|
||||||
|
u32 fregInXmm = 0;
|
||||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
bool opWantsCR0 = code[i].wantsCR0;
|
bool opWantsCR0 = code[i].wantsCR0;
|
||||||
|
@ -822,6 +823,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
wantsCA &= !code[i].outputCA || opWantsCA;
|
wantsCA &= !code[i].outputCA || opWantsCA;
|
||||||
code[i].gprInUse = regInUse;
|
code[i].gprInUse = regInUse;
|
||||||
code[i].fprInUse = fregInUse;
|
code[i].fprInUse = fregInUse;
|
||||||
|
code[i].fprInXmm = fregInXmm;
|
||||||
// TODO: if there's no possible endblocks or exceptions in between, tell the regcache
|
// TODO: if there's no possible endblocks or exceptions in between, tell the regcache
|
||||||
// we can throw away a register if it's going to be overwritten later.
|
// we can throw away a register if it's going to be overwritten later.
|
||||||
for (int j = 0; j < 3; j++)
|
for (int j = 0; j < 3; j++)
|
||||||
|
@ -829,7 +831,11 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
regInUse |= 1 << code[i].regsIn[j];
|
regInUse |= 1 << code[i].regsIn[j];
|
||||||
for (int j = 0; j < 4; j++)
|
for (int j = 0; j < 4; j++)
|
||||||
if (code[i].fregsIn[j] >= 0)
|
if (code[i].fregsIn[j] >= 0)
|
||||||
|
{
|
||||||
fregInUse |= 1 << code[i].fregsIn[j];
|
fregInUse |= 1 << code[i].fregsIn[j];
|
||||||
|
if (strncmp(code[i].opinfo->opname, "stfd", 4))
|
||||||
|
fregInXmm |= 1 << code[i].fregsIn[j];
|
||||||
|
}
|
||||||
// For now, we need to count output registers as "used" though; otherwise the flush
|
// For now, we need to count output registers as "used" though; otherwise the flush
|
||||||
// will result in a redundant store (e.g. store to regcache, then store again to
|
// will result in a redundant store (e.g. store to regcache, then store again to
|
||||||
// the same location later).
|
// the same location later).
|
||||||
|
@ -837,7 +843,11 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
if (code[i].regsOut[j] >= 0)
|
if (code[i].regsOut[j] >= 0)
|
||||||
regInUse |= 1 << code[i].regsOut[j];
|
regInUse |= 1 << code[i].regsOut[j];
|
||||||
if (code[i].fregOut >= 0)
|
if (code[i].fregOut >= 0)
|
||||||
|
{
|
||||||
fregInUse |= 1 << code[i].fregOut;
|
fregInUse |= 1 << code[i].fregOut;
|
||||||
|
if (strncmp(code[i].opinfo->opname, "stfd", 4))
|
||||||
|
fregInXmm |= 1 << code[i].fregOut;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return address;
|
return address;
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,9 @@ struct CodeOp //16B
|
||||||
// which registers are still needed after this instruction in this block
|
// which registers are still needed after this instruction in this block
|
||||||
u32 gprInUse;
|
u32 gprInUse;
|
||||||
u32 fprInUse;
|
u32 fprInUse;
|
||||||
|
// we do double stores from GPRs, so we don't want to load a PowerPC floating point register into
|
||||||
|
// an XMM only to move it again to a GPR afterwards.
|
||||||
|
u32 fprInXmm;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BlockStats
|
struct BlockStats
|
||||||
|
|
Loading…
Reference in New Issue