Merge pull request #4123 from hthh/improve-const-stores
Jit: FIFO optimization improvements
This commit is contained in:
commit
cf3513f7fc
|
@ -144,7 +144,7 @@ void CachedInterpreter::Jit(u32 address)
|
|||
|
||||
js.blockStart = PC;
|
||||
js.firstFPInstructionFound = false;
|
||||
js.fifoBytesThisBlock = 0;
|
||||
js.fifoBytesSinceCheck = 0;
|
||||
js.downcountAmount = 0;
|
||||
js.curBlock = b;
|
||||
|
||||
|
|
|
@ -349,7 +349,7 @@ bool Jit64::Cleanup()
|
|||
{
|
||||
bool did_something = false;
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(GPFifo::FastCheckGatherPipe);
|
||||
|
@ -597,7 +597,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
|||
js.firstFPInstructionFound = false;
|
||||
js.isLastInstruction = false;
|
||||
js.blockStart = em_address;
|
||||
js.fifoBytesThisBlock = 0;
|
||||
js.fifoBytesSinceCheck = 0;
|
||||
js.mustCheckFifo = false;
|
||||
js.curBlock = b;
|
||||
js.numLoadStoreInst = 0;
|
||||
|
@ -690,6 +690,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
|||
}
|
||||
}
|
||||
|
||||
if (js.noSpeculativeConstantsAddresses.find(js.blockStart) ==
|
||||
js.noSpeculativeConstantsAddresses.end())
|
||||
{
|
||||
IntializeSpeculativeConstants();
|
||||
}
|
||||
|
||||
// Translate instructions
|
||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||
{
|
||||
|
@ -724,10 +730,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
|||
js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end();
|
||||
|
||||
// Gather pipe writes using an immediate address are explicitly tracked.
|
||||
if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo))
|
||||
if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo))
|
||||
{
|
||||
if (js.fifoBytesThisBlock >= 32)
|
||||
js.fifoBytesThisBlock -= 32;
|
||||
js.fifoBytesSinceCheck = 0;
|
||||
js.mustCheckFifo = false;
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
@ -967,3 +972,39 @@ void Jit64::EnableOptimization()
|
|||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||
}
|
||||
|
||||
void Jit64::IntializeSpeculativeConstants()
|
||||
{
|
||||
// If the block depends on an input register which looks like a gather pipe or MMIO related
|
||||
// constant, guess that it is actually a constant input, and specialize the block based on this
|
||||
// assumption. This happens when there are branches in code writing to the gather pipe, but only
|
||||
// the first block loads the constant.
|
||||
// Insert a check at the start of the block to verify that the value is actually constant.
|
||||
// This can save a lot of backpatching and optimize gather pipe writes in more places.
|
||||
const u8* target = nullptr;
|
||||
for (auto i : code_block.m_gpr_inputs)
|
||||
{
|
||||
u32 compileTimeValue = PowerPC::ppcState.gpr[i];
|
||||
if (PowerPC::IsOptimizableGatherPipeWrite(compileTimeValue) ||
|
||||
PowerPC::IsOptimizableGatherPipeWrite(compileTimeValue - 0x8000) ||
|
||||
compileTimeValue == 0xCC000000)
|
||||
{
|
||||
if (!target)
|
||||
{
|
||||
SwitchToFarCode();
|
||||
target = GetCodePtr();
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunctionC(
|
||||
reinterpret_cast<void*>(&JitInterface::CompileExceptionCheck),
|
||||
static_cast<u32>(JitInterface::ExceptionType::EXCEPTIONS_SPECULATIVE_CONSTANTS));
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
SwitchToNearCode();
|
||||
}
|
||||
CMP(32, PPCSTATE(gpr[i]), Imm32(compileTimeValue));
|
||||
J_CC(CC_NZ, target);
|
||||
gpr.SetImmediate32(i, compileTimeValue, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,6 +68,8 @@ public:
|
|||
BitSet32 CallerSavedRegistersInUse() const;
|
||||
BitSet8 ComputeStaticGQRs(const PPCAnalyst::CodeBlock&) const;
|
||||
|
||||
void IntializeSpeculativeConstants();
|
||||
|
||||
JitBlockCache* GetBlockCache() override { return &blocks; }
|
||||
void Trace();
|
||||
|
||||
|
|
|
@ -227,10 +227,12 @@ void RegCache::DiscardRegContentsIfCached(size_t preg)
|
|||
}
|
||||
}
|
||||
|
||||
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue)
|
||||
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty)
|
||||
{
|
||||
// "dirty" can be false to avoid redundantly flushing an immediate when
|
||||
// processing speculative constants.
|
||||
DiscardRegContentsIfCached(preg);
|
||||
regs[preg].away = true;
|
||||
regs[preg].away |= dirty;
|
||||
regs[preg].location = Imm32(immValue);
|
||||
}
|
||||
|
||||
|
@ -282,10 +284,7 @@ void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
|
|||
|
||||
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
||||
{
|
||||
if (!regs[i].away && regs[i].location.IsImm())
|
||||
PanicAlert("Bad immediate");
|
||||
|
||||
if (!regs[i].away || (regs[i].away && regs[i].location.IsImm()))
|
||||
if (!regs[i].away || regs[i].location.IsImm())
|
||||
{
|
||||
X64Reg xr = GetFreeXReg();
|
||||
if (xregs[xr].dirty)
|
||||
|
@ -294,7 +293,7 @@ void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
|||
PanicAlert("GetFreeXReg returned locked register");
|
||||
xregs[xr].free = false;
|
||||
xregs[xr].ppcReg = i;
|
||||
xregs[xr].dirty = makeDirty || regs[i].location.IsImm();
|
||||
xregs[xr].dirty = makeDirty || regs[i].away;
|
||||
if (doLoad)
|
||||
LoadRegister(i, xr);
|
||||
for (size_t j = 0; j < regs.size(); j++)
|
||||
|
|
|
@ -161,7 +161,7 @@ public:
|
|||
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
|
||||
Gen::OpArg GetDefaultLocation(size_t reg) const override;
|
||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
|
||||
void SetImmediate32(size_t preg, u32 immValue);
|
||||
void SetImmediate32(size_t preg, u32 immValue, bool dirty = true);
|
||||
BitSet32 GetRegUtilization() override;
|
||||
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
|
||||
};
|
||||
|
|
|
@ -592,6 +592,6 @@ void Jit64::eieio(UGeckoInstruction inst)
|
|||
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
|
||||
// which is generally safe. However postponing FIFO writes across eieio instructions
|
||||
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||
js.mustCheckFifo = true;
|
||||
}
|
||||
|
|
|
@ -515,7 +515,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
|||
{
|
||||
js.isLastInstruction = false;
|
||||
js.blockStart = em_address;
|
||||
js.fifoBytesThisBlock = 0;
|
||||
js.fifoBytesSinceCheck = 0;
|
||||
js.curBlock = b;
|
||||
jit->js.numLoadStoreInst = 0;
|
||||
jit->js.numFloatingPointInst = 0;
|
||||
|
|
|
@ -170,7 +170,7 @@ void JitArm64::Break(UGeckoInstruction inst)
|
|||
|
||||
void JitArm64::Cleanup()
|
||||
{
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||
{
|
||||
gpr.Lock(W0);
|
||||
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
|
||||
|
@ -424,7 +424,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
|||
js.firstFPInstructionFound = false;
|
||||
js.assumeNoPairedQuantize = false;
|
||||
js.blockStart = em_address;
|
||||
js.fifoBytesThisBlock = 0;
|
||||
js.fifoBytesSinceCheck = 0;
|
||||
js.mustCheckFifo = false;
|
||||
js.downcountAmount = 0;
|
||||
js.skipInstructions = 0;
|
||||
|
@ -512,10 +512,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
|||
bool gatherPipeIntCheck =
|
||||
jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end();
|
||||
|
||||
if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo))
|
||||
if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo))
|
||||
{
|
||||
if (js.fifoBytesThisBlock >= 32)
|
||||
js.fifoBytesThisBlock -= 32;
|
||||
js.fifoBytesSinceCheck = 0;
|
||||
js.mustCheckFifo = false;
|
||||
|
||||
gpr.Lock(W30);
|
||||
|
|
|
@ -333,7 +333,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
}
|
||||
ADD(W0, W0, accessSize >> 3);
|
||||
STR(INDEX_UNSIGNED, W0, X30, count_off);
|
||||
js.fifoBytesThisBlock += accessSize >> 3;
|
||||
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||
|
||||
if (accessSize != 8)
|
||||
gpr.Unlock(WA);
|
||||
|
@ -833,6 +833,6 @@ void JitArm64::eieio(UGeckoInstruction inst)
|
|||
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
|
||||
// which is generally safe. However postponing FIFO writes across eieio instructions
|
||||
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||
js.mustCheckFifo = true;
|
||||
}
|
||||
|
|
|
@ -442,7 +442,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
|
||||
ADD(W0, W0, accessSize >> 3);
|
||||
STR(INDEX_UNSIGNED, W0, X30, count_off);
|
||||
js.fifoBytesThisBlock += accessSize >> 3;
|
||||
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||
|
||||
if (update)
|
||||
{
|
||||
|
|
|
@ -104,7 +104,7 @@ protected:
|
|||
u8* trampolineExceptionHandler;
|
||||
|
||||
bool mustCheckFifo;
|
||||
int fifoBytesThisBlock;
|
||||
int fifoBytesSinceCheck;
|
||||
|
||||
PPCAnalyst::BlockStats st;
|
||||
PPCAnalyst::BlockRegStats gpa;
|
||||
|
@ -116,6 +116,7 @@ protected:
|
|||
|
||||
std::unordered_set<u32> fifoWriteAddresses;
|
||||
std::unordered_set<u32> pairedQuantizeAddresses;
|
||||
std::unordered_set<u32> noSpeculativeConstantsAddresses;
|
||||
};
|
||||
|
||||
PPCAnalyst::CodeBlock code_block;
|
||||
|
|
|
@ -462,7 +462,7 @@ void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize)
|
|||
CALL(jit->GetAsmRoutines()->fifoDirectWrite64);
|
||||
break;
|
||||
}
|
||||
jit->js.fifoBytesThisBlock += accessSize >> 3;
|
||||
jit->js.fifoBytesSinceCheck += accessSize >> 3;
|
||||
}
|
||||
|
||||
bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
||||
|
|
|
@ -260,6 +260,9 @@ void CompileExceptionCheck(ExceptionType type)
|
|||
case ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE:
|
||||
exception_addresses = &jit->js.pairedQuantizeAddresses;
|
||||
break;
|
||||
case ExceptionType::EXCEPTIONS_SPECULATIVE_CONSTANTS:
|
||||
exception_addresses = &jit->js.noSpeculativeConstantsAddresses;
|
||||
break;
|
||||
}
|
||||
|
||||
if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end()))
|
||||
|
|
|
@ -15,7 +15,8 @@ namespace JitInterface
|
|||
enum class ExceptionType
|
||||
{
|
||||
EXCEPTIONS_FIFO_WRITE,
|
||||
EXCEPTIONS_PAIRED_QUANTIZE
|
||||
EXCEPTIONS_PAIRED_QUANTIZE,
|
||||
EXCEPTIONS_SPECULATIVE_CONSTANTS
|
||||
};
|
||||
|
||||
void DoState(PointerWrap& p);
|
||||
|
|
|
@ -843,10 +843,13 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
|||
}
|
||||
|
||||
// Forward scan, for flags that need the other direction for calculation.
|
||||
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe;
|
||||
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe, gprDefined, gprBlockInputs;
|
||||
BitSet8 gqrUsed, gqrModified;
|
||||
for (u32 i = 0; i < block->m_num_instructions; i++)
|
||||
{
|
||||
gprBlockInputs |= code[i].regsIn & ~gprDefined;
|
||||
gprDefined |= code[i].regsOut;
|
||||
|
||||
code[i].fprIsSingle = fprIsSingle;
|
||||
code[i].fprIsDuplicated = fprIsDuplicated;
|
||||
code[i].fprIsStoreSafe = fprIsStoreSafe;
|
||||
|
@ -899,6 +902,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
|||
}
|
||||
block->m_gqr_used = gqrUsed;
|
||||
block->m_gqr_modified = gqrModified;
|
||||
block->m_gpr_inputs = gprBlockInputs;
|
||||
return address;
|
||||
}
|
||||
|
||||
|
|
|
@ -154,6 +154,9 @@ struct CodeBlock
|
|||
|
||||
// Which GQRs this block modifies, if any.
|
||||
BitSet8 m_gqr_modified;
|
||||
|
||||
// Which GPRs this block reads from before defining, if any.
|
||||
BitSet32 m_gpr_inputs;
|
||||
};
|
||||
|
||||
class PPCAnalyzer
|
||||
|
|
Loading…
Reference in New Issue