Merge pull request #4123 from hthh/improve-const-stores
Jit: FIFO optimization improvements
This commit is contained in:
commit
cf3513f7fc
|
@ -144,7 +144,7 @@ void CachedInterpreter::Jit(u32 address)
|
||||||
|
|
||||||
js.blockStart = PC;
|
js.blockStart = PC;
|
||||||
js.firstFPInstructionFound = false;
|
js.firstFPInstructionFound = false;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.downcountAmount = 0;
|
js.downcountAmount = 0;
|
||||||
js.curBlock = b;
|
js.curBlock = b;
|
||||||
|
|
||||||
|
|
|
@ -349,7 +349,7 @@ bool Jit64::Cleanup()
|
||||||
{
|
{
|
||||||
bool did_something = false;
|
bool did_something = false;
|
||||||
|
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||||
{
|
{
|
||||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||||
ABI_CallFunction(GPFifo::FastCheckGatherPipe);
|
ABI_CallFunction(GPFifo::FastCheckGatherPipe);
|
||||||
|
@ -597,7 +597,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
js.firstFPInstructionFound = false;
|
js.firstFPInstructionFound = false;
|
||||||
js.isLastInstruction = false;
|
js.isLastInstruction = false;
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.mustCheckFifo = false;
|
js.mustCheckFifo = false;
|
||||||
js.curBlock = b;
|
js.curBlock = b;
|
||||||
js.numLoadStoreInst = 0;
|
js.numLoadStoreInst = 0;
|
||||||
|
@ -690,6 +690,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (js.noSpeculativeConstantsAddresses.find(js.blockStart) ==
|
||||||
|
js.noSpeculativeConstantsAddresses.end())
|
||||||
|
{
|
||||||
|
IntializeSpeculativeConstants();
|
||||||
|
}
|
||||||
|
|
||||||
// Translate instructions
|
// Translate instructions
|
||||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
|
@ -724,10 +730,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end();
|
js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end();
|
||||||
|
|
||||||
// Gather pipe writes using an immediate address are explicitly tracked.
|
// Gather pipe writes using an immediate address are explicitly tracked.
|
||||||
if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo))
|
if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo))
|
||||||
{
|
{
|
||||||
if (js.fifoBytesThisBlock >= 32)
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.fifoBytesThisBlock -= 32;
|
|
||||||
js.mustCheckFifo = false;
|
js.mustCheckFifo = false;
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
|
@ -967,3 +972,39 @@ void Jit64::EnableOptimization()
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit64::IntializeSpeculativeConstants()
|
||||||
|
{
|
||||||
|
// If the block depends on an input register which looks like a gather pipe or MMIO related
|
||||||
|
// constant, guess that it is actually a constant input, and specialize the block based on this
|
||||||
|
// assumption. This happens when there are branches in code writing to the gather pipe, but only
|
||||||
|
// the first block loads the constant.
|
||||||
|
// Insert a check at the start of the block to verify that the value is actually constant.
|
||||||
|
// This can save a lot of backpatching and optimize gather pipe writes in more places.
|
||||||
|
const u8* target = nullptr;
|
||||||
|
for (auto i : code_block.m_gpr_inputs)
|
||||||
|
{
|
||||||
|
u32 compileTimeValue = PowerPC::ppcState.gpr[i];
|
||||||
|
if (PowerPC::IsOptimizableGatherPipeWrite(compileTimeValue) ||
|
||||||
|
PowerPC::IsOptimizableGatherPipeWrite(compileTimeValue - 0x8000) ||
|
||||||
|
compileTimeValue == 0xCC000000)
|
||||||
|
{
|
||||||
|
if (!target)
|
||||||
|
{
|
||||||
|
SwitchToFarCode();
|
||||||
|
target = GetCodePtr();
|
||||||
|
MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
|
||||||
|
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||||
|
ABI_CallFunctionC(
|
||||||
|
reinterpret_cast<void*>(&JitInterface::CompileExceptionCheck),
|
||||||
|
static_cast<u32>(JitInterface::ExceptionType::EXCEPTIONS_SPECULATIVE_CONSTANTS));
|
||||||
|
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||||
|
JMP(asm_routines.dispatcher, true);
|
||||||
|
SwitchToNearCode();
|
||||||
|
}
|
||||||
|
CMP(32, PPCSTATE(gpr[i]), Imm32(compileTimeValue));
|
||||||
|
J_CC(CC_NZ, target);
|
||||||
|
gpr.SetImmediate32(i, compileTimeValue, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -68,6 +68,8 @@ public:
|
||||||
BitSet32 CallerSavedRegistersInUse() const;
|
BitSet32 CallerSavedRegistersInUse() const;
|
||||||
BitSet8 ComputeStaticGQRs(const PPCAnalyst::CodeBlock&) const;
|
BitSet8 ComputeStaticGQRs(const PPCAnalyst::CodeBlock&) const;
|
||||||
|
|
||||||
|
void IntializeSpeculativeConstants();
|
||||||
|
|
||||||
JitBlockCache* GetBlockCache() override { return &blocks; }
|
JitBlockCache* GetBlockCache() override { return &blocks; }
|
||||||
void Trace();
|
void Trace();
|
||||||
|
|
||||||
|
|
|
@ -227,10 +227,12 @@ void RegCache::DiscardRegContentsIfCached(size_t preg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue)
|
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty)
|
||||||
{
|
{
|
||||||
|
// "dirty" can be false to avoid redundantly flushing an immediate when
|
||||||
|
// processing speculative constants.
|
||||||
DiscardRegContentsIfCached(preg);
|
DiscardRegContentsIfCached(preg);
|
||||||
regs[preg].away = true;
|
regs[preg].away |= dirty;
|
||||||
regs[preg].location = Imm32(immValue);
|
regs[preg].location = Imm32(immValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,10 +284,7 @@ void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
|
||||||
|
|
||||||
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
||||||
{
|
{
|
||||||
if (!regs[i].away && regs[i].location.IsImm())
|
if (!regs[i].away || regs[i].location.IsImm())
|
||||||
PanicAlert("Bad immediate");
|
|
||||||
|
|
||||||
if (!regs[i].away || (regs[i].away && regs[i].location.IsImm()))
|
|
||||||
{
|
{
|
||||||
X64Reg xr = GetFreeXReg();
|
X64Reg xr = GetFreeXReg();
|
||||||
if (xregs[xr].dirty)
|
if (xregs[xr].dirty)
|
||||||
|
@ -294,7 +293,7 @@ void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
||||||
PanicAlert("GetFreeXReg returned locked register");
|
PanicAlert("GetFreeXReg returned locked register");
|
||||||
xregs[xr].free = false;
|
xregs[xr].free = false;
|
||||||
xregs[xr].ppcReg = i;
|
xregs[xr].ppcReg = i;
|
||||||
xregs[xr].dirty = makeDirty || regs[i].location.IsImm();
|
xregs[xr].dirty = makeDirty || regs[i].away;
|
||||||
if (doLoad)
|
if (doLoad)
|
||||||
LoadRegister(i, xr);
|
LoadRegister(i, xr);
|
||||||
for (size_t j = 0; j < regs.size(); j++)
|
for (size_t j = 0; j < regs.size(); j++)
|
||||||
|
|
|
@ -161,7 +161,7 @@ public:
|
||||||
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
|
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
|
||||||
Gen::OpArg GetDefaultLocation(size_t reg) const override;
|
Gen::OpArg GetDefaultLocation(size_t reg) const override;
|
||||||
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
|
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
|
||||||
void SetImmediate32(size_t preg, u32 immValue);
|
void SetImmediate32(size_t preg, u32 immValue, bool dirty = true);
|
||||||
BitSet32 GetRegUtilization() override;
|
BitSet32 GetRegUtilization() override;
|
||||||
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
|
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -592,6 +592,6 @@ void Jit64::eieio(UGeckoInstruction inst)
|
||||||
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
|
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
|
||||||
// which is generally safe. However postponing FIFO writes across eieio instructions
|
// which is generally safe. However postponing FIFO writes across eieio instructions
|
||||||
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
|
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||||
js.mustCheckFifo = true;
|
js.mustCheckFifo = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -515,7 +515,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
{
|
{
|
||||||
js.isLastInstruction = false;
|
js.isLastInstruction = false;
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.curBlock = b;
|
js.curBlock = b;
|
||||||
jit->js.numLoadStoreInst = 0;
|
jit->js.numLoadStoreInst = 0;
|
||||||
jit->js.numFloatingPointInst = 0;
|
jit->js.numFloatingPointInst = 0;
|
||||||
|
|
|
@ -170,7 +170,7 @@ void JitArm64::Break(UGeckoInstruction inst)
|
||||||
|
|
||||||
void JitArm64::Cleanup()
|
void JitArm64::Cleanup()
|
||||||
{
|
{
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||||
{
|
{
|
||||||
gpr.Lock(W0);
|
gpr.Lock(W0);
|
||||||
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
|
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
|
||||||
|
@ -424,7 +424,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
||||||
js.firstFPInstructionFound = false;
|
js.firstFPInstructionFound = false;
|
||||||
js.assumeNoPairedQuantize = false;
|
js.assumeNoPairedQuantize = false;
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.mustCheckFifo = false;
|
js.mustCheckFifo = false;
|
||||||
js.downcountAmount = 0;
|
js.downcountAmount = 0;
|
||||||
js.skipInstructions = 0;
|
js.skipInstructions = 0;
|
||||||
|
@ -512,10 +512,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
||||||
bool gatherPipeIntCheck =
|
bool gatherPipeIntCheck =
|
||||||
jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end();
|
jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end();
|
||||||
|
|
||||||
if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo))
|
if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo))
|
||||||
{
|
{
|
||||||
if (js.fifoBytesThisBlock >= 32)
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.fifoBytesThisBlock -= 32;
|
|
||||||
js.mustCheckFifo = false;
|
js.mustCheckFifo = false;
|
||||||
|
|
||||||
gpr.Lock(W30);
|
gpr.Lock(W30);
|
||||||
|
|
|
@ -333,7 +333,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||||
}
|
}
|
||||||
ADD(W0, W0, accessSize >> 3);
|
ADD(W0, W0, accessSize >> 3);
|
||||||
STR(INDEX_UNSIGNED, W0, X30, count_off);
|
STR(INDEX_UNSIGNED, W0, X30, count_off);
|
||||||
js.fifoBytesThisBlock += accessSize >> 3;
|
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||||
|
|
||||||
if (accessSize != 8)
|
if (accessSize != 8)
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
|
@ -833,6 +833,6 @@ void JitArm64::eieio(UGeckoInstruction inst)
|
||||||
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
|
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
|
||||||
// which is generally safe. However postponing FIFO writes across eieio instructions
|
// which is generally safe. However postponing FIFO writes across eieio instructions
|
||||||
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
|
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||||
js.mustCheckFifo = true;
|
js.mustCheckFifo = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -442,7 +442,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
ADD(W0, W0, accessSize >> 3);
|
ADD(W0, W0, accessSize >> 3);
|
||||||
STR(INDEX_UNSIGNED, W0, X30, count_off);
|
STR(INDEX_UNSIGNED, W0, X30, count_off);
|
||||||
js.fifoBytesThisBlock += accessSize >> 3;
|
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||||
|
|
||||||
if (update)
|
if (update)
|
||||||
{
|
{
|
||||||
|
|
|
@ -104,7 +104,7 @@ protected:
|
||||||
u8* trampolineExceptionHandler;
|
u8* trampolineExceptionHandler;
|
||||||
|
|
||||||
bool mustCheckFifo;
|
bool mustCheckFifo;
|
||||||
int fifoBytesThisBlock;
|
int fifoBytesSinceCheck;
|
||||||
|
|
||||||
PPCAnalyst::BlockStats st;
|
PPCAnalyst::BlockStats st;
|
||||||
PPCAnalyst::BlockRegStats gpa;
|
PPCAnalyst::BlockRegStats gpa;
|
||||||
|
@ -116,6 +116,7 @@ protected:
|
||||||
|
|
||||||
std::unordered_set<u32> fifoWriteAddresses;
|
std::unordered_set<u32> fifoWriteAddresses;
|
||||||
std::unordered_set<u32> pairedQuantizeAddresses;
|
std::unordered_set<u32> pairedQuantizeAddresses;
|
||||||
|
std::unordered_set<u32> noSpeculativeConstantsAddresses;
|
||||||
};
|
};
|
||||||
|
|
||||||
PPCAnalyst::CodeBlock code_block;
|
PPCAnalyst::CodeBlock code_block;
|
||||||
|
|
|
@ -462,7 +462,7 @@ void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize)
|
||||||
CALL(jit->GetAsmRoutines()->fifoDirectWrite64);
|
CALL(jit->GetAsmRoutines()->fifoDirectWrite64);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
jit->js.fifoBytesThisBlock += accessSize >> 3;
|
jit->js.fifoBytesSinceCheck += accessSize >> 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
||||||
|
|
|
@ -260,6 +260,9 @@ void CompileExceptionCheck(ExceptionType type)
|
||||||
case ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE:
|
case ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE:
|
||||||
exception_addresses = &jit->js.pairedQuantizeAddresses;
|
exception_addresses = &jit->js.pairedQuantizeAddresses;
|
||||||
break;
|
break;
|
||||||
|
case ExceptionType::EXCEPTIONS_SPECULATIVE_CONSTANTS:
|
||||||
|
exception_addresses = &jit->js.noSpeculativeConstantsAddresses;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end()))
|
if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end()))
|
||||||
|
|
|
@ -15,7 +15,8 @@ namespace JitInterface
|
||||||
enum class ExceptionType
|
enum class ExceptionType
|
||||||
{
|
{
|
||||||
EXCEPTIONS_FIFO_WRITE,
|
EXCEPTIONS_FIFO_WRITE,
|
||||||
EXCEPTIONS_PAIRED_QUANTIZE
|
EXCEPTIONS_PAIRED_QUANTIZE,
|
||||||
|
EXCEPTIONS_SPECULATIVE_CONSTANTS
|
||||||
};
|
};
|
||||||
|
|
||||||
void DoState(PointerWrap& p);
|
void DoState(PointerWrap& p);
|
||||||
|
|
|
@ -843,10 +843,13 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
||||||
}
|
}
|
||||||
|
|
||||||
// Forward scan, for flags that need the other direction for calculation.
|
// Forward scan, for flags that need the other direction for calculation.
|
||||||
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe;
|
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe, gprDefined, gprBlockInputs;
|
||||||
BitSet8 gqrUsed, gqrModified;
|
BitSet8 gqrUsed, gqrModified;
|
||||||
for (u32 i = 0; i < block->m_num_instructions; i++)
|
for (u32 i = 0; i < block->m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
|
gprBlockInputs |= code[i].regsIn & ~gprDefined;
|
||||||
|
gprDefined |= code[i].regsOut;
|
||||||
|
|
||||||
code[i].fprIsSingle = fprIsSingle;
|
code[i].fprIsSingle = fprIsSingle;
|
||||||
code[i].fprIsDuplicated = fprIsDuplicated;
|
code[i].fprIsDuplicated = fprIsDuplicated;
|
||||||
code[i].fprIsStoreSafe = fprIsStoreSafe;
|
code[i].fprIsStoreSafe = fprIsStoreSafe;
|
||||||
|
@ -899,6 +902,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
|
||||||
}
|
}
|
||||||
block->m_gqr_used = gqrUsed;
|
block->m_gqr_used = gqrUsed;
|
||||||
block->m_gqr_modified = gqrModified;
|
block->m_gqr_modified = gqrModified;
|
||||||
|
block->m_gpr_inputs = gprBlockInputs;
|
||||||
return address;
|
return address;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -154,6 +154,9 @@ struct CodeBlock
|
||||||
|
|
||||||
// Which GQRs this block modifies, if any.
|
// Which GQRs this block modifies, if any.
|
||||||
BitSet8 m_gqr_modified;
|
BitSet8 m_gqr_modified;
|
||||||
|
|
||||||
|
// Which GPRs this block reads from before defining, if any.
|
||||||
|
BitSet32 m_gpr_inputs;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PPCAnalyzer
|
class PPCAnalyzer
|
||||||
|
|
Loading…
Reference in New Issue