Merge pull request #4123 from hthh/improve-const-stores

Jit: FIFO optimization improvements
This commit is contained in:
Markus Wick 2016-10-04 08:32:48 +02:00 committed by GitHub
commit cf3513f7fc
16 changed files with 81 additions and 28 deletions

View File

@ -144,7 +144,7 @@ void CachedInterpreter::Jit(u32 address)
js.blockStart = PC;
js.firstFPInstructionFound = false;
js.fifoBytesThisBlock = 0;
js.fifoBytesSinceCheck = 0;
js.downcountAmount = 0;
js.curBlock = b;

View File

@ -349,7 +349,7 @@ bool Jit64::Cleanup()
{
bool did_something = false;
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
{
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunction(GPFifo::FastCheckGatherPipe);
@ -597,7 +597,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
js.firstFPInstructionFound = false;
js.isLastInstruction = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
js.curBlock = b;
js.numLoadStoreInst = 0;
@ -690,6 +690,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
}
}
if (js.noSpeculativeConstantsAddresses.find(js.blockStart) ==
js.noSpeculativeConstantsAddresses.end())
{
IntializeSpeculativeConstants();
}
// Translate instructions
for (u32 i = 0; i < code_block.m_num_instructions; i++)
{
@ -724,10 +730,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end();
// Gather pipe writes using an immediate address are explicitly tracked.
if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo))
if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo))
{
if (js.fifoBytesThisBlock >= 32)
js.fifoBytesThisBlock -= 32;
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
@ -967,3 +972,39 @@ void Jit64::EnableOptimization()
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
}
void Jit64::IntializeSpeculativeConstants()
{
// If the block depends on an input register which looks like a gather pipe or MMIO related
// constant, guess that it is actually a constant input, and specialize the block based on this
// assumption. This happens when there are branches in code writing to the gather pipe, but only
// the first block loads the constant.
// Insert a check at the start of the block to verify that the value is actually constant.
// This can save a lot of backpatching and optimize gather pipe writes in more places.
const u8* target = nullptr;
for (auto i : code_block.m_gpr_inputs)
{
u32 compileTimeValue = PowerPC::ppcState.gpr[i];
if (PowerPC::IsOptimizableGatherPipeWrite(compileTimeValue) ||
PowerPC::IsOptimizableGatherPipeWrite(compileTimeValue - 0x8000) ||
compileTimeValue == 0xCC000000)
{
if (!target)
{
SwitchToFarCode();
target = GetCodePtr();
MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionC(
reinterpret_cast<void*>(&JitInterface::CompileExceptionCheck),
static_cast<u32>(JitInterface::ExceptionType::EXCEPTIONS_SPECULATIVE_CONSTANTS));
ABI_PopRegistersAndAdjustStack({}, 0);
JMP(asm_routines.dispatcher, true);
SwitchToNearCode();
}
CMP(32, PPCSTATE(gpr[i]), Imm32(compileTimeValue));
J_CC(CC_NZ, target);
gpr.SetImmediate32(i, compileTimeValue, false);
}
}
}

View File

@ -68,6 +68,8 @@ public:
BitSet32 CallerSavedRegistersInUse() const;
BitSet8 ComputeStaticGQRs(const PPCAnalyst::CodeBlock&) const;
void IntializeSpeculativeConstants();
JitBlockCache* GetBlockCache() override { return &blocks; }
void Trace();

View File

@ -227,10 +227,12 @@ void RegCache::DiscardRegContentsIfCached(size_t preg)
}
}
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue)
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty)
{
// "dirty" can be false to avoid redundantly flushing an immediate when
// processing speculative constants.
DiscardRegContentsIfCached(preg);
regs[preg].away = true;
regs[preg].away |= dirty;
regs[preg].location = Imm32(immValue);
}
@ -282,10 +284,7 @@ void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
{
if (!regs[i].away && regs[i].location.IsImm())
PanicAlert("Bad immediate");
if (!regs[i].away || (regs[i].away && regs[i].location.IsImm()))
if (!regs[i].away || regs[i].location.IsImm())
{
X64Reg xr = GetFreeXReg();
if (xregs[xr].dirty)
@ -294,7 +293,7 @@ void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
PanicAlert("GetFreeXReg returned locked register");
xregs[xr].free = false;
xregs[xr].ppcReg = i;
xregs[xr].dirty = makeDirty || regs[i].location.IsImm();
xregs[xr].dirty = makeDirty || regs[i].away;
if (doLoad)
LoadRegister(i, xr);
for (size_t j = 0; j < regs.size(); j++)

View File

@ -161,7 +161,7 @@ public:
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
void SetImmediate32(size_t preg, u32 immValue);
void SetImmediate32(size_t preg, u32 immValue, bool dirty = true);
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};

View File

@ -592,6 +592,6 @@ void Jit64::eieio(UGeckoInstruction inst)
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
// which is generally safe. However postponing FIFO writes across eieio instructions
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
js.mustCheckFifo = true;
}

View File

@ -515,7 +515,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
{
js.isLastInstruction = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
js.fifoBytesSinceCheck = 0;
js.curBlock = b;
jit->js.numLoadStoreInst = 0;
jit->js.numFloatingPointInst = 0;

View File

@ -170,7 +170,7 @@ void JitArm64::Break(UGeckoInstruction inst)
void JitArm64::Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
{
gpr.Lock(W0);
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
@ -424,7 +424,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
js.firstFPInstructionFound = false;
js.assumeNoPairedQuantize = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
js.downcountAmount = 0;
js.skipInstructions = 0;
@ -512,10 +512,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
bool gatherPipeIntCheck =
jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end();
if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo))
if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo))
{
if (js.fifoBytesThisBlock >= 32)
js.fifoBytesThisBlock -= 32;
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
gpr.Lock(W30);

View File

@ -333,7 +333,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, count_off);
js.fifoBytesThisBlock += accessSize >> 3;
js.fifoBytesSinceCheck += accessSize >> 3;
if (accessSize != 8)
gpr.Unlock(WA);
@ -833,6 +833,6 @@ void JitArm64::eieio(UGeckoInstruction inst)
// optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block,
// which is generally safe. However postponing FIFO writes across eieio instructions
// is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection).
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
js.mustCheckFifo = true;
}

View File

@ -442,7 +442,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, count_off);
js.fifoBytesThisBlock += accessSize >> 3;
js.fifoBytesSinceCheck += accessSize >> 3;
if (update)
{

View File

@ -104,7 +104,7 @@ protected:
u8* trampolineExceptionHandler;
bool mustCheckFifo;
int fifoBytesThisBlock;
int fifoBytesSinceCheck;
PPCAnalyst::BlockStats st;
PPCAnalyst::BlockRegStats gpa;
@ -116,6 +116,7 @@ protected:
std::unordered_set<u32> fifoWriteAddresses;
std::unordered_set<u32> pairedQuantizeAddresses;
std::unordered_set<u32> noSpeculativeConstantsAddresses;
};
PPCAnalyst::CodeBlock code_block;

View File

@ -462,7 +462,7 @@ void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize)
CALL(jit->GetAsmRoutines()->fifoDirectWrite64);
break;
}
jit->js.fifoBytesThisBlock += accessSize >> 3;
jit->js.fifoBytesSinceCheck += accessSize >> 3;
}
bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,

View File

@ -260,6 +260,9 @@ void CompileExceptionCheck(ExceptionType type)
case ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE:
exception_addresses = &jit->js.pairedQuantizeAddresses;
break;
case ExceptionType::EXCEPTIONS_SPECULATIVE_CONSTANTS:
exception_addresses = &jit->js.noSpeculativeConstantsAddresses;
break;
}
if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end()))

View File

@ -15,7 +15,8 @@ namespace JitInterface
enum class ExceptionType
{
EXCEPTIONS_FIFO_WRITE,
EXCEPTIONS_PAIRED_QUANTIZE
EXCEPTIONS_PAIRED_QUANTIZE,
EXCEPTIONS_SPECULATIVE_CONSTANTS
};
void DoState(PointerWrap& p);

View File

@ -843,10 +843,13 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
}
// Forward scan, for flags that need the other direction for calculation.
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe;
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe, gprDefined, gprBlockInputs;
BitSet8 gqrUsed, gqrModified;
for (u32 i = 0; i < block->m_num_instructions; i++)
{
gprBlockInputs |= code[i].regsIn & ~gprDefined;
gprDefined |= code[i].regsOut;
code[i].fprIsSingle = fprIsSingle;
code[i].fprIsDuplicated = fprIsDuplicated;
code[i].fprIsStoreSafe = fprIsStoreSafe;
@ -899,6 +902,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32
}
block->m_gqr_used = gqrUsed;
block->m_gqr_modified = gqrModified;
block->m_gpr_inputs = gprBlockInputs;
return address;
}

View File

@ -154,6 +154,9 @@ struct CodeBlock
// Which GQRs this block modifies, if any.
BitSet8 m_gqr_modified;
// Which GPRs this block reads from before defining, if any.
BitSet32 m_gpr_inputs;
};
class PPCAnalyzer