Merge pull request #9314 from smurf3tte/dcbx

Jit: Fix correctness issue in dcbf/dcbi/dcbst
This commit is contained in:
Markus Wick 2021-01-27 20:12:21 +01:00 committed by GitHub
commit b1fdd14ed1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 19 additions and 64 deletions

View File

@ -233,37 +233,21 @@ void Jit64::dcbx(UGeckoInstruction inst)
JITDISABLE(bJITLoadStoreOff);
X64Reg addr = RSCRATCH;
X64Reg value = RSCRATCH2;
RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read);
RCX64Reg tmp = gpr.Scratch();
RegCache::Realize(Ra, Rb, tmp);
RegCache::Realize(Ra, Rb);
MOV_sum(32, addr, Ra, Rb);
AND(32, R(addr), Imm8(~31));
// Check whether a JIT cache line needs to be invalidated.
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet()));
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
SHR(32, R(addr), Imm8(5));
BT(32, R(value), R(addr));
FixupBranch c = J_CC(CC_C, true);
SwitchToFarCode();
SetJumpTarget(c);
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
MOV(32, R(ABI_PARAM1), R(addr));
SHL(32, R(ABI_PARAM1), Imm8(5));
MOV(32, R(ABI_PARAM2), Imm32(32));
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
ABI_CallFunction(JitInterface::InvalidateICache);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
asm_routines.ResetStack(*this);
c = J(true);
SwitchToNearCode();
SetJumpTarget(c);
}
void Jit64::dcbt(UGeckoInstruction inst)

View File

@ -538,11 +538,9 @@ void JitArm64::dcbx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
gpr.Lock(W30);
gpr.Lock(W0);
ARM64Reg addr = gpr.GetReg();
ARM64Reg value = gpr.GetReg();
ARM64Reg WA = W30;
ARM64Reg addr = W0;
u32 a = inst.RA, b = inst.RB;
@ -551,21 +549,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
else
MOV(addr, gpr.R(b));
// Check whether a JIT cache line needs to be invalidated.
AND(value, addr, 32 - 10, 28 - 10); // upper three bits and last 10 bit are masked for the bitset
// of cachelines, 0x1ffffc00
LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
MOVP2R(EncodeRegTo64(WA), GetBlockCache()->GetBlockBitSet());
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
LSR(value, value, addr); // move current bit to bit 0
FixupBranch bit_not_set = TBZ(value, 0);
FixupBranch far_addr = B();
SwitchToFarCode();
SetJumpTarget(far_addr);
ANDI2R(addr, addr, ~31); // mask sizeof cacheline
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
@ -573,7 +557,6 @@ void JitArm64::dcbx(UGeckoInstruction inst)
ABI_PushRegisters(gprs_to_push);
m_float_emit.ABI_PushRegisters(fprs_to_push, X30);
LSL(W0, addr, 5);
MOVI2R(X1, 32);
MOVI2R(X2, 0);
MOVP2R(X3, &JitInterface::InvalidateICache);
@ -582,12 +565,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
ABI_PopRegisters(gprs_to_push);
FixupBranch near_addr = B();
SwitchToNearCode();
SetJumpTarget(bit_not_set);
SetJumpTarget(near_addr);
gpr.Unlock(addr, value, W30);
gpr.Unlock(W0);
}
void JitArm64::dcbt(UGeckoInstruction inst)

View File

@ -270,11 +270,6 @@ void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length)
}
}
u32* JitBaseBlockCache::GetBlockBitSet() const
{
return valid_block.m_valid_block.get();
}
void JitBaseBlockCache::WriteDestroyBlock(const JitBlock& block)
{
}

View File

@ -98,19 +98,6 @@ typedef void (*CompiledCode)();
class ValidBlockBitSet final
{
public:
enum
{
// ValidBlockBitSet covers the whole 32-bit address-space in 32-byte
// chunks.
// FIXME: Maybe we can get away with less? There isn't any actual
// RAM in most of this space.
VALID_BLOCK_MASK_SIZE = (1ULL << 32) / 32,
// The number of elements in the allocated array. Each u32 contains 32 bits.
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
};
// Directly accessed by Jit64.
std::unique_ptr<u32[]> m_valid_block;
ValidBlockBitSet()
{
m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]);
@ -121,6 +108,19 @@ public:
void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); }
void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); }
bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; }
private:
enum
{
// ValidBlockBitSet covers the whole 32-bit address-space in 32-byte
// chunks.
// FIXME: Maybe we can get away with less? There isn't any actual
// RAM in most of this space.
VALID_BLOCK_MASK_SIZE = (1ULL << 32) / 32,
// The number of elements in the allocated array. Each u32 contains 32 bits.
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
};
std::unique_ptr<u32[]> m_valid_block;
};
class JitBaseBlockCache
@ -162,8 +162,6 @@ public:
void InvalidateICache(u32 address, u32 length, bool forced);
void ErasePhysicalRange(u32 address, u32 length);
u32* GetBlockBitSet() const;
protected:
virtual void DestroyBlock(JitBlock& block);