Revert "Jit: Fix correctness issue in dcbf/dcbi/dcbst"
This reverts commit 66b992cfe4
.
A new (additional) correctness issue was revealed in the old
AArch64 code when applying it on top of modern JitArm64:
LSR was being used when LSRV was intended. This commit uses LSRV.
This commit is contained in:
parent
55a465c6e6
commit
b84a0704cd
|
@ -234,21 +234,37 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
JITDISABLE(bJITLoadStoreOff);
|
JITDISABLE(bJITLoadStoreOff);
|
||||||
|
|
||||||
X64Reg addr = RSCRATCH;
|
X64Reg addr = RSCRATCH;
|
||||||
|
X64Reg value = RSCRATCH2;
|
||||||
RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
|
RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
|
||||||
RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read);
|
RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read);
|
||||||
RegCache::Realize(Ra, Rb);
|
RCX64Reg tmp = gpr.Scratch();
|
||||||
|
RegCache::Realize(Ra, Rb, tmp);
|
||||||
|
|
||||||
MOV_sum(32, addr, Ra, Rb);
|
MOV_sum(32, addr, Ra, Rb);
|
||||||
AND(32, R(addr), Imm8(~31));
|
|
||||||
|
|
||||||
|
// Check whether a JIT cache line needs to be invalidated.
|
||||||
|
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
|
||||||
|
SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
|
MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet()));
|
||||||
|
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
|
||||||
|
SHR(32, R(addr), Imm8(5));
|
||||||
|
BT(32, R(value), R(addr));
|
||||||
|
|
||||||
|
FixupBranch c = J_CC(CC_C, true);
|
||||||
|
SwitchToFarCode();
|
||||||
|
SetJumpTarget(c);
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
MOV(32, R(ABI_PARAM1), R(addr));
|
MOV(32, R(ABI_PARAM1), R(addr));
|
||||||
|
SHL(32, R(ABI_PARAM1), Imm8(5));
|
||||||
MOV(32, R(ABI_PARAM2), Imm32(32));
|
MOV(32, R(ABI_PARAM2), Imm32(32));
|
||||||
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||||
ABI_CallFunction(JitInterface::InvalidateICache);
|
ABI_CallFunction(JitInterface::InvalidateICache);
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
asm_routines.ResetStack(*this);
|
asm_routines.ResetStack(*this);
|
||||||
|
c = J(true);
|
||||||
|
SwitchToNearCode();
|
||||||
|
SetJumpTarget(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::dcbt(UGeckoInstruction inst)
|
void Jit64::dcbt(UGeckoInstruction inst)
|
||||||
|
|
|
@ -539,9 +539,11 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStoreOff);
|
JITDISABLE(bJITLoadStoreOff);
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W0);
|
gpr.Lock(ARM64Reg::W30);
|
||||||
|
|
||||||
ARM64Reg addr = ARM64Reg::W0;
|
ARM64Reg addr = gpr.GetReg();
|
||||||
|
ARM64Reg value = gpr.GetReg();
|
||||||
|
ARM64Reg WA = ARM64Reg::W30;
|
||||||
|
|
||||||
u32 a = inst.RA, b = inst.RB;
|
u32 a = inst.RA, b = inst.RB;
|
||||||
|
|
||||||
|
@ -550,7 +552,21 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
else
|
else
|
||||||
MOV(addr, gpr.R(b));
|
MOV(addr, gpr.R(b));
|
||||||
|
|
||||||
AND(addr, addr, LogicalImm(~31, 32)); // mask sizeof cacheline
|
// Check whether a JIT cache line needs to be invalidated.
|
||||||
|
AND(value, addr, LogicalImm(0x1ffffc00, 32)); // upper three bits and last 10 bit are masked for
|
||||||
|
// the bitset of cachelines, 0x1ffffc00
|
||||||
|
LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
|
MOVP2R(EncodeRegTo64(WA), GetBlockCache()->GetBlockBitSet());
|
||||||
|
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
|
||||||
|
|
||||||
|
LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
|
||||||
|
|
||||||
|
LSRV(value, value, addr); // move current bit to bit 0
|
||||||
|
|
||||||
|
FixupBranch bit_not_set = TBZ(value, 0);
|
||||||
|
FixupBranch far_addr = B();
|
||||||
|
SwitchToFarCode();
|
||||||
|
SetJumpTarget(far_addr);
|
||||||
|
|
||||||
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
||||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||||
|
@ -558,6 +574,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
ABI_PushRegisters(gprs_to_push);
|
ABI_PushRegisters(gprs_to_push);
|
||||||
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
|
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
|
||||||
|
|
||||||
|
LSL(ARM64Reg::W0, addr, 5);
|
||||||
MOVI2R(ARM64Reg::X1, 32);
|
MOVI2R(ARM64Reg::X1, 32);
|
||||||
MOVI2R(ARM64Reg::X2, 0);
|
MOVI2R(ARM64Reg::X2, 0);
|
||||||
MOVP2R(ARM64Reg::X3, &JitInterface::InvalidateICache);
|
MOVP2R(ARM64Reg::X3, &JitInterface::InvalidateICache);
|
||||||
|
@ -566,7 +583,12 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
|
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
|
||||||
ABI_PopRegisters(gprs_to_push);
|
ABI_PopRegisters(gprs_to_push);
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W0);
|
FixupBranch near_addr = B();
|
||||||
|
SwitchToNearCode();
|
||||||
|
SetJumpTarget(bit_not_set);
|
||||||
|
SetJumpTarget(near_addr);
|
||||||
|
|
||||||
|
gpr.Unlock(addr, value, ARM64Reg::W30);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::dcbt(UGeckoInstruction inst)
|
void JitArm64::dcbt(UGeckoInstruction inst)
|
||||||
|
|
|
@ -269,6 +269,11 @@ void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32* JitBaseBlockCache::GetBlockBitSet() const
|
||||||
|
{
|
||||||
|
return valid_block.m_valid_block.get();
|
||||||
|
}
|
||||||
|
|
||||||
void JitBaseBlockCache::WriteDestroyBlock(const JitBlock& block)
|
void JitBaseBlockCache::WriteDestroyBlock(const JitBlock& block)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,18 +99,6 @@ typedef void (*CompiledCode)();
|
||||||
class ValidBlockBitSet final
|
class ValidBlockBitSet final
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ValidBlockBitSet()
|
|
||||||
{
|
|
||||||
m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]);
|
|
||||||
ClearAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Set(u32 bit) { m_valid_block[bit / 32] |= 1u << (bit % 32); }
|
|
||||||
void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); }
|
|
||||||
void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); }
|
|
||||||
bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
// ValidBlockBitSet covers the whole 32-bit address-space in 32-byte
|
// ValidBlockBitSet covers the whole 32-bit address-space in 32-byte
|
||||||
|
@ -121,7 +109,19 @@ private:
|
||||||
// The number of elements in the allocated array. Each u32 contains 32 bits.
|
// The number of elements in the allocated array. Each u32 contains 32 bits.
|
||||||
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
|
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
|
||||||
};
|
};
|
||||||
|
// Directly accessed by Jit64.
|
||||||
std::unique_ptr<u32[]> m_valid_block;
|
std::unique_ptr<u32[]> m_valid_block;
|
||||||
|
|
||||||
|
ValidBlockBitSet()
|
||||||
|
{
|
||||||
|
m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]);
|
||||||
|
ClearAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Set(u32 bit) { m_valid_block[bit / 32] |= 1u << (bit % 32); }
|
||||||
|
void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); }
|
||||||
|
void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); }
|
||||||
|
bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class JitBaseBlockCache
|
class JitBaseBlockCache
|
||||||
|
@ -163,6 +163,8 @@ public:
|
||||||
void InvalidateICache(u32 address, u32 length, bool forced);
|
void InvalidateICache(u32 address, u32 length, bool forced);
|
||||||
void ErasePhysicalRange(u32 address, u32 length);
|
void ErasePhysicalRange(u32 address, u32 length);
|
||||||
|
|
||||||
|
u32* GetBlockBitSet() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void DestroyBlock(JitBlock& block);
|
virtual void DestroyBlock(JitBlock& block);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue