CPU/NewRec: Fix register corruption in swl/swr

This commit is contained in:
Stenzek 2024-03-31 13:17:05 +10:00
parent a3013efbca
commit 8ebda3cdc8
No known key found for this signature in database
4 changed files with 75 additions and 64 deletions

View File

@ -1785,7 +1785,13 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
{ {
DebugAssert(size == MemoryAccessSize::Word && !sign); DebugAssert(size == MemoryAccessSize::Word && !sign);
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the alloc
const Register addr = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); const Register addr = Register(AllocateTempHostReg(HR_CALLEE_SAVED));
const Register value = g_settings.gpu_pgxp_enable ? Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
if (g_settings.gpu_pgxp_enable)
MoveMIPSRegToReg(value, inst->r.rt);
FlushForLoadStore(address, true, use_fastmem); FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified.. // TODO: if address is constant, this can be simplified..
@ -1794,20 +1800,13 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u)); armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
const Reg rt = inst->r.rt;
const Register value = g_settings.gpu_pgxp_enable ? Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
MoveMIPSRegToReg(value, rt);
armAsm->and_(RSCRATCH, addr, 3); armAsm->and_(RSCRATCH, addr, 3);
armAsm->lsl(RSCRATCH, RSCRATCH, 3); // *8 armAsm->lsl(RSCRATCH, RSCRATCH, 3); // *8
armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u));
// Don't need the original address anymore. // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.
if (!g_settings.gpu_pgxp_enable) if (!g_settings.gpu_pgxp_enable)
FreeHostReg(addr.GetCode()); MoveMIPSRegToReg(value, inst->r.rt);
else
armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u));
if (inst->op == InstructionOp::swl) if (inst->op == InstructionOp::swl)
{ {
@ -1836,10 +1835,15 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
armAsm->orr(value, value, RRET); armAsm->orr(value, value, RRET);
} }
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); if (!g_settings.gpu_pgxp_enable)
if (g_settings.gpu_pgxp_enable)
{ {
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
FreeHostReg(addr.GetCode());
}
else
{
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
Flush(FLUSH_FOR_C_CALL); Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RARG3, value); armAsm->mov(RARG3, value);
FreeHostReg(value.GetCode()); FreeHostReg(value.GetCode());

View File

@ -1764,7 +1764,13 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
{ {
DebugAssert(size == MemoryAccessSize::Word && !sign); DebugAssert(size == MemoryAccessSize::Word && !sign);
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the alloc
const WRegister addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); const WRegister addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
if (g_settings.gpu_pgxp_enable)
MoveMIPSRegToReg(value, inst->r.rt);
FlushForLoadStore(address, true, use_fastmem); FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified.. // TODO: if address is constant, this can be simplified..
@ -1773,20 +1779,13 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u)); armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
const Reg rt = inst->r.rt;
const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
MoveMIPSRegToReg(value, rt);
armAsm->and_(RWSCRATCH, addr, 3); armAsm->and_(RWSCRATCH, addr, 3);
armAsm->lsl(RWSCRATCH, RWSCRATCH, 3); // *8 armAsm->lsl(RWSCRATCH, RWSCRATCH, 3); // *8
armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u));
// Don't need the original address anymore. // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.
if (!g_settings.gpu_pgxp_enable) if (!g_settings.gpu_pgxp_enable)
FreeHostReg(addr.GetCode()); MoveMIPSRegToReg(value, inst->r.rt);
else
armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u));
if (inst->op == InstructionOp::swl) if (inst->op == InstructionOp::swl)
{ {
@ -1815,12 +1814,15 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
armAsm->orr(value, value, RWRET); armAsm->orr(value, value, RWRET);
} }
FreeHostReg(addr.GetCode()); if (!g_settings.gpu_pgxp_enable)
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
if (g_settings.gpu_pgxp_enable)
{ {
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
FreeHostReg(addr.GetCode());
}
else
{
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
Flush(FLUSH_FOR_C_CALL); Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RWARG3, value); armAsm->mov(RWARG3, value);
FreeHostReg(value.GetCode()); FreeHostReg(value.GetCode());

View File

@ -2071,7 +2071,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
{ {
DebugAssert(size == MemoryAccessSize::Word && !sign); DebugAssert(size == MemoryAccessSize::Word && !sign);
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the alloc
const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED)); const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
if (g_settings.gpu_pgxp_enable)
MoveMIPSRegToReg(value, inst->r.rt);
FlushForLoadStore(address, true, use_fastmem); FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified.. // TODO: if address is constant, this can be simplified..
@ -2080,20 +2086,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
rvAsm->ANDI(RARG1, addr, ~0x3u); rvAsm->ANDI(RARG1, addr, ~0x3u);
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
const Reg rt = inst->r.rt;
const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
MoveMIPSRegToReg(value, rt);
rvAsm->ANDI(RSCRATCH, addr, 3); rvAsm->ANDI(RSCRATCH, addr, 3);
rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8 rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8
rvAsm->ANDI(addr, addr, ~0x3u);
// Don't need the original address anymore. // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.
if (!g_settings.gpu_pgxp_enable) if (!g_settings.gpu_pgxp_enable)
FreeHostReg(addr.Index()); MoveMIPSRegToReg(value, inst->r.rt);
else
rvAsm->ANDI(addr, addr, ~0x3u);
if (inst->op == InstructionOp::swl) if (inst->op == InstructionOp::swl)
{ {
@ -2122,12 +2121,15 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
rvAsm->OR(value, value, RRET); rvAsm->OR(value, value, RRET);
} }
FreeHostReg(addr.Index()); if (!g_settings.gpu_pgxp_enable)
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
if (g_settings.gpu_pgxp_enable)
{ {
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
FreeHostReg(addr.Index());
}
else
{
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
Flush(FLUSH_FOR_C_CALL); Flush(FLUSH_FOR_C_CALL);
rvAsm->MV(RARG3, value); rvAsm->MV(RARG3, value);
FreeHostReg(value.Index()); FreeHostReg(value.Index());

View File

@ -1577,14 +1577,13 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
cg->mov(RWARG2, 24); cg->mov(RWARG2, 24);
cg->sub(RWARG2, cg->ecx); cg->sub(RWARG2, cg->ecx);
const Reg32& temp = (RWARG3 == cg->ecx) ? RWARG4 : RWARG3;
if (inst->op == InstructionOp::lwl) if (inst->op == InstructionOp::lwl)
{ {
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift; // const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
// new_value = (value & mask) | (RWRET << (24 - shift)); // new_value = (value & mask) | (RWRET << (24 - shift));
cg->mov(temp, 0xFFFFFFu); cg->mov(RWARG3, 0xFFFFFFu);
cg->shr(temp, cg->cl); cg->shr(RWARG3, cg->cl);
cg->and_(value, temp); cg->and_(value, RWARG3);
cg->mov(cg->ecx, RWARG2); cg->mov(cg->ecx, RWARG2);
cg->shl(RWRET, cg->cl); cg->shl(RWRET, cg->cl);
cg->or_(value, RWRET); cg->or_(value, RWRET);
@ -1594,10 +1593,10 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift); // const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
// new_value = (value & mask) | (RWRET >> shift); // new_value = (value & mask) | (RWRET >> shift);
cg->shr(RWRET, cg->cl); cg->shr(RWRET, cg->cl);
cg->mov(temp, 0xFFFFFF00u); cg->mov(RWARG3, 0xFFFFFF00u);
cg->mov(cg->ecx, RWARG2); cg->mov(cg->ecx, RWARG2);
cg->shl(temp, cg->cl); cg->shl(RWARG3, cg->cl);
cg->and_(value, temp); cg->and_(value, RWARG3);
cg->or_(value, RWRET); cg->or_(value, RWRET);
} }
@ -1730,7 +1729,13 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
{ {
DebugAssert(size == MemoryAccessSize::Word && !sign); DebugAssert(size == MemoryAccessSize::Word && !sign);
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the alloc
const Reg32 addr = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)); const Reg32 addr = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED));
const Reg32 value = g_settings.gpu_pgxp_enable ? Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
if (g_settings.gpu_pgxp_enable)
MoveMIPSRegToReg(value, inst->r.rt);
FlushForLoadStore(address, true, use_fastmem); FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified.. // TODO: if address is constant, this can be simplified..
@ -1740,22 +1745,15 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
cg->and_(RWARG1, ~0x3u); cg->and_(RWARG1, ~0x3u);
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
const Reg rt = inst->r.rt;
const Reg32 value = g_settings.gpu_pgxp_enable ? Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
DebugAssert(value != cg->ecx); DebugAssert(value != cg->ecx);
MoveMIPSRegToReg(value, rt);
cg->mov(cg->ecx, addr); cg->mov(cg->ecx, addr);
cg->and_(cg->ecx, 3); cg->and_(cg->ecx, 3);
cg->shl(cg->ecx, 3); // *8 cg->shl(cg->ecx, 3); // *8
cg->and_(addr, ~0x3u);
// Don't need the original address anymore. // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.
if (g_settings.gpu_pgxp_enable) if (!g_settings.gpu_pgxp_enable)
cg->and_(addr, ~0x3u); MoveMIPSRegToReg(value, inst->r.rt);
else
FreeHostReg(addr.getIdx());
if (inst->op == InstructionOp::swl) if (inst->op == InstructionOp::swl)
{ {
@ -1787,10 +1785,15 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
cg->or_(value, RWRET); cg->or_(value, RWRET);
} }
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); if (!g_settings.gpu_pgxp_enable)
if (g_settings.gpu_pgxp_enable)
{ {
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
FreeHostReg(addr.getIdx());
}
else
{
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
Flush(FLUSH_FOR_C_CALL); Flush(FLUSH_FOR_C_CALL);
cg->mov(RWARG3, value); cg->mov(RWARG3, value);
FreeHostReg(value.getIdx()); FreeHostReg(value.getIdx());