JIT: fix ldm with writeback loading rn

This commit is contained in:
RSDuck 2021-07-24 04:35:33 +02:00
parent 8513900892
commit c2152c8f74
4 changed files with 85 additions and 48 deletions

View File

@ -185,7 +185,7 @@ public:
void T_Comp_BL_LONG_2(); void T_Comp_BL_LONG_2();
void T_Comp_BL_Merged(); void T_Comp_BL_Merged();
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn);
void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn); void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn);

View File

@ -465,7 +465,7 @@ void Compiler::T_Comp_MemSPRel()
Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store); Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store);
} }
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn)
{ {
IrregularCycles = true; IrregularCycles = true;
@ -474,7 +474,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (regsCount == 0) if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me return 0; // actually not the right behaviour TODO: fix me
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin())) int firstReg = *regs.begin();
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << firstReg) && !(firstReg == rn && skipLoadingRn))
{ {
int flags = 0; int flags = 0;
if (store) if (store)
@ -483,7 +484,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
flags |= memop_SubtractOffset; flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0); Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(*regs.begin(), rn, offset, 32, flags); Comp_MemAccess(firstReg, rn, offset, 32, flags);
return decrement ? -4 : 4; return decrement ? -4 : 4;
} }
@ -539,12 +540,16 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
loadStoreOffsets[i++] = GetCodeOffset(); loadStoreOffsets[i++] = GetCodeOffset();
if (store) if (store)
{
STR(INDEX_UNSIGNED, first, X1, offset); STR(INDEX_UNSIGNED, first, X1, offset);
else }
else if (!(reg == rn && skipLoadingRn))
{
LDR(INDEX_UNSIGNED, first, X1, offset); LDR(INDEX_UNSIGNED, first, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)) && !store) if (!(RegCache.LoadedRegs & (1 << reg)))
SaveReg(reg, first); SaveReg(reg, first);
}
offset += 4; offset += 4;
} }
@ -558,13 +563,23 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
ARM64Reg first = W3, second = W4; ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg)) if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg); {
if (!(reg == rn && skipLoadingRn))
first = MapReg(reg);
}
else if (store) else if (store)
{
LoadReg(reg, first); LoadReg(reg, first);
}
if (RegCache.LoadedRegs & (1 << nextReg)) if (RegCache.LoadedRegs & (1 << nextReg))
second = MapReg(nextReg); {
if (!(nextReg == rn && skipLoadingRn))
second = MapReg(nextReg);
}
else if (store) else if (store)
{
LoadReg(nextReg, second); LoadReg(nextReg, second);
}
loadStoreOffsets[i++] = GetCodeOffset(); loadStoreOffsets[i++] = GetCodeOffset();
if (store) if (store)
@ -705,20 +720,23 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
LDR(INDEX_UNSIGNED, W3, SP, i * 8); LDR(INDEX_UNSIGNED, W3, SP, i * 8);
MOVI2R(W1, reg - 8); MOVI2R(W1, reg - 8);
BL(WriteBanked); BL(WriteBanked);
FixupBranch alreadyWritten = CBNZ(W4); if (!(reg == rn && skipLoadingRn))
if (RegCache.LoadedRegs & (1 << reg)) {
MOV(MapReg(reg), W3); FixupBranch alreadyWritten = CBNZ(W4);
else if (RegCache.LoadedRegs & (1 << reg))
SaveReg(reg, W3); MOV(MapReg(reg), W3);
SetJumpTarget(alreadyWritten); else
SaveReg(reg, W3);
SetJumpTarget(alreadyWritten);
}
} }
else if (!usermode && nextReg != regs.end()) else if (!usermode && nextReg != regs.end())
{ {
ARM64Reg first = W3, second = W4; ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg)) if (RegCache.LoadedRegs & (1 << reg) && !(reg == rn && skipLoadingRn))
first = MapReg(reg); first = MapReg(reg);
if (RegCache.LoadedRegs & (1 << *nextReg)) if (RegCache.LoadedRegs & (1 << *nextReg) && !(*nextReg == rn && skipLoadingRn))
second = MapReg(*nextReg); second = MapReg(*nextReg);
LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8); LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
@ -733,8 +751,11 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
} }
else if (RegCache.LoadedRegs & (1 << reg)) else if (RegCache.LoadedRegs & (1 << reg))
{ {
ARM64Reg mapped = MapReg(reg); if (!(reg == rn && skipLoadingRn))
LDR(INDEX_UNSIGNED, mapped, SP, i * 8); {
ARM64Reg mapped = MapReg(reg);
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
}
} }
else else
{ {
@ -778,13 +799,13 @@ void Compiler::A_Comp_LDM_STM()
ARM64Reg rn = MapReg(CurInstr.A_Reg(16)); ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
if (load && writeback && regs[CurInstr.A_Reg(16)]) if (load && writeback && regs[CurInstr.A_Reg(16)])
writeback = Num == 0 writeback = Num == 0
? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) && (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1));
: false;
if (writeback) s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode, load && writeback);
if (writeback && offset)
{ {
if (offset > 0) if (offset > 0)
ADD(rn, rn, offset); ADD(rn, rn, offset);
@ -820,10 +841,12 @@ void Compiler::T_Comp_LDMIA_STMIA()
ARM64Reg rb = MapReg(CurInstr.T_Reg(8)); ARM64Reg rb = MapReg(CurInstr.T_Reg(8));
bool load = CurInstr.Instr & (1 << 11); bool load = CurInstr.Instr & (1 << 11);
u32 regsCount = regs.Count(); u32 regsCount = regs.Count();
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
if (!load || !regs[CurInstr.T_Reg(8)]) bool writeback = !load || !regs[CurInstr.T_Reg(8)];
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false, load && writeback);
if (writeback && offset)
{ {
if (offset > 0) if (offset > 0)
ADD(rb, rb, offset); ADD(rb, rb, offset);

View File

@ -163,7 +163,7 @@ public:
memop_SubtractOffset = 1 << 4 memop_SubtractOffset = 1 << 4
}; };
void Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags); void Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags);
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr); bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),

View File

@ -399,14 +399,15 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
} }
} }
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn)
{ {
int regsCount = regs.Count(); int regsCount = regs.Count();
if (regsCount == 0) if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me return 0; // actually not the right behaviour TODO: fix me
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin())) int firstReg = *regs.begin();
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << firstReg) && !(firstReg == rn && skipLoadingRn))
{ {
int flags = 0; int flags = 0;
if (store) if (store)
@ -415,7 +416,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
flags |= memop_SubtractOffset; flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0); Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(*regs.begin(), rn, offset, 32, flags); Comp_MemAccess(firstReg, rn, offset, 32, flags);
return decrement ? -4 : 4; return decrement ? -4 : 4;
} }
@ -482,7 +483,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
{ {
if (RegCache.LoadedRegs & (1 << reg)) if (RegCache.LoadedRegs & (1 << reg))
{ {
MOV(32, MapReg(reg), mem); if (!(reg == rn && skipLoadingRn))
MOV(32, MapReg(reg), mem);
else
MOV(32, R(RSCRATCH), mem); // just touch the memory
} }
else else
{ {
@ -548,12 +552,15 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
MOV(32, R(RSCRATCH2), Imm32(reg - 8)); MOV(32, R(RSCRATCH2), Imm32(reg - 8));
POP(RSCRATCH3); POP(RSCRATCH3);
CALL(WriteBanked); CALL(WriteBanked);
FixupBranch sucessfulWritten = J_CC(CC_NC); if (!(reg == rn && skipLoadingRn))
if (RegCache.LoadedRegs & (1 << reg)) {
MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3)); FixupBranch sucessfulWritten = J_CC(CC_NC);
else if (RegCache.LoadedRegs & (1 << reg))
SaveReg(reg, RSCRATCH3); MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
SetJumpTarget(sucessfulWritten); else
SaveReg(reg, RSCRATCH3);
SetJumpTarget(sucessfulWritten);
}
} }
else if (!(RegCache.LoadedRegs & (1 << reg))) else if (!(RegCache.LoadedRegs & (1 << reg)))
{ {
@ -562,6 +569,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
POP(RSCRATCH); POP(RSCRATCH);
SaveReg(reg, RSCRATCH); SaveReg(reg, RSCRATCH);
} }
else if (reg == rn && skipLoadingRn)
{
ADD(64, R(RSP), Imm8(8));
}
else else
{ {
POP(MapReg(reg).GetSimpleReg()); POP(MapReg(reg).GetSimpleReg());
@ -748,14 +759,14 @@ void Compiler::A_Comp_LDM_STM()
OpArg rn = MapReg(CurInstr.A_Reg(16)); OpArg rn = MapReg(CurInstr.A_Reg(16));
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
if (load && writeback && regs[CurInstr.A_Reg(16)]) if (load && writeback && regs[CurInstr.A_Reg(16)])
writeback = Num == 0 writeback = Num == 0
? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) && (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1));
: false;
if (writeback) s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode, load && writeback);
ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset));
if (writeback && offset)
ADD(32, rn, Imm32(offset));
} }
void Compiler::T_Comp_MemImm() void Compiler::T_Comp_MemImm()
@ -825,9 +836,10 @@ void Compiler::T_Comp_PUSH_POP()
} }
OpArg sp = MapReg(13); OpArg sp = MapReg(13);
s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false); s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false, false);
ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max if (offset)
ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max
} }
void Compiler::T_Comp_LDMIA_STMIA() void Compiler::T_Comp_LDMIA_STMIA()
@ -836,9 +848,11 @@ void Compiler::T_Comp_LDMIA_STMIA()
OpArg rb = MapReg(CurInstr.T_Reg(8)); OpArg rb = MapReg(CurInstr.T_Reg(8));
bool load = CurInstr.Instr & (1 << 11); bool load = CurInstr.Instr & (1 << 11);
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false); bool writeback = !load || !regs[CurInstr.T_Reg(8)];
if (!load || !regs[CurInstr.T_Reg(8)]) s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false, load && writeback);
if (writeback && offset)
ADD(32, rb, Imm8(offset)); ADD(32, rb, Imm8(offset));
} }