Reimplements fastmem for ARMv7 floating point loadstores.
This implements a new system for fastmem backpatching on ARMv7 that is less of a mindfsck to deal with. This also implements stfs under the default loadstore path as well, not sure why it was by itself in the first place. I'll be moving the rest of the loadstore methods over to this new way in a few days.
This commit is contained in:
parent
e47bfc2788
commit
181f16c5f0
|
@ -205,7 +205,6 @@ public:
|
||||||
// Floating point loadStore
|
// Floating point loadStore
|
||||||
void lfXX(UGeckoInstruction _inst);
|
void lfXX(UGeckoInstruction _inst);
|
||||||
void stfXX(UGeckoInstruction _inst);
|
void stfXX(UGeckoInstruction _inst);
|
||||||
void stfs(UGeckoInstruction _inst);
|
|
||||||
|
|
||||||
// Paired Singles
|
// Paired Singles
|
||||||
void ps_add(UGeckoInstruction _inst);
|
void ps_add(UGeckoInstruction _inst);
|
||||||
|
|
|
@ -17,7 +17,7 @@ using namespace ArmGen;
|
||||||
// 1) It's really necessary. We don't know anything about the context.
|
// 1) It's really necessary. We don't know anything about the context.
|
||||||
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
|
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
|
||||||
// that many of them in a typical program/game.
|
// that many of them in a typical program/game.
|
||||||
static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store)
|
static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store, bool *new_system)
|
||||||
{
|
{
|
||||||
u8 op = (inst >> 20) & 0xFF;
|
u8 op = (inst >> 20) & 0xFF;
|
||||||
rD = (ARMReg)((inst >> 12) & 0xF);
|
rD = (ARMReg)((inst >> 12) & 0xF);
|
||||||
|
@ -61,8 +61,23 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
printf("Op is 0x%02x\n", op);
|
{
|
||||||
return false;
|
// Could be a floating point loadstore
|
||||||
|
u8 op2 = (inst >> 24) & 0xF;
|
||||||
|
switch (op2)
|
||||||
|
{
|
||||||
|
case 0xD: // VLDR/VSTR
|
||||||
|
*new_system = true;
|
||||||
|
break;
|
||||||
|
case 0x4: // VST1/VLD1
|
||||||
|
*new_system = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
printf("Op is 0x%02x\n", op);
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -70,10 +85,7 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
|
||||||
bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
|
bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
|
||||||
{
|
{
|
||||||
if (access_address < (uintptr_t)Memory::base)
|
if (access_address < (uintptr_t)Memory::base)
|
||||||
{
|
PanicAlertT("Exception handler - access below memory space. 0x%08x", access_address);
|
||||||
PanicAlertT("Exception handler - access below memory space. %08llx%08llx",
|
|
||||||
access_address >> 32, access_address);
|
|
||||||
}
|
|
||||||
return BackPatch(ctx);
|
return BackPatch(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,66 +99,90 @@ bool JitArm::BackPatch(SContext* ctx)
|
||||||
ARMReg rD;
|
ARMReg rD;
|
||||||
u8 accessSize;
|
u8 accessSize;
|
||||||
bool Store;
|
bool Store;
|
||||||
|
bool new_system = false;
|
||||||
|
|
||||||
if (!DisamLoadStore(Value, rD, accessSize, Store))
|
if (!DisamLoadStore(Value, rD, accessSize, Store, &new_system))
|
||||||
{
|
{
|
||||||
printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value);
|
printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value);
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Store)
|
if (new_system)
|
||||||
{
|
{
|
||||||
const u32 ARMREGOFFSET = 4 * 5;
|
// The new system is a lot easier to backpatch than the old crap.
|
||||||
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
|
// Instead of backpatching over code and making sure we NOP pad and other crap
|
||||||
switch (accessSize)
|
// We emit both the slow and fast path and branch over the slow path each time
|
||||||
{
|
// We search backwards until we find the second branch instruction
|
||||||
case 8: // 8bit
|
// Then proceed to replace it with a NOP and set that to the new PC.
|
||||||
emitter.MOVI2R(R14, (u32)&Memory::Write_U8, false); // 1-2
|
// This ensures that we run the slow path and then branch over the fast path.
|
||||||
return 0;
|
|
||||||
break;
|
// Run backwards until we find the branch we want to NOP
|
||||||
case 16: // 16bit
|
for (int branches = 2; branches > 0; ctx->CTX_PC -= 4)
|
||||||
emitter.MOVI2R(R14, (u32)&Memory::Write_U16, false); // 1-2
|
if ((*(u32*)ctx->CTX_PC & 0x0F000000) == 0x0A000000) // B
|
||||||
return 0;
|
--branches;
|
||||||
break;
|
|
||||||
case 32: // 32bit
|
ctx->CTX_PC += 4;
|
||||||
emitter.MOVI2R(R14, (u32)&Memory::Write_U32, false); // 1-2
|
ARMXEmitter emitter((u8*)ctx->CTX_PC);
|
||||||
break;
|
emitter.NOP(1);
|
||||||
}
|
|
||||||
emitter.PUSH(4, R0, R1, R2, R3); // 3
|
|
||||||
emitter.MOV(R0, rD); // Value - 4
|
|
||||||
emitter.MOV(R1, R10); // Addr- 5
|
|
||||||
emitter.BL(R14); // 6
|
|
||||||
emitter.POP(4, R0, R1, R2, R3); // 7
|
|
||||||
u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4);
|
|
||||||
ctx->CTX_PC = newPC;
|
|
||||||
emitter.FlushIcache();
|
emitter.FlushIcache();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const u32 ARMREGOFFSET = 4 * 4;
|
if (Store)
|
||||||
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
|
|
||||||
switch (accessSize)
|
|
||||||
{
|
{
|
||||||
case 8: // 8bit
|
const u32 ARMREGOFFSET = 4 * 5;
|
||||||
emitter.MOVI2R(R14, (u32)&Memory::Read_U8, false); // 2
|
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
|
||||||
break;
|
switch (accessSize)
|
||||||
case 16: // 16bit
|
{
|
||||||
emitter.MOVI2R(R14, (u32)&Memory::Read_U16, false); // 2
|
case 8: // 8bit
|
||||||
break;
|
emitter.MOVI2R(R14, (u32)&Memory::Write_U8, false); // 1-2
|
||||||
case 32: // 32bit
|
return 0;
|
||||||
emitter.MOVI2R(R14, (u32)&Memory::Read_U32, false); // 2
|
break;
|
||||||
break;
|
case 16: // 16bit
|
||||||
|
emitter.MOVI2R(R14, (u32)&Memory::Write_U16, false); // 1-2
|
||||||
|
return 0;
|
||||||
|
break;
|
||||||
|
case 32: // 32bit
|
||||||
|
emitter.MOVI2R(R14, (u32)&Memory::Write_U32, false); // 1-2
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
emitter.PUSH(4, R0, R1, R2, R3); // 3
|
||||||
|
emitter.MOV(R0, rD); // Value - 4
|
||||||
|
emitter.MOV(R1, R10); // Addr- 5
|
||||||
|
emitter.BL(R14); // 6
|
||||||
|
emitter.POP(4, R0, R1, R2, R3); // 7
|
||||||
|
u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4);
|
||||||
|
ctx->CTX_PC = newPC;
|
||||||
|
emitter.FlushIcache();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const u32 ARMREGOFFSET = 4 * 4;
|
||||||
|
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
|
||||||
|
switch (accessSize)
|
||||||
|
{
|
||||||
|
case 8: // 8bit
|
||||||
|
emitter.MOVI2R(R14, (u32)&Memory::Read_U8, false); // 2
|
||||||
|
break;
|
||||||
|
case 16: // 16bit
|
||||||
|
emitter.MOVI2R(R14, (u32)&Memory::Read_U16, false); // 2
|
||||||
|
break;
|
||||||
|
case 32: // 32bit
|
||||||
|
emitter.MOVI2R(R14, (u32)&Memory::Read_U32, false); // 2
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
emitter.PUSH(4, R0, R1, R2, R3); // 3
|
||||||
|
emitter.MOV(R0, R10); // 4
|
||||||
|
emitter.BL(R14); // 5
|
||||||
|
emitter.MOV(R14, R0); // 6
|
||||||
|
emitter.POP(4, R0, R1, R2, R3); // 7
|
||||||
|
emitter.MOV(rD, R14); // 8
|
||||||
|
ctx->CTX_PC -= ARMREGOFFSET + (4 * 4);
|
||||||
|
emitter.FlushIcache();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
emitter.PUSH(4, R0, R1, R2, R3); // 3
|
|
||||||
emitter.MOV(R0, R10); // 4
|
|
||||||
emitter.BL(R14); // 5
|
|
||||||
emitter.MOV(R14, R0); // 6
|
|
||||||
emitter.POP(4, R0, R1, R2, R3); // 7
|
|
||||||
emitter.MOV(rD, R14); // 8
|
|
||||||
ctx->CTX_PC -= ARMREGOFFSET + (4 * 4);
|
|
||||||
emitter.FlushIcache();
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,9 +77,9 @@ void JitArm::lfXX(UGeckoInstruction inst)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ARMReg v0 = fpr.R0(inst.FD), v1;
|
ARMReg v0 = fpr.R0(inst.FD, false), v1;
|
||||||
if (single)
|
if (single)
|
||||||
v1 = fpr.R1(inst.FD);
|
v1 = fpr.R1(inst.FD, false);
|
||||||
|
|
||||||
if (update)
|
if (update)
|
||||||
{
|
{
|
||||||
|
@ -134,28 +134,9 @@ void JitArm::lfXX(UGeckoInstruction inst)
|
||||||
if (update)
|
if (update)
|
||||||
MOV(RA, rB);
|
MOV(RA, rB);
|
||||||
|
|
||||||
if (false)
|
// This branch gets changed to a NOP when the fastpath fails
|
||||||
{
|
FixupBranch fast_path = B();
|
||||||
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
|
FixupBranch slow_out;
|
||||||
BIC(rB, rB, mask); // 1
|
|
||||||
MOVI2R(rA, (u32)Memory::base, false); // 2-3
|
|
||||||
ADD(rB, rB, rA); // 4
|
|
||||||
|
|
||||||
NEONXEmitter nemit(this);
|
|
||||||
if (single)
|
|
||||||
{
|
|
||||||
VLDR(S0, rB, 0);
|
|
||||||
nemit.VREV32(I_8, D0, D0); // Byte swap to result
|
|
||||||
VCVT(v0, S0, 0);
|
|
||||||
VCVT(v1, S0, 0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
VLDR(v0, rB, 0);
|
|
||||||
nemit.VREV64(I_8, v0, v0); // Byte swap to result
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
PUSH(4, R0, R1, R2, R3);
|
PUSH(4, R0, R1, R2, R3);
|
||||||
MOV(R0, rB);
|
MOV(R0, rB);
|
||||||
|
@ -163,9 +144,7 @@ void JitArm::lfXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
MOVI2R(rA, (u32)&Memory::Read_U32);
|
MOVI2R(rA, (u32)&Memory::Read_U32);
|
||||||
BL(rA);
|
BL(rA);
|
||||||
|
|
||||||
VMOV(S0, R0);
|
VMOV(S0, R0);
|
||||||
|
|
||||||
VCVT(v0, S0, 0);
|
VCVT(v0, S0, 0);
|
||||||
VCVT(v1, S0, 0);
|
VCVT(v1, S0, 0);
|
||||||
}
|
}
|
||||||
|
@ -181,7 +160,34 @@ void JitArm::lfXX(UGeckoInstruction inst)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
POP(4, R0, R1, R2, R3);
|
POP(4, R0, R1, R2, R3);
|
||||||
|
slow_out = B();
|
||||||
}
|
}
|
||||||
|
SetJumpTarget(fast_path);
|
||||||
|
{
|
||||||
|
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
|
||||||
|
ARMReg rC = gpr.GetReg();
|
||||||
|
BIC(rC, rB, mask);
|
||||||
|
MOVI2R(rA, (u32)Memory::base);
|
||||||
|
ADD(rC, rC, rA);
|
||||||
|
|
||||||
|
NEONXEmitter nemit(this);
|
||||||
|
if (single)
|
||||||
|
{
|
||||||
|
nemit.VLD1(F_32, D0, rC);
|
||||||
|
nemit.VREV32(I_8, D0, D0); // Byte swap to result
|
||||||
|
VCVT(v0, S0, 0);
|
||||||
|
VCVT(v1, S0, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nemit.VLD1(I_64, v0, rC);
|
||||||
|
nemit.VREV64(I_8, v0, v0); // Byte swap to result
|
||||||
|
}
|
||||||
|
gpr.Unlock(rC);
|
||||||
|
}
|
||||||
|
|
||||||
|
SetJumpTarget(slow_out);
|
||||||
|
|
||||||
gpr.Unlock(rA, rB);
|
gpr.Unlock(rA, rB);
|
||||||
SetJumpTarget(DoNotLoad);
|
SetJumpTarget(DoNotLoad);
|
||||||
}
|
}
|
||||||
|
@ -302,36 +308,17 @@ void JitArm::stfXX(UGeckoInstruction inst)
|
||||||
SetCC();
|
SetCC();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (false)
|
// This branch gets changed to a NOP when the fastpath fails
|
||||||
{
|
FixupBranch fast_path = B();
|
||||||
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
|
FixupBranch slow_out;
|
||||||
BIC(rB, rB, mask); // 1
|
|
||||||
MOVI2R(rA, (u32)Memory::base, false); // 2-3
|
|
||||||
ADD(rB, rB, rA); // 4
|
|
||||||
|
|
||||||
NEONXEmitter nemit(this);
|
|
||||||
if (single)
|
|
||||||
{
|
|
||||||
VCVT(S0, v0, 0);
|
|
||||||
nemit.VREV32(I_8, D0, D0);
|
|
||||||
VSTR(S0, rB, 0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
nemit.VREV64(I_8, D0, v0);
|
|
||||||
VSTR(D0, rB, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
PUSH(4, R0, R1, R2, R3);
|
PUSH(4, R0, R1, R2, R3);
|
||||||
if (single)
|
if (single)
|
||||||
{
|
{
|
||||||
MOVI2R(rA, (u32)&Memory::Write_U32);
|
MOV(R1, rB);
|
||||||
VCVT(S0, v0, 0);
|
VCVT(S0, v0, 0);
|
||||||
VMOV(R0, S0);
|
VMOV(R0, S0);
|
||||||
MOV(R1, rB);
|
MOVI2R(rA, (u32)&Memory::Write_U32);
|
||||||
|
|
||||||
BL(rA);
|
BL(rA);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -347,43 +334,32 @@ void JitArm::stfXX(UGeckoInstruction inst)
|
||||||
BL(rA);
|
BL(rA);
|
||||||
}
|
}
|
||||||
POP(4, R0, R1, R2, R3);
|
POP(4, R0, R1, R2, R3);
|
||||||
|
slow_out = B();
|
||||||
}
|
}
|
||||||
gpr.Unlock(rA, rB);
|
SetJumpTarget(fast_path);
|
||||||
}
|
|
||||||
|
|
||||||
// Some games use stfs as a way to quickly write to the gatherpipe and other hardware areas.
|
|
||||||
// Keep it as a safe store until this can get optimized.
|
|
||||||
// Look at the JIT64 implementation to see how it is done
|
|
||||||
|
|
||||||
void JitArm::stfs(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
|
||||||
|
|
||||||
ARMReg rA = gpr.GetReg();
|
|
||||||
ARMReg rB = gpr.GetReg();
|
|
||||||
ARMReg v0 = fpr.R0(inst.FS);
|
|
||||||
VCVT(S0, v0, 0);
|
|
||||||
|
|
||||||
if (inst.RA)
|
|
||||||
{
|
{
|
||||||
MOVI2R(rB, inst.SIMM_16);
|
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
|
||||||
ARMReg RA = gpr.R(inst.RA);
|
ARMReg rC = gpr.GetReg();
|
||||||
ADD(rB, rB, RA);
|
BIC(rC, rB, mask);
|
||||||
}
|
MOVI2R(rA, (u32)Memory::base);
|
||||||
else
|
ADD(rC, rC, rA);
|
||||||
{
|
|
||||||
MOVI2R(rB, (u32)inst.SIMM_16);
|
NEONXEmitter nemit(this);
|
||||||
|
if (single)
|
||||||
|
{
|
||||||
|
VCVT(S0, v0, 0);
|
||||||
|
nemit.VREV32(I_8, D0, D0);
|
||||||
|
VSTR(S0, rC, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nemit.VREV64(I_8, D0, v0);
|
||||||
|
VSTR(D0, rC, 0);
|
||||||
|
}
|
||||||
|
gpr.Unlock(rC);
|
||||||
}
|
}
|
||||||
|
|
||||||
MOVI2R(rA, (u32)&Memory::Write_U32);
|
SetJumpTarget(slow_out);
|
||||||
PUSH(4, R0, R1, R2, R3);
|
|
||||||
VMOV(R0, S0);
|
|
||||||
MOV(R1, rB);
|
|
||||||
|
|
||||||
BL(rA);
|
|
||||||
|
|
||||||
POP(4, R0, R1, R2, R3);
|
|
||||||
|
|
||||||
gpr.Unlock(rA, rB);
|
gpr.Unlock(rA, rB);
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,7 +89,7 @@ static GekkoOPTemplate primarytable[] =
|
||||||
{50, &JitArm::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
|
{50, &JitArm::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
|
||||||
{51, &JitArm::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
{51, &JitArm::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||||
|
|
||||||
{52, &JitArm::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
{52, &JitArm::stfXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
||||||
{53, &JitArm::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
{53, &JitArm::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||||
{54, &JitArm::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
{54, &JitArm::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||||
{55, &JitArm::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
{55, &JitArm::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||||
|
|
Loading…
Reference in New Issue