[ARM] Change all floating point loadstores to fastmem implementations except lfs since all floating point accesses tend to be to RAM space. lfs tends to get used to write quickly to the gatherpipe and other places, look at the JIT64 implementation to see how to make it quicker.

This commit is contained in:
Ryan Houdek 2013-09-24 05:41:36 +00:00
parent 29dc253fde
commit eb6ed3e42a
4 changed files with 250 additions and 546 deletions

View File

@ -1338,7 +1338,7 @@ void NEONXEmitter::VREVX(u32 size, NEONElementType Size, ARMReg Vd, ARMReg Vm)
void NEONXEmitter::VREV64(NEONElementType Size, ARMReg Vd, ARMReg Vm)
{
VREVX(2, Size, Vd, Vm);
VREVX(0, Size, Vd, Vm);
}
void NEONXEmitter::VREV32(NEONElementType Size, ARMReg Vd, ARMReg Vm)
@ -1348,7 +1348,7 @@ void NEONXEmitter::VREV32(NEONElementType Size, ARMReg Vd, ARMReg Vm)
void NEONXEmitter::VREV16(NEONElementType Size, ARMReg Vd, ARMReg Vm)
{
VREVX(0, Size, Vd, Vm);
VREVX(2, Size, Vd, Vm);
}
void NEONXEmitter::VRSQRTE(NEONElementType Size, ARMReg Vd, ARMReg Vm)

View File

@ -214,22 +214,9 @@ public:
void fcmpu(UGeckoInstruction _inst);
// Floating point loadStore
void lfs(UGeckoInstruction _inst);
void lfsu(UGeckoInstruction _inst);
void lfsux(UGeckoInstruction _inst);
void lfsx(UGeckoInstruction _inst);
void lfd(UGeckoInstruction _inst);
void lfdx(UGeckoInstruction _inst);
void lfdu(UGeckoInstruction _inst);
void lfdux(UGeckoInstruction _inst);
void lfXX(UGeckoInstruction _inst);
void stfXX(UGeckoInstruction _inst);
void stfs(UGeckoInstruction _inst);
void stfsx(UGeckoInstruction _inst);
void stfsu(UGeckoInstruction _inst);
void stfsux(UGeckoInstruction _inst);
void stfd(UGeckoInstruction _inst);
void stfdx(UGeckoInstruction _inst);
void stfdu(UGeckoInstruction _inst);
void stfdux(UGeckoInstruction _inst);
// Paired Singles
void ps_add(UGeckoInstruction _inst);

View File

@ -32,310 +32,273 @@
#include "JitFPRCache.h"
#include "JitAsm.h"
void JitArm::lfs(UGeckoInstruction inst)
void JitArm::lfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RA;
ARMReg v0 = fpr.R0(inst.FD);
ARMReg v1 = fpr.R1(inst.FD);
u32 a = inst.RA, b = inst.RB;
if (inst.RA)
s32 offset = inst.SIMM_16;
bool single = false;
bool update = false;
bool zeroA = false;
s32 offsetReg = -1;
switch (inst.OPCD)
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
case 31:
switch(inst.SUBOP10)
{
case 567: // lfsux
single = true;
update = true;
offsetReg = b;
break;
case 535: // lfsx
single = true;
zeroA = true;
offsetReg = b;
break;
case 631: // lfdux
update = true;
offsetReg = b;
break;
case 599: // lfdx
zeroA = true;
offsetReg = b;
break;
}
break;
case 49: // lfsu
update = true;
single = true;
break;
case 48: // lfs
single = true;
zeroA = true;
break;
case 51: // lfdu
update = true;
break;
case 50: // lfd
zeroA = true;
break;
}
ARMReg v0 = fpr.R0(inst.FD), v1;
if (single)
v1 = fpr.R1(inst.FD);
if (update)
{
RA = gpr.R(a);
// Update path /always/ uses RA
if (offsetReg == -1) // uses SIMM_16
{
MOVI2R(rB, offset);
ADD(rB, rB, RA);
}
else
ADD(rB, gpr.R(offsetReg), RA);
}
else
MOVI2R(rB, (u32)inst.SIMM_16);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
MOVI2R(rA, (u32)&Memory::Read_U32);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
VMOV(S0, R0);
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lfsu(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FD);
ARMReg v1 = fpr.R1(inst.FD);
MOVI2R(rB, inst.SIMM_16);
ADD(rB, rB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
MOVI2R(rA, (u32)&Memory::Read_U32);
MOV(RA, rB);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
VMOV(S0, R0);
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lfsux(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FD);
ARMReg v1 = fpr.R1(inst.FD);
ADD(rB, RB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
MOVI2R(rA, (u32)&Memory::Read_U32);
MOV(RA, rB);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
VMOV(S0, R0);
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lfsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RB = gpr.R(inst.RB);
ARMReg v0 = fpr.R0(inst.FD);
ARMReg v1 = fpr.R1(inst.FD);
if (inst.RA)
ADD(rB, RB, gpr.R(inst.RA));
else
MOV(rB, RB);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
MOVI2R(rA, (u32)&Memory::Read_U32);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
VMOV(S0, R0);
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lfd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FD);
if (inst.RA)
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
if (zeroA)
{
if (offsetReg == -1)
{
if (a)
{
RA = gpr.R(a);
MOVI2R(rB, offset);
ADD(rB, rB, RA);
}
else
MOVI2R(rB, (u32)offset);
}
else
{
ARMReg RB = gpr.R(offsetReg);
if (a)
{
RA = gpr.R(a);
ADD(rB, RB, RA);
}
else
MOV(rB, RB);
}
}
}
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
if (update)
MOV(RA, rB);
Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rB, rB, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(rB, rB, rA); // 4
NEONXEmitter nemit(this);
if (single)
{
VLDR(S0, rB, 0);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
MOVI2R(rB, (u32)inst.SIMM_16);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
MOVI2R(rA, (u32)&Memory::Read_F64);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(v0, R0);
#else
VMOV(v0, D0);
#endif
POP(4, R0, R1, R2, R3);
{
VLDR(v0, rB, 0);
nemit.VREV64(I_8, v0, v0); // Byte swap to result
}
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lfdx(UGeckoInstruction inst)
void JitArm::stfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RA;
ARMReg v0 = fpr.R0(inst.FD);
u32 a = inst.RA, b = inst.RB;
if (inst.RA)
s32 offset = inst.SIMM_16;
bool single = false;
bool update = false;
bool zeroA = false;
s32 offsetReg = -1;
switch (inst.OPCD)
{
ARMReg RA = gpr.R(inst.RA);
ADD(rB, RB, RA);
case 31:
switch(inst.SUBOP10)
{
case 663: // stfsx
single = true;
zeroA = true;
offsetReg = b;
break;
case 695: // stfsux
single = true;
offsetReg = b;
break;
case 727: // stfdx
zeroA = true;
offsetReg = b;
break;
case 759: // stfdux
update = true;
offsetReg = b;
break;
}
break;
case 53: // stfsu
update = true;
single = true;
break;
case 52: // stfs
single = true;
zeroA = true;
break;
case 55: // stfdu
update = true;
break;
case 54: // stfd
zeroA = true;
break;
}
ARMReg v0 = fpr.R0(inst.FS);
if (update)
{
RA = gpr.R(a);
// Update path /always/ uses RA
if (offsetReg == -1) // uses SIMM_16
{
MOVI2R(rB, offset);
ADD(rB, rB, RA);
}
else
ADD(rB, gpr.R(offsetReg), RA);
}
else
MOV(rB, RB);
{
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
if (zeroA)
{
if (offsetReg == -1)
{
if (a)
{
RA = gpr.R(a);
MOVI2R(rB, offset);
ADD(rB, rB, RA);
}
else
MOVI2R(rB, (u32)offset);
}
else
{
ARMReg RB = gpr.R(offsetReg);
if (a)
{
RA = gpr.R(a);
ADD(rB, RB, RA);
}
else
MOV(rB, RB);
}
}
}
MOVI2R(rA, (u32)&Memory::Read_F64);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
if (update)
{
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(v0, R0);
#else
VMOV(v0, D0);
#endif
SetCC(CC_NEQ);
MOV(RA, rB);
SetCC();
}
POP(4, R0, R1, R2, R3);
Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rB, rB, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(rB, rB, rA); // 4
NEONXEmitter nemit(this);
if (single)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VSTR(S0, rB, 0);
}
else
{
nemit.VREV64(I_8, D0, v0);
VSTR(D0, rB, 0);
}
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lfdu(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FD);
MOVI2R(rB, inst.SIMM_16);
ADD(rB, rB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
MOVI2R(rA, (u32)&Memory::Read_F64);
MOV(RA, rB);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(v0, R0);
#else
VMOV(v0, D0);
#endif
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lfdux(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FD);
ADD(rB, RB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
MOVI2R(rA, (u32)&Memory::Read_F64);
MOV(RA, rB);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(v0, R0);
#else
VMOV(v0, D0);
#endif
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
// Some games use stfs as a way to quickly write to the gatherpipe and other hardware areas.
// Keep it as a safe store until this can get optimized.
// Look at the JIT64 implementation to see how it is done
void JitArm::stfs(UGeckoInstruction inst)
{
@ -369,249 +332,3 @@ void JitArm::stfs(UGeckoInstruction inst)
gpr.Unlock(rA, rB);
}
void JitArm::stfsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
VCVT(S0, v0, 0);
if (inst.RA)
{
ARMReg RA = gpr.R(inst.RA);
ADD(rB, RB, RA);
}
else
MOV(rB, RB);
MOVI2R(rA, (u32)&Memory::Write_U32);
PUSH(4, R0, R1, R2, R3);
VMOV(R0, S0);
MOV(R1, rB);
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
}
void JitArm::stfsu(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
VCVT(S0, v0, 0);
MOVI2R(rB, inst.SIMM_16);
ADD(rB, rB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, rB);
SetCC();
MOVI2R(rA, (u32)&Memory::Write_U32);
PUSH(4, R0, R1, R2, R3);
VMOV(R0, S0);
MOV(R1, rB);
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
}
void JitArm::stfsux(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
VCVT(S0, v0, 0);
ADD(rB, RB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, rB);
SetCC();
MOVI2R(rA, (u32)&Memory::Write_U32);
PUSH(4, R0, R1, R2, R3);
VMOV(R0, S0);
MOV(R1, rB);
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
}
void JitArm::stfd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
if (inst.RA)
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
}
else
MOVI2R(rB, (u32)inst.SIMM_16);
MOVI2R(rA, (u32)&Memory::Write_F64);
PUSH(4, R0, R1, R2, R3);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(R0, v0);
MOV(R2, rB);
#else
VMOV(D0, v0);
MOV(R0, rB);
#endif
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
}
void JitArm::stfdx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
if (inst.RA)
{
ARMReg RA = gpr.R(inst.RA);
ADD(rB, RB, RA);
}
else
MOV(rB, RB);
MOVI2R(rA, (u32)&Memory::Write_F64);
PUSH(4, R0, R1, R2, R3);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(R0, v0);
MOV(R2, rB);
#else
VMOV(D0, v0);
MOV(R0, rB);
#endif
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
}
void JitArm::stfdu(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
MOVI2R(rB, inst.SIMM_16);
ADD(rB, rB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, rB);
SetCC();
MOVI2R(rA, (u32)&Memory::Write_F64);
PUSH(4, R0, R1, R2, R3);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(R0, v0);
MOV(R2, rB);
#else
VMOV(D0, v0);
MOV(R0, rB);
#endif
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
}
void JitArm::stfdux(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff)
ARMReg RA = gpr.R(inst.RA);
ARMReg RB = gpr.R(inst.RB);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg v0 = fpr.R0(inst.FS);
ADD(rB, RB, RA);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, rB);
SetCC();
MOVI2R(rA, (u32)&Memory::Write_F64);
PUSH(4, R0, R1, R2, R3);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(R0, v0);
MOV(R2, rB);
#else
VMOV(D0, v0);
MOV(R0, rB);
#endif
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB);
}

View File

@ -97,15 +97,15 @@ static GekkoOPTemplate primarytable[] =
{46, &JitArm::lmw}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{47, &JitArm::stmw}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{48, &JitArm::lfs}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArm::lfsu}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArm::lfd}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm::lfdu}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{48, &JitArm::lfXX}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
{49, &JitArm::lfXX}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{50, &JitArm::lfXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
{51, &JitArm::lfXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
{52, &JitArm::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
{53, &JitArm::stfsu}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm::stfd}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm::stfdu}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{53, &JitArm::stfXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{54, &JitArm::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &JitArm::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{56, &JitArm::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &JitArm::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
@ -268,15 +268,15 @@ static GekkoOPTemplate table31[] =
{725, &JitArm::Default}, //"stswi", OPTYPE_STORE, FL_EVIL}},
// fp load/store
{535, &JitArm::lfsx}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArm::lfsux}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArm::lfdx}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArm::lfdux}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{535, &JitArm::lfXX}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{567, &JitArm::lfXX}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{599, &JitArm::lfXX}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
{631, &JitArm::lfXX}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
{663, &JitArm::stfsx}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArm::stfsux}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArm::stfdx}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArm::stfdux}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{663, &JitArm::stfXX}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{695, &JitArm::stfXX}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{727, &JitArm::stfXX}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{759, &JitArm::stfXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
{983, &JitArm::Default}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
{19, &JitArm::mfcr}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},