Merge pull request #876 from FioraAeterna/floatloadstore
JIT64: clean up and unify float load/store code
This commit is contained in:
commit
4a78a8a72a
|
@ -193,11 +193,9 @@ public:
|
|||
|
||||
void cntlzwx(UGeckoInstruction inst);
|
||||
|
||||
void lfs(UGeckoInstruction inst);
|
||||
void lfd(UGeckoInstruction inst);
|
||||
void stfd(UGeckoInstruction inst);
|
||||
void stfs(UGeckoInstruction inst);
|
||||
void stfsx(UGeckoInstruction inst);
|
||||
void lfXXX(UGeckoInstruction inst);
|
||||
void stfXXX(UGeckoInstruction inst);
|
||||
void stfiwx(UGeckoInstruction inst);
|
||||
void psq_l(UGeckoInstruction inst);
|
||||
void psq_st(UGeckoInstruction inst);
|
||||
|
||||
|
@ -212,7 +210,6 @@ public:
|
|||
void srwx(UGeckoInstruction inst);
|
||||
void dcbst(UGeckoInstruction inst);
|
||||
void dcbz(UGeckoInstruction inst);
|
||||
void lfsx(UGeckoInstruction inst);
|
||||
|
||||
void subfic(UGeckoInstruction inst);
|
||||
void subfcx(UGeckoInstruction inst);
|
||||
|
|
|
@ -82,15 +82,15 @@ static GekkoOPTemplate primarytable[] =
|
|||
{46, &Jit64::lmw}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
|
||||
{47, &Jit64::stmw}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
|
||||
|
||||
{48, &Jit64::lfs}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
|
||||
{49, &Jit64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||
{50, &Jit64::lfd}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
|
||||
{51, &Jit64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||
{48, &Jit64::lfXXX}, //"lfs", OPTYPE_LOADFP, FL_IN_A}},
|
||||
{49, &Jit64::lfXXX}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||
{50, &Jit64::lfXXX}, //"lfd", OPTYPE_LOADFP, FL_IN_A}},
|
||||
{51, &Jit64::lfXXX}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
{52, &Jit64::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{53, &Jit64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
{54, &Jit64::stfd}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{55, &Jit64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
{52, &Jit64::stfXXX}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{53, &Jit64::stfXXX}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
{54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
{56, &Jit64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}},
|
||||
{57, &Jit64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
|
@ -253,16 +253,16 @@ static GekkoOPTemplate table31[] =
|
|||
{725, &Jit64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}},
|
||||
|
||||
// fp load/store
|
||||
{535, &Jit64::lfsx}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
|
||||
{567, &Jit64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
|
||||
{599, &Jit64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
|
||||
{631, &Jit64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
|
||||
{535, &Jit64::lfXXX}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
|
||||
{567, &Jit64::lfXXX}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
|
||||
{599, &Jit64::lfXXX}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}},
|
||||
{631, &Jit64::lfXXX}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}},
|
||||
|
||||
{663, &Jit64::stfsx}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
|
||||
{695, &Jit64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
|
||||
{727, &Jit64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
|
||||
{759, &Jit64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
|
||||
{983, &Jit64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
|
||||
{663, &Jit64::stfXXX}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
|
||||
{695, &Jit64::stfXXX}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
|
||||
{727, &Jit64::stfXXX}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
|
||||
{759, &Jit64::stfXXX}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}},
|
||||
{983, &Jit64::stfiwx}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}},
|
||||
|
||||
{19, &Jit64::mfcr}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},
|
||||
{83, &Jit64::mfmsr}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}},
|
||||
|
|
|
@ -14,134 +14,161 @@ using namespace Gen;
|
|||
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
||||
// and pshufb could help a lot.
|
||||
|
||||
void Jit64::lfs(UGeckoInstruction inst)
|
||||
void Jit64::lfXXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
bool indexed = inst.OPCD == 31;
|
||||
bool update = indexed ? !!(inst.SUBOP10 & 0x20) : !!(inst.OPCD & 1);
|
||||
bool single = indexed ? !(inst.SUBOP10 & 0x40) : !(inst.OPCD & 2);
|
||||
update &= indexed || inst.SIMM_16;
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
FALLBACK_IF(!a);
|
||||
int b = inst.RB;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
FALLBACK_IF(!indexed && !a);
|
||||
|
||||
SafeLoadToReg(EAX, gpr.R(a), 32, offset, CallerSavedRegistersInUse(), false);
|
||||
if (update)
|
||||
gpr.BindToRegister(a, true, true);
|
||||
|
||||
s32 offset = 0;
|
||||
OpArg addr = gpr.R(a);
|
||||
if (indexed)
|
||||
{
|
||||
if (update)
|
||||
{
|
||||
ADD(32, addr, gpr.R(b));
|
||||
}
|
||||
else
|
||||
{
|
||||
addr = R(EAX);
|
||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
LEA(32, EAX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
else
|
||||
{
|
||||
MOV(32, addr, gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, addr, gpr.R(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (update)
|
||||
ADD(32, addr, Imm32((s32)(s16)inst.SIMM_16));
|
||||
else
|
||||
offset = (s32)(s16)inst.SIMM_16;
|
||||
}
|
||||
|
||||
SafeLoadToReg(RAX, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false);
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, js.memcheck);
|
||||
fpr.BindToRegister(d, js.memcheck || !single);
|
||||
|
||||
MEMCHECK_START
|
||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||
if (single)
|
||||
{
|
||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVQ_xmm(XMM0, R(RAX));
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
}
|
||||
MEMCHECK_END
|
||||
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::lfd(UGeckoInstruction inst)
|
||||
void Jit64::stfXXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
|
||||
SafeLoadToReg(RAX, gpr.R(a), 64, offset, CallerSavedRegistersInUse(), false);
|
||||
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, true);
|
||||
|
||||
MEMCHECK_START
|
||||
MOVQ_xmm(XMM0, R(RAX));
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
MEMCHECK_END
|
||||
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::stfd(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
bool indexed = inst.OPCD == 31;
|
||||
bool update = indexed ? !!(inst.SUBOP10&0x20) : !!(inst.OPCD&1);
|
||||
bool single = indexed ? !(inst.SUBOP10&0x40) : !(inst.OPCD&2);
|
||||
update &= indexed || inst.SIMM_16;
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
|
||||
FALLBACK_IF(!indexed && !a);
|
||||
|
||||
s32 offset = 0;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
if (indexed)
|
||||
{
|
||||
if (update)
|
||||
{
|
||||
gpr.BindToRegister(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
else
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (update)
|
||||
{
|
||||
gpr.BindToRegister(a, true, true);
|
||||
ADD(32, gpr.R(a), Imm32((s32)(s16)inst.SIMM_16));
|
||||
}
|
||||
else
|
||||
{
|
||||
offset = (s32)(s16)inst.SIMM_16;
|
||||
}
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
}
|
||||
|
||||
if (single)
|
||||
{
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, CallerSavedRegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RAX), fpr.RX(s));
|
||||
else
|
||||
MOV(64, R(RAX), fpr.R(s));
|
||||
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, CallerSavedRegistersInUse());
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
// This one is a little bit weird; it stores the low 32 bits of a double without converting it
|
||||
void Jit64::stfiwx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RAX), fpr.RX(s));
|
||||
MOVD_xmm(R(EAX), fpr.RX(s));
|
||||
else
|
||||
MOV(64, R(RAX), fpr.R(s));
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, CallerSavedRegistersInUse());
|
||||
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::stfs(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, CallerSavedRegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::stfsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
|
||||
|
||||
int s = inst.RS;
|
||||
fpr.Lock(s);
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
SafeWriteF32ToReg(XMM0, ABI_PARAM1, 0, CallerSavedRegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lfsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
|
||||
SafeLoadToReg(EAX, R(EAX), 32, 0, CallerSavedRegistersInUse(), false);
|
||||
|
||||
fpr.Lock(inst.RS);
|
||||
fpr.BindToRegister(inst.RS, js.memcheck);
|
||||
|
||||
MEMCHECK_START
|
||||
ConvertSingleToDouble(fpr.RX(inst.RS), EAX, true);
|
||||
MEMCHECK_END
|
||||
|
||||
fpr.UnlockAll();
|
||||
MOV(32, R(EAX), fpr.R(s));
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, CallerSavedRegistersInUse());
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue