Jit64: make ForceSinglePrecision more versatile
This commit is contained in:
parent
9792976ee9
commit
6b8ab5993a
|
@ -38,17 +38,7 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X
|
|||
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), packed, reversible);
|
||||
}
|
||||
if (single)
|
||||
{
|
||||
if (packed)
|
||||
{
|
||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
||||
}
|
||||
else
|
||||
{
|
||||
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
|
||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
||||
}
|
||||
}
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
@ -215,21 +205,9 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
fpr.BindToRegister(d, !single);
|
||||
|
||||
if (single)
|
||||
{
|
||||
if (packed)
|
||||
{
|
||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
||||
}
|
||||
ForceSinglePrecision(fpr.RX(d), R(XMM0), packed, true);
|
||||
else
|
||||
{
|
||||
ForceSinglePrecisionS(fpr.RX(d), XMM0);
|
||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
}
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
@ -492,11 +470,9 @@ void Jit64::frspx(UGeckoInstruction inst)
|
|||
int d = inst.FD;
|
||||
|
||||
fpr.Lock(b, d);
|
||||
fpr.BindToRegister(d, d == b);
|
||||
if (b != d)
|
||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
|
||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
||||
OpArg src = fpr.R(b);
|
||||
fpr.BindToRegister(d, false);
|
||||
ForceSinglePrecision(fpr.RX(d), src, false, true);
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
|
|
@ -113,7 +113,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*avxOp)
|
|||
{
|
||||
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible);
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
|
|||
PanicAlert("ps_sum WTF!!!");
|
||||
}
|
||||
fpr.BindToRegister(d, false);
|
||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
||||
ForceSinglePrecision(fpr.RX(d), R(XMM0));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
@ -205,7 +205,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
|
|||
Force25BitPrecision(XMM0, R(XMM0), XMM1);
|
||||
MULPD(XMM0, fpr.R(a));
|
||||
fpr.BindToRegister(d, false);
|
||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
||||
ForceSinglePrecision(fpr.RX(d), R(XMM0));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
@ -264,7 +264,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
|||
CALL((void *)asm_routines.frsqrte);
|
||||
MOVLHPS(fpr.RX(d), XMM0);
|
||||
|
||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -291,7 +291,7 @@ void Jit64::ps_res(UGeckoInstruction inst)
|
|||
CALL((void *)asm_routines.fres);
|
||||
MOVLHPS(fpr.RX(d), XMM0);
|
||||
|
||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
||||
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -386,7 +386,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
fpr.BindToRegister(d, false);
|
||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
||||
ForceSinglePrecision(fpr.RX(d), R(XMM0));
|
||||
SetFPRFIfNeeded(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
|
|
@ -640,31 +640,30 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address
|
|||
MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg));
|
||||
}
|
||||
|
||||
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg output, X64Reg input)
|
||||
void EmuCodeBlock::ForceSinglePrecision(X64Reg output, OpArg input, bool packed, bool duplicate)
|
||||
{
|
||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||
if (jit->jo.accurateSinglePrecision)
|
||||
{
|
||||
CVTSD2SS(output, R(input));
|
||||
CVTSS2SD(output, R(output));
|
||||
}
|
||||
else if (output != input)
|
||||
if (packed)
|
||||
{
|
||||
MOVAPD(output, R(input));
|
||||
}
|
||||
}
|
||||
|
||||
void EmuCodeBlock::ForceSinglePrecisionP(X64Reg output, X64Reg input)
|
||||
{
|
||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||
if (jit->jo.accurateSinglePrecision)
|
||||
{
|
||||
CVTPD2PS(output, R(input));
|
||||
CVTPD2PS(output, input);
|
||||
CVTPS2PD(output, R(output));
|
||||
}
|
||||
else if (output != input)
|
||||
else
|
||||
{
|
||||
MOVAPD(output, R(input));
|
||||
CVTSD2SS(output, input);
|
||||
CVTSS2SD(output, R(output));
|
||||
if (duplicate)
|
||||
MOVDDUP(output, R(output));
|
||||
}
|
||||
}
|
||||
else if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
|
||||
{
|
||||
if (duplicate)
|
||||
MOVDDUP(output, input);
|
||||
else
|
||||
MOVAPD(output, input);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -121,8 +121,7 @@ public:
|
|||
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg, u8),
|
||||
Gen::X64Reg regOp, Gen::OpArg arg1, Gen::OpArg arg2, u8 imm);
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg output, Gen::X64Reg input);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg output, Gen::X64Reg input);
|
||||
void ForceSinglePrecision(Gen::X64Reg output, Gen::OpArg input, bool packed = true, bool duplicate = false);
|
||||
void Force25BitPrecision(Gen::X64Reg output, Gen::OpArg input, Gen::X64Reg tmp);
|
||||
|
||||
// RSCRATCH might get trashed
|
||||
|
|
Loading…
Reference in New Issue