Jit64: make ForceSinglePrecision more versatile
This commit is contained in:
parent
9792976ee9
commit
6b8ab5993a
|
@ -38,17 +38,7 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X
|
||||||
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), packed, reversible);
|
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), packed, reversible);
|
||||||
}
|
}
|
||||||
if (single)
|
if (single)
|
||||||
{
|
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
|
||||||
if (packed)
|
|
||||||
{
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
|
|
||||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -215,21 +205,9 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
fpr.BindToRegister(d, !single);
|
fpr.BindToRegister(d, !single);
|
||||||
|
|
||||||
if (single)
|
if (single)
|
||||||
{
|
ForceSinglePrecision(fpr.RX(d), R(XMM0), packed, true);
|
||||||
if (packed)
|
|
||||||
{
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
ForceSinglePrecisionS(fpr.RX(d), XMM0);
|
|
||||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOVSD(fpr.RX(d), R(XMM0));
|
MOVSD(fpr.RX(d), R(XMM0));
|
||||||
}
|
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -492,11 +470,9 @@ void Jit64::frspx(UGeckoInstruction inst)
|
||||||
int d = inst.FD;
|
int d = inst.FD;
|
||||||
|
|
||||||
fpr.Lock(b, d);
|
fpr.Lock(b, d);
|
||||||
fpr.BindToRegister(d, d == b);
|
OpArg src = fpr.R(b);
|
||||||
if (b != d)
|
fpr.BindToRegister(d, false);
|
||||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
ForceSinglePrecision(fpr.RX(d), src, false, true);
|
||||||
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
|
|
||||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,7 +113,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*avxOp)
|
||||||
{
|
{
|
||||||
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible);
|
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible);
|
||||||
}
|
}
|
||||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -173,7 +173,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
|
||||||
PanicAlert("ps_sum WTF!!!");
|
PanicAlert("ps_sum WTF!!!");
|
||||||
}
|
}
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, false);
|
||||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
ForceSinglePrecision(fpr.RX(d), R(XMM0));
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -205,7 +205,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
|
||||||
Force25BitPrecision(XMM0, R(XMM0), XMM1);
|
Force25BitPrecision(XMM0, R(XMM0), XMM1);
|
||||||
MULPD(XMM0, fpr.R(a));
|
MULPD(XMM0, fpr.R(a));
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, false);
|
||||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
ForceSinglePrecision(fpr.RX(d), R(XMM0));
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -264,7 +264,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||||
CALL((void *)asm_routines.frsqrte);
|
CALL((void *)asm_routines.frsqrte);
|
||||||
MOVLHPS(fpr.RX(d), XMM0);
|
MOVLHPS(fpr.RX(d), XMM0);
|
||||||
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
@ -291,7 +291,7 @@ void Jit64::ps_res(UGeckoInstruction inst)
|
||||||
CALL((void *)asm_routines.fres);
|
CALL((void *)asm_routines.fres);
|
||||||
MOVLHPS(fpr.RX(d), XMM0);
|
MOVLHPS(fpr.RX(d), XMM0);
|
||||||
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
|
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
@ -386,7 +386,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, false);
|
||||||
ForceSinglePrecisionP(fpr.RX(d), XMM0);
|
ForceSinglePrecision(fpr.RX(d), R(XMM0));
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
|
@ -640,31 +640,30 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address
|
||||||
MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg));
|
MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg output, X64Reg input)
|
void EmuCodeBlock::ForceSinglePrecision(X64Reg output, OpArg input, bool packed, bool duplicate)
|
||||||
{
|
{
|
||||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||||
if (jit->jo.accurateSinglePrecision)
|
if (jit->jo.accurateSinglePrecision)
|
||||||
{
|
{
|
||||||
CVTSD2SS(output, R(input));
|
if (packed)
|
||||||
CVTSS2SD(output, R(output));
|
|
||||||
}
|
|
||||||
else if (output != input)
|
|
||||||
{
|
{
|
||||||
MOVAPD(output, R(input));
|
CVTPD2PS(output, input);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmuCodeBlock::ForceSinglePrecisionP(X64Reg output, X64Reg input)
|
|
||||||
{
|
|
||||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
|
||||||
if (jit->jo.accurateSinglePrecision)
|
|
||||||
{
|
|
||||||
CVTPD2PS(output, R(input));
|
|
||||||
CVTPS2PD(output, R(output));
|
CVTPS2PD(output, R(output));
|
||||||
}
|
}
|
||||||
else if (output != input)
|
else
|
||||||
{
|
{
|
||||||
MOVAPD(output, R(input));
|
CVTSD2SS(output, input);
|
||||||
|
CVTSS2SD(output, R(output));
|
||||||
|
if (duplicate)
|
||||||
|
MOVDDUP(output, R(output));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
|
||||||
|
{
|
||||||
|
if (duplicate)
|
||||||
|
MOVDDUP(output, input);
|
||||||
|
else
|
||||||
|
MOVAPD(output, input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -121,8 +121,7 @@ public:
|
||||||
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg, u8),
|
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg, u8),
|
||||||
Gen::X64Reg regOp, Gen::OpArg arg1, Gen::OpArg arg2, u8 imm);
|
Gen::X64Reg regOp, Gen::OpArg arg1, Gen::OpArg arg2, u8 imm);
|
||||||
|
|
||||||
void ForceSinglePrecisionS(Gen::X64Reg output, Gen::X64Reg input);
|
void ForceSinglePrecision(Gen::X64Reg output, Gen::OpArg input, bool packed = true, bool duplicate = false);
|
||||||
void ForceSinglePrecisionP(Gen::X64Reg output, Gen::X64Reg input);
|
|
||||||
void Force25BitPrecision(Gen::X64Reg output, Gen::OpArg input, Gen::X64Reg tmp);
|
void Force25BitPrecision(Gen::X64Reg output, Gen::OpArg input, Gen::X64Reg tmp);
|
||||||
|
|
||||||
// RSCRATCH might get trashed
|
// RSCRATCH might get trashed
|
||||||
|
|
Loading…
Reference in New Issue