Jit64: make ForceSinglePrecision more versatile

This commit is contained in:
Tillmann Karras 2015-05-21 12:33:36 +02:00
parent 9792976ee9
commit 6b8ab5993a
4 changed files with 30 additions and 56 deletions

View File

@ -38,17 +38,7 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), packed, reversible); avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), packed, reversible);
} }
if (single) if (single)
{ ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
if (packed)
{
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
}
else
{
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
MOVDDUP(fpr.RX(d), fpr.R(d));
}
}
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -215,21 +205,9 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
fpr.BindToRegister(d, !single); fpr.BindToRegister(d, !single);
if (single) if (single)
{ ForceSinglePrecision(fpr.RX(d), R(XMM0), packed, true);
if (packed)
{
ForceSinglePrecisionP(fpr.RX(d), XMM0);
}
else
{
ForceSinglePrecisionS(fpr.RX(d), XMM0);
MOVDDUP(fpr.RX(d), fpr.R(d));
}
}
else else
{
MOVSD(fpr.RX(d), R(XMM0)); MOVSD(fpr.RX(d), R(XMM0));
}
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -492,11 +470,9 @@ void Jit64::frspx(UGeckoInstruction inst)
int d = inst.FD; int d = inst.FD;
fpr.Lock(b, d); fpr.Lock(b, d);
fpr.BindToRegister(d, d == b); OpArg src = fpr.R(b);
if (b != d) fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(b)); ForceSinglePrecision(fpr.RX(d), src, false, true);
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
MOVDDUP(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }

View File

@ -113,7 +113,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*avxOp)
{ {
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible); avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible);
} }
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d)); ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -173,7 +173,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
PanicAlert("ps_sum WTF!!!"); PanicAlert("ps_sum WTF!!!");
} }
fpr.BindToRegister(d, false); fpr.BindToRegister(d, false);
ForceSinglePrecisionP(fpr.RX(d), XMM0); ForceSinglePrecision(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -205,7 +205,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
Force25BitPrecision(XMM0, R(XMM0), XMM1); Force25BitPrecision(XMM0, R(XMM0), XMM1);
MULPD(XMM0, fpr.R(a)); MULPD(XMM0, fpr.R(a));
fpr.BindToRegister(d, false); fpr.BindToRegister(d, false);
ForceSinglePrecisionP(fpr.RX(d), XMM0); ForceSinglePrecision(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -264,7 +264,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
CALL((void *)asm_routines.frsqrte); CALL((void *)asm_routines.frsqrte);
MOVLHPS(fpr.RX(d), XMM0); MOVLHPS(fpr.RX(d), XMM0);
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d)); ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -291,7 +291,7 @@ void Jit64::ps_res(UGeckoInstruction inst)
CALL((void *)asm_routines.fres); CALL((void *)asm_routines.fres);
MOVLHPS(fpr.RX(d), XMM0); MOVLHPS(fpr.RX(d), XMM0);
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d)); ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -386,7 +386,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
} }
fpr.BindToRegister(d, false); fpr.BindToRegister(d, false);
ForceSinglePrecisionP(fpr.RX(d), XMM0); ForceSinglePrecision(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }

View File

@ -640,31 +640,30 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address
MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg)); MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg));
} }
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg output, X64Reg input) void EmuCodeBlock::ForceSinglePrecision(X64Reg output, OpArg input, bool packed, bool duplicate)
{ {
// Most games don't need these. Zelda requires it though - some platforms get stuck without them. // Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jit->jo.accurateSinglePrecision) if (jit->jo.accurateSinglePrecision)
{ {
CVTSD2SS(output, R(input)); if (packed)
CVTSS2SD(output, R(output)); {
CVTPD2PS(output, input);
CVTPS2PD(output, R(output));
}
else
{
CVTSD2SS(output, input);
CVTSS2SD(output, R(output));
if (duplicate)
MOVDDUP(output, R(output));
}
} }
else if (output != input) else if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
{ {
MOVAPD(output, R(input)); if (duplicate)
} MOVDDUP(output, input);
} else
MOVAPD(output, input);
void EmuCodeBlock::ForceSinglePrecisionP(X64Reg output, X64Reg input)
{
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jit->jo.accurateSinglePrecision)
{
CVTPD2PS(output, R(input));
CVTPS2PD(output, R(output));
}
else if (output != input)
{
MOVAPD(output, R(input));
} }
} }

View File

@ -121,8 +121,7 @@ public:
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg, u8), void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg, u8),
Gen::X64Reg regOp, Gen::OpArg arg1, Gen::OpArg arg2, u8 imm); Gen::X64Reg regOp, Gen::OpArg arg1, Gen::OpArg arg2, u8 imm);
void ForceSinglePrecisionS(Gen::X64Reg output, Gen::X64Reg input); void ForceSinglePrecision(Gen::X64Reg output, Gen::OpArg input, bool packed = true, bool duplicate = false);
void ForceSinglePrecisionP(Gen::X64Reg output, Gen::X64Reg input);
void Force25BitPrecision(Gen::X64Reg output, Gen::OpArg input, Gen::X64Reg tmp); void Force25BitPrecision(Gen::X64Reg output, Gen::OpArg input, Gen::X64Reg tmp);
// RSCRATCH might get trashed // RSCRATCH might get trashed