Jit64: make ForceSinglePrecision more versatile

This commit is contained in:
Tillmann Karras 2015-05-21 12:33:36 +02:00
parent 9792976ee9
commit 6b8ab5993a
4 changed files with 30 additions and 56 deletions

View File

@ -38,17 +38,7 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), packed, reversible);
}
if (single)
{
if (packed)
{
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
}
else
{
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
MOVDDUP(fpr.RX(d), fpr.R(d));
}
}
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true);
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}
@ -215,21 +205,9 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
fpr.BindToRegister(d, !single);
if (single)
{
if (packed)
{
ForceSinglePrecisionP(fpr.RX(d), XMM0);
}
else
{
ForceSinglePrecisionS(fpr.RX(d), XMM0);
MOVDDUP(fpr.RX(d), fpr.R(d));
}
}
ForceSinglePrecision(fpr.RX(d), R(XMM0), packed, true);
else
{
MOVSD(fpr.RX(d), R(XMM0));
}
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}
@ -492,11 +470,9 @@ void Jit64::frspx(UGeckoInstruction inst)
int d = inst.FD;
fpr.Lock(b, d);
fpr.BindToRegister(d, d == b);
if (b != d)
MOVAPD(fpr.RX(d), fpr.R(b));
ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d));
MOVDDUP(fpr.RX(d), fpr.R(d));
OpArg src = fpr.R(b);
fpr.BindToRegister(d, false);
ForceSinglePrecision(fpr.RX(d), src, false, true);
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}

View File

@ -113,7 +113,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*avxOp)
{
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible);
}
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}
@ -173,7 +173,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
PanicAlert("ps_sum WTF!!!");
}
fpr.BindToRegister(d, false);
ForceSinglePrecisionP(fpr.RX(d), XMM0);
ForceSinglePrecision(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}
@ -205,7 +205,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
Force25BitPrecision(XMM0, R(XMM0), XMM1);
MULPD(XMM0, fpr.R(a));
fpr.BindToRegister(d, false);
ForceSinglePrecisionP(fpr.RX(d), XMM0);
ForceSinglePrecision(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}
@ -264,7 +264,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
CALL((void *)asm_routines.frsqrte);
MOVLHPS(fpr.RX(d), XMM0);
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
@ -291,7 +291,7 @@ void Jit64::ps_res(UGeckoInstruction inst)
CALL((void *)asm_routines.fres);
MOVLHPS(fpr.RX(d), XMM0);
ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d));
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
@ -386,7 +386,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
}
fpr.BindToRegister(d, false);
ForceSinglePrecisionP(fpr.RX(d), XMM0);
ForceSinglePrecision(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}

View File

@ -640,31 +640,30 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address
MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg));
}
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg output, X64Reg input)
void EmuCodeBlock::ForceSinglePrecision(X64Reg output, OpArg input, bool packed, bool duplicate)
{
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jit->jo.accurateSinglePrecision)
{
CVTSD2SS(output, R(input));
CVTSS2SD(output, R(output));
if (packed)
{
CVTPD2PS(output, input);
CVTPS2PD(output, R(output));
}
else
{
CVTSD2SS(output, input);
CVTSS2SD(output, R(output));
if (duplicate)
MOVDDUP(output, R(output));
}
}
else if (output != input)
else if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
{
MOVAPD(output, R(input));
}
}
void EmuCodeBlock::ForceSinglePrecisionP(X64Reg output, X64Reg input)
{
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jit->jo.accurateSinglePrecision)
{
CVTPD2PS(output, R(input));
CVTPS2PD(output, R(output));
}
else if (output != input)
{
MOVAPD(output, R(input));
if (duplicate)
MOVDDUP(output, input);
else
MOVAPD(output, input);
}
}

View File

@ -121,8 +121,7 @@ public:
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg, u8),
Gen::X64Reg regOp, Gen::OpArg arg1, Gen::OpArg arg2, u8 imm);
void ForceSinglePrecisionS(Gen::X64Reg output, Gen::X64Reg input);
void ForceSinglePrecisionP(Gen::X64Reg output, Gen::X64Reg input);
void ForceSinglePrecision(Gen::X64Reg output, Gen::OpArg input, bool packed = true, bool duplicate = false);
void Force25BitPrecision(Gen::X64Reg output, Gen::OpArg input, Gen::X64Reg tmp);
// RSCRATCH might get trashed