Jit64: implement ps_res
This is probably more accurate than it is allowed to be. After all, the instructions are supposed to be "estimates".
This commit is contained in:
parent
0a2a273a2e
commit
cdc27e46f1
|
@ -178,7 +178,7 @@ public:
|
||||||
void ps_arith(UGeckoInstruction inst); //aggregate
|
void ps_arith(UGeckoInstruction inst); //aggregate
|
||||||
void ps_mergeXX(UGeckoInstruction inst);
|
void ps_mergeXX(UGeckoInstruction inst);
|
||||||
void ps_maddXX(UGeckoInstruction inst);
|
void ps_maddXX(UGeckoInstruction inst);
|
||||||
void ps_rsqrte(UGeckoInstruction inst);
|
void ps_recip(UGeckoInstruction inst);
|
||||||
void ps_sum(UGeckoInstruction inst);
|
void ps_sum(UGeckoInstruction inst);
|
||||||
void ps_muls(UGeckoInstruction inst);
|
void ps_muls(UGeckoInstruction inst);
|
||||||
|
|
||||||
|
|
|
@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] =
|
||||||
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
||||||
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
||||||
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
||||||
{24, &Jit64::Default}, //"ps_res", OPTYPE_PS, 0}},
|
{24, &Jit64::ps_recip}, //"ps_res", OPTYPE_PS, 0}},
|
||||||
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
||||||
{26, &Jit64::ps_rsqrte}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
{26, &Jit64::ps_recip}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||||
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
||||||
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
||||||
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
||||||
|
|
|
@ -8,9 +8,10 @@
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
#include "CPUDetect.h"
|
#include "CPUDetect.h"
|
||||||
|
|
||||||
const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||||
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||||
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
static const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
||||||
|
static const double one_const = 1.0f;
|
||||||
|
|
||||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
||||||
{
|
{
|
||||||
|
@ -59,9 +60,6 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static const double one_const = 1.0f;
|
|
||||||
|
|
||||||
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
|
|
@ -100,20 +100,33 @@ void Jit64::ps_sign(UGeckoInstruction inst)
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
// ps_res and ps_rsqrte
|
||||||
|
void Jit64::ps_recip(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITPairedOff)
|
JITDISABLE(bJITPairedOff)
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
|
OpArg divisor;
|
||||||
int d = inst.FD;
|
int d = inst.FD;
|
||||||
int b = inst.FB;
|
int b = inst.FB;
|
||||||
fpr.Lock(d, b);
|
fpr.Lock(d, b);
|
||||||
fpr.BindToRegister(d, (d == b), true);
|
fpr.BindToRegister(d, (d == b));
|
||||||
SQRTPD(XMM0, fpr.R(b));
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 24:
|
||||||
|
// ps_res
|
||||||
|
divisor = fpr.R(b);
|
||||||
|
break;
|
||||||
|
case 26:
|
||||||
|
// ps_rsqrte
|
||||||
|
SQRTPD(XMM0, fpr.R(b));
|
||||||
|
divisor = R(XMM0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
MOVAPD(XMM1, M((void*)&psOneOne));
|
||||||
DIVPD(XMM1, R(XMM0));
|
DIVPD(XMM1, divisor);
|
||||||
MOVAPD(fpr.R(d), XMM1);
|
MOVAPD(fpr.R(d), XMM1);
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue