Jit64: use AVX instructions in some places
This commit is contained in:
parent
ae86850a78
commit
ddaf29e039
|
@ -119,7 +119,7 @@ public:
|
||||||
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||||
typedef u32 (*Operation)(u32 a, u32 b);
|
typedef u32 (*Operation)(u32 a, u32 b);
|
||||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||||
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg), void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg));
|
||||||
|
|
||||||
// OPCODES
|
// OPCODES
|
||||||
void unknown_instruction(UGeckoInstruction _inst);
|
void unknown_instruction(UGeckoInstruction _inst);
|
||||||
|
|
|
@ -13,37 +13,62 @@ static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFF
|
||||||
static const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
static const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
||||||
static const double one_const = 1.0f;
|
static const double one_const = 1.0f;
|
||||||
|
|
||||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single,
|
||||||
|
void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg),
|
||||||
|
void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg))
|
||||||
{
|
{
|
||||||
|
if (!cpu_info.bAVX)
|
||||||
|
{
|
||||||
|
op_3 = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
fpr.Lock(d, a, b);
|
fpr.Lock(d, a, b);
|
||||||
if (d == a)
|
if (d == a)
|
||||||
{
|
{
|
||||||
fpr.BindToRegister(d, true);
|
fpr.BindToRegister(d);
|
||||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
(this->*op_2)(fpr.RX(d), fpr.R(b));
|
||||||
}
|
}
|
||||||
else if (d == b)
|
else if (d == b)
|
||||||
{
|
{
|
||||||
if (reversible)
|
if (reversible)
|
||||||
{
|
{
|
||||||
fpr.BindToRegister(d, true);
|
fpr.BindToRegister(d);
|
||||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
(this->*op_2)(fpr.RX(d), fpr.R(a));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (op_3)
|
||||||
|
{
|
||||||
|
fpr.BindToRegister(d);
|
||||||
|
fpr.BindToRegister(a, true, false);
|
||||||
|
(this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOVSD(XMM0, fpr.R(b));
|
MOVSD(XMM0, fpr.R(b));
|
||||||
fpr.BindToRegister(d, !dupe);
|
fpr.BindToRegister(d, !single);
|
||||||
MOVSD(fpr.RX(d), fpr.R(a));
|
MOVSD(fpr.RX(d), fpr.R(a));
|
||||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
(this->*op_2)(fpr.RX(d), Gen::R(XMM0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Sources different from d, can use rather quick solution
|
if (op_3)
|
||||||
fpr.BindToRegister(d, !dupe);
|
{
|
||||||
MOVSD(fpr.RX(d), fpr.R(a));
|
fpr.BindToRegister(d, !single);
|
||||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
fpr.BindToRegister(a);
|
||||||
|
(this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b));
|
||||||
}
|
}
|
||||||
if (dupe)
|
else
|
||||||
|
{
|
||||||
|
fpr.BindToRegister(d, !single);
|
||||||
|
MOVSD(fpr.RX(d), fpr.R(a));
|
||||||
|
(this->*op_2)(fpr.RX(d), fpr.R(b));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (single)
|
||||||
{
|
{
|
||||||
ForceSinglePrecisionS(fpr.RX(d));
|
ForceSinglePrecisionS(fpr.RX(d));
|
||||||
if (cpu_info.bSSE3)
|
if (cpu_info.bSSE3)
|
||||||
|
@ -73,13 +98,13 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool dupe = inst.OPCD == 59;
|
bool single = inst.OPCD == 59;
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
|
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD, &XEmitter::VDIVSD); break; //div
|
||||||
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
|
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD, &XEmitter::VSUBSD); break; //sub
|
||||||
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
|
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD, &XEmitter::VADDSD); break; //add
|
||||||
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
|
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, &XEmitter::VMULSD); break; //mul
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
||||||
}
|
}
|
||||||
|
@ -92,11 +117,18 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
|
||||||
int d = inst.FD;
|
int d = inst.FD;
|
||||||
int b = inst.FB;
|
int b = inst.FB;
|
||||||
fpr.Lock(b, d);
|
fpr.Lock(b, d);
|
||||||
fpr.BindToRegister(d, true, true);
|
fpr.BindToRegister(d, d == b, true);
|
||||||
MOVSD(XMM0, M((void *)&one_const));
|
MOVSD(XMM0, M((void *)&one_const));
|
||||||
SQRTSD(XMM1, fpr.R(b));
|
SQRTSD(XMM1, fpr.R(b));
|
||||||
|
if (cpu_info.bAVX)
|
||||||
|
{
|
||||||
|
VDIVSD(fpr.RX(d), XMM0, R(XMM1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
DIVSD(XMM0, R(XMM1));
|
DIVSD(XMM0, R(XMM1));
|
||||||
MOVSD(fpr.R(d), XMM0);
|
MOVSD(fpr.R(d), XMM0);
|
||||||
|
}
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -198,7 +230,7 @@ void Jit64::fmrx(UGeckoInstruction inst)
|
||||||
int d = inst.FD;
|
int d = inst.FD;
|
||||||
int b = inst.FB;
|
int b = inst.FB;
|
||||||
fpr.Lock(b, d);
|
fpr.Lock(b, d);
|
||||||
fpr.BindToRegister(d, true, true);
|
fpr.BindToRegister(d, d == b, true);
|
||||||
MOVSD(XMM0, fpr.R(b));
|
MOVSD(XMM0, fpr.R(b));
|
||||||
MOVSD(fpr.R(d), XMM0);
|
MOVSD(fpr.R(d), XMM0);
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
|
|
Loading…
Reference in New Issue