Merge branch 'revert-avx'

This commit is contained in:
degasus 2013-12-17 09:18:12 +01:00
commit 0d631c71a0
2 changed files with 45 additions and 77 deletions

View File

@ -119,7 +119,7 @@ public:
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg), void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg));
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
// OPCODES
void unknown_instruction(UGeckoInstruction _inst);

View File

@ -13,61 +13,48 @@ static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFF
static const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
static const double one_const = 1.0f;
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single,
void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg),
void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg))
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
{
if (!cpu_info.bAVX)
{
op_3 = nullptr;
}
fpr.Lock(d, a, b);
if (d == a)
{
fpr.BindToRegister(d);
(this->*op_2)(fpr.RX(d), fpr.R(b));
fpr.BindToRegister(d, true);
if(!single)
{
fpr.BindToRegister(b, true, false);
}
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (d == b)
{
if (reversible)
{
fpr.BindToRegister(d);
(this->*op_2)(fpr.RX(d), fpr.R(a));
fpr.BindToRegister(d, true);
if(!single)
{
fpr.BindToRegister(a, true, false);
}
(this->*op)(fpr.RX(d), fpr.R(a));
}
else
{
if (op_3)
{
fpr.BindToRegister(d);
fpr.BindToRegister(a, true, false);
(this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b));
}
else
{
MOVSD(XMM0, fpr.R(b));
fpr.BindToRegister(d, false);
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op_2)(fpr.RX(d), Gen::R(XMM0));
}
MOVSD(XMM0, fpr.R(b));
fpr.BindToRegister(d, !single);
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
}
else
{
if (op_3)
// Sources different from d, can use rather quick solution
fpr.BindToRegister(d, !single);
if(!single)
{
fpr.BindToRegister(d, false);
fpr.BindToRegister(a);
(this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b));
}
else
{
fpr.BindToRegister(d, false);
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op_2)(fpr.RX(d), fpr.R(b));
fpr.BindToRegister(b, true, false);
}
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b));
}
if (single)
{
ForceSinglePrecisionS(fpr.RX(d));
@ -101,10 +88,10 @@ void Jit64::fp_arith(UGeckoInstruction inst)
bool single = inst.OPCD == 59;
switch (inst.SUBOP5)
{
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD, &XEmitter::VDIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD, &XEmitter::VSUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD, &XEmitter::VADDSD); break; //add
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, &XEmitter::VMULSD); break; //mul
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD); break; //add
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD); break; //mul
default:
_assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
}
@ -112,24 +99,17 @@ void Jit64::fp_arith(UGeckoInstruction inst)
void Jit64::frsqrtex(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
int d = inst.FD;
int b = inst.FB;
fpr.Lock(b, d);
fpr.BindToRegister(d, d == b, true);
MOVSD(XMM0, M((void *)&one_const));
SQRTSD(XMM1, fpr.R(b));
if (cpu_info.bAVX)
{
VDIVSD(fpr.RX(d), XMM0, R(XMM1));
}
else
{
DIVSD(XMM0, R(XMM1));
MOVSD(fpr.R(d), XMM0);
}
fpr.UnlockAll();
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
int d = inst.FD;
int b = inst.FB;
fpr.Lock(b, d);
fpr.BindToRegister(d, true, true);
MOVSD(XMM0, M((void *)&one_const));
SQRTSD(XMM1, fpr.R(b));
DIVSD(XMM0, R(XMM1));
MOVSD(fpr.R(d), XMM0);
fpr.UnlockAll();
}
void Jit64::fmaddXX(UGeckoInstruction inst)
@ -224,28 +204,16 @@ void Jit64::fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
if (inst.Rc)
{
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
if (d != b)
{
fpr.Lock(b, d);
// we don't need to load d, but if it already is, it must be marked as dirty
if (fpr.IsBound(d))
{
fpr.BindToRegister(d);
}
fpr.BindToRegister(b, true, false);
// caveat: the order of ModRM:r/m, ModRM:reg is deliberate!
// "MOVSD reg, mem" zeros out the upper half of the destination register
MOVSD(fpr.R(d), fpr.RX(b));
fpr.UnlockAll();
}
fpr.Lock(b, d);
fpr.BindToRegister(d, true, true);
MOVSD(XMM0, fpr.R(b));
MOVSD(fpr.R(d), XMM0);
fpr.UnlockAll();
}
void Jit64::fcmpx(UGeckoInstruction inst)