diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 8676b1cb5a..71a556cb5f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -119,7 +119,7 @@ public: void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); typedef u32 (*Operation)(u32 a, u32 b); void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); - void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); + void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg), void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg)); // OPCODES void unknown_instruction(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index ec8ddc63d3..066ba69dfd 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -13,37 +13,62 @@ static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFF static const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0}; static const double one_const = 1.0f; -void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)) +void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, + void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg), + void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg)) { + if (!cpu_info.bAVX) + { + op_3 = nullptr; + } + fpr.Lock(d, a, b); if (d == a) { - fpr.BindToRegister(d, true); - (this->*op)(fpr.RX(d), fpr.R(b)); + fpr.BindToRegister(d); + (this->*op_2)(fpr.RX(d), fpr.R(b)); } else if (d == b) { if (reversible) { - fpr.BindToRegister(d, true); - (this->*op)(fpr.RX(d), fpr.R(a)); + fpr.BindToRegister(d); + (this->*op_2)(fpr.RX(d), fpr.R(a)); } else { - MOVSD(XMM0, fpr.R(b)); - fpr.BindToRegister(d, !dupe); - MOVSD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), Gen::R(XMM0)); + if (op_3) + { + fpr.BindToRegister(d); + fpr.BindToRegister(a, true, false); + (this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b)); + } + else + { + MOVSD(XMM0, fpr.R(b)); + fpr.BindToRegister(d, !single); + MOVSD(fpr.RX(d), fpr.R(a)); + (this->*op_2)(fpr.RX(d), Gen::R(XMM0)); + } } } else { - // Sources different from d, can use rather quick solution - fpr.BindToRegister(d, !dupe); - MOVSD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), fpr.R(b)); + if (op_3) + { + fpr.BindToRegister(d, !single); + fpr.BindToRegister(a); + (this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b)); + } + else + { + fpr.BindToRegister(d, !single); + MOVSD(fpr.RX(d), fpr.R(a)); + (this->*op_2)(fpr.RX(d), fpr.R(b)); + } } - if (dupe) + + if (single) { ForceSinglePrecisionS(fpr.RX(d)); if (cpu_info.bSSE3) @@ -73,13 +98,13 @@ void Jit64::fp_arith_s(UGeckoInstruction inst) Default(inst); return; } - bool dupe = inst.OPCD == 59; + bool single = inst.OPCD == 59; switch (inst.SUBOP5) { - case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div - case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub - case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add - case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul + case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD, &XEmitter::VDIVSD); break; //div + case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD, &XEmitter::VSUBSD); break; //sub + case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD, &XEmitter::VADDSD); break; //add + case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, &XEmitter::VMULSD); break; //mul default: _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); } @@ -87,17 +112,24 @@ void Jit64::fp_arith_s(UGeckoInstruction inst) void Jit64::frsqrtex(UGeckoInstruction inst) { - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff) - int d = inst.FD; - int b = inst.FB; - fpr.Lock(b, d); - fpr.BindToRegister(d, true, true); - MOVSD(XMM0, M((void *)&one_const)); - SQRTSD(XMM1, fpr.R(b)); - DIVSD(XMM0, R(XMM1)); - MOVSD(fpr.R(d), XMM0); - fpr.UnlockAll(); + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + int d = inst.FD; + int b = inst.FB; + fpr.Lock(b, d); + fpr.BindToRegister(d, d == b, true); + MOVSD(XMM0, M((void *)&one_const)); + SQRTSD(XMM1, fpr.R(b)); + if (cpu_info.bAVX) + { + VDIVSD(fpr.RX(d), XMM0, R(XMM1)); + } + else + { + DIVSD(XMM0, R(XMM1)); + MOVSD(fpr.R(d), XMM0); + } + fpr.UnlockAll(); } void Jit64::fmaddXX(UGeckoInstruction inst) @@ -198,7 +230,7 @@ void Jit64::fmrx(UGeckoInstruction inst) int d = inst.FD; int b = inst.FB; fpr.Lock(b, d); - fpr.BindToRegister(d, true, true); + fpr.BindToRegister(d, d == b, true); MOVSD(XMM0, fpr.R(b)); MOVSD(fpr.R(d), XMM0); fpr.UnlockAll();