From f000bb759051c3ef7061b0df0c5d6d20e31d4cbd Mon Sep 17 00:00:00 2001 From: Shawn Hoffman Date: Mon, 3 Oct 2016 07:15:26 -0700 Subject: [PATCH] Jit64: use domain-appropriate instructions Original commit: commit 62d3423ccf090da95b0caae1652e8fbb248c4b99 Author: Tillmann Karras Date: Wed Nov 18 20:32:20 2015 +0100 --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 30 +++++++++---------- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 4a50a0ca82..65339e162d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -149,18 +149,18 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re SwitchToFarCode(); SetJumpTarget(handle_nan); MOVAPD(tmp, R(clobber)); - PANDN(clobber, R(xmm)); - PAND(tmp, M(psGeneratedQNaN)); - POR(tmp, R(clobber)); + ANDNPD(clobber, R(xmm)); + ANDPD(tmp, M(psGeneratedQNaN)); + ORPD(tmp, R(clobber)); MOVAPD(xmm, R(tmp)); for (u32 x : inputs) { MOVAPD(clobber, fpr.R(x)); CMPPD(clobber, R(clobber), CMP_ORD); MOVAPD(tmp, R(clobber)); - PANDN(clobber, fpr.R(x)); - PAND(xmm, R(tmp)); - POR(xmm, R(clobber)); + ANDNPD(clobber, fpr.R(x)); + ANDPD(xmm, R(tmp)); + ORPD(xmm, R(clobber)); } FixupBranch done = J(true); SwitchToNearCode(); @@ -350,7 +350,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst) ADDSD(XMM1, fpr.R(b)); } if (inst.SUBOP5 == 31) // nmadd - PXOR(XMM1, M(packed ? psSignBits2 : psSignBits)); + XORPD(XMM1, M(packed ? psSignBits2 : psSignBits)); } fpr.BindToRegister(d, !single); if (single) @@ -384,15 +384,15 @@ void Jit64::fsign(UGeckoInstruction inst) switch (inst.SUBOP10) { case 40: // neg - avx_op(&XEmitter::VPXOR, &XEmitter::PXOR, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits), - packed); + avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src, + M(packed ? psSignBits2 : psSignBits), packed); break; case 136: // nabs - avx_op(&XEmitter::VPOR, &XEmitter::POR, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits), + avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits), packed); break; case 264: // abs - avx_op(&XEmitter::VPAND, &XEmitter::PAND, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask), + avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask), packed); break; default: @@ -416,7 +416,7 @@ void Jit64::fselx(UGeckoInstruction inst) bool packed = inst.OPCD == 4; // ps_sel fpr.Lock(a, b, c, d); - PXOR(XMM0, R(XMM0)); + XORPD(XMM0, R(XMM0)); // This condition is very tricky; there's only one right way to handle both the case of // negative/positive zero and NaN properly. // (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE. @@ -433,9 +433,9 @@ void Jit64::fselx(UGeckoInstruction inst) else { MOVAPD(XMM1, R(XMM0)); - PAND(XMM0, fpr.R(b)); - PANDN(XMM1, fpr.R(c)); - POR(XMM1, R(XMM0)); + ANDPD(XMM0, fpr.R(b)); + ANDNPD(XMM1, fpr.R(c)); + ORPD(XMM1, R(XMM0)); } fpr.BindToRegister(d, !packed); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 458b143112..f834282ea9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -362,7 +362,7 @@ void Jit64::dcbz(UGeckoInstruction inst) FixupBranch slow = J_CC(CC_Z, true); // Fast path: compute full address, then zero out 32 bytes of memory. - PXOR(XMM0, R(XMM0)); + XORPS(XMM0, R(XMM0)); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);