From 57876cbe5ab13ff2f5939ab4ba096f9f946f91cd Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 17 Feb 2025 23:26:39 -0600 Subject: [PATCH] microVU: Accurate CLIP --- pcsx2/x86/microVU_Misc.h | 1 + pcsx2/x86/microVU_Upper.inl | 36 ++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index c51cb22ba8..101feffd0b 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -21,6 +21,7 @@ struct mVU_Globals u32 signbit [4] = __four(0x80000000); u32 minvals [4] = __four(0xff7fffff); u32 maxvals [4] = __four(0x7f7fffff); + u32 exponent[4] = __four(0x7f800000); u32 one [4] = __four(0x3f800000); u32 Pi4 [4] = __four(0x3f490fdb); u32 T1 [4] = __four(0x3f7ffff5); diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index f2d1573d09..3c1f426313 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -544,36 +544,36 @@ mVUop(mVU_CLIP) const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf); const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0x1); const xmm& t1 = mVU.regAlloc->allocReg(); + const xmm& t2 = mVU.regAlloc->allocReg(); mVUunpack_xyzw(Ft, Ft, 0); mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite); xSHL(gprT1, 6); - xAND.PS(Ft, ptr128[mVUglob.absclip]); - xMOVAPS(t1, Ft); - xPOR(t1, ptr128[mVUglob.signbit]); + xMOVAPS (t1, ptr128[mVUglob.exponent]); + xPAND (t1, Fs); + xPXOR (t2, t2); + xPCMP.EQD(t1, t2); // Denormal check + xPANDN (t1, Fs); // If denormal, set to zero, which can't be greater than any nonnegative denormal in Ft + xPAND (Ft, ptr128[mVUglob.absclip]); - xCMPNLE.PS(t1, Fs); // -w, -z, -y, -x - xCMPLT.PS(Ft, Fs); // +w, +z, +y, +x + xMOVAPS (Fs, ptr128[mVUglob.signbit]); + xPXOR (Fs, t1); // Negate + xPCMP.GTD(t1, Ft); // +w, +z, +y, +x + xPCMP.GTD(Fs, Ft); // -w, -z, -y, -x - xMOVAPS(Fs, Ft); // Fs = +w, +z, +y, +x - xUNPCK.LPS(Ft, t1); // Ft = -y,+y,-x,+x - xUNPCK.HPS(Fs, t1); // Fs = -w,+w,-z,+z - - xMOVMSKPS(gprT2, Fs); // -w,+w,-z,+z - xAND(gprT2, 0x3); - xSHL(gprT2, 4); - xOR(gprT1, gprT2); - - xMOVMSKPS(gprT2, Ft); // -y,+y,-x,+x - xAND(gprT2, 0xf); - xOR(gprT1, gprT2); - xAND(gprT1, 0xffffff); + xPBLEND.W (Fs, t1, 0x55); // Squish together + xPACK.SSWB(Fs, Fs); // Convert u16 to u8 + xPMOVMSKB (gprT2, Fs); // Get bitmask + xAND (gprT2, 0x3f); // Mask unused stuff + xAND (gprT1, 0xffffff); + xOR (gprT1, gprT2); mVUallocCFLAGb(mVU, gprT1, cFLAG.write); mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(t1); + mVU.regAlloc->clearNeeded(t2); mVU.profiler.EmitOp(opCLIP); } pass3