mirror of https://github.com/PCSX2/pcsx2.git
microVU: Accurate CLIP
This commit is contained in:
parent
c0e69ff9ab
commit
57876cbe5a
|
@ -21,6 +21,7 @@ struct mVU_Globals
|
||||||
u32 signbit [4] = __four(0x80000000);
|
u32 signbit [4] = __four(0x80000000);
|
||||||
u32 minvals [4] = __four(0xff7fffff);
|
u32 minvals [4] = __four(0xff7fffff);
|
||||||
u32 maxvals [4] = __four(0x7f7fffff);
|
u32 maxvals [4] = __four(0x7f7fffff);
|
||||||
|
u32 exponent[4] = __four(0x7f800000);
|
||||||
u32 one [4] = __four(0x3f800000);
|
u32 one [4] = __four(0x3f800000);
|
||||||
u32 Pi4 [4] = __four(0x3f490fdb);
|
u32 Pi4 [4] = __four(0x3f490fdb);
|
||||||
u32 T1 [4] = __four(0x3f7ffff5);
|
u32 T1 [4] = __four(0x3f7ffff5);
|
||||||
|
|
|
@ -544,36 +544,36 @@ mVUop(mVU_CLIP)
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0x1);
|
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, 0x1);
|
||||||
const xmm& t1 = mVU.regAlloc->allocReg();
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
||||||
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
||||||
|
|
||||||
mVUunpack_xyzw(Ft, Ft, 0);
|
mVUunpack_xyzw(Ft, Ft, 0);
|
||||||
mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite);
|
mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite);
|
||||||
xSHL(gprT1, 6);
|
xSHL(gprT1, 6);
|
||||||
|
|
||||||
xAND.PS(Ft, ptr128[mVUglob.absclip]);
|
xMOVAPS (t1, ptr128[mVUglob.exponent]);
|
||||||
xMOVAPS(t1, Ft);
|
xPAND (t1, Fs);
|
||||||
xPOR(t1, ptr128[mVUglob.signbit]);
|
xPXOR (t2, t2);
|
||||||
|
xPCMP.EQD(t1, t2); // Denormal check
|
||||||
|
xPANDN (t1, Fs); // If denormal, set to zero, which can't be greater than any nonnegative denormal in Ft
|
||||||
|
xPAND (Ft, ptr128[mVUglob.absclip]);
|
||||||
|
|
||||||
xCMPNLE.PS(t1, Fs); // -w, -z, -y, -x
|
xMOVAPS (Fs, ptr128[mVUglob.signbit]);
|
||||||
xCMPLT.PS(Ft, Fs); // +w, +z, +y, +x
|
xPXOR (Fs, t1); // Negate
|
||||||
|
xPCMP.GTD(t1, Ft); // +w, +z, +y, +x
|
||||||
|
xPCMP.GTD(Fs, Ft); // -w, -z, -y, -x
|
||||||
|
|
||||||
xMOVAPS(Fs, Ft); // Fs = +w, +z, +y, +x
|
xPBLEND.W (Fs, t1, 0x55); // Squish together
|
||||||
xUNPCK.LPS(Ft, t1); // Ft = -y,+y,-x,+x
|
xPACK.SSWB(Fs, Fs); // Convert u16 to u8
|
||||||
xUNPCK.HPS(Fs, t1); // Fs = -w,+w,-z,+z
|
xPMOVMSKB (gprT2, Fs); // Get bitmask
|
||||||
|
xAND (gprT2, 0x3f); // Mask unused stuff
|
||||||
xMOVMSKPS(gprT2, Fs); // -w,+w,-z,+z
|
xAND (gprT1, 0xffffff);
|
||||||
xAND(gprT2, 0x3);
|
xOR (gprT1, gprT2);
|
||||||
xSHL(gprT2, 4);
|
|
||||||
xOR(gprT1, gprT2);
|
|
||||||
|
|
||||||
xMOVMSKPS(gprT2, Ft); // -y,+y,-x,+x
|
|
||||||
xAND(gprT2, 0xf);
|
|
||||||
xOR(gprT1, gprT2);
|
|
||||||
xAND(gprT1, 0xffffff);
|
|
||||||
|
|
||||||
mVUallocCFLAGb(mVU, gprT1, cFLAG.write);
|
mVUallocCFLAGb(mVU, gprT1, cFLAG.write);
|
||||||
mVU.regAlloc->clearNeeded(Fs);
|
mVU.regAlloc->clearNeeded(Fs);
|
||||||
mVU.regAlloc->clearNeeded(Ft);
|
mVU.regAlloc->clearNeeded(Ft);
|
||||||
mVU.regAlloc->clearNeeded(t1);
|
mVU.regAlloc->clearNeeded(t1);
|
||||||
|
mVU.regAlloc->clearNeeded(t2);
|
||||||
mVU.profiler.EmitOp(opCLIP);
|
mVU.profiler.EmitOp(opCLIP);
|
||||||
}
|
}
|
||||||
pass3
|
pass3
|
||||||
|
|
Loading…
Reference in New Issue