From 7fd0f67f93c41f9934870d58b89e68f5a5c3c851 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Thu, 5 Mar 2009 01:19:54 +0000 Subject: [PATCH] pork chop sandwiches! git-svn-id: http://pcsx2.googlecode.com/svn/trunk@682 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/pcsx2.rc | 4 +-- pcsx2/x86/microVU.cpp | 8 ++++-- pcsx2/x86/microVU.h | 1 - pcsx2/x86/microVU_Alloc.h | 8 +++--- pcsx2/x86/microVU_Alloc.inl | 5 ++-- pcsx2/x86/microVU_Misc.h | 50 ++++++++++++++++++++++++++----------- pcsx2/x86/microVU_Misc.inl | 35 +++++++++++++++++++++++--- pcsx2/x86/microVU_Upper.inl | 42 +++++++++++++++---------------- 8 files changed, 103 insertions(+), 50 deletions(-) diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index 79b4dbee64..89a7f40458 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -357,9 +357,9 @@ BEGIN DEFPUSHBUTTON "OK",IDOK,217,242,50,14 PUSHBUTTON "Cancel",IDCANCEL,278,242,50,14 CTEXT "These hacks will speed up emulation but reduce emulation compatibility or cause visual errors. If you have problems, disable all these and try again!",IDC_HACKDESC,18,7,286,19 - GROUPBOX "EmotionEngine (EE) Sync Hacks",IDC_STATIC,7,31,159,180 + GROUPBOX "EmotionEngine (EE) Sync Hacks",IDC_STATIC,7,31,159,185 GROUPBOX "Miscellaneous",IDC_STATIC,7,220,194,33 - LTEXT "Important: X2 and X3 sync hacks *will* cause choppy/skippy audio on many FMV movies.",IDC_STATIC,13,188,149,21 + LTEXT "Important: X2 and X3 sync hacks *will* cause choppy/skippy audio on many FMV movies.",IDC_STATIC,20,188,137,25 LTEXT "Known to work well with a couple games, namely Shadow of the Colossus (but breaks most other games).",IDC_STATIC,25,158,133,28 LTEXT "Big speedup! Works well with many games.",IDC_STATIC,25,124,125,19 LTEXT "Most compatible option - recommended for everyone with high-end machines.",IDC_STATIC,25,55,136,19 diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 827905c666..5e51b802ec 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -19,16 +19,20 @@ // Micro VU recompiler! - author: cottonvibes(@gmail.com) #include "PrecompiledHeader.h" -#include "microVU.h" #ifdef PCSX2_MICROVU +#include "microVU.h" //------------------------------------------------------------------ -// VU Micro - Global Variables +// Micro VU - Global Variables //------------------------------------------------------------------ PCSX2_ALIGNED16(microVU microVU0); PCSX2_ALIGNED16(microVU microVU1); +PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; +PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; +PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; + //------------------------------------------------------------------ // Micro VU - Main Functions //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index a326e59eb0..ce69dc76b1 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -22,7 +22,6 @@ #include "VU.h" #include "ix86/ix86.h" #include "microVU_Alloc.h" -//#include struct microBlock { u32 pipelineState; // FMACx|y|z|w | FDiv | EFU | IALU | BRANCH // Still thinking of how I'm going to do this diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 588c59a33c..9c85fd7e10 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -45,10 +45,12 @@ struct microAllocInfo { // bit 5 = Read Q1/P1 or backup? // bit 6 = Write to Q2/P2? // bit 7 = Write Fd/Acc/Result to backup memory? - // bit 8 = Update Status Flags? - // bit 9 = Update Mac Flags? - // bit 10 = Used with bit 11 to make a 2-bit key for status/mac flag instance + // bit 8 = Update Mac Flags? + // bit 9 = Update Status Flags? + // bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance // bit 11 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) + // bit 12 = Used with bit 13 to make a 2-bit key for status flag instance + // bit 13 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) u32 curPC; }; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 16c8a50326..9e4251c0a3 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -45,10 +45,11 @@ else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ } -microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft, const bool makeFd) { +microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) { microVU* mVU = mVUx; Fs = xmmFs; Ft = xmmFt; + Fd = xmmFs; if (_XYZW_SS) { if (!_Fs_) { getZeroSS(Fs); } else { getReg(Fs, _Fs_); } @@ -69,8 +70,6 @@ microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft, const bool makeFd) { else { getReg(Ft, _Ft_); } } } - if (makeFdFs) {Fd = Fs;} - else {Fd = xmmFd;} } microVUt(void) mVUallocFMAC1b(int& Fd) { diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 47c64bfe32..3992088ac0 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -18,6 +18,13 @@ #pragma once +//------------------------------------------------------------------ +// Global Variables +//------------------------------------------------------------------ +PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_maxvals[4]); + //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ @@ -40,14 +47,23 @@ #define _Imm11_ (s32)(mVU->code & 0x400 ? 0xfffffc00 | (mVU->code & 0x3ff) : mVU->code & 0x3ff) #define _UImm11_ (s32)(mVU->code & 0x7ff) -#define xmmT1 0 // XMM0 // Temp Reg -#define xmmFd 1 // XMM1 // Holds the Value of Fd -#define xmmFs 2 // XMM2 // Holds the Value of Fs -#define xmmFt 3 // XMM3 // Holds the Value of Ft -#define xmmACC1 4 // XMM4 // Holds the Value of ACC -#define xmmACC2 5 // XMM5 // Holds the Backup Value of ACC -#define xmmPQ 6 // XMM6 // Holds the Value and Backup Values of P and Q regs -#define xmmF 7 // XMM7 // Holds 4 instances of the status and mac flags (macflagX4::statusflagX4) +#define xmmT1 0 // Temp Reg +#define xmmFs 1 // Holds the Value of Fs (writes back result Fd) +#define xmmFt 2 // Holds the Value of Ft +#define xmmACC1 3 // Holds the Value of ACC +#define xmmACC2 4 // Holds the Backup Value of ACC +#define xmmPQ 5 // Holds the Value and Backup Values of P and Q regs +#define xmmVI 6 // Holds VI regs 8, 9, 10, 11, 12, 13, 14, and 15 +#define xmmF 7 // Holds 4 instances of the status and mac flags (macflagX4::statusflagX4) + +#define gprT1 0 // Temp Reg +#define gprT2 1 // Temp Reg +#define gprT3 2 // Temp Reg +#define gprVI7 3 // VI 7 +#define gprESP 4 // Don't use? +#define gprVI5 5 // VI 6::5 +#define gprVI3 6 // VI 4::3 +#define gprVI1 7 // VI 2::1 // Template Stuff #define mVUx (vuIndex ? µVU1 : µVU0) @@ -57,11 +73,17 @@ #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo -#define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0)) -#define getFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1)) -#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) -#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3)) -#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7)) -#define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8)) +#define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0)) +#define getFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1)) +#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) +#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3)) +#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7)) +#define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8)) +#define doMac (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<8)) +#define doStatus (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<9)) +#define fmInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) + 4) +#define fsInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) + 0) +#define fpmInstance ((((u8)(mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) - 1) & 0x3) + 4) +#define fpsInstance ((((u8)(mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) - 1) & 0x3) + 0) #include "microVU_Misc.inl" diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index b31b1fda76..6f5ba445a2 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -24,11 +24,40 @@ //------------------------------------------------------------------ // Used for Result Clamping -microVUx(void) mVUclamp1(int reg, int regTemp, int xyzw) { +microVUx(void) mVUclamp1(int reg, int regT1, int xyzw) { + switch (xyzw) { + case 1: case 2: case 4: case 8: + SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals); + break; + default: + SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals); + break; + } } // Used for Operand Clamping -microVUx(void) mVUclamp2(int reg, int regTemp, int xyzw) { +microVUx(void) mVUclamp2(int reg, int regT1, int xyzw) { + if (CHECK_VU_SIGN_OVERFLOW) { + switch (xyzw) { + case 1: case 2: case 4: case 8: + SSE_MOVSS_XMM_to_XMM(regT1, reg); + SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); + SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals); + SSE_ORPS_XMM_to_XMM(reg, regT1); + break; + default: + SSE_MOVAPS_XMM_to_XMM(regT1, reg); + SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); + SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals); + SSE_ORPS_XMM_to_XMM(reg, regT1); + break; + } + } + else mVUclamp1(reg, regT1, xyzw); } //------------------------------------------------------------------ @@ -123,4 +152,4 @@ microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) { } } -#endif //PCSX2_MICROVU \ No newline at end of file +#endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 21dbf7aa91..1b422e0c15 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -22,56 +22,54 @@ // mVUupdateFlags() - Updates status/mac flags //------------------------------------------------------------------ +#define AND_XYZW (_XYZW_SS ? (1) : (doMac ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W]))) + microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { microVU* mVU = mVUx; static u8 *pjmp, *pjmp2; - static u32 *pjmp32; - static u32 macaddr, stataddr, prevstataddr; - static int x86macflag, x86statflag, x86temp; + static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; //SysPrintf ("mVUupdateFlags\n"); if( !(doFlags) ) return; - //macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0); - //stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address - //prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address - - - SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); // Flip wzyx to xyzw - MOV32MtoR(x86statflag, prevstataddr); // Load the previous status in to x86statflag - AND16ItoR(x86statflag, 0xff0); // Keep Sticky and D/I flags + if (!doMac) { regT1 = reg; } + else SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); // Flip wzyx to xyzw + if (doStatus) { + SSE_PEXTRW_XMM_to_R32(gprT1, xmmF, fpsInstance); // Get Prev Status Flag + AND16ItoR(gprT1, 0xff0); // Keep Sticky and D/I flags + } //-------------------------Check for Signed flags------------------------------ // The following code makes sure the Signed Bit isn't set with Negative Zero SSE_XORPS_XMM_to_XMM(regT2, regT2); // Clear regT2 SSE_CMPEQPS_XMM_to_XMM(regT2, regT1); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(EAX, regT2); // Used for Zero Flag Calculation + SSE_MOVMSKPS_XMM_to_R32(gprT3, regT2); // Used for Zero Flag Calculation SSE_ANDNPS_XMM_to_XMM(regT2, regT1); - SSE_MOVMSKPS_XMM_to_R32(x86macflag, regT2); // Move the sign bits of the t1reg + SSE_MOVMSKPS_XMM_to_R32(gprT2, regT2); // Move the sign bits of the t1reg - AND16ItoR(x86macflag, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation + AND16ItoR(gprT2, AND_XYZW ); // Grab "Is Signed" bits from the previous calculation pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x82); // SS, S flags - SHL16ItoR(x86macflag, 4); + if (doMac) SHL16ItoR(gprT2, 4); + if (doStatus) OR16ItoR(gprT1, 0x82); // SS, S flags if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking x86SetJ8(pjmp); //-------------------------Check for Zero flags------------------------------ - AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation + AND16ItoR(gprT3, AND_XYZW ); // Grab "Is Zero" bits from the previous calculation pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x41); // ZS, Z flags - OR32RtoR(x86macflag, EAX); + if (doMac) OR32RtoR(gprT2, gprT3); + if (doStatus) OR16ItoR(gprT1, 0x41); // ZS, Z flags x86SetJ8(pjmp); //-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------ if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here - MOV16RtoM(macaddr, x86macflag); - MOV16RtoM(stataddr, x86statflag); + if (doMac) SSE_PINSRW_R32_to_XMM(xmmF, gprT2, fmInstance); // Set Mac Flag + if (doStatus) SSE_PINSRW_R32_to_XMM(xmmF, gprT1, fsInstance); // Set Status Flag } //------------------------------------------------------------------ @@ -81,7 +79,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { #define mVU_FMAC1(operation) { \ if (isNOP) return; \ int Fd, Fs, Ft; \ - mVUallocFMAC1a(Fd, Fs, Ft, 1); \ + mVUallocFMAC1a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \