From 7a0e3dca12d231f0592df6f639923871b7841b08 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sun, 15 Mar 2009 10:03:34 +0000 Subject: [PATCH] implemented all vu lower instructions (second pass). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@792 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 2 +- pcsx2/x86/microVU.h | 14 +-- pcsx2/x86/microVU_Alloc.inl | 16 ++++ pcsx2/x86/microVU_Lower.inl | 171 +++++++++++++++++++++++++++++++----- pcsx2/x86/microVU_Misc.h | 5 +- 5 files changed, 178 insertions(+), 30 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 2ce53e7a8c..d61690c514 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -64,7 +64,7 @@ PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578 // Micro VU - Main Functions //------------------------------------------------------------------ -// Only run this once! ;) +// Only run this once per VU! ;) microVUt(void) mVUinit(VURegs* vuRegsPtr) { microVU* mVU = mVUx; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index dcae7f4377..f76f271585 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -20,6 +20,7 @@ #define _EmitterId_ (vuIndex+1) #include "Common.h" #include "VU.h" +#include "GS.h" #include "ix86/ix86.h" #include "microVU_Alloc.h" @@ -104,15 +105,16 @@ struct microVU { u32 microSize; // VU Micro Memory Size u32 progSize; // VU Micro Program Size (microSize/8) u32 cacheAddr; // VU Cache Start Address - static const u32 cacheSize = 0x400000; // VU Cache Size + static const u32 cacheSize = 0x500000; // VU Cache Size microProgManager<0x800> prog; // Micro Program Data - VURegs* regs; // VU Regs Struct - u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) - u8* ptr; // Pointer to next place to write recompiled code to - u32 code; // Contains the current Instruction - u32 iReg; // iReg + VURegs* regs; // VU Regs Struct + u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) + u8* ptr; // Pointer to next place to write recompiled code to + u32 code; // Contains the current Instruction + u32 iReg; // iReg (only used in recompilation, not execution) + u32 clipFlag[4]; // 4 instances of clip flag (used in execution) /* uptr x86eax; // Accumulator register. Used in arithmetic operations. diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index ccea38603b..6da7d4472e 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -741,6 +741,16 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) { OR32RtoR(fInstance, reg); } +microVUt(void) mVUallocCFLAGa(int reg, int fInstance) { + microVU* mVU = mVUx; + MOV32MtoR(reg, mVU->clipFlag[fInstance]); +} + +microVUt(void) mVUallocCFLAGb(int reg, int fInstance) { + microVU* mVU = mVUx; + MOV32RtoM(mVU->clipFlag[fInstance], reg); +} + //------------------------------------------------------------------ // VI Reg Allocators //------------------------------------------------------------------ @@ -788,4 +798,10 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { if (!_reg_) { getZero(reg); } \ else { mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _X_Y_Z_W); } \ } + +// VF to GPR +#define getReg8(GPRreg, _reg_, _fxf_) { \ + if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \ + else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \ +} #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 9a3579aae5..1002a578f5 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -207,11 +207,11 @@ microVUf(void) mVU_EATANxz() { mVU_EATAN_(); } } -#define eexpHelper(addr) { \ - SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ - SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmT1); \ - SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \ - SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \ +#define eexpHelper(addr) { \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ + SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmT1); \ + SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \ + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \ } microVUf(void) mVU_EEXP() { microVU* mVU = mVUx; @@ -334,11 +334,11 @@ microVUf(void) mVU_ESADD() { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } } -#define esinHelper(addr) { \ - SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \ - SSE_MOVAPS_XMM_to_XMM(xmmFs, xmmT1); \ - SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \ - SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \ +#define esinHelper(addr) { \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \ + SSE_MOVAPS_XMM_to_XMM(xmmFs, xmmT1); \ + SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \ + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \ } microVUf(void) mVU_ESIN() { microVU* mVU = mVUx; @@ -390,11 +390,56 @@ microVUf(void) mVU_ESUM() { } } -microVUf(void) mVU_FCAND() {} -microVUf(void) mVU_FCEQ() {} -microVUf(void) mVU_FCOR() {} -microVUf(void) mVU_FCSET() {} -microVUf(void) mVU_FCGET() {} +microVUf(void) mVU_FCAND() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocCFLAGa(gprT2, fvcInstance); + XOR32RtoR(gprT1, gprT1); + AND32ItoR(gprT2, _Imm24_); + SETNZ8R(gprT1); + mVUallocVIb(gprT1, 1); + } +} +microVUf(void) mVU_FCEQ() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocCFLAGa(gprT2, fvcInstance); + XOR32RtoR(gprT1, gprT1); + CMP32ItoR(gprT2, _Imm24_); + SETNZ8R(gprT1); + mVUallocVIb(gprT1, 1); + } +} +microVUf(void) mVU_FCGET() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocCFLAGa(gprT1, fvcInstance); + AND32ItoR(gprT1, 0xfff); + mVUallocVIb(gprT1, _Ft_); + } +} +microVUf(void) mVU_FCOR() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocCFLAGa(gprT1, fvcInstance); + OR32ItoR(gprT1, _Imm24_); + ADD32ItoR(gprT1, 1); // If 24 1's will make 25th bit 1, else 0 + SHR32ItoR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg) + mVUallocVIb(gprT1, 1); + } +} +microVUf(void) mVU_FCSET() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + MOV32ItoR(gprT1, _Imm24_); + mVUallocCFLAGb(gprT1, fcInstance); + } +} microVUf(void) mVU_FMAND() { microVU* mVU = mVUx; @@ -800,15 +845,76 @@ microVUf(void) mVU_SQI() { } } -microVUf(void) mVU_RINIT() {} -microVUf(void) mVU_RGET() {} -microVUf(void) mVU_RNEXT() {} -microVUf(void) mVU_RXOR() {} +microVUf(void) mVU_RINIT() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (_Fs_ || (_Fsf_ == 3)) { + getReg8(gprR, _Fs_, _Fsf_); + AND32ItoR(gprR, 0x007fffff); + OR32ItoR (gprR, 0x3f800000); + } + else MOV32ItoR(gprR, 0x3f800000); + } +} +microVUt(void) mVU_RGET_() { + microVU* mVU = mVUx; + if (_Ft_) { + if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR); + if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR); + if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR); + if (_W) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[3], gprR); + } +} +microVUf(void) mVU_RGET() { + microVU* mVU = mVUx; + if (recPass == 0) { /*if (!_Ft_) nop();*/ } + else { mVU_RGET_(); } +} +microVUf(void) mVU_RNEXT() { + microVU* mVU = mVUx; + if (recPass == 0) { /*if (!_Ft_) nop();*/ } + else { + // algorithm from www.project-fao.org + MOV32RtoR(gprT1, gprR); + SHR32ItoR(gprT1, 4); + AND32ItoR(gprT1, 1); -microVUf(void) mVU_WAITP() {} -microVUf(void) mVU_WAITQ() {} + MOV32RtoR(gprT2, gprR); + SHR32ItoR(gprT2, 22); + AND32ItoR(gprT2, 1); + + SHL32ItoR(gprR, 1); + XOR32RtoR(gprT1, gprT2); + XOR32RtoR(gprR, gprT1); + AND32ItoR(gprR, 0x007fffff); + OR32ItoR (gprR, 0x3f800000); + mVU_RGET_(); + } +} +microVUf(void) mVU_RXOR() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (_Fs_ || (_Fsf_ == 3)) { + getReg8(gprT1, _Fs_, _Fsf_); + AND32ItoR(gprT1, 0x7fffff); + XOR32RtoR(gprR, gprT1); + } + } +} + +microVUf(void) mVU_WAITP() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else {} +} +microVUf(void) mVU_WAITQ() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else {} +} -microVUf(void) mVU_XGKICK() {} microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; if (recPass == 0) {} @@ -825,4 +931,25 @@ microVUf(void) mVU_XITOP() { mVUallocVIb(gprT1, _Ft_); } } + +microVUt(void) __fastcall mVU_XGKICK_(u32 addr) { + microVU* mVU = mVUx; + u32 *data = (u32*)(mVU->regs->Mem + (addr&0x3fff)); + u32 size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4); + u8 *pDest = mtgsThread->GetDataPacketPtr(); + memcpy_aligned(pDest, mVU->regs->Mem + addr, size<<4); + mtgsThread->SendDataPacket(); +} +void __fastcall mVU_XGKICK0(u32 addr) { mVU_XGKICK_<0>(addr); } +void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); } + +microVUf(void) mVU_XGKICK() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall + if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); + else CALLFunc((uptr)mVU_XGKICK1); + } +} #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index a7cfc6330d..e28ec9c4e2 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -83,6 +83,7 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define _Imm12_ (((mVU->code >> 21 ) & 0x1) << 11) | (mVU->code & 0x7ff) #define _Imm5_ (((mVU->code & 0x400) ? 0xfff0 : 0) | ((mVU->code >> 6) & 0xf)) #define _Imm15_ (((mVU->code >> 10) & 0x7800) | (mVU->code & 0x7ff)) +#define _Imm24_ (u32)(mVU->code & 0xffffff) #define getVUmem(x) (((vuIndex == 1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) #define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12))) @@ -107,7 +108,7 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define gprT1 0 // Temp Reg #define gprT2 1 // Temp Reg -#define gprT3 2 // Temp Reg? +#define gprR 2 // R Reg #define gprF0 3 // MAC Flag::Status Flag 0 #define gprESP 4 // Don't use? #define gprF1 5 // MAC Flag::Status Flag 1 @@ -140,6 +141,8 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define fpsInstance (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) >> 12) - 1) & 0x3) #define fvmInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<14)) >> 14) #define fvsInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<16)) >> 16) +#define fvcInstance 1//((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<14)) >> 14) +#define fcInstance 1//((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<14)) >> 14) //#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13)) //#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14))