diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index cc28ab22a9..2ce53e7a8c 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -29,36 +29,36 @@ PCSX2_ALIGNED16(microVU microVU0); PCSX2_ALIGNED16(microVU microVU1); -PCSX2_ALIGNED16(u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; -PCSX2_ALIGNED16(u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; -PCSX2_ALIGNED16(u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; -PCSX2_ALIGNED16(u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; -PCSX2_ALIGNED16(u32 mVU_one[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000}; -PCSX2_ALIGNED16(u32 mVU_T1[4]) = {0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5}; -PCSX2_ALIGNED16(u32 mVU_T2[4]) = {0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c}; -PCSX2_ALIGNED16(u32 mVU_T3[4]) = {0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6}; -PCSX2_ALIGNED16(u32 mVU_T4[4]) = {0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63}; -PCSX2_ALIGNED16(u32 mVU_T5[4]) = {0x3dc577df, 0x3dc577df, 0x3dc577df, 0x3dc577df}; -PCSX2_ALIGNED16(u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xbd6501c4}; -PCSX2_ALIGNED16(u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652}; -PCSX2_ALIGNED16(u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7}; -PCSX2_ALIGNED16(u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb}; -PCSX2_ALIGNED16(u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4}; -PCSX2_ALIGNED16(u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e}; -PCSX2_ALIGNED16(u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f}; -PCSX2_ALIGNED16(u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14}; -PCSX2_ALIGNED16(u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8}; -PCSX2_ALIGNED16(u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4}; -PCSX2_ALIGNED16(u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff}; -PCSX2_ALIGNED16(u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553}; -PCSX2_ALIGNED16(u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510}; -PCSX2_ALIGNED16(u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac}; -PCSX2_ALIGNED16(float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0}; -PCSX2_ALIGNED16(float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0}; -PCSX2_ALIGNED16(float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0}; -PCSX2_ALIGNED16(float mVU_ITOF_4[4]) = {0.0625f, 0.0625f, 0.0625f, 0.0625f}; -PCSX2_ALIGNED16(float mVU_ITOF_12[4]) = {0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625}; -PCSX2_ALIGNED16(float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125}; +PCSX2_ALIGNED16(const u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; +PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; +PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; +PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; +PCSX2_ALIGNED16(const u32 mVU_one[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000}; +PCSX2_ALIGNED16(const u32 mVU_T1[4]) = {0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5}; +PCSX2_ALIGNED16(const u32 mVU_T2[4]) = {0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c}; +PCSX2_ALIGNED16(const u32 mVU_T3[4]) = {0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6}; +PCSX2_ALIGNED16(const u32 mVU_T4[4]) = {0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63}; +PCSX2_ALIGNED16(const u32 mVU_T5[4]) = {0x3dc577df, 0x3dc577df, 0x3dc577df, 0x3dc577df}; +PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xbd6501c4}; +PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652}; +PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7}; +PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb}; +PCSX2_ALIGNED16(const u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4}; +PCSX2_ALIGNED16(const u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e}; +PCSX2_ALIGNED16(const u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f}; +PCSX2_ALIGNED16(const u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14}; +PCSX2_ALIGNED16(const u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8}; +PCSX2_ALIGNED16(const u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4}; +PCSX2_ALIGNED16(const u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff}; +PCSX2_ALIGNED16(const u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553}; +PCSX2_ALIGNED16(const u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510}; +PCSX2_ALIGNED16(const u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac}; +PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0}; +PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0}; +PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0}; +PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = {0.0625f, 0.0625f, 0.0625f, 0.0625f}; +PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = {0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625}; +PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125}; //------------------------------------------------------------------ // Micro VU - Main Functions diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index d31d659694..dcae7f4377 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -100,11 +100,13 @@ struct microProgManager { }; struct microVU { - int index; // VU Index (VU0 or VU1) + u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size u32 progSize; // VU Micro Program Size (microSize/8) u32 cacheAddr; // VU Cache Start Address static const u32 cacheSize = 0x400000; // VU Cache Size + + microProgManager<0x800> prog; // Micro Program Data VURegs* regs; // VU Regs Struct u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) @@ -122,7 +124,6 @@ struct microVU { uptr x86esi; // Source register. Used as a pointer to a source in stream operations. uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations. */ - microProgManager<0x800> prog; // Micro Program Data }; // microVU rec structs diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 4441c8e3c9..ccea38603b 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -769,7 +769,7 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { } //------------------------------------------------------------------ -// Div/Sqrt/Rsqrt Allocator Helpers +// Lower Instruction Allocator Helpers //------------------------------------------------------------------ #define getReg5(reg, _reg_, _fxf_) { \ @@ -783,4 +783,9 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { } \ } +// Doesn't Clamp +#define getReg7(reg, _reg_) { \ + if (!_reg_) { getZero(reg); } \ + else { mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _X_Y_Z_W); } \ +} #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 76fc48ba0e..9a3579aae5 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -560,11 +560,6 @@ microVUf(void) mVU_IBNE() {} microVUf(void) mVU_JR() {} microVUf(void) mVU_JALR() {} -microVUf(void) mVU_ILW() {} -microVUf(void) mVU_ISW() {} -microVUf(void) mVU_ILWR() {} -microVUf(void) mVU_ISWR() {} - microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; if (recPass == 0) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } @@ -609,6 +604,89 @@ microVUf(void) mVU_MR32() { mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } } + +microVUf(void) mVU_ILW() { + microVU* mVU = mVUx; + if (recPass == 0) { /*If (!_Ft_) nop();*/ } + else { + if (!_Fs_) { + MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS ); + mVUallocVIb(gprT1, _Ft_); + } + else { + mVUallocVIa(gprT1, _Fs_); + ADD32ItoR(gprT1, _Imm11_); + mVUaddrFix(gprT1); + MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); + mVUallocVIb(gprT1, _Ft_); + } + } +} +microVUf(void) mVU_ILWR() { + microVU* mVU = mVUx; + if (recPass == 0) { /*If (!_Ft_) nop();*/ } + else { + if (!_Fs_) { + MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS ); + mVUallocVIb(gprT1, _Ft_); + } + else { + mVUallocVIa(gprT1, _Fs_); + mVUaddrFix(gprT1); + MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); + mVUallocVIb(gprT1, _Ft_); + } + } +} +microVUf(void) mVU_ISW() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Fs_) { + int imm = getVUmem(_Imm11_); + mVUallocVIa(gprT1, _Ft_); + if (_X) MOV32RtoM((uptr)mVU->regs->Mem + imm, gprT1); + if (_Y) MOV32RtoM((uptr)mVU->regs->Mem + imm + 4, gprT1); + if (_Z) MOV32RtoM((uptr)mVU->regs->Mem + imm + 8, gprT1); + if (_W) MOV32RtoM((uptr)mVU->regs->Mem + imm + 12, gprT1); + } + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + ADD32ItoR(gprT1, _Imm11_); + mVUaddrFix(gprT1); + if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + } + } +} +microVUf(void) mVU_ISWR() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Fs_) { + mVUallocVIa(gprT1, _Ft_); + if (_X) MOV32RtoM((uptr)mVU->regs->Mem, gprT1); + if (_Y) MOV32RtoM((uptr)mVU->regs->Mem+4, gprT1); + if (_Z) MOV32RtoM((uptr)mVU->regs->Mem+8, gprT1); + if (_W) MOV32RtoM((uptr)mVU->regs->Mem+12, gprT1); + } + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + mVUaddrFix(gprT1); + if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + } + } +} + microVUf(void) mVU_LQ() { microVU* mVU = mVUx; if (recPass == 0) { /*If (!_Ft_) nop();*/ } @@ -667,10 +745,60 @@ microVUf(void) mVU_LQI() { } } } -microVUf(void) mVU_SQ() {} -microVUf(void) mVU_SQD() {} -microVUf(void) mVU_SQI() {} -//microVUf(void) mVU_LOI() {} +microVUf(void) mVU_SQ() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Ft_) { + getReg7(xmmFs, _Fs_); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Ft_); + ADD32ItoR(gprT1, _Imm11_); + mVUaddrFix(gprT1); + getReg7(xmmFs, _Fs_); + mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + } +} +microVUf(void) mVU_SQD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Ft_) { + getReg7(xmmFs, _Fs_); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Ft_); + SUB16ItoR(gprT1, 1); + mVUallocVIb(gprT1, _Ft_); // ToDo: Backup to memory check. + mVUaddrFix(gprT1); + getReg7(xmmFs, _Fs_); + mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + } +} +microVUf(void) mVU_SQI() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Ft_) { + getReg7(xmmFs, _Fs_); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Ft_); + MOV32RtoR(gprT2, gprT1); + mVUaddrFix(gprT1); + getReg7(xmmFs, _Fs_); + mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + ADD16ItoR(gprT2, 1); + mVUallocVIb(gprT2, _Ft_); // ToDo: Backup to memory check. + } + } +} microVUf(void) mVU_RINIT() {} microVUf(void) mVU_RGET() {} diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index d8412016eb..a7cfc6330d 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -22,35 +22,35 @@ // Global Variables //------------------------------------------------------------------ -PCSX2_ALIGNED16_EXTERN(u32 mVU_absclip[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_signbit[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_minvals[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_maxvals[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T1[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T2[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T3[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T4[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T5[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T6[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T7[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_T8[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_Pi4[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_S2[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_S3[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_S4[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_S5[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_E1[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_E2[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_E3[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_E4[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_E5[4]); -PCSX2_ALIGNED16_EXTERN(u32 mVU_E6[4]); -PCSX2_ALIGNED16_EXTERN(float mVU_FTOI_4[4]); -PCSX2_ALIGNED16_EXTERN(float mVU_FTOI_12[4]); -PCSX2_ALIGNED16_EXTERN(float mVU_FTOI_15[4]); -PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_4[4]); -PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_12[4]); -PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_15[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_maxvals[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T1[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E1[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E6[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_4[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_12[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_15[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_4[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_12[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); //------------------------------------------------------------------ // Helper Macros @@ -85,6 +85,7 @@ PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_15[4]); #define _Imm15_ (((mVU->code >> 10) & 0x7800) | (mVU->code & 0x7ff)) #define getVUmem(x) (((vuIndex == 1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) +#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12))) #define xmmT1 0 // Temp Reg #define xmmFs 1 // Holds the Value of Fs (writes back result Fd) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 174ebe4fd2..e229a09dae 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -93,48 +93,93 @@ microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { } } -microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) { +microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) { switch ( xyzw ) { - case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); - SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset+4, reg); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; // YW - case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); - SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1); - break; // YZ - case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW - SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; // YZW - case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset, reg); - if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); - else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; // XW - case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset, reg); - SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); - break; //XZ - case 11: SSE_MOVSS_XMM_to_M32(offset, reg); - SSE_MOVHPS_XMM_to_M64(offset+8, reg); - break; //XZW - case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW - SSE_MOVHPS_XMM_to_M64(offset, xmmT1); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; // XYW - case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVLPS_XMM_to_M64(offset, reg); - SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); - break; // XYZ - case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X - case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y - case 2: SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // Z - case 1: SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // W - case 12: SSE_MOVLPS_XMM_to_M64(offset, reg); break; // XY - case 3: SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; // ZW - default: SSE_MOVAPS_XMM_to_M128(offset, reg); break; // XYZW + case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); + SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_M32(offset+4, reg); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // YW + case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); + SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1); + break; // YZ + case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW + SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // YZW + case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_M32(offset, reg); + if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); + else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // XW + case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_M32(offset, reg); + SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); + break; //XZ + case 11: SSE_MOVSS_XMM_to_M32(offset, reg); + SSE_MOVHPS_XMM_to_M64(offset+8, reg); + break; //XZW + case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW + SSE_MOVHPS_XMM_to_M64(offset, xmmT1); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // XYW + case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVLPS_XMM_to_M64(offset, reg); + SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); + break; // XYZ + case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X + case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y + case 2: SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // Z + case 1: SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // W + case 12: SSE_MOVLPS_XMM_to_M64(offset, reg); break; // XY + case 3: SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; // ZW + default: SSE_MOVAPS_XMM_to_M128(offset, reg); break; // XYZW + } +} + +microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { + switch ( xyzw ) { + case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); + SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // YW + case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); + SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); + break; // YZ + case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW + SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // YZW + case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); + else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // XW + case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + break; //XZ + case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); + break; //XZW + case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW + SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // XYW + case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + break; // XYZ + case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X + case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y + case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z + case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W + case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY + case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW + default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW } }