implemented some more opcodes...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@760 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-03-13 02:03:18 +00:00
parent 71c4561f86
commit cbe108e99d
6 changed files with 292 additions and 112 deletions

View File

@ -29,36 +29,36 @@
PCSX2_ALIGNED16(microVU microVU0);
PCSX2_ALIGNED16(microVU microVU1);
PCSX2_ALIGNED16(u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
PCSX2_ALIGNED16(u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
PCSX2_ALIGNED16(u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff};
PCSX2_ALIGNED16(u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff};
PCSX2_ALIGNED16(u32 mVU_one[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000};
PCSX2_ALIGNED16(u32 mVU_T1[4]) = {0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5};
PCSX2_ALIGNED16(u32 mVU_T2[4]) = {0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c};
PCSX2_ALIGNED16(u32 mVU_T3[4]) = {0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6};
PCSX2_ALIGNED16(u32 mVU_T4[4]) = {0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63};
PCSX2_ALIGNED16(u32 mVU_T5[4]) = {0x3dc577df, 0x3dc577df, 0x3dc577df, 0x3dc577df};
PCSX2_ALIGNED16(u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xbd6501c4};
PCSX2_ALIGNED16(u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652};
PCSX2_ALIGNED16(u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7};
PCSX2_ALIGNED16(u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb};
PCSX2_ALIGNED16(u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4};
PCSX2_ALIGNED16(u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e};
PCSX2_ALIGNED16(u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f};
PCSX2_ALIGNED16(u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14};
PCSX2_ALIGNED16(u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8};
PCSX2_ALIGNED16(u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4};
PCSX2_ALIGNED16(u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff};
PCSX2_ALIGNED16(u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553};
PCSX2_ALIGNED16(u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510};
PCSX2_ALIGNED16(u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac};
PCSX2_ALIGNED16(float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0};
PCSX2_ALIGNED16(float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0};
PCSX2_ALIGNED16(float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0};
PCSX2_ALIGNED16(float mVU_ITOF_4[4]) = {0.0625f, 0.0625f, 0.0625f, 0.0625f};
PCSX2_ALIGNED16(float mVU_ITOF_12[4]) = {0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625};
PCSX2_ALIGNED16(float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125};
PCSX2_ALIGNED16(const u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff};
PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff};
PCSX2_ALIGNED16(const u32 mVU_one[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000};
PCSX2_ALIGNED16(const u32 mVU_T1[4]) = {0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5};
PCSX2_ALIGNED16(const u32 mVU_T2[4]) = {0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c};
PCSX2_ALIGNED16(const u32 mVU_T3[4]) = {0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6};
PCSX2_ALIGNED16(const u32 mVU_T4[4]) = {0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63};
PCSX2_ALIGNED16(const u32 mVU_T5[4]) = {0x3dc577df, 0x3dc577df, 0x3dc577df, 0x3dc577df};
PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xbd6501c4};
PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652};
PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7};
PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb};
PCSX2_ALIGNED16(const u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4};
PCSX2_ALIGNED16(const u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e};
PCSX2_ALIGNED16(const u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f};
PCSX2_ALIGNED16(const u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14};
PCSX2_ALIGNED16(const u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8};
PCSX2_ALIGNED16(const u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4};
PCSX2_ALIGNED16(const u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff};
PCSX2_ALIGNED16(const u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553};
PCSX2_ALIGNED16(const u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510};
PCSX2_ALIGNED16(const u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac};
PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0};
PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0};
PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0};
PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = {0.0625f, 0.0625f, 0.0625f, 0.0625f};
PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = {0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625};
PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125};
//------------------------------------------------------------------
// Micro VU - Main Functions

View File

@ -100,11 +100,13 @@ struct microProgManager {
};
struct microVU {
int index; // VU Index (VU0 or VU1)
u32 index; // VU Index (VU0 or VU1)
u32 microSize; // VU Micro Memory Size
u32 progSize; // VU Micro Program Size (microSize/8)
u32 cacheAddr; // VU Cache Start Address
static const u32 cacheSize = 0x400000; // VU Cache Size
microProgManager<0x800> prog; // Micro Program Data
VURegs* regs; // VU Regs Struct
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
@ -122,7 +124,6 @@ struct microVU {
uptr x86esi; // Source register. Used as a pointer to a source in stream operations.
uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations.
*/
microProgManager<0x800> prog; // Micro Program Data
};
// microVU rec structs

View File

@ -769,7 +769,7 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
}
//------------------------------------------------------------------
// Div/Sqrt/Rsqrt Allocator Helpers
// Lower Instruction Allocator Helpers
//------------------------------------------------------------------
#define getReg5(reg, _reg_, _fxf_) { \
@ -783,4 +783,9 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
} \
}
// Doesn't Clamp
#define getReg7(reg, _reg_) { \
if (!_reg_) { getZero(reg); } \
else { mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _X_Y_Z_W); } \
}
#endif //PCSX2_MICROVU

View File

@ -560,11 +560,6 @@ microVUf(void) mVU_IBNE() {}
microVUf(void) mVU_JR() {}
microVUf(void) mVU_JALR() {}
microVUf(void) mVU_ILW() {}
microVUf(void) mVU_ISW() {}
microVUf(void) mVU_ILWR() {}
microVUf(void) mVU_ISWR() {}
microVUf(void) mVU_MOVE() {
microVU* mVU = mVUx;
if (recPass == 0) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
@ -609,6 +604,89 @@ microVUf(void) mVU_MR32() {
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_ILW() {
microVU* mVU = mVUx;
if (recPass == 0) { /*If (!_Ft_) nop();*/ }
else {
if (!_Fs_) {
MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS );
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
}
microVUf(void) mVU_ILWR() {
microVU* mVU = mVUx;
if (recPass == 0) { /*If (!_Ft_) nop();*/ }
else {
if (!_Fs_) {
MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS );
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
}
microVUf(void) mVU_ISW() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
if (!_Fs_) {
int imm = getVUmem(_Imm11_);
mVUallocVIa<vuIndex>(gprT1, _Ft_);
if (_X) MOV32RtoM((uptr)mVU->regs->Mem + imm, gprT1);
if (_Y) MOV32RtoM((uptr)mVU->regs->Mem + imm + 4, gprT1);
if (_Z) MOV32RtoM((uptr)mVU->regs->Mem + imm + 8, gprT1);
if (_W) MOV32RtoM((uptr)mVU->regs->Mem + imm + 12, gprT1);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
}
}
}
microVUf(void) mVU_ISWR() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
if (!_Fs_) {
mVUallocVIa<vuIndex>(gprT1, _Ft_);
if (_X) MOV32RtoM((uptr)mVU->regs->Mem, gprT1);
if (_Y) MOV32RtoM((uptr)mVU->regs->Mem+4, gprT1);
if (_Z) MOV32RtoM((uptr)mVU->regs->Mem+8, gprT1);
if (_W) MOV32RtoM((uptr)mVU->regs->Mem+12, gprT1);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
}
}
}
microVUf(void) mVU_LQ() {
microVU* mVU = mVUx;
if (recPass == 0) { /*If (!_Ft_) nop();*/ }
@ -667,10 +745,60 @@ microVUf(void) mVU_LQI() {
}
}
}
microVUf(void) mVU_SQ() {}
microVUf(void) mVU_SQD() {}
microVUf(void) mVU_SQI() {}
//microVUf(void) mVU_LOI() {}
microVUf(void) mVU_SQ() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Ft_);
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
getReg7(xmmFs, _Fs_);
mVUsaveReg2<vuIndex>(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
}
}
}
microVUf(void) mVU_SQD() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Ft_);
SUB16ItoR(gprT1, 1);
mVUallocVIb<vuIndex>(gprT1, _Ft_); // ToDo: Backup to memory check.
mVUaddrFix<vuIndex>(gprT1);
getReg7(xmmFs, _Fs_);
mVUsaveReg2<vuIndex>(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
}
}
}
microVUf(void) mVU_SQI() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
mVUsaveReg<vuIndex>(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Ft_);
MOV32RtoR(gprT2, gprT1);
mVUaddrFix<vuIndex>(gprT1);
getReg7(xmmFs, _Fs_);
mVUsaveReg2<vuIndex>(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
ADD16ItoR(gprT2, 1);
mVUallocVIb<vuIndex>(gprT2, _Ft_); // ToDo: Backup to memory check.
}
}
}
microVUf(void) mVU_RINIT() {}
microVUf(void) mVU_RGET() {}

View File

@ -22,35 +22,35 @@
// Global Variables
//------------------------------------------------------------------
PCSX2_ALIGNED16_EXTERN(u32 mVU_absclip[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_signbit[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_minvals[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_maxvals[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T1[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T2[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T3[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T4[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T5[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T6[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T7[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_T8[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_Pi4[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_S2[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_S3[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_S4[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_S5[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_E1[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_E2[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_E3[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_E4[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_E5[4]);
PCSX2_ALIGNED16_EXTERN(u32 mVU_E6[4]);
PCSX2_ALIGNED16_EXTERN(float mVU_FTOI_4[4]);
PCSX2_ALIGNED16_EXTERN(float mVU_FTOI_12[4]);
PCSX2_ALIGNED16_EXTERN(float mVU_FTOI_15[4]);
PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_4[4]);
PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_12[4]);
PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_15[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_maxvals[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T1[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T2[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T3[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T4[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T5[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S2[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S3[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S4[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S5[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E1[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E2[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E3[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E4[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E5[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E6[4]);
PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_4[4]);
PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_12[4]);
PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_15[4]);
PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_4[4]);
PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_12[4]);
PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]);
//------------------------------------------------------------------
// Helper Macros
@ -85,6 +85,7 @@ PCSX2_ALIGNED16_EXTERN(float mVU_ITOF_15[4]);
#define _Imm15_ (((mVU->code >> 10) & 0x7800) | (mVU->code & 0x7ff))
#define getVUmem(x) (((vuIndex == 1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16)
#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12)))
#define xmmT1 0 // Temp Reg
#define xmmFs 1 // Holds the Value of Fs (writes back result Fd)

View File

@ -93,48 +93,93 @@ microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
}
}
microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) {
microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) {
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset+4, reg);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1);
break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset, reg);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
break; //XZ
case 11: SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_M64(offset, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_M64(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X
case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y
case 2: SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // Z
case 1: SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // W
case 12: SSE_MOVLPS_XMM_to_M64(offset, reg); break; // XY
case 3: SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; // ZW
default: SSE_MOVAPS_XMM_to_M128(offset, reg); break; // XYZW
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset+4, reg);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1);
break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset, reg);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
break; //XZ
case 11: SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_M64(offset, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_M64(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X
case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y
case 2: SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // Z
case 1: SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // W
case 12: SSE_MOVLPS_XMM_to_M64(offset, reg); break; // XY
case 3: SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; // ZW
default: SSE_MOVAPS_XMM_to_M128(offset, reg); break; // XYZW
}
}
microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
break; //XZ
case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg);
break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X
case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y
case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z
case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W
case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY
case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW
default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW
}
}