mirror of https://github.com/PCSX2/pcsx2.git
microVU:
-implemented Jake's suggestion for proper alignment in the microVU struct. -minor optimizations/changes git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1188 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c67bfbe648
commit
9390d5583a
|
@ -181,7 +181,7 @@ microVUt(int) mVUfindLeastUsedProg() {
|
|||
|
||||
mVUclearProg<vuIndex>(smallidx); // Clear old data if overwriting old program
|
||||
mVUcacheProg<vuIndex>(smallidx); // Cache Micro Program
|
||||
Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval );
|
||||
//Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval );
|
||||
return smallidx;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,6 +92,11 @@ struct microProgManager {
|
|||
|
||||
#define mVUcacheSize (0x2000000 / ((vuIndex) ? 1 : 4))
|
||||
struct microVU {
|
||||
|
||||
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
|
||||
|
||||
u32 index; // VU Index (VU0 or VU1)
|
||||
u32 microSize; // VU Micro Memory Size
|
||||
u32 progSize; // VU Micro Program Size (microSize/4)
|
||||
|
@ -113,16 +118,6 @@ struct microVU {
|
|||
u32 q; // Holds current Q instance index
|
||||
u32 totalCycles; // Total Cycles that mVU is expected to run for
|
||||
u32 cycles; // Cycles Counter
|
||||
|
||||
// WARNING! MSVC does not reliably guarantee alignment on structure or class member variables,
|
||||
// failing around 10-20% of the time to align (random depending on various circumstances).
|
||||
// GCC fails to align the members at all, failing about 50-80% of the time (barring occasional
|
||||
// random luck). If you want these to be guaranteed aligned, move them to the top of the
|
||||
// struct, and ensure the struct itself is aligned. :) -- air
|
||||
|
||||
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
|
||||
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
|
||||
};
|
||||
|
||||
// microVU rec structs
|
||||
|
@ -139,7 +134,7 @@ microVUt(void) mVUreset();
|
|||
microVUt(void) mVUclose();
|
||||
microVUt(void) mVUclear(u32, u32);
|
||||
|
||||
// Prototypes for Linux.
|
||||
// Prototypes for Linux
|
||||
void __fastcall mVUcleanUpVU0();
|
||||
void __fastcall mVUcleanUpVU1();
|
||||
void* __fastcall mVUcompileVU0(u32 startPC, uptr pState);
|
||||
|
@ -150,7 +145,7 @@ microVUf(void) mVUopL();
|
|||
// Private Functions
|
||||
microVUt(void) mVUclearProg(microVU* mVU, int progIndex);
|
||||
microVUt(int) mVUfindLeastUsedProg(microVU* mVU);
|
||||
microVUt(int) mVUsearchProg(/*microVU* mVU*/);
|
||||
microVUt(int) mVUsearchProg();
|
||||
microVUt(void) mVUcacheProg(microVU* mVU, int progIndex);
|
||||
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles);
|
||||
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles);
|
||||
|
|
|
@ -27,8 +27,7 @@
|
|||
mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles); \
|
||||
CMP16ItoM((uptr)&mVU->branch, 0); \
|
||||
incPC2(1); \
|
||||
if( mVUblocks[iPC/2] == NULL ) \
|
||||
mVUblocks[iPC/2] = new microBlockManager(); \
|
||||
if (!mVUblocks[iPC/2]) { mVUblocks[iPC/2] = new microBlockManager(); } \
|
||||
bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \
|
||||
incPC2(-1); \
|
||||
if (bBlock) { nJMPcc((uptr)bBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \
|
||||
|
@ -159,11 +158,12 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
microVU* mVU = mVUx;
|
||||
u8* thisPtr = x86Ptr;
|
||||
|
||||
if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUprint("microVU: invalid startPC"); }
|
||||
if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { Console::Error("microVU%d: invalid startPC", params vuIndex); }
|
||||
startPC &= (vuIndex ? 0x3ff8 : 0xff8);
|
||||
|
||||
if( mVUblocks[startPC/8] == NULL )
|
||||
if (mVUblocks[startPC/8] == NULL) {
|
||||
mVUblocks[startPC/8] = new microBlockManager();
|
||||
}
|
||||
|
||||
// Searches for Existing Compiled Block (if found, then returns; else, compile)
|
||||
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
|
||||
|
@ -275,7 +275,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
if (bBlock) { // Branch non-taken has already been compiled
|
||||
incPC(-3); // Go back to branch opcode (to get branch imm addr)
|
||||
|
||||
if( mVUblocks[branchAddr/8] == NULL )
|
||||
if (mVUblocks[branchAddr/8] == NULL)
|
||||
mVUblocks[branchAddr/8] = new microBlockManager();
|
||||
|
||||
// Check if branch-block has already been compiled
|
||||
|
|
|
@ -105,20 +105,20 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) {
|
|||
return;*/
|
||||
|
||||
switch ( xyzw ) {
|
||||
case 5: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xe1); //WZXY
|
||||
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
|
||||
SSE_MOVSS_XMM_to_M32(offset+4, reg);
|
||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE_MOVSS_XMM_to_M32(offset+12, reg);
|
||||
break; // YW
|
||||
case 6: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc9);
|
||||
case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9);
|
||||
SSE_MOVLPS_XMM_to_M64(offset+4, reg);
|
||||
break; // YZ
|
||||
case 7: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x93); //ZYXW
|
||||
case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW
|
||||
SSE_MOVHPS_XMM_to_M64(offset+4, reg);
|
||||
SSE_MOVSS_XMM_to_M32(offset+12, reg);
|
||||
break; // YZW
|
||||
case 9: SSE_MOVSS_XMM_to_M32(offset, reg);
|
||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE_MOVSS_XMM_to_M32(offset+12, reg);
|
||||
break; // XW
|
||||
case 10: SSE_MOVSS_XMM_to_M32(offset, reg);
|
||||
|
@ -128,7 +128,7 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) {
|
|||
case 11: SSE_MOVSS_XMM_to_M32(offset, reg);
|
||||
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
|
||||
break; //XZW
|
||||
case 13: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x4b); //YXZW
|
||||
case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW
|
||||
SSE_MOVHPS_XMM_to_M64(offset, reg);
|
||||
SSE_MOVSS_XMM_to_M32(offset+12, reg);
|
||||
break; // XYW
|
||||
|
@ -163,20 +163,20 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
|
|||
return;*/
|
||||
|
||||
switch ( xyzw ) {
|
||||
case 5: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xe1); //WZXY
|
||||
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
|
||||
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);
|
||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
|
||||
break; // YW
|
||||
case 6: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc9);
|
||||
case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9);
|
||||
SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset+4);
|
||||
break; // YZ
|
||||
case 7: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x93); //ZYXW
|
||||
case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW
|
||||
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+4);
|
||||
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
|
||||
break; // YZW
|
||||
case 9: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
|
||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
|
||||
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
|
||||
break; // XW
|
||||
case 10: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
|
||||
|
@ -186,7 +186,7 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
|
|||
case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
|
||||
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8);
|
||||
break; //XZW
|
||||
case 13: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x4b); //YXZW
|
||||
case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW
|
||||
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset);
|
||||
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
|
||||
break; // XYW
|
||||
|
@ -316,7 +316,7 @@ microVUt(void) mVUcheckSflag(int progIndex) {
|
|||
}
|
||||
}
|
||||
|
||||
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
|
||||
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0x7fffffff, 0x80000000, 0x7fffffff, 0x80000000};
|
||||
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
|
||||
|
||||
// Warning: Modifies xmmT1 and xmmT2
|
||||
|
@ -332,19 +332,18 @@ void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
|
|||
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
|
||||
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x88);
|
||||
mVUmergeRegs(to, xmmT1, 0xc);
|
||||
|
||||
// ZW
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0xfa);
|
||||
SSE2_PAND_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK1);
|
||||
SSE2_POR_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK2);
|
||||
SSE2_PSHUFD_XMM_to_XMM(to, to, 0xfa);
|
||||
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
|
||||
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0xfa);
|
||||
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
|
||||
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
|
||||
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
|
||||
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x88);
|
||||
mVUmergeRegs(to, xmmT1, 0x3);
|
||||
if (min) SSE2_MINPD_XMM_to_XMM(to, xmmT2);
|
||||
else SSE2_MAXPD_XMM_to_XMM(to, xmmT2);
|
||||
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x88);
|
||||
SSE2_MOVSD_XMM_to_XMM (to, xmmT1);
|
||||
}
|
||||
|
||||
// Warning: Modifies from and to's upper 3 vectors
|
||||
|
|
Loading…
Reference in New Issue