-implemented Jake's suggestion for proper alignment in the microVU struct.
-minor optimizations/changes

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1188 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-05-15 04:51:53 +00:00
parent c67bfbe648
commit 9390d5583a
4 changed files with 37 additions and 43 deletions

View File

@ -181,7 +181,7 @@ microVUt(int) mVUfindLeastUsedProg() {
mVUclearProg<vuIndex>(smallidx); // Clear old data if overwriting old program
mVUcacheProg<vuIndex>(smallidx); // Cache Micro Program
Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval );
//Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval );
return smallidx;
}
}

View File

@ -65,7 +65,7 @@ public:
}
};
template<u32 progSize> // progSize = VU program memory size / 4
template<u32 progSize> // progSize = VU program memory size / 4
struct microProgram {
u32 data[progSize];
u32 used; // Number of times its been used
@ -92,6 +92,11 @@ struct microProgManager {
#define mVUcacheSize (0x2000000 / ((vuIndex) ? 1 : 4))
struct microVU {
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
u32 index; // VU Index (VU0 or VU1)
u32 microSize; // VU Micro Memory Size
u32 progSize; // VU Micro Program Size (microSize/4)
@ -113,16 +118,6 @@ struct microVU {
u32 q; // Holds current Q instance index
u32 totalCycles; // Total Cycles that mVU is expected to run for
u32 cycles; // Cycles Counter
// WARNING! MSVC does not reliably guarantee alignment on structure or class member variables,
// failing around 10-20% of the time to align (random depending on various circumstances).
// GCC fails to align the members at all, failing about 50-80% of the time (barring occasional
// random luck). If you want these to be guaranteed aligned, move them to the top of the
// struct, and ensure the struct itself is aligned. :) -- air
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
};
// microVU rec structs
@ -139,7 +134,7 @@ microVUt(void) mVUreset();
microVUt(void) mVUclose();
microVUt(void) mVUclear(u32, u32);
// Prototypes for Linux.
// Prototypes for Linux
void __fastcall mVUcleanUpVU0();
void __fastcall mVUcleanUpVU1();
void* __fastcall mVUcompileVU0(u32 startPC, uptr pState);
@ -150,7 +145,7 @@ microVUf(void) mVUopL();
// Private Functions
microVUt(void) mVUclearProg(microVU* mVU, int progIndex);
microVUt(int) mVUfindLeastUsedProg(microVU* mVU);
microVUt(int) mVUsearchProg(/*microVU* mVU*/);
microVUt(int) mVUsearchProg();
microVUt(void) mVUcacheProg(microVU* mVU, int progIndex);
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles);
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles);

View File

@ -27,8 +27,7 @@
mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles); \
CMP16ItoM((uptr)&mVU->branch, 0); \
incPC2(1); \
if( mVUblocks[iPC/2] == NULL ) \
mVUblocks[iPC/2] = new microBlockManager(); \
if (!mVUblocks[iPC/2]) { mVUblocks[iPC/2] = new microBlockManager(); } \
bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \
incPC2(-1); \
if (bBlock) { nJMPcc((uptr)bBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \
@ -159,11 +158,12 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
microVU* mVU = mVUx;
u8* thisPtr = x86Ptr;
if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUprint("microVU: invalid startPC"); }
if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { Console::Error("microVU%d: invalid startPC", params vuIndex); }
startPC &= (vuIndex ? 0x3ff8 : 0xff8);
if( mVUblocks[startPC/8] == NULL )
if (mVUblocks[startPC/8] == NULL) {
mVUblocks[startPC/8] = new microBlockManager();
}
// Searches for Existing Compiled Block (if found, then returns; else, compile)
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
@ -275,7 +275,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
if (bBlock) { // Branch non-taken has already been compiled
incPC(-3); // Go back to branch opcode (to get branch imm addr)
if( mVUblocks[branchAddr/8] == NULL )
if (mVUblocks[branchAddr/8] == NULL)
mVUblocks[branchAddr/8] = new microBlockManager();
// Check if branch-block has already been compiled

View File

@ -105,20 +105,20 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) {
return;*/
switch ( xyzw ) {
case 5: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xe1); //WZXY
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
SSE_MOVSS_XMM_to_M32(offset+4, reg);
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // YW
case 6: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc9);
case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9);
SSE_MOVLPS_XMM_to_M64(offset+4, reg);
break; // YZ
case 7: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x93); //ZYXW
case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_M64(offset+4, reg);
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // YZW
case 9: SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // XW
case 10: SSE_MOVSS_XMM_to_M32(offset, reg);
@ -128,7 +128,7 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) {
case 11: SSE_MOVSS_XMM_to_M32(offset, reg);
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
break; //XZW
case 13: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x4b); //YXZW
case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_M64(offset, reg);
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break; // XYW
@ -163,20 +163,20 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
return;*/
switch ( xyzw ) {
case 5: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xe1); //WZXY
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // YW
case 6: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc9);
case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9);
SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset+4);
break; // YZ
case 7: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x93); //ZYXW
case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // YZW
case 9: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // XW
case 10: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
@ -186,7 +186,7 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8);
break; //XZW
case 13: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x4b); //YXZW
case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12);
break; // XYW
@ -316,7 +316,7 @@ microVUt(void) mVUcheckSflag(int progIndex) {
}
}
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0x7fffffff, 0x80000000, 0x7fffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
// Warning: Modifies xmmT1 and xmmT2
@ -330,28 +330,27 @@ void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x88);
mVUmergeRegs(to, xmmT1, 0xc);
// ZW
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(to, to, 0xfa);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x88);
mVUmergeRegs(to, xmmT1, 0x3);
if (min) SSE2_MINPD_XMM_to_XMM(to, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(to, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x88);
SSE2_MOVSD_XMM_to_XMM (to, xmmT1);
}
// Warning: Modifies from and to's upper 3 vectors
void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) {
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(from, from, 0x50);
SSE2_PAND_M128_to_XMM (from, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (from, (uptr)MIN_MAX_MASK2);