From 76d639ff399cc5421cbd0f919241ca4ba80357f1 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Wed, 1 Sep 2010 16:15:18 +0000 Subject: [PATCH] microVU: * Remove need for packed structs through use of unions. * Streamlined the microBlockManager's linked list (less heap allocs and simpler interations). * Use two 32 bit compares for fast block compares, instead of 6-7 individual u8 compares. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3717 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.h | 85 +++++++++++++++++------------------ pcsx2/x86/microVU_Compile.inl | 9 ---- pcsx2/x86/microVU_IR.h | 56 ++++++++++++++--------- pcsx2/x86/microVU_Misc.h | 3 -- 4 files changed, 75 insertions(+), 78 deletions(-) diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index ef09c08990..ff594de09f 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -33,86 +33,83 @@ using namespace x86Emitter; #include "microVU_IR.h" struct microBlockLink { - microBlock* block; + microBlock block; microBlockLink* next; }; class microBlockManager { private: - microBlockLink blockList; + microBlockLink* blockList; microBlockLink* blockEnd; int listI; public: microBlockManager() { - listI = -1; - blockList.block = NULL; - blockList.next = NULL; - blockEnd = &blockList; + listI = 0; + blockEnd = blockList = NULL; } ~microBlockManager() { reset(); } void reset() { - if (listI >= 0) { - microBlockLink* linkI = &blockList; - microBlockLink* linkD = NULL; - for (int i = 0; i <= listI; i++) { - safe_aligned_free(linkI->block); - linkI = linkI->next; - safe_delete(linkD); - linkD = linkI; - } - safe_delete(linkI); + microBlockLink* linkI = blockList; + while( linkI != NULL ) + { + microBlockLink* freeI = linkI; + linkI = linkI->next; + _aligned_free(freeI); } - listI = -1; - blockEnd = &blockList; + listI = 0; + blockEnd = blockList = NULL; }; microBlock* add(microBlock* pBlock) { microBlock* thisBlock = search(&pBlock->pState); if (!thisBlock) { listI++; - blockEnd->block = (microBlock*)_aligned_malloc(sizeof(microBlock), 16); - blockEnd->next = new microBlockLink; - memcpy_const(blockEnd->block, pBlock, sizeof(microBlock)); - thisBlock = blockEnd->block; - blockEnd = blockEnd->next; + microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16); + newBlock->next = NULL; + + if (blockEnd) { + blockEnd->next = newBlock; + blockEnd = newBlock; + } + else { + blockEnd = blockList = newBlock; + } + + memcpy_const(&newBlock->block, pBlock, sizeof(microBlock)); + thisBlock = &newBlock->block; } return thisBlock; } __ri microBlock* search(microRegInfo* pState) { - microBlockLink* linkI = &blockList; if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) - for (int i = 0; i <= listI; i++) { - if (mVUquickSearch((void*)pState, (void*)&linkI->block->pState, sizeof(microRegInfo))) return linkI->block; - linkI = linkI->next; + for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) { + if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo))) + return &linkI->block; } } else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) - for (int i = 0; i <= listI; i++) { - if ((linkI->block->pState.q == pState->q) - && (linkI->block->pState.p == pState->p) - && ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp) - && (linkI->block->pState.flags == pState->flags) - && (linkI->block->pState.xgkick == pState->xgkick) - && (linkI->block->pState.viBackUp == pState->viBackUp) - && (linkI->block->pState.blockType == pState->blockType) - && !(linkI->block->pState.needExactMatch & 7)) { return linkI->block; } - linkI = linkI->next; + for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) { + if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue; + if (linkI->block.pState.simple32[0] != pState->simple32[0]) continue; + if (linkI->block.pState.simple32[1] != pState->simple32[1]) continue; + if (linkI->block.pState.needExactMatch & 7) continue; + return &linkI->block; } } return NULL; } void printInfo(int pc) { if (listI < 7) return; - microBlockLink* linkI = &blockList; + microBlockLink* linkI = blockList; for (int i = 0; i <= listI; i++) { u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo)/4; - for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block->pState.VI)[j]; - for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block->pState.VF[j].reg; - for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block->pState)[j]; + for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j]; + for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg; + for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j]; DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%08x][viBackup=%02d]" - "[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block->pState.q, - linkI->block->pState.p, linkI->block->pState.xgkick, linkI->block->pState.vi15, linkI->block->pState.viBackUp, - linkI->block->pState.flags, linkI->block->pState.needExactMatch, linkI->block->pState.blockType, viCRC, vfCRC); + "[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block.pState.q, + linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.viBackUp, + linkI->block.pState.flags, linkI->block.pState.needExactMatch, linkI->block.pState.blockType, viCRC, vfCRC); linkI = linkI->next; } } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 9dc72fe64a..aa90a016b5 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -214,18 +214,9 @@ static __ri void eBitWarning(mV) { //------------------------------------------------------------------ // Cycles / Pipeline State / Early Exit from Execution //------------------------------------------------------------------ -#ifndef __LINUX__ __fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; } __fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); } __fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); } -#else -// Fixme: I don't really feel like dealing with 'cannot bind packed field' errors right now, -// so Linux will use the old defines for the moment. -#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; } -#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } -#define tCycles(dest, src) { dest = aMax(dest, src); } -#endif - __fi void incP(mV) { mVU->p ^= 1; } __fi void incQ(mV) { mVU->q ^= 1; } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index e07d5253c2..498c39b024 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -25,34 +25,46 @@ union regInfo { }; }; -#ifdef _MSC_VER -# pragma pack(1) -#endif +union __aligned16 microRegInfo { // Ordered for Faster Compares + struct + { + u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set) -struct __aligned16 microRegInfo { // Ordered for Faster Compares - u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set) - u8 needExactMatch; // If set, block needs an exact match of pipeline state - u8 q; - u8 p; - u8 r; - u8 xgkick; - u8 viBackUp; // VI reg number that was written to on branch-delay slot - u8 VI[16]; - regInfo VF[32]; - u8 flags; // clip x2 :: status x2 - u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) - u8 padding[5]; // 160 bytes -} __packed; + union + { + struct + { + u8 needExactMatch; // If set, block needs an exact match of pipeline state + u8 q; + u8 p; + u8 r; + u8 xgkick; + u8 viBackUp; // VI reg number that was written to on branch-delay slot + u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) + }; + u32 simple32[2]; + }; + + struct + { + u8 VI[16]; + regInfo VF[32]; + u8 flags; // clip x2 :: status x2 + }; + }; + + u128 full128[160/sizeof(u128)]; + u64 full64[160/sizeof(u64)]; + u32 full32[160/sizeof(u32)]; +}; + +C_ASSERT( sizeof(microRegInfo) == 160 ); struct __aligned16 microBlock { microRegInfo pState; // Detailed State of Pipeline microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) u8* x86ptrStart; // Start of code -} __packed; - -#ifdef _MSC_VER -# pragma pack() -#endif +}; struct microTempRegInfo { regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 0aa263cdde..ef58dbe8ef 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -153,9 +153,6 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst; //------------------------------------------------------------------ // Define mVUquickSearch //------------------------------------------------------------------ -// FIXME: I changed the below saerchXMM extern from __aligned16 to __pagealigned. -// This *probably* fixes the crashing bugs in linux when using the optimized memcmp. -// Needs testing... --air #ifndef __LINUX__ extern __pagealigned u8 mVUsearchXMM[__pagesize]; typedef u32 (__fastcall *mVUCall)(void*, void*);