* Remove need for packed structs through use of unions.
 * Streamlined the microBlockManager's linked list (less heap allocs and simpler interations).
 * Use two 32 bit compares for fast block compares, instead of 6-7 individual u8 compares.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3717 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-09-01 16:15:18 +00:00
parent 1ded55f4cf
commit 76d639ff39
4 changed files with 75 additions and 78 deletions

View File

@ -33,86 +33,83 @@ using namespace x86Emitter;
#include "microVU_IR.h" #include "microVU_IR.h"
struct microBlockLink { struct microBlockLink {
microBlock* block; microBlock block;
microBlockLink* next; microBlockLink* next;
}; };
class microBlockManager { class microBlockManager {
private: private:
microBlockLink blockList; microBlockLink* blockList;
microBlockLink* blockEnd; microBlockLink* blockEnd;
int listI; int listI;
public: public:
microBlockManager() { microBlockManager() {
listI = -1; listI = 0;
blockList.block = NULL; blockEnd = blockList = NULL;
blockList.next = NULL;
blockEnd = &blockList;
} }
~microBlockManager() { reset(); } ~microBlockManager() { reset(); }
void reset() { void reset() {
if (listI >= 0) { microBlockLink* linkI = blockList;
microBlockLink* linkI = &blockList; while( linkI != NULL )
microBlockLink* linkD = NULL; {
for (int i = 0; i <= listI; i++) { microBlockLink* freeI = linkI;
safe_aligned_free(linkI->block);
linkI = linkI->next; linkI = linkI->next;
safe_delete(linkD); _aligned_free(freeI);
linkD = linkI;
} }
safe_delete(linkI); listI = 0;
} blockEnd = blockList = NULL;
listI = -1;
blockEnd = &blockList;
}; };
microBlock* add(microBlock* pBlock) { microBlock* add(microBlock* pBlock) {
microBlock* thisBlock = search(&pBlock->pState); microBlock* thisBlock = search(&pBlock->pState);
if (!thisBlock) { if (!thisBlock) {
listI++; listI++;
blockEnd->block = (microBlock*)_aligned_malloc(sizeof(microBlock), 16); microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
blockEnd->next = new microBlockLink; newBlock->next = NULL;
memcpy_const(blockEnd->block, pBlock, sizeof(microBlock));
thisBlock = blockEnd->block; if (blockEnd) {
blockEnd = blockEnd->next; blockEnd->next = newBlock;
blockEnd = newBlock;
}
else {
blockEnd = blockList = newBlock;
}
memcpy_const(&newBlock->block, pBlock, sizeof(microBlock));
thisBlock = &newBlock->block;
} }
return thisBlock; return thisBlock;
} }
__ri microBlock* search(microRegInfo* pState) { __ri microBlock* search(microRegInfo* pState) {
microBlockLink* linkI = &blockList;
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i <= listI; i++) { for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) {
if (mVUquickSearch((void*)pState, (void*)&linkI->block->pState, sizeof(microRegInfo))) return linkI->block; if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo)))
linkI = linkI->next; return &linkI->block;
} }
} }
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
for (int i = 0; i <= listI; i++) { for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) {
if ((linkI->block->pState.q == pState->q) if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue;
&& (linkI->block->pState.p == pState->p) if (linkI->block.pState.simple32[0] != pState->simple32[0]) continue;
&& ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp) if (linkI->block.pState.simple32[1] != pState->simple32[1]) continue;
&& (linkI->block->pState.flags == pState->flags) if (linkI->block.pState.needExactMatch & 7) continue;
&& (linkI->block->pState.xgkick == pState->xgkick) return &linkI->block;
&& (linkI->block->pState.viBackUp == pState->viBackUp)
&& (linkI->block->pState.blockType == pState->blockType)
&& !(linkI->block->pState.needExactMatch & 7)) { return linkI->block; }
linkI = linkI->next;
} }
} }
return NULL; return NULL;
} }
void printInfo(int pc) { void printInfo(int pc) {
if (listI < 7) return; if (listI < 7) return;
microBlockLink* linkI = &blockList; microBlockLink* linkI = blockList;
for (int i = 0; i <= listI; i++) { for (int i = 0; i <= listI; i++) {
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo)/4; u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo)/4;
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block->pState.VI)[j]; for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block->pState.VF[j].reg; for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg;
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block->pState)[j]; for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%08x][viBackup=%02d]" DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%08x][viBackup=%02d]"
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block->pState.q, "[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block.pState.q,
linkI->block->pState.p, linkI->block->pState.xgkick, linkI->block->pState.vi15, linkI->block->pState.viBackUp, linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.viBackUp,
linkI->block->pState.flags, linkI->block->pState.needExactMatch, linkI->block->pState.blockType, viCRC, vfCRC); linkI->block.pState.flags, linkI->block.pState.needExactMatch, linkI->block.pState.blockType, viCRC, vfCRC);
linkI = linkI->next; linkI = linkI->next;
} }
} }

View File

@ -214,18 +214,9 @@ static __ri void eBitWarning(mV) {
//------------------------------------------------------------------ //------------------------------------------------------------------
// Cycles / Pipeline State / Early Exit from Execution // Cycles / Pipeline State / Early Exit from Execution
//------------------------------------------------------------------ //------------------------------------------------------------------
#ifndef __LINUX__
__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; } __fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; }
__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); } __fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); }
__fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); } __fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); }
#else
// Fixme: I don't really feel like dealing with 'cannot bind packed field' errors right now,
// so Linux will use the old defines for the moment.
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
#define tCycles(dest, src) { dest = aMax(dest, src); }
#endif
__fi void incP(mV) { mVU->p ^= 1; } __fi void incP(mV) { mVU->p ^= 1; }
__fi void incQ(mV) { mVU->q ^= 1; } __fi void incQ(mV) { mVU->q ^= 1; }

View File

@ -25,34 +25,46 @@ union regInfo {
}; };
}; };
#ifdef _MSC_VER union __aligned16 microRegInfo { // Ordered for Faster Compares
# pragma pack(1) struct
#endif {
struct __aligned16 microRegInfo { // Ordered for Faster Compares
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set) u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
union
{
struct
{
u8 needExactMatch; // If set, block needs an exact match of pipeline state u8 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q; u8 q;
u8 p; u8 p;
u8 r; u8 r;
u8 xgkick; u8 xgkick;
u8 viBackUp; // VI reg number that was written to on branch-delay slot u8 viBackUp; // VI reg number that was written to on branch-delay slot
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
};
u32 simple32[2];
};
struct
{
u8 VI[16]; u8 VI[16];
regInfo VF[32]; regInfo VF[32];
u8 flags; // clip x2 :: status x2 u8 flags; // clip x2 :: status x2
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) };
u8 padding[5]; // 160 bytes };
} __packed;
u128 full128[160/sizeof(u128)];
u64 full64[160/sizeof(u64)];
u32 full32[160/sizeof(u32)];
};
C_ASSERT( sizeof(microRegInfo) == 160 );
struct __aligned16 microBlock { struct __aligned16 microBlock {
microRegInfo pState; // Detailed State of Pipeline microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code u8* x86ptrStart; // Start of code
} __packed; };
#ifdef _MSC_VER
# pragma pack()
#endif
struct microTempRegInfo { struct microTempRegInfo {
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction

View File

@ -153,9 +153,6 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
//------------------------------------------------------------------ //------------------------------------------------------------------
// Define mVUquickSearch // Define mVUquickSearch
//------------------------------------------------------------------ //------------------------------------------------------------------
// FIXME: I changed the below saerchXMM extern from __aligned16 to __pagealigned.
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
// Needs testing... --air
#ifndef __LINUX__ #ifndef __LINUX__
extern __pagealigned u8 mVUsearchXMM[__pagesize]; extern __pagealigned u8 mVUsearchXMM[__pagesize];
typedef u32 (__fastcall *mVUCall)(void*, void*); typedef u32 (__fastcall *mVUCall)(void*, void*);