* Remove need for packed structs through use of unions.
 * Streamlined the microBlockManager's linked list (less heap allocs and simpler interations).
 * Use two 32 bit compares for fast block compares, instead of 6-7 individual u8 compares.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3717 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-09-01 16:15:18 +00:00
parent 1ded55f4cf
commit 76d639ff39
4 changed files with 75 additions and 78 deletions

View File

@ -33,86 +33,83 @@ using namespace x86Emitter;
#include "microVU_IR.h"
struct microBlockLink {
microBlock* block;
microBlock block;
microBlockLink* next;
};
class microBlockManager {
private:
microBlockLink blockList;
microBlockLink* blockList;
microBlockLink* blockEnd;
int listI;
public:
microBlockManager() {
listI = -1;
blockList.block = NULL;
blockList.next = NULL;
blockEnd = &blockList;
listI = 0;
blockEnd = blockList = NULL;
}
~microBlockManager() { reset(); }
void reset() {
if (listI >= 0) {
microBlockLink* linkI = &blockList;
microBlockLink* linkD = NULL;
for (int i = 0; i <= listI; i++) {
safe_aligned_free(linkI->block);
linkI = linkI->next;
safe_delete(linkD);
linkD = linkI;
}
safe_delete(linkI);
microBlockLink* linkI = blockList;
while( linkI != NULL )
{
microBlockLink* freeI = linkI;
linkI = linkI->next;
_aligned_free(freeI);
}
listI = -1;
blockEnd = &blockList;
listI = 0;
blockEnd = blockList = NULL;
};
microBlock* add(microBlock* pBlock) {
microBlock* thisBlock = search(&pBlock->pState);
if (!thisBlock) {
listI++;
blockEnd->block = (microBlock*)_aligned_malloc(sizeof(microBlock), 16);
blockEnd->next = new microBlockLink;
memcpy_const(blockEnd->block, pBlock, sizeof(microBlock));
thisBlock = blockEnd->block;
blockEnd = blockEnd->next;
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
newBlock->next = NULL;
if (blockEnd) {
blockEnd->next = newBlock;
blockEnd = newBlock;
}
else {
blockEnd = blockList = newBlock;
}
memcpy_const(&newBlock->block, pBlock, sizeof(microBlock));
thisBlock = &newBlock->block;
}
return thisBlock;
}
__ri microBlock* search(microRegInfo* pState) {
microBlockLink* linkI = &blockList;
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i <= listI; i++) {
if (mVUquickSearch((void*)pState, (void*)&linkI->block->pState, sizeof(microRegInfo))) return linkI->block;
linkI = linkI->next;
for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) {
if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo)))
return &linkI->block;
}
}
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
for (int i = 0; i <= listI; i++) {
if ((linkI->block->pState.q == pState->q)
&& (linkI->block->pState.p == pState->p)
&& ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp)
&& (linkI->block->pState.flags == pState->flags)
&& (linkI->block->pState.xgkick == pState->xgkick)
&& (linkI->block->pState.viBackUp == pState->viBackUp)
&& (linkI->block->pState.blockType == pState->blockType)
&& !(linkI->block->pState.needExactMatch & 7)) { return linkI->block; }
linkI = linkI->next;
for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) {
if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue;
if (linkI->block.pState.simple32[0] != pState->simple32[0]) continue;
if (linkI->block.pState.simple32[1] != pState->simple32[1]) continue;
if (linkI->block.pState.needExactMatch & 7) continue;
return &linkI->block;
}
}
return NULL;
}
void printInfo(int pc) {
if (listI < 7) return;
microBlockLink* linkI = &blockList;
microBlockLink* linkI = blockList;
for (int i = 0; i <= listI; i++) {
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo)/4;
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block->pState.VI)[j];
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block->pState.VF[j].reg;
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block->pState)[j];
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg;
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%08x][viBackup=%02d]"
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block->pState.q,
linkI->block->pState.p, linkI->block->pState.xgkick, linkI->block->pState.vi15, linkI->block->pState.viBackUp,
linkI->block->pState.flags, linkI->block->pState.needExactMatch, linkI->block->pState.blockType, viCRC, vfCRC);
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block.pState.q,
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.viBackUp,
linkI->block.pState.flags, linkI->block.pState.needExactMatch, linkI->block.pState.blockType, viCRC, vfCRC);
linkI = linkI->next;
}
}

View File

@ -214,18 +214,9 @@ static __ri void eBitWarning(mV) {
//------------------------------------------------------------------
// Cycles / Pipeline State / Early Exit from Execution
//------------------------------------------------------------------
#ifndef __LINUX__
__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; }
__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); }
__fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); }
#else
// Fixme: I don't really feel like dealing with 'cannot bind packed field' errors right now,
// so Linux will use the old defines for the moment.
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
#define tCycles(dest, src) { dest = aMax(dest, src); }
#endif
__fi void incP(mV) { mVU->p ^= 1; }
__fi void incQ(mV) { mVU->q ^= 1; }

View File

@ -25,34 +25,46 @@ union regInfo {
};
};
#ifdef _MSC_VER
# pragma pack(1)
#endif
union __aligned16 microRegInfo { // Ordered for Faster Compares
struct
{
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
struct __aligned16 microRegInfo { // Ordered for Faster Compares
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
u8 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q;
u8 p;
u8 r;
u8 xgkick;
u8 viBackUp; // VI reg number that was written to on branch-delay slot
u8 VI[16];
regInfo VF[32];
u8 flags; // clip x2 :: status x2
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
u8 padding[5]; // 160 bytes
} __packed;
union
{
struct
{
u8 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q;
u8 p;
u8 r;
u8 xgkick;
u8 viBackUp; // VI reg number that was written to on branch-delay slot
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
};
u32 simple32[2];
};
struct
{
u8 VI[16];
regInfo VF[32];
u8 flags; // clip x2 :: status x2
};
};
u128 full128[160/sizeof(u128)];
u64 full64[160/sizeof(u64)];
u32 full32[160/sizeof(u32)];
};
C_ASSERT( sizeof(microRegInfo) == 160 );
struct __aligned16 microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
} __packed;
#ifdef _MSC_VER
# pragma pack()
#endif
};
struct microTempRegInfo {
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction

View File

@ -153,9 +153,6 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
//------------------------------------------------------------------
// Define mVUquickSearch
//------------------------------------------------------------------
// FIXME: I changed the below saerchXMM extern from __aligned16 to __pagealigned.
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
// Needs testing... --air
#ifndef __LINUX__
extern __pagealigned u8 mVUsearchXMM[__pagesize];
typedef u32 (__fastcall *mVUCall)(void*, void*);