mirror of https://github.com/PCSX2/pcsx2.git
microVU:
* Remove need for packed structs through use of unions. * Streamlined the microBlockManager's linked list (less heap allocs and simpler interations). * Use two 32 bit compares for fast block compares, instead of 6-7 individual u8 compares. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3717 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
1ded55f4cf
commit
76d639ff39
|
@ -33,86 +33,83 @@ using namespace x86Emitter;
|
|||
#include "microVU_IR.h"
|
||||
|
||||
struct microBlockLink {
|
||||
microBlock* block;
|
||||
microBlock block;
|
||||
microBlockLink* next;
|
||||
};
|
||||
|
||||
class microBlockManager {
|
||||
private:
|
||||
microBlockLink blockList;
|
||||
microBlockLink* blockList;
|
||||
microBlockLink* blockEnd;
|
||||
int listI;
|
||||
|
||||
public:
|
||||
microBlockManager() {
|
||||
listI = -1;
|
||||
blockList.block = NULL;
|
||||
blockList.next = NULL;
|
||||
blockEnd = &blockList;
|
||||
listI = 0;
|
||||
blockEnd = blockList = NULL;
|
||||
}
|
||||
~microBlockManager() { reset(); }
|
||||
void reset() {
|
||||
if (listI >= 0) {
|
||||
microBlockLink* linkI = &blockList;
|
||||
microBlockLink* linkD = NULL;
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
safe_aligned_free(linkI->block);
|
||||
linkI = linkI->next;
|
||||
safe_delete(linkD);
|
||||
linkD = linkI;
|
||||
}
|
||||
safe_delete(linkI);
|
||||
microBlockLink* linkI = blockList;
|
||||
while( linkI != NULL )
|
||||
{
|
||||
microBlockLink* freeI = linkI;
|
||||
linkI = linkI->next;
|
||||
_aligned_free(freeI);
|
||||
}
|
||||
listI = -1;
|
||||
blockEnd = &blockList;
|
||||
listI = 0;
|
||||
blockEnd = blockList = NULL;
|
||||
};
|
||||
microBlock* add(microBlock* pBlock) {
|
||||
microBlock* thisBlock = search(&pBlock->pState);
|
||||
if (!thisBlock) {
|
||||
listI++;
|
||||
blockEnd->block = (microBlock*)_aligned_malloc(sizeof(microBlock), 16);
|
||||
blockEnd->next = new microBlockLink;
|
||||
memcpy_const(blockEnd->block, pBlock, sizeof(microBlock));
|
||||
thisBlock = blockEnd->block;
|
||||
blockEnd = blockEnd->next;
|
||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
|
||||
newBlock->next = NULL;
|
||||
|
||||
if (blockEnd) {
|
||||
blockEnd->next = newBlock;
|
||||
blockEnd = newBlock;
|
||||
}
|
||||
else {
|
||||
blockEnd = blockList = newBlock;
|
||||
}
|
||||
|
||||
memcpy_const(&newBlock->block, pBlock, sizeof(microBlock));
|
||||
thisBlock = &newBlock->block;
|
||||
}
|
||||
return thisBlock;
|
||||
}
|
||||
__ri microBlock* search(microRegInfo* pState) {
|
||||
microBlockLink* linkI = &blockList;
|
||||
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
if (mVUquickSearch((void*)pState, (void*)&linkI->block->pState, sizeof(microRegInfo))) return linkI->block;
|
||||
linkI = linkI->next;
|
||||
for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) {
|
||||
if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo)))
|
||||
return &linkI->block;
|
||||
}
|
||||
}
|
||||
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
if ((linkI->block->pState.q == pState->q)
|
||||
&& (linkI->block->pState.p == pState->p)
|
||||
&& ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp)
|
||||
&& (linkI->block->pState.flags == pState->flags)
|
||||
&& (linkI->block->pState.xgkick == pState->xgkick)
|
||||
&& (linkI->block->pState.viBackUp == pState->viBackUp)
|
||||
&& (linkI->block->pState.blockType == pState->blockType)
|
||||
&& !(linkI->block->pState.needExactMatch & 7)) { return linkI->block; }
|
||||
linkI = linkI->next;
|
||||
for (microBlockLink* linkI = blockList; linkI != NULL; linkI = linkI->next) {
|
||||
if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue;
|
||||
if (linkI->block.pState.simple32[0] != pState->simple32[0]) continue;
|
||||
if (linkI->block.pState.simple32[1] != pState->simple32[1]) continue;
|
||||
if (linkI->block.pState.needExactMatch & 7) continue;
|
||||
return &linkI->block;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
void printInfo(int pc) {
|
||||
if (listI < 7) return;
|
||||
microBlockLink* linkI = &blockList;
|
||||
microBlockLink* linkI = blockList;
|
||||
for (int i = 0; i <= listI; i++) {
|
||||
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo)/4;
|
||||
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block->pState.VI)[j];
|
||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block->pState.VF[j].reg;
|
||||
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block->pState)[j];
|
||||
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
|
||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg;
|
||||
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
|
||||
DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%08x][viBackup=%02d]"
|
||||
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block->pState.q,
|
||||
linkI->block->pState.p, linkI->block->pState.xgkick, linkI->block->pState.vi15, linkI->block->pState.viBackUp,
|
||||
linkI->block->pState.flags, linkI->block->pState.needExactMatch, linkI->block->pState.blockType, viCRC, vfCRC);
|
||||
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block.pState.q,
|
||||
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.viBackUp,
|
||||
linkI->block.pState.flags, linkI->block.pState.needExactMatch, linkI->block.pState.blockType, viCRC, vfCRC);
|
||||
linkI = linkI->next;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -214,18 +214,9 @@ static __ri void eBitWarning(mV) {
|
|||
//------------------------------------------------------------------
|
||||
// Cycles / Pipeline State / Early Exit from Execution
|
||||
//------------------------------------------------------------------
|
||||
#ifndef __LINUX__
|
||||
__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; }
|
||||
__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); }
|
||||
__fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); }
|
||||
#else
|
||||
// Fixme: I don't really feel like dealing with 'cannot bind packed field' errors right now,
|
||||
// so Linux will use the old defines for the moment.
|
||||
#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
|
||||
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
|
||||
#define tCycles(dest, src) { dest = aMax(dest, src); }
|
||||
#endif
|
||||
|
||||
__fi void incP(mV) { mVU->p ^= 1; }
|
||||
__fi void incQ(mV) { mVU->q ^= 1; }
|
||||
|
||||
|
|
|
@ -25,34 +25,46 @@ union regInfo {
|
|||
};
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack(1)
|
||||
#endif
|
||||
union __aligned16 microRegInfo { // Ordered for Faster Compares
|
||||
struct
|
||||
{
|
||||
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
|
||||
|
||||
struct __aligned16 microRegInfo { // Ordered for Faster Compares
|
||||
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
|
||||
u8 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u8 q;
|
||||
u8 p;
|
||||
u8 r;
|
||||
u8 xgkick;
|
||||
u8 viBackUp; // VI reg number that was written to on branch-delay slot
|
||||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
||||
u8 padding[5]; // 160 bytes
|
||||
} __packed;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u8 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u8 q;
|
||||
u8 p;
|
||||
u8 r;
|
||||
u8 xgkick;
|
||||
u8 viBackUp; // VI reg number that was written to on branch-delay slot
|
||||
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
||||
};
|
||||
u32 simple32[2];
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
u8 flags; // clip x2 :: status x2
|
||||
};
|
||||
};
|
||||
|
||||
u128 full128[160/sizeof(u128)];
|
||||
u64 full64[160/sizeof(u64)];
|
||||
u32 full32[160/sizeof(u32)];
|
||||
};
|
||||
|
||||
C_ASSERT( sizeof(microRegInfo) == 160 );
|
||||
|
||||
struct __aligned16 microBlock {
|
||||
microRegInfo pState; // Detailed State of Pipeline
|
||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||
u8* x86ptrStart; // Start of code
|
||||
} __packed;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack()
|
||||
#endif
|
||||
};
|
||||
|
||||
struct microTempRegInfo {
|
||||
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
|
||||
|
|
|
@ -153,9 +153,6 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
|
|||
//------------------------------------------------------------------
|
||||
// Define mVUquickSearch
|
||||
//------------------------------------------------------------------
|
||||
// FIXME: I changed the below saerchXMM extern from __aligned16 to __pagealigned.
|
||||
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
|
||||
// Needs testing... --air
|
||||
#ifndef __LINUX__
|
||||
extern __pagealigned u8 mVUsearchXMM[__pagesize];
|
||||
typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||
|
|
Loading…
Reference in New Issue