- Implemented a faster block-compare algorithm.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1334 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-06-06 06:40:29 +00:00
parent 01d1573fdc
commit c4531a5c31
5 changed files with 90 additions and 17 deletions

View File

@ -27,17 +27,18 @@
#include "microVU_IR.h"
#include "microVU_Misc.h"
#define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...)
class microBlockManager {
private:
static const int MaxBlocks = mMaxBlocks - 1;
int listSize; // Total Items - 1
int listI; // Index to Add new block
microBlock blockList[mMaxBlocks];
microBlock* blockList;
public:
microBlockManager() { reset(); }
~microBlockManager() {}
microBlockManager() { blockList = (microBlock*)_aligned_malloc(sizeof(microBlock)*mMaxBlocks, 16); reset(); }
~microBlockManager() { if (blockList) { _aligned_free(blockList); } }
void reset() { listSize = -1; listI = -1; };
microBlock* add(microBlock* pBlock) {
microBlock* thisBlock = search(&pBlock->pState);
@ -54,7 +55,7 @@ public:
if (listSize < 0) return NULL;
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
for (int i = 0; i <= listSize; i++) {
if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo)/* - 4*/)) return &blockList[i];
if (mVUquickSearch((void*)pState, (void*)&blockList[i].pState, sizeof(microRegInfo))) return &blockList[i];
}
}
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
@ -158,6 +159,7 @@ microVUf(void) mVUcacheProg(int progIndex);
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles);
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles);
// recCall Function Pointer
typedef void (__fastcall *mVUrecCall)(u32, u32);

View File

@ -171,7 +171,7 @@ microVUt(void) mVUincCycles(mV, int x) {
}
microVUt(void) mVUsetCycles(mV) {
incCycles(mVUstall);
mVUincCycles(mVU, mVUstall);
// If upper Op && lower Op write to same VF reg:
if ((mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) && mVUregsTemp.VFreg[0]) {
if (mVUregsTemp.r || mVUregsTemp.VI) mVUlow.noWriteVF = 1;
@ -297,7 +297,7 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
for (int branch = 0; mVUcount < (vuIndex ? (0x3fff/8) : (0xfff/8)); ) {
incPC(1);
startLoop();
incCycles(1);
mVUincCycles(mVU, 1);
mVUopU(mVU, 0);
if (curI & _Ebit_) { branch = 1; mVUup.eBit = 1; }
if (curI & _DTbit_) { branch = 4; }
@ -424,7 +424,7 @@ eBitTemination:
int lMac = findFlagInst(xMac, 0x7fffffff);
int lClip = findFlagInst(xClip, 0x7fffffff);
memset(&mVUinfo, 0, sizeof(mVUinfo));
incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUcycles -= 100;
if (mVUinfo.doDivFlag) {
int flagReg;

View File

@ -32,15 +32,16 @@ union regInfo {
#pragma pack(push, 1)
#pragma warning(disable:4996)
#endif
struct microRegInfo {
regInfo VF[32];
u8 VI[32];
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
u32 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q;
u8 p;
u8 r;
u8 xgkick;
u8 VI[16];
regInfo VF[32];
u8 flags; // clip x2 :: status x2
u32 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last 2 bytes in struct)
u8 padding[7]; // 160 bytes
#if defined(_MSC_VER)
};
#pragma pack(pop)
@ -59,7 +60,7 @@ struct microTempRegInfo {
u8 xgkick; // Holds the cycle info for XGkick
};
struct microBlock {
__declspec(align(16)) struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
@ -91,10 +92,10 @@ struct microLowerOp {
microVFreg VF_read[2]; // VF Vectors read by this instruction
microVIreg VI_write; // VI reg written to by this instruction
microVIreg VI_read[2]; // VI regs read by this instruction
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
bool isNOP; // This instruction is a NOP
bool isFSSET; // This instruction is a FSSET
bool useSflag; // This instruction uses/reads Sflag
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
bool memReadIs; // Read Is (VI reg) from memory (used by branches)

View File

@ -160,6 +160,18 @@ declareAllVariables
#define pass3 if (recPass == 2)
#define pass4 if (recPass == 3)
// Define mVUquickSearch
#if defined(_MSC_VER)
extern u32 __fastcall mVUsearchXMM(void *dest, void *src);
#define mVUquickSearch(dest, src, size) (mVUsearchXMM(dest, src) == 0xf)
#else
#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
// Note: GCC might not guarantee alignment on microRegInfo,
// so to be safe I'm using normal memcmp. If at least 8-byte
// alignment is guaranteed, the function below is faster.
// #define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
#endif
// Misc Macros...
#define mVUprogI mVU->prog.prog[progIndex]
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
@ -188,7 +200,6 @@ declareAllVariables
#define setCode() { mVU->code = curI; }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
#define incCycles(x) { mVUincCycles(mVU, x); }
#define bSaveAddr (((xPC + (2 * 8)) & ((isVU1) ? 0x3ff8:0xff8)) / 8)
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((isVU1) ? 0x3ff8 : 0xff8))
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))

View File

@ -60,7 +60,7 @@ void mVUclamp2(int reg, int regT1, int xyzw) {
}
//------------------------------------------------------------------
// Micro VU - Misc Functions
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
//------------------------------------------------------------------
void mVUunpack_xyzw(int dstreg, int srcreg, int xyzw) {
@ -261,6 +261,10 @@ void mVUmergeRegs(int dest, int src, int xyzw) {
}
}
//------------------------------------------------------------------
// Micro VU - Misc Functions
//------------------------------------------------------------------
// Transforms the Address in gprReg to valid VU0/VU1 Address
microVUt(void) mVUaddrFix(mV, int gprReg) {
if (mVU == &microVU1) {
@ -295,6 +299,10 @@ microVUt(void) mVUrestoreRegs(mV) {
MOV32ItoR(gprR, Roffset); // Restore gprR
}
//------------------------------------------------------------------
// Micro VU - Custom SSE Instructions
//------------------------------------------------------------------
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
@ -425,3 +433,54 @@ void SSE_ADD2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
SSE_ADDPS_XMM_to_XMM(to, from);
}
//------------------------------------------------------------------
// Micro VU - Custom Quick Search
//------------------------------------------------------------------
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
static __declspec(naked) u32 __fastcall mVUsearchXMM(void *dest, void *src) {
__asm {
movaps xmm0, [ecx]
pcmpeqd xmm0, [edx]
movaps xmm1, [ecx + 0x10]
pcmpeqd xmm1, [edx + 0x10]
pand xmm0, xmm1
movmskps eax, xmm0
cmp eax, 0xf
jl exitPoint
movaps xmm0, [ecx + 0x20]
pcmpeqd xmm0, [edx + 0x20]
movaps xmm1, [ecx + 0x30]
pcmpeqd xmm1, [edx + 0x30]
pand xmm0, xmm1
movaps xmm2, [ecx + 0x40]
pcmpeqd xmm2, [edx + 0x40]
movaps xmm3, [ecx + 0x50]
pcmpeqd xmm3, [edx + 0x50]
pand xmm2, xmm3
movaps xmm4, [ecx + 0x60]
pcmpeqd xmm4, [edx + 0x60]
movaps xmm5, [ecx + 0x70]
pcmpeqd xmm5, [edx + 0x70]
pand xmm4, xmm5
movaps xmm6, [ecx + 0x80]
pcmpeqd xmm6, [edx + 0x80]
movaps xmm7, [ecx + 0x90]
pcmpeqd xmm7, [edx + 0x90]
pand xmm6, xmm7
pand xmm0, xmm2
pand xmm4, xmm6
pand xmm0, xmm4
movmskps eax, xmm0
exitPoint:
ret
}
}