mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Implemented a faster block-compare algorithm. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1334 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
01d1573fdc
commit
c4531a5c31
|
@ -27,17 +27,18 @@
|
|||
#include "microVU_IR.h"
|
||||
#include "microVU_Misc.h"
|
||||
|
||||
|
||||
#define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...)
|
||||
class microBlockManager {
|
||||
private:
|
||||
static const int MaxBlocks = mMaxBlocks - 1;
|
||||
int listSize; // Total Items - 1
|
||||
int listI; // Index to Add new block
|
||||
microBlock blockList[mMaxBlocks];
|
||||
microBlock* blockList;
|
||||
|
||||
public:
|
||||
microBlockManager() { reset(); }
|
||||
~microBlockManager() {}
|
||||
microBlockManager() { blockList = (microBlock*)_aligned_malloc(sizeof(microBlock)*mMaxBlocks, 16); reset(); }
|
||||
~microBlockManager() { if (blockList) { _aligned_free(blockList); } }
|
||||
void reset() { listSize = -1; listI = -1; };
|
||||
microBlock* add(microBlock* pBlock) {
|
||||
microBlock* thisBlock = search(&pBlock->pState);
|
||||
|
@ -54,7 +55,7 @@ public:
|
|||
if (listSize < 0) return NULL;
|
||||
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
for (int i = 0; i <= listSize; i++) {
|
||||
if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo)/* - 4*/)) return &blockList[i];
|
||||
if (mVUquickSearch((void*)pState, (void*)&blockList[i].pState, sizeof(microRegInfo))) return &blockList[i];
|
||||
}
|
||||
}
|
||||
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||
|
@ -158,6 +159,7 @@ microVUf(void) mVUcacheProg(int progIndex);
|
|||
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles);
|
||||
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles);
|
||||
|
||||
// recCall Function Pointer
|
||||
typedef void (__fastcall *mVUrecCall)(u32, u32);
|
||||
|
||||
|
||||
|
|
|
@ -171,7 +171,7 @@ microVUt(void) mVUincCycles(mV, int x) {
|
|||
}
|
||||
|
||||
microVUt(void) mVUsetCycles(mV) {
|
||||
incCycles(mVUstall);
|
||||
mVUincCycles(mVU, mVUstall);
|
||||
// If upper Op && lower Op write to same VF reg:
|
||||
if ((mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) && mVUregsTemp.VFreg[0]) {
|
||||
if (mVUregsTemp.r || mVUregsTemp.VI) mVUlow.noWriteVF = 1;
|
||||
|
@ -297,7 +297,7 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
|||
for (int branch = 0; mVUcount < (vuIndex ? (0x3fff/8) : (0xfff/8)); ) {
|
||||
incPC(1);
|
||||
startLoop();
|
||||
incCycles(1);
|
||||
mVUincCycles(mVU, 1);
|
||||
mVUopU(mVU, 0);
|
||||
if (curI & _Ebit_) { branch = 1; mVUup.eBit = 1; }
|
||||
if (curI & _DTbit_) { branch = 4; }
|
||||
|
@ -424,7 +424,7 @@ eBitTemination:
|
|||
int lMac = findFlagInst(xMac, 0x7fffffff);
|
||||
int lClip = findFlagInst(xClip, 0x7fffffff);
|
||||
memset(&mVUinfo, 0, sizeof(mVUinfo));
|
||||
incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
|
||||
mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
|
||||
mVUcycles -= 100;
|
||||
if (mVUinfo.doDivFlag) {
|
||||
int flagReg;
|
||||
|
|
|
@ -32,15 +32,16 @@ union regInfo {
|
|||
#pragma pack(push, 1)
|
||||
#pragma warning(disable:4996)
|
||||
#endif
|
||||
struct microRegInfo {
|
||||
regInfo VF[32];
|
||||
u8 VI[32];
|
||||
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
||||
u32 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u8 q;
|
||||
u8 p;
|
||||
u8 r;
|
||||
u8 xgkick;
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u32 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last 2 bytes in struct)
|
||||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u8 padding[7]; // 160 bytes
|
||||
#if defined(_MSC_VER)
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
@ -59,9 +60,9 @@ struct microTempRegInfo {
|
|||
u8 xgkick; // Holds the cycle info for XGkick
|
||||
};
|
||||
|
||||
struct microBlock {
|
||||
__declspec(align(16)) struct microBlock {
|
||||
microRegInfo pState; // Detailed State of Pipeline
|
||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||
u8* x86ptrStart; // Start of code
|
||||
};
|
||||
|
||||
|
@ -91,10 +92,10 @@ struct microLowerOp {
|
|||
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
||||
microVIreg VI_write; // VI reg written to by this instruction
|
||||
microVIreg VI_read[2]; // VI regs read by this instruction
|
||||
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
|
||||
bool isNOP; // This instruction is a NOP
|
||||
bool isFSSET; // This instruction is a FSSET
|
||||
bool useSflag; // This instruction uses/reads Sflag
|
||||
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
|
||||
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
|
||||
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
|
||||
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
|
||||
|
|
|
@ -160,6 +160,18 @@ declareAllVariables
|
|||
#define pass3 if (recPass == 2)
|
||||
#define pass4 if (recPass == 3)
|
||||
|
||||
// Define mVUquickSearch
|
||||
#if defined(_MSC_VER)
|
||||
extern u32 __fastcall mVUsearchXMM(void *dest, void *src);
|
||||
#define mVUquickSearch(dest, src, size) (mVUsearchXMM(dest, src) == 0xf)
|
||||
#else
|
||||
#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
|
||||
// Note: GCC might not guarantee alignment on microRegInfo,
|
||||
// so to be safe I'm using normal memcmp. If at least 8-byte
|
||||
// alignment is guaranteed, the function below is faster.
|
||||
// #define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
|
||||
#endif
|
||||
|
||||
// Misc Macros...
|
||||
#define mVUprogI mVU->prog.prog[progIndex]
|
||||
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
|
||||
|
@ -188,7 +200,6 @@ declareAllVariables
|
|||
#define setCode() { mVU->code = curI; }
|
||||
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
|
||||
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
|
||||
#define incCycles(x) { mVUincCycles(mVU, x); }
|
||||
#define bSaveAddr (((xPC + (2 * 8)) & ((isVU1) ? 0x3ff8:0xff8)) / 8)
|
||||
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((isVU1) ? 0x3ff8 : 0xff8))
|
||||
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
|
||||
|
|
|
@ -60,7 +60,7 @@ void mVUclamp2(int reg, int regT1, int xyzw) {
|
|||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Misc Functions
|
||||
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
|
||||
//------------------------------------------------------------------
|
||||
|
||||
void mVUunpack_xyzw(int dstreg, int srcreg, int xyzw) {
|
||||
|
@ -261,6 +261,10 @@ void mVUmergeRegs(int dest, int src, int xyzw) {
|
|||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Misc Functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||
microVUt(void) mVUaddrFix(mV, int gprReg) {
|
||||
if (mVU == µVU1) {
|
||||
|
@ -295,6 +299,10 @@ microVUt(void) mVUrestoreRegs(mV) {
|
|||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Custom SSE Instructions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
|
||||
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
|
||||
|
||||
|
@ -425,3 +433,54 @@ void SSE_ADD2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
|||
void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
||||
SSE_ADDPS_XMM_to_XMM(to, from);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Custom Quick Search
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
|
||||
static __declspec(naked) u32 __fastcall mVUsearchXMM(void *dest, void *src) {
|
||||
|
||||
__asm {
|
||||
movaps xmm0, [ecx]
|
||||
pcmpeqd xmm0, [edx]
|
||||
movaps xmm1, [ecx + 0x10]
|
||||
pcmpeqd xmm1, [edx + 0x10]
|
||||
pand xmm0, xmm1
|
||||
|
||||
movmskps eax, xmm0
|
||||
cmp eax, 0xf
|
||||
jl exitPoint
|
||||
|
||||
movaps xmm0, [ecx + 0x20]
|
||||
pcmpeqd xmm0, [edx + 0x20]
|
||||
movaps xmm1, [ecx + 0x30]
|
||||
pcmpeqd xmm1, [edx + 0x30]
|
||||
pand xmm0, xmm1
|
||||
|
||||
movaps xmm2, [ecx + 0x40]
|
||||
pcmpeqd xmm2, [edx + 0x40]
|
||||
movaps xmm3, [ecx + 0x50]
|
||||
pcmpeqd xmm3, [edx + 0x50]
|
||||
pand xmm2, xmm3
|
||||
|
||||
movaps xmm4, [ecx + 0x60]
|
||||
pcmpeqd xmm4, [edx + 0x60]
|
||||
movaps xmm5, [ecx + 0x70]
|
||||
pcmpeqd xmm5, [edx + 0x70]
|
||||
pand xmm4, xmm5
|
||||
|
||||
movaps xmm6, [ecx + 0x80]
|
||||
pcmpeqd xmm6, [edx + 0x80]
|
||||
movaps xmm7, [ecx + 0x90]
|
||||
pcmpeqd xmm7, [edx + 0x90]
|
||||
pand xmm6, xmm7
|
||||
|
||||
pand xmm0, xmm2
|
||||
pand xmm4, xmm6
|
||||
pand xmm0, xmm4
|
||||
movmskps eax, xmm0
|
||||
exitPoint:
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue