more microVU "stuff"

<gigaherz>if the commit log has "stuff" in it, it's vu work from cotton

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1009 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-04-18 23:56:32 +00:00
parent 07c22b357c
commit 924869f765
9 changed files with 174 additions and 131 deletions

View File

@ -43,6 +43,8 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4;
mVU->cache = NULL;
memset(&mVU->prog, 0, sizeof(mVU->prog));
mVUlog((vuIndex) ? "microVU1: init" : "microVU0: init");
mVUreset<vuIndex>();
}
@ -53,17 +55,13 @@ microVUt(void) mVUreset() {
microVU* mVU = mVUx;
mVUclose<vuIndex>(); // Close
// Create Block Managers
for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
mVU->prog.prog[i].block[j] = new microBlockManager();
}
}
mVUlog((vuIndex) ? "microVU1: reset" : "microVU0: reset");
// Dynarec Cache
mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache));
memset(mVU->cache, 0xcc, mVU->cacheSize);
// Setup Entrance/Exit Points
x86SetPtr(mVU->cache);
mVUdispatcherA<vuIndex>();
@ -76,6 +74,13 @@ microVUt(void) mVUreset() {
mVU->prog.cur = -1;
mVU->prog.total = -1;
// Create Block Managers
for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
mVU->prog.prog[i].block[j] = new microBlockManager();
}
}
// Setup Dynarec Cache Limits for Each Program
u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes
for (int i = 0; i <= mVU->prog.max; i++) {
@ -90,6 +95,7 @@ microVUt(void) mVUreset() {
microVUt(void) mVUclose() {
microVU* mVU = mVUx;
mVUlog((vuIndex) ? "microVU1: close" : "microVU0: close");
if ( mVU->cache ) { HostSys::Munmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; }
@ -149,6 +155,7 @@ __forceinline int mVUfindLeastUsedProg(microVU* mVU) {
}
mVUclearProg(mVU, j); // Clear old data if overwriting old program
mVUcacheProg(mVU, j); // Cache Micro Program
mVUlog("microVU: Program Cache got Full!");
return j;
}
}
@ -160,7 +167,7 @@ __forceinline int mVUsearchProg(microVU* mVU) {
//if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/)
//if (mVU->prog.prog[i]) // ToDo: Implement Cycles
if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) {
if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); }
//if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); }
mVU->prog.cur = i;
mVU->prog.cleared = 0;
mVU->prog.prog[i].used++;

View File

@ -89,7 +89,7 @@ struct microVU {
u32 index; // VU Index (VU0 or VU1)
u32 microSize; // VU Micro Memory Size
u32 progSize; // VU Micro Program Size (microSize/4)
static const u32 cacheSize = 0x500000; // VU Cache Size
static const u32 cacheSize = 0x800000; // VU Cache Size
microProgManager<0x4000> prog; // Micro Program Data
@ -105,6 +105,7 @@ struct microVU {
u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR)
u32 p; // Holds current P instance index
u32 q; // Holds current Q instance index
u32 tempBackup;
};
// microVU rec structs

View File

@ -36,7 +36,7 @@ struct microRegInfo {
u8 p;
u8 r;
u8 xgkick;
u8 needExactMatch; // This block needs an exact match of pipeline state
u8 needExactMatch; // If set, block needs an exact match of pipeline state
};
struct microTempRegInfo {
@ -51,10 +51,9 @@ struct microTempRegInfo {
};
struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
u8* x86ptrStart; // Start of code
//u8* x86ptrEnd; // End of code (first byte outside of block)
//u32 size; // Number of 64bit VU Instructions in Block
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
};
template<u32 pSize>

View File

@ -282,7 +282,6 @@ microVUt(void) mVUallocFMAC9a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt;
Fd = xmmT1;
ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
if (_X_Y_Z_W == 8) {
getReg6(Fs, _Fs_);
if (_Ft_ == _Fs_) { Ft = Fs; }
@ -296,6 +295,7 @@ microVUt(void) mVUallocFMAC9a(int& Fd, int& ACC, int& Fs, int& Ft) {
else if (!_Ft_) { getZero4(Ft); }
else { getReg4(Ft, _Ft_); }
}
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
}
microVUt(void) mVUallocFMAC9b(int& Fd) {
@ -344,7 +344,6 @@ microVUt(void) mVUallocFMAC11a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt;
Fd = xmmT1;
ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
if (_X_Y_Z_W == 8) {
getReg6(Fs, _Fs_);
if ( (_Ft_ == _Fs_) && _bc_x) { Ft = Fs; }
@ -358,6 +357,7 @@ microVUt(void) mVUallocFMAC11a(int& Fd, int& ACC, int& Fs, int& Ft) {
if (!_Ft_) { getZero3(Ft); }
else { getReg3(Ft, _Ft_); }
}
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
}
microVUt(void) mVUallocFMAC11b(int& Fd) {
@ -394,11 +394,11 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt;
Fd = xmmT1;
ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
}
microVUt(void) mVUallocFMAC13b(int& Fd) {
@ -415,8 +415,7 @@ microVUt(void) mVUallocFMAC14a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
Ft = xmmFt;
ACCw = xmmACC;
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
if (_X_Y_Z_W == 8) {
getReg6(Fs, _Fs_);
if (_Ft_ == _Fs_) { Ft = Fs; }
@ -430,6 +429,7 @@ microVUt(void) mVUallocFMAC14a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
else if (!_Ft_) { getZero4(Ft); }
else { getReg4(Ft, _Ft_); }
}
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
}
microVUt(void) mVUallocFMAC14b(int& ACCw, int& ACCr) {
@ -448,8 +448,7 @@ microVUt(void) mVUallocFMAC15a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
Ft = xmmFt;
ACCw = xmmACC;
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
if (_X_Y_Z_W == 8) {
getReg6(Fs, _Fs_);
if ((_Ft_ == _Fs_) && _bc_x) { Ft = Fs; }
@ -463,6 +462,7 @@ microVUt(void) mVUallocFMAC15a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
if (!_Ft_) { getZero3(Ft); }
else { getReg3(Ft, _Ft_); }
}
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
}
microVUt(void) mVUallocFMAC15b(int& ACCw, int& ACCr) {
@ -479,11 +479,11 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
Ft = xmmFt;
ACCw = xmmACC;
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
}
microVUt(void) mVUallocFMAC16b(int& ACCw, int& ACCr) {
@ -542,8 +542,7 @@ microVUt(void) mVUallocFMAC19a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt;
Fd = xmmT1;
ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -552,6 +551,7 @@ microVUt(void) mVUallocFMAC19a(int& Fd, int& ACC, int& Fs, int& Ft) {
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
}
microVUt(void) mVUallocFMAC19b(int& Fd) {
@ -629,11 +629,11 @@ microVUt(void) mVUallocFMAC25a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt;
Fd = xmmT1;
ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
getQreg(Ft);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
}
microVUt(void) mVUallocFMAC25b(int& Fd) {
@ -650,11 +650,11 @@ microVUt(void) mVUallocFMAC26a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
Ft = xmmFt;
ACCw = xmmACC;
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
getQreg(Ft);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
}
microVUt(void) mVUallocFMAC26b(int& ACCw, int& ACCr) {
@ -748,7 +748,7 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
#define getReg5(reg, _reg_, _fxf_) { \
if (!_reg_) { \
if (_fxf_ < 3) { SSE_XORPS_XMM_to_XMM(reg, reg); } \
else { mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 3); } \
else { mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 1); } \
} \
else { \
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \

View File

@ -23,19 +23,15 @@
// Helper Macros
//------------------------------------------------------------------
#define createBlock(blockEndPtr) { \
block.pipelineState = pipelineState; \
block.x86ptrStart = x86ptrStart; \
block.x86ptrEnd = blockEndPtr; \
/*block.x86ptrBranch;*/ \
if (!(pipelineState & 1)) { \
memcpy_fast(&block.pState, pState, sizeof(microRegInfo)); \
} \
}
#define branchCase(JMPcc) \
CMP16ItoM((uptr)mVU->branch, 0); \
ajmp = JMPcc((uptr)0); \
#define branchCase(JMPcc, nJMPcc) \
mVUsetupBranch<vuIndex>(bStatus, bMac); \
mVUlog("mVUcompile branchCase"); \
CMP16ItoM((uptr)&mVU->branch, 0); \
incPC2(1); \
pBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \
incPC2(-1); \
if (pBlock) { nJMPcc((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \
else { ajmp = JMPcc((uptr)0); } \
break
#define flagSetMacro(xFlag, pFlag, xF, yF, zF) { \
@ -134,7 +130,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) {
for (; (ii > 0 && i >= 0); i--, ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; }
}
if (doMac && (j >= 0)) {
for (; (jj > 0 && j >= 0); j--, jj--) { xMac = (xMac-1) & 3; bMac[i] = xMac; }
for (; (jj > 0 && j >= 0); j--, jj--) { xMac = (xMac-1) & 3; bMac[j] = xMac; }
}
incPC2(-2);
}
@ -146,9 +142,10 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) {
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) {
microVU* mVU = mVUx;
mVUlog("mVUsetupBranch");
PUSH32R(gprR); // Backup gprR
PUSH32R(gprESP); // Backup gprESP
MOV32RtoM((uptr)&mVU->tempBackup, gprESP);
MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
@ -175,7 +172,7 @@ microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) {
OR32RtoR(gprF2, getFlagReg2(bMac[2]));
OR32RtoR(gprF3, getFlagReg2(bMac[3]));
POP32R(gprESP); // Restore gprESP
MOV32MtoR(gprESP, (uptr)&mVU->tempBackup);
POP32R(gprR); // Restore gprR
// Shuffle P/Q regs since every block starts at instance #0
@ -210,7 +207,7 @@ microVUt(void) mVUincCycles(int x) {
microVUt(void) mVUsetCycles() {
microVU* mVU = mVUx;
incCycles(mVUstall);
if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg
if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg
mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP
mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector
mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y);
@ -229,10 +226,14 @@ microVUt(void) mVUsetCycles() {
microVUt(void) mVUdivSet() {
microVU* mVU = mVUx;
int flagReg1, flagReg2;
getFlagReg(flagReg1, fsInstance);
if (!doStatus) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); }
AND16ItoR(flagReg1, 0xfcf);
OR16MtoR (flagReg1, (uptr)&mVU->divFlag);
if (doDivFlag) {
getFlagReg(flagReg1, fsInstance);
if (!doStatus) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); }
MOV32RtoR(gprT1, flagReg1);
AND32ItoR(gprT1, 0xffff0fcf);
OR32MtoR (gprT1, (uptr)&mVU->divFlag);
MOV32RtoR(flagReg1, gprT1);
}
}
//------------------------------------------------------------------
@ -243,13 +244,17 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
microVU* mVU = mVUx;
u8* thisPtr = x86Ptr;
if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUlog("microVU: invalid startPC"); }
//mVUlog("mVUcompile Search");
// Searches for Existing Compiled Block (if found, then returns; else, compile)
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
if (pBlock) { return pBlock->x86ptrStart; }
mVUlog("mVUcompile First Pass");
// First Pass
iPC = startPC / 4;
setCode();
mVUbranch = 0;
mVUstartPC = iPC;
mVUcount = 0;
@ -257,13 +262,17 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVU->p = 0; // All blocks start at p index #0
mVU->q = 0; // All blocks start at q index #0
memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info
mVUblock.x86ptrStart = thisPtr;
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
for (int branch = 0;; ) {
incPC(1);
startLoop();
mVUopU<vuIndex, 0>();
if (curI & _Ebit_) { branch = 1; }
if (curI & _MDTbit_) { branch = 2; }
if (curI & _Ibit_) { incPC(1); mVUinfo |= _isNOP; }
else { incPC(1); mVUopL<vuIndex, 0>(); }
if (curI & _Ibit_) { mVUinfo |= _isNOP; }
else { incPC(-1); mVUopL<vuIndex, 0>(); incPC(1); }
mVUsetCycles<vuIndex>();
if (mVU->p) { mVUinfo |= _readP; }
if (mVU->q) { mVUinfo |= _readQ; }
@ -276,86 +285,107 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUcount++;
}
mVUlog("mVUcompile mVUsetFlags");
// Sets Up Flag instances
int bStatus[4]; int bMac[4];
mVUsetFlags<vuIndex>(bStatus, bMac);
mVUlog("mVUcompile Second Pass");
//write8(0xcc);
// Second Pass
iPC = mVUstartPC;
setCode();
mVUbranch = 0;
int test = 0;
for (bool x = 1; x; ) {
if (isEOB) { x = 0; }
if (isNOP) { doUpperOp(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
else if (!swapOps) { doUpperOp(); incPC(1); mVUopL<vuIndex, 1>(); }
else { incPC(1); mVUopL<vuIndex, 1>(); incPC(-1); doUpperOp(); incPC(1); }
if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); mVU->iReg = curI; incPC(-1); } }
else if (!swapOps) { incPC(1); doUpperOp(); incPC(-1); mVUopL<vuIndex, 1>(); incPC(1); }
else { mVUopL<vuIndex, 1>(); incPC(1); doUpperOp(); }
test++;
if (test > 0x3ff) { mVUlog("microVU: Possible infinite compiling loop!"); x = 0; test = 0; }
if (!isBdelay) { incPC(1); }
else {
u32* ajmp = 0;
switch (mVUbranch) {
case 3: branchCase(JZ32); // IBEQ
case 4: branchCase(JGE32); // IBGEZ
case 5: branchCase(JG32); // IBGTZ
case 6: branchCase(JLE32); // IBLEQ
case 7: branchCase(JL32); // IBLTZ
case 8: branchCase(JNZ32); // IBNEQ
case 3: branchCase(JZ32, JNZ32); // IBEQ
case 4: branchCase(JGE32, JNGE32); // IBGEZ
case 5: branchCase(JG32, JNG32); // IBGTZ
case 6: branchCase(JLE32, JNLE32); // IBLEQ
case 7: branchCase(JL32, JNL32); // IBLTZ
case 8: branchCase(JNZ32, JZ32); // IBNEQ
case 1: case 2: // B/BAL
incPC(-2); // Go back to branch opcode (to get branch imm addr)
mVUlog("mVUcompile B/BAL");
incPC(-3); // Go back to branch opcode (to get branch imm addr)
mVUsetupBranch<vuIndex>(bStatus, bMac);
// Check if branch-block has already been compiled
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) {
ajmp = JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5));
mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
}
else {
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add block
if (!vuIndex) mVUcompileVU0(branchAddr, (uptr)&pBlock->pState);
else mVUcompileVU1(branchAddr, (uptr)&pBlock->pState);
}
//incPC(+2);
if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); }
else if (!vuIndex) { mVUcompileVU0(branchAddr, (uptr)&mVUregs); }
else { mVUcompileVU1(branchAddr, (uptr)&mVUregs); }
return thisPtr;
case 9: case 10: // JR/JALR
mVUlog("mVUcompile JR/JALR");
memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch<vuIndex>(bStatus, bMac);
PUSH32R(gprR); // Backup EDX
//MOV32MtoR(gprT1, (uptr)&mVUcurProg.x86ptr); // Get last x86ptr for this program
//MOV32RtoM((uptr)&x86Ptr, gprT1); // Setup x86Ptr to write to correct address
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address
pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager
MOV32ItoR(gprR, (u32)&pBlock->pState); // Get pState (EDX second argument for __fastcall)
MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall)
AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address
MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall)
if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState)
else CALLFunc((uptr)mVUcompileVU1);
POP32R(gprR); // Restore EDX
JMPR(gprT1); // Jump to rec-code address
return thisPtr;
}
// Conditional Branches
mVUlog("mVUcompile conditional branch");
if (pBlock) { // Branch non-taken has already been compiled
incPC(-3); // Go back to branch opcode (to get branch imm addr)
// Check if branch-block has already been compiled
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); }
else if (!vuIndex) { mVUcompileVU0(branchAddr, (uptr)&mVUregs); }
else { mVUcompileVU1(branchAddr, (uptr)&mVUregs); }
}
else {
uptr jumpAddr;
incPC(1); // Get PC for branch not-taken
if (!vuIndex) mVUcompileVU0(xPC, (uptr)&mVUregs);
else mVUcompileVU1(xPC, (uptr)&mVUregs);
incPC(-4); // Go back to branch opcode (to get branch imm addr)
if (!vuIndex) jumpAddr = (uptr)mVUcompileVU0(branchAddr, (uptr)&mVUregs);
else jumpAddr = (uptr)mVUcompileVU1(branchAddr, (uptr)&mVUregs);
*ajmp = (jumpAddr - ((uptr)ajmp + 4));
}
return thisPtr;
}
}
mVUlog("mVUcompile ebit");
// Do E-bit end stuff here
incCycles(55); // Ensures Valid P/Q instances
mVUcycles -= 55;
if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); }
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ);
//MOV32ItoM((uptr)&mVU->p, mVU->p);
//MOV32ItoM((uptr)&mVU->q, mVU->q);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ);
AND32ItoM((uptr)&microVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC);
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
//ToDo: Save pipeline state?
return thisPtr;
}

View File

@ -22,6 +22,8 @@
// Dispatcher Functions
//------------------------------------------------------------------
void testFunction() { mVUlog("microVU: Entered Execution Mode"); }
// Generates the code for entering recompiled blocks
microVUt(void) mVUdispatcherA() {
static u32 PCSX2_ALIGNED16(vuMXCSR);
@ -43,9 +45,9 @@ microVUt(void) mVUdispatcherA() {
SSE_LDMXCSR((uptr)&vuMXCSR);
// Load Regs
MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R]);
MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG]);
MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG]);
MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R].UL);
MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL);
MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL);
SHL32ItoR(gprF0, 16);
AND32ItoR(gprF1, 0xffff);
OR32RtoR (gprF0, gprF1);
@ -54,16 +56,21 @@ microVUt(void) mVUdispatcherA() {
MOV32RtoR(gprF3, gprF0);
for (int i = 0; i < 8; i++) {
MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1]);
MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1].UL);
}
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)&mVU_maxvals[0]);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)&mVU_minvals[0]);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]);
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]);
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL);
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
//PUSH32R(EAX);
//CALLFunc((uptr)testFunction);
//POP32R(EAX);
//write8(0xcc);
// Jump to Recompiled Code Block
JMPR(EAX);
}
@ -86,18 +93,18 @@ microVUt(void) mVUdispatcherB() {
MOV32RtoR(gprT1, gprF0); // ToDo: Ensure Correct Flag instances
AND32ItoR(gprT1, 0xffff);
SHR32ItoR(gprF0, 16);
MOV32RtoM((uptr)&mVU->regs->VI[REG_R], gprR);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG], gprT1);
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG], gprF0);
MOV32RtoM((uptr)&mVU->regs->VI[REG_R].UL, gprR);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1);
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprF0);
for (int i = 0; i < 8; i++) {
MOVDMMXtoM((uptr)&mVU->regs->VI[i+1], i);
MOVDMMXtoM((uptr)&mVU->regs->VI[i+1].UL, i);
}
SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC);
//SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances
SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC.UL[0], xmmACC);
//SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); // ToDo: Ensure Correct Q/P instances
//SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP
//SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ);
//SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ);
// Restore cpu state
POP32R(EDI);
@ -105,6 +112,8 @@ microVUt(void) mVUdispatcherB() {
POP32R(EBP);
POP32R(EBX);
//write8(0xcc);
EMMS();
RET();
@ -119,7 +128,7 @@ microVUt(void) mVUdispatcherB() {
microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
microVU* mVU = mVUx;
mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles);
//mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles);
// ToDo: Implement Cycles
mVUsearchProg(mVU); // Find and set correct program
@ -135,6 +144,7 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
microVUt(void) mVUcleanUp() {
microVU* mVU = mVUx;
//mVUlog("microVU: Program exited successfully!");
mVUcurProg.x86ptr = x86Ptr;
mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start));
}
@ -147,7 +157,7 @@ void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.start
void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.startFunct)(startPC, cycles); }
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); }
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); }
void mVUcleanUpVU0() { mVUcleanUp<0>(); }
void mVUcleanUpVU1() { mVUcleanUp<1>(); }
void __fastcall mVUcleanUpVU0() { mVUcleanUp<0>(); }
void __fastcall mVUcleanUpVU1() { mVUcleanUp<1>(); }
#endif //PCSX2_MICROVU

View File

@ -415,7 +415,7 @@ microVUf(void) mVU_ESUM() {
microVUf(void) mVU_FCAND() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) {mVUlog("clip broken");}
else {
mVUallocCFLAGa<vuIndex>(gprT1, fvcInstance);
AND32ItoR(gprT1, _Imm24_);
@ -427,7 +427,7 @@ microVUf(void) mVU_FCAND() {
microVUf(void) mVU_FCEQ() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) {mVUlog("clip broken");}
else {
mVUallocCFLAGa<vuIndex>(gprT1, fvcInstance);
XOR32ItoR(gprT1, _Imm24_);
@ -439,7 +439,7 @@ microVUf(void) mVU_FCEQ() {
microVUf(void) mVU_FCGET() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) {mVUlog("clip broken");}
else {
mVUallocCFLAGa<vuIndex>(gprT1, fvcInstance);
AND32ItoR(gprT1, 0xfff);
@ -449,7 +449,7 @@ microVUf(void) mVU_FCGET() {
microVUf(void) mVU_FCOR() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) {mVUlog("clip broken");}
else {
mVUallocCFLAGa<vuIndex>(gprT1, fvcInstance);
OR32ItoR(gprT1, _Imm24_);
@ -461,7 +461,7 @@ microVUf(void) mVU_FCOR() {
microVUf(void) mVU_FCSET() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) {mVUlog("clip broken");}
else {
MOV32ItoR(gprT1, _Imm24_);
mVUallocCFLAGb<vuIndex>(gprT1, fcInstance);
@ -1007,7 +1007,7 @@ microVUf(void) mVU_XTOP() {
microVU* mVU = mVUx;
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); }
else {
MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top);
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->top);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
@ -1016,7 +1016,7 @@ microVUf(void) mVU_XITOP() {
microVU* mVU = mVUx;
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); }
else {
MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop );
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->itop);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
@ -1025,16 +1025,13 @@ microVUf(void) mVU_XITOP() {
// XGkick
//------------------------------------------------------------------
microVUt(void) __fastcall mVU_XGKICK_(u32 addr) {
microVU* mVU = mVUx;
u32 *data = (u32*)(mVU->regs->Mem + (addr&0x3fff));
void __fastcall mVU_XGKICK_(u32 addr) {
u32 *data = (u32*)(microVU1.regs->Mem + (addr&0x3fff));
u32 size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4);
u8 *pDest = mtgsThread->GetDataPacketPtr();
memcpy_aligned(pDest, mVU->regs->Mem + addr, size<<4);
memcpy_aligned(pDest, microVU1.regs->Mem + addr, size<<4);
mtgsThread->SendDataPacket();
}
void __fastcall mVU_XGKICK0(u32 addr) { mVU_XGKICK_<0>(addr); }
void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); }
microVUf(void) mVU_XGKICK() {
microVU* mVU = mVUx;
@ -1042,8 +1039,7 @@ microVUf(void) mVU_XGKICK() {
else {
mVUallocVIa<vuIndex>(gprT2, _Fs_); // gprT2 = ECX for __fastcall
PUSH32R(gprR); // gprR = EDX is volatile so backup
if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0);
else CALLFunc((uptr)mVU_XGKICK1);
CALLFunc((uptr)mVU_XGKICK_);
POP32R(gprR); // Restore
}
}

View File

@ -169,7 +169,7 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
break; // XYW
case 14: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHLPS_XMM_to_XMM(reg, reg);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X
case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y
@ -242,19 +242,19 @@ microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) {
// Transforms the Address in gprReg to valid VU0/VU1 Address
microVUt(void) mVUaddrFix(int gprReg) {
if ( vuIndex == 1 ) {
AND32ItoR(EAX, 0x3ff); // wrap around
SHL32ItoR(EAX, 4);
AND32ItoR(gprReg, 0x3ff); // wrap around
SHL32ItoR(gprReg, 4);
}
else {
u8 *jmpA, *jmpB;
CMP32ItoR(EAX, 0x400);
CMP32ItoR(gprReg, 0x400);
jmpA = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs
AND32ItoR(EAX, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
AND32ItoR(gprReg, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
jmpB = JMP8(0);
x86SetJ8(jmpA);
AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around
AND32ItoR(gprReg, 0xff); // if addr < 0x4000, wrap around
x86SetJ8(jmpB);
SHL32ItoR(EAX, 4); // multiply by 16 (shift left by 4)
SHL32ItoR(gprReg, 4); // multiply by 16 (shift left by 4)
}
}

View File

@ -584,7 +584,7 @@ microVUf(void) mVU_ITOF12() { mVU_ITOFx<vuIndex, recPass>((uptr)mVU_ITOF_12); }
microVUf(void) mVU_ITOF15() { mVU_ITOFx<vuIndex, recPass>((uptr)mVU_ITOF_15); }
microVUf(void) mVU_CLIP() {
microVU* mVU = mVUx;
if (!recPass) { mVUanalyzeFMAC4<vuIndex>(_Fs_, _Ft_); }
if (!recPass) { mVUanalyzeFMAC4<vuIndex>(_Fs_, _Ft_); mVUlog("clip broken"); }
else {
int Fs, Ft;
mVUallocFMAC17a<vuIndex>(Fs, Ft);