mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Finished implementing regAlloc. Sadly the speedgain wasn't great (0%~2% in the games I tried). I think the speedup should be bigger with a CPU that supports SSE4.1, but I don't have one to test :p git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1573 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
cc3130660a
commit
37675b6c49
|
@ -136,7 +136,6 @@ struct microVU {
|
||||||
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
|
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
|
||||||
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
|
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
|
||||||
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
|
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ
|
||||||
PCSX2_ALIGNED16(u32 xmmVFb[4]); // Backup for VF regs
|
|
||||||
|
|
||||||
u32 index; // VU Index (VU0 or VU1)
|
u32 index; // VU Index (VU0 or VU1)
|
||||||
u32 vuMemSize; // VU Main Memory Size (in bytes)
|
u32 vuMemSize; // VU Main Memory Size (in bytes)
|
||||||
|
|
|
@ -259,6 +259,7 @@ microVUt(void) mVUendProgram(mV, int isEbit, int* xStatus, int* xMac, int* xClip
|
||||||
int fClip = (isEbit) ? findFlagInst(xClip, 0x7fffffff) : cI;
|
int fClip = (isEbit) ? findFlagInst(xClip, 0x7fffffff) : cI;
|
||||||
int qInst = 0;
|
int qInst = 0;
|
||||||
int pInst = 0;
|
int pInst = 0;
|
||||||
|
mVU->regAlloc->flushAll();
|
||||||
|
|
||||||
if (isEbit) {
|
if (isEbit) {
|
||||||
mVUprint("mVUcompile ebit");
|
mVUprint("mVUcompile ebit");
|
||||||
|
@ -370,6 +371,7 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
||||||
mVUsetupRange(mVU, startPC, 1);
|
mVUsetupRange(mVU, startPC, 1);
|
||||||
|
|
||||||
// Reset regAlloc
|
// Reset regAlloc
|
||||||
|
mVU->regAlloc->flushAll();
|
||||||
mVU->regAlloc->reset();
|
mVU->regAlloc->reset();
|
||||||
|
|
||||||
// First Pass
|
// First Pass
|
||||||
|
@ -435,6 +437,7 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
||||||
else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); }
|
else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); }
|
||||||
else { doSwapOp(mVU); }
|
else { doSwapOp(mVU); }
|
||||||
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
|
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
|
||||||
|
if (!doRegAlloc) { mVU->regAlloc->flushAll(); }
|
||||||
|
|
||||||
if (!mVUinfo.isBdelay) { incPC(1); }
|
if (!mVUinfo.isBdelay) { incPC(1); }
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -181,7 +181,6 @@ private:
|
||||||
int findFreeRegRec(int startIdx) {
|
int findFreeRegRec(int startIdx) {
|
||||||
for (int i = startIdx; i < xmmTotal; i++) {
|
for (int i = startIdx; i < xmmTotal; i++) {
|
||||||
if (!xmmReg[i].isNeeded) {
|
if (!xmmReg[i].isNeeded) {
|
||||||
if ((i+1) >= xmmTotal) return i;
|
|
||||||
int x = findFreeRegRec(i+1);
|
int x = findFreeRegRec(i+1);
|
||||||
if (x == -1) return i;
|
if (x == -1) return i;
|
||||||
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
||||||
|
@ -223,16 +222,19 @@ public:
|
||||||
xmmReg[reg].xyzw = 0;
|
xmmReg[reg].xyzw = 0;
|
||||||
xmmReg[reg].isNeeded = 0;
|
xmmReg[reg].isNeeded = 0;
|
||||||
}
|
}
|
||||||
void writeBackReg(int reg) {
|
void writeBackReg(int reg, bool invalidateRegs = 1) {
|
||||||
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
|
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
|
||||||
if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
|
if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
|
||||||
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||||
|
if (invalidateRegs) {
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
if (i == reg) continue;
|
if ((i == reg) || xmmReg[i].isNeeded) continue;
|
||||||
if (xmmReg[i].reg == xmmReg[reg].reg) {
|
if (xmmReg[i].reg == xmmReg[reg].reg) {
|
||||||
|
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon::Error("microVU Error: writeBackReg() [%d]", params xmmReg[i].reg);
|
||||||
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
||||||
xmmReg[reg].count = counter;
|
xmmReg[reg].count = counter;
|
||||||
xmmReg[reg].xyzw = 0;
|
xmmReg[reg].xyzw = 0;
|
||||||
|
@ -272,7 +274,7 @@ public:
|
||||||
if (vfLoadReg >= 0) { // Search For Cached Regs
|
if (vfLoadReg >= 0) { // Search For Cached Regs
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified
|
if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified
|
||||||
|| (/*!xmmReg[i].isNeeded &&*/ xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
|| (xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
||||||
int z = i;
|
int z = i;
|
||||||
if (vfWriteReg >= 0) { // Reg will be modified
|
if (vfWriteReg >= 0) { // Reg will be modified
|
||||||
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
||||||
|
@ -287,7 +289,7 @@ public:
|
||||||
}
|
}
|
||||||
else { // Don't clone reg, but shuffle to adjust for SS ops
|
else { // Don't clone reg, but shuffle to adjust for SS ops
|
||||||
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(z); }
|
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(z); }
|
||||||
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
||||||
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
||||||
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,6 @@ mVUop(mVU_DIV) {
|
||||||
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); }
|
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); }
|
||||||
pass2 {
|
pass2 {
|
||||||
u8 *ajmp, *bjmp, *cjmp, *djmp;
|
u8 *ajmp, *bjmp, *cjmp, *djmp;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
|
@ -80,7 +79,6 @@ mVUop(mVU_DIV) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
||||||
}
|
}
|
||||||
|
@ -89,7 +87,6 @@ mVUop(mVU_SQRT) {
|
||||||
pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); }
|
pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); }
|
||||||
pass2 {
|
pass2 {
|
||||||
u8 *ajmp;
|
u8 *ajmp;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
||||||
|
|
||||||
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
||||||
|
@ -102,7 +99,6 @@ mVUop(mVU_SQRT) {
|
||||||
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
|
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
|
||||||
|
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); }
|
pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); }
|
||||||
}
|
}
|
||||||
|
@ -111,7 +107,6 @@ mVUop(mVU_RSQRT) {
|
||||||
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); }
|
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); }
|
||||||
pass2 {
|
pass2 {
|
||||||
u8 *ajmp, *bjmp, *cjmp, *djmp;
|
u8 *ajmp, *bjmp, *cjmp, *djmp;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
|
@ -147,7 +142,6 @@ mVUop(mVU_RSQRT) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
||||||
}
|
}
|
||||||
|
@ -195,7 +189,6 @@ mVUop(mVU_EATAN) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->clearNeeded(t2);
|
mVU->regAlloc->clearNeeded(t2);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("EATAN P"); }
|
pass3 { mVUlog("EATAN P"); }
|
||||||
}
|
}
|
||||||
|
@ -203,7 +196,6 @@ mVUop(mVU_EATAN) {
|
||||||
mVUop(mVU_EATANxy) {
|
mVUop(mVU_EATANxy) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
int t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
int Fs = mVU->regAlloc->allocReg();
|
int Fs = mVU->regAlloc->allocReg();
|
||||||
int t2 = mVU->regAlloc->allocReg();
|
int t2 = mVU->regAlloc->allocReg();
|
||||||
|
@ -217,7 +209,6 @@ mVUop(mVU_EATANxy) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->clearNeeded(t2);
|
mVU->regAlloc->clearNeeded(t2);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("EATANxy P"); }
|
pass3 { mVUlog("EATANxy P"); }
|
||||||
}
|
}
|
||||||
|
@ -225,7 +216,6 @@ mVUop(mVU_EATANxy) {
|
||||||
mVUop(mVU_EATANxz) {
|
mVUop(mVU_EATANxz) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
int t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
int Fs = mVU->regAlloc->allocReg();
|
int Fs = mVU->regAlloc->allocReg();
|
||||||
int t2 = mVU->regAlloc->allocReg();
|
int t2 = mVU->regAlloc->allocReg();
|
||||||
|
@ -239,7 +229,6 @@ mVUop(mVU_EATANxz) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->clearNeeded(t2);
|
mVU->regAlloc->clearNeeded(t2);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("EATANxz P"); }
|
pass3 { mVUlog("EATANxz P"); }
|
||||||
}
|
}
|
||||||
|
@ -254,7 +243,6 @@ mVUop(mVU_EATANxz) {
|
||||||
mVUop(mVU_EEXP) {
|
mVUop(mVU_EEXP) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
int t2 = mVU->regAlloc->allocReg();
|
int t2 = mVU->regAlloc->allocReg();
|
||||||
|
@ -282,7 +270,6 @@ mVUop(mVU_EEXP) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->clearNeeded(t2);
|
mVU->regAlloc->clearNeeded(t2);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("EEXP P"); }
|
pass3 { mVUlog("EEXP P"); }
|
||||||
}
|
}
|
||||||
|
@ -306,14 +293,12 @@ microVUt(void) mVU_sumXYZ(int PQ, int Fs) {
|
||||||
mVUop(mVU_ELENG) {
|
mVUop(mVU_ELENG) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ(xmmPQ, Fs);
|
||||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ELENG P"); }
|
pass3 { mVUlog("ELENG P"); }
|
||||||
}
|
}
|
||||||
|
@ -321,7 +306,6 @@ mVUop(mVU_ELENG) {
|
||||||
mVUop(mVU_ERCPR) {
|
mVUop(mVU_ERCPR) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
|
@ -330,7 +314,6 @@ mVUop(mVU_ERCPR) {
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ERCPR P"); }
|
pass3 { mVUlog("ERCPR P"); }
|
||||||
}
|
}
|
||||||
|
@ -338,7 +321,6 @@ mVUop(mVU_ERCPR) {
|
||||||
mVUop(mVU_ERLENG) {
|
mVUop(mVU_ERLENG) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ(xmmPQ, Fs);
|
||||||
|
@ -348,7 +330,6 @@ mVUop(mVU_ERLENG) {
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ERLENG P"); }
|
pass3 { mVUlog("ERLENG P"); }
|
||||||
}
|
}
|
||||||
|
@ -356,7 +337,6 @@ mVUop(mVU_ERLENG) {
|
||||||
mVUop(mVU_ERSADD) {
|
mVUop(mVU_ERSADD) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ(xmmPQ, Fs);
|
||||||
|
@ -365,7 +345,6 @@ mVUop(mVU_ERSADD) {
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ERSADD P"); }
|
pass3 { mVUlog("ERSADD P"); }
|
||||||
}
|
}
|
||||||
|
@ -373,7 +352,6 @@ mVUop(mVU_ERSADD) {
|
||||||
mVUop(mVU_ERSQRT) {
|
mVUop(mVU_ERSQRT) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
|
@ -382,7 +360,6 @@ mVUop(mVU_ERSQRT) {
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ERSQRT P"); }
|
pass3 { mVUlog("ERSQRT P"); }
|
||||||
}
|
}
|
||||||
|
@ -390,13 +367,11 @@ mVUop(mVU_ERSQRT) {
|
||||||
mVUop(mVU_ESADD) {
|
mVUop(mVU_ESADD) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ(xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ESADD P"); }
|
pass3 { mVUlog("ESADD P"); }
|
||||||
}
|
}
|
||||||
|
@ -411,7 +386,6 @@ mVUop(mVU_ESADD) {
|
||||||
mVUop(mVU_ESIN) {
|
mVUop(mVU_ESIN) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
int t2 = mVU->regAlloc->allocReg();
|
int t2 = mVU->regAlloc->allocReg();
|
||||||
|
@ -433,7 +407,6 @@ mVUop(mVU_ESIN) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->clearNeeded(t2);
|
mVU->regAlloc->clearNeeded(t2);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ESIN P"); }
|
pass3 { mVUlog("ESIN P"); }
|
||||||
}
|
}
|
||||||
|
@ -441,13 +414,11 @@ mVUop(mVU_ESIN) {
|
||||||
mVUop(mVU_ESQRT) {
|
mVUop(mVU_ESQRT) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ESQRT P"); }
|
pass3 { mVUlog("ESQRT P"); }
|
||||||
}
|
}
|
||||||
|
@ -455,7 +426,6 @@ mVUop(mVU_ESQRT) {
|
||||||
mVUop(mVU_ESUM) {
|
mVUop(mVU_ESUM) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
@ -467,7 +437,6 @@ mVUop(mVU_ESUM) {
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ESUM P"); }
|
pass3 { mVUlog("ESUM P"); }
|
||||||
}
|
}
|
||||||
|
@ -753,14 +722,12 @@ mVUop(mVU_ISUBIU) {
|
||||||
mVUop(mVU_MFIR) {
|
mVUop(mVU_MFIR) {
|
||||||
pass1 { if (!_Ft_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeReg2(_Ft_, mVUlow.VF_write, 1); }
|
pass1 { if (!_Ft_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeReg2(_Ft_, mVUlow.VF_write, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVUallocVIa(mVU, gprT1, _Is_);
|
||||||
MOVSX32R16toR(gprT1, gprT1);
|
MOVSX32R16toR(gprT1, gprT1);
|
||||||
SSE2_MOVD_R_to_XMM(Ft, gprT1);
|
SSE2_MOVD_R_to_XMM(Ft, gprT1);
|
||||||
if (!_XYZW_SS) { mVUunpack_xyzw(Ft, Ft, 0); }
|
if (!_XYZW_SS) { mVUunpack_xyzw(Ft, Ft, 0); }
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("MFIR.%s vf%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
pass3 { mVUlog("MFIR.%s vf%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
||||||
}
|
}
|
||||||
|
@ -768,11 +735,9 @@ mVUop(mVU_MFIR) {
|
||||||
mVUop(mVU_MFP) {
|
mVUop(mVU_MFP) {
|
||||||
pass1 { mVUanalyzeMFP(mVU, _Ft_); }
|
pass1 { mVUanalyzeMFP(mVU, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
getPreg(Ft);
|
getPreg(Ft);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); }
|
pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); }
|
||||||
}
|
}
|
||||||
|
@ -780,10 +745,8 @@ mVUop(mVU_MFP) {
|
||||||
mVUop(mVU_MOVE) {
|
mVUop(mVU_MOVE) {
|
||||||
pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
||||||
}
|
}
|
||||||
|
@ -791,14 +754,12 @@ mVUop(mVU_MOVE) {
|
||||||
mVUop(mVU_MR32) {
|
mVUop(mVU_MR32) {
|
||||||
pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0))));
|
if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0))));
|
||||||
else SSE2_PSHUFD_XMM_to_XMM(Ft, Fs, 0x39);
|
else SSE2_PSHUFD_XMM_to_XMM(Ft, Fs, 0x39);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("MR32.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
pass3 { mVUlog("MR32.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
||||||
}
|
}
|
||||||
|
@ -806,12 +767,10 @@ mVUop(mVU_MR32) {
|
||||||
mVUop(mVU_MTIR) {
|
mVUop(mVU_MTIR) {
|
||||||
pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeReg5(_Fs_, _Fsf_, mVUlow.VF_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
|
pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeReg5(_Fs_, _Fsf_, mVUlow.VF_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
SSE2_MOVD_XMM_to_R(gprT1, Fs);
|
SSE2_MOVD_XMM_to_R(gprT1, Fs);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVUallocVIb(mVU, gprT1, _It_);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("MTIR vi%02d, vf%02d%s", _Ft_, _Fs_, _Fsf_String); }
|
pass3 { mVUlog("MTIR vi%02d, vf%02d%s", _Ft_, _Fs_, _Fsf_String); }
|
||||||
}
|
}
|
||||||
|
@ -914,7 +873,6 @@ mVUop(mVU_ISWR) {
|
||||||
mVUop(mVU_LQ) {
|
mVUop(mVU_LQ) {
|
||||||
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 0); }
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 0); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
if (_Is_) {
|
if (_Is_) {
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVUallocVIa(mVU, gprT1, _Is_);
|
||||||
|
@ -924,7 +882,6 @@ mVUop(mVU_LQ) {
|
||||||
}
|
}
|
||||||
else mVUloadReg(Ft, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W);
|
else mVUloadReg(Ft, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("LQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
pass3 { mVUlog("LQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
||||||
}
|
}
|
||||||
|
@ -932,7 +889,6 @@ mVUop(mVU_LQ) {
|
||||||
mVUop(mVU_LQD) {
|
mVUop(mVU_LQD) {
|
||||||
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); }
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
if (_Is_) {
|
if (_Is_) {
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVUallocVIa(mVU, gprT1, _Is_);
|
||||||
SUB16ItoR(gprT1, 1);
|
SUB16ItoR(gprT1, 1);
|
||||||
|
@ -949,7 +905,6 @@ mVUop(mVU_LQD) {
|
||||||
mVUloadReg(Ft, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
mVUloadReg(Ft, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("LQD.%s vf%02d, --vi%02d", _XYZW_String, _Ft_, _Is_); }
|
pass3 { mVUlog("LQD.%s vf%02d, --vi%02d", _XYZW_String, _Ft_, _Is_); }
|
||||||
}
|
}
|
||||||
|
@ -957,7 +912,6 @@ mVUop(mVU_LQD) {
|
||||||
mVUop(mVU_LQI) {
|
mVUop(mVU_LQI) {
|
||||||
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); }
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
if (_Is_) {
|
if (_Is_) {
|
||||||
mVUallocVIa(mVU, (!mVUlow.noWriteVF) ? gprT1 : gprT2, _Is_);
|
mVUallocVIa(mVU, (!mVUlow.noWriteVF) ? gprT1 : gprT2, _Is_);
|
||||||
if (!mVUlow.noWriteVF) {
|
if (!mVUlow.noWriteVF) {
|
||||||
|
@ -975,7 +929,6 @@ mVUop(mVU_LQI) {
|
||||||
mVUloadReg(Ft, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
mVUloadReg(Ft, (uptr)mVU->regs->Mem, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("LQI.%s vf%02d, vi%02d++", _XYZW_String, _Ft_, _Fs_); }
|
pass3 { mVUlog("LQI.%s vf%02d, vi%02d++", _XYZW_String, _Ft_, _Fs_); }
|
||||||
}
|
}
|
||||||
|
@ -987,7 +940,6 @@ mVUop(mVU_LQI) {
|
||||||
mVUop(mVU_SQ) {
|
mVUop(mVU_SQ) {
|
||||||
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 0); }
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 0); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
if (_It_) {
|
if (_It_) {
|
||||||
mVUallocVIa(mVU, gprT1, _It_);
|
mVUallocVIa(mVU, gprT1, _It_);
|
||||||
|
@ -997,7 +949,6 @@ mVUop(mVU_SQ) {
|
||||||
}
|
}
|
||||||
else mVUsaveReg(Fs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W, 1);
|
else mVUsaveReg(Fs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W, 1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("SQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Fs_, _Ft_, _Imm11_); }
|
pass3 { mVUlog("SQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Fs_, _Ft_, _Imm11_); }
|
||||||
}
|
}
|
||||||
|
@ -1005,7 +956,6 @@ mVUop(mVU_SQ) {
|
||||||
mVUop(mVU_SQD) {
|
mVUop(mVU_SQD) {
|
||||||
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 1); }
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
if (_It_) {
|
if (_It_) {
|
||||||
mVUallocVIa(mVU, gprT1, _It_);
|
mVUallocVIa(mVU, gprT1, _It_);
|
||||||
|
@ -1016,7 +966,6 @@ mVUop(mVU_SQD) {
|
||||||
}
|
}
|
||||||
else mVUsaveReg(Fs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1);
|
else mVUsaveReg(Fs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("SQD.%s vf%02d, --vi%02d", _XYZW_String, _Fs_, _Ft_); }
|
pass3 { mVUlog("SQD.%s vf%02d, --vi%02d", _XYZW_String, _Fs_, _Ft_); }
|
||||||
}
|
}
|
||||||
|
@ -1024,7 +973,6 @@ mVUop(mVU_SQD) {
|
||||||
mVUop(mVU_SQI) {
|
mVUop(mVU_SQI) {
|
||||||
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 1); }
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
if (_It_) {
|
if (_It_) {
|
||||||
mVUallocVIa(mVU, gprT1, _It_);
|
mVUallocVIa(mVU, gprT1, _It_);
|
||||||
|
@ -1036,7 +984,6 @@ mVUop(mVU_SQI) {
|
||||||
}
|
}
|
||||||
else mVUsaveReg(Fs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1);
|
else mVUsaveReg(Fs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("SQI.%s vf%02d, vi%02d++", _XYZW_String, _Fs_, _Ft_); }
|
pass3 { mVUlog("SQI.%s vf%02d, vi%02d++", _XYZW_String, _Fs_, _Ft_); }
|
||||||
}
|
}
|
||||||
|
@ -1049,14 +996,12 @@ mVUop(mVU_RINIT) {
|
||||||
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (_Fs_ || (_Fsf_ == 3)) {
|
if (_Fs_ || (_Fsf_ == 3)) {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
SSE2_MOVD_XMM_to_R(gprT1, Fs);
|
SSE2_MOVD_XMM_to_R(gprT1, Fs);
|
||||||
AND32ItoR(gprT1, 0x007fffff);
|
AND32ItoR(gprT1, 0x007fffff);
|
||||||
OR32ItoR (gprT1, 0x3f800000);
|
OR32ItoR (gprT1, 0x3f800000);
|
||||||
MOV32RtoM(Rmem, gprT1);
|
MOV32RtoM(Rmem, gprT1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
else MOV32ItoM(Rmem, 0x3f800000);
|
else MOV32ItoM(Rmem, 0x3f800000);
|
||||||
}
|
}
|
||||||
|
@ -1065,12 +1010,10 @@ mVUop(mVU_RINIT) {
|
||||||
|
|
||||||
microVUt(void) mVU_RGET_(mV, int Rreg) {
|
microVUt(void) mVU_RGET_(mV, int Rreg) {
|
||||||
if (!mVUlow.noWriteVF) {
|
if (!mVUlow.noWriteVF) {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
SSE2_MOVD_R_to_XMM(Ft, Rreg);
|
SSE2_MOVD_R_to_XMM(Ft, Rreg);
|
||||||
if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0);
|
if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1109,13 +1052,11 @@ mVUop(mVU_RXOR) {
|
||||||
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (_Fs_ || (_Fsf_ == 3)) {
|
if (_Fs_ || (_Fsf_ == 3)) {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
SSE2_MOVD_XMM_to_R(gprT1, Fs);
|
SSE2_MOVD_XMM_to_R(gprT1, Fs);
|
||||||
AND32ItoR(gprT1, 0x7fffff);
|
AND32ItoR(gprT1, 0x7fffff);
|
||||||
XOR32RtoM(Rmem, gprT1);
|
XOR32RtoM(Rmem, gprT1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("RXOR R, vf%02d%s", _Fs_, _Fsf_String); }
|
pass3 { mVUlog("RXOR R, vf%02d%s", _Fs_, _Fsf_String); }
|
||||||
|
|
|
@ -267,6 +267,9 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
#define mVUdumpProg 0&&
|
#define mVUdumpProg 0&&
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Reg Alloc
|
||||||
|
#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction (Turns off regAlloc)
|
||||||
|
|
||||||
// Speed Hacks
|
// Speed Hacks
|
||||||
#define CHECK_VU_CONSTHACK 0 // Only use for GoW (will be slower on other games)
|
#define CHECK_VU_CONSTHACK 0 // Only use for GoW (will be slower on other games)
|
||||||
#define CHECK_VU_FLAGHACK (u32)Config.Hacks.vuFlagHack // (Can cause Infinite loops, SPS, etc...)
|
#define CHECK_VU_FLAGHACK (u32)Config.Hacks.vuFlagHack // (Can cause Infinite loops, SPS, etc...)
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
|
|
||||||
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
|
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
|
||||||
microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) {
|
microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) {
|
||||||
int sReg, mReg = gprT1, xyzw = _X_Y_Z_W;
|
int sReg, mReg = gprT1, xyzw = _X_Y_Z_W, regT1b = 0;
|
||||||
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
||||||
|
|
||||||
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
|
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
|
||||||
|
@ -42,7 +42,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) {
|
||||||
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
|
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
|
||||||
if (sFLAG.doNonSticky) AND32ItoR(sReg, 0xfffc00ff); // Clear O,U,S,Z flags
|
if (sFLAG.doNonSticky) AND32ItoR(sReg, 0xfffc00ff); // Clear O,U,S,Z flags
|
||||||
}
|
}
|
||||||
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); }
|
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; }
|
||||||
|
|
||||||
//-------------------------Check for Signed flags------------------------------
|
//-------------------------Check for Signed flags------------------------------
|
||||||
|
|
||||||
|
@ -75,6 +75,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) {
|
||||||
OR32RtoR (sReg, mReg);
|
OR32RtoR (sReg, mReg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
@ -136,12 +137,11 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co
|
||||||
pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); }
|
pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); }
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs, Ft, ACC;
|
int Fs, Ft, ACC;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
setupFtReg(mVU, Ft, opCase);
|
setupFtReg(mVU, Ft, opCase);
|
||||||
|
|
||||||
if (isACC) {
|
if (isACC) {
|
||||||
ACC = mVU->regAlloc->allocReg((_X_Y_Z_W == 0xf) ? -1 : 32, 32, 0xf, 0);
|
|
||||||
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
|
ACC = mVU->regAlloc->allocReg((_X_Y_Z_W == 0xf) ? -1 : 32, 32, 0xf, 0);
|
||||||
if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W));
|
if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W));
|
||||||
}
|
}
|
||||||
else { Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); }
|
else { Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); }
|
||||||
|
@ -165,10 +165,8 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co
|
||||||
}
|
}
|
||||||
else mVUupdateFlags(mVU, Fs, (((opCase==2)&&(!_XYZW_SS)) ? Ft : -1));
|
else mVUupdateFlags(mVU, Fs, (((opCase==2)&&(!_XYZW_SS)) ? Ft : -1));
|
||||||
|
|
||||||
//if (isACC) SSE_MOVAPS_XMM_to_XMM(xmmACC, ACC); // For Testing
|
|
||||||
mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First
|
mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVU_printOP(mVU, opCase, opName, isACC); }
|
pass3 { mVU_printOP(mVU, opCase, opName, isACC); }
|
||||||
}
|
}
|
||||||
|
@ -178,11 +176,10 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op
|
||||||
pass1 { setupPass1(mVU, opCase, 1, 0); }
|
pass1 { setupPass1(mVU, opCase, 1, 0); }
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs, Ft, ACC;
|
int Fs, Ft, ACC;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
setupFtReg(mVU, Ft, opCase);
|
setupFtReg(mVU, Ft, opCase);
|
||||||
|
|
||||||
ACC = mVU->regAlloc->allocReg(32, 32, 0xf, 0);
|
|
||||||
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
|
ACC = mVU->regAlloc->allocReg(32, 32, 0xf, 0);
|
||||||
|
|
||||||
if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); }
|
if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); }
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
|
@ -209,11 +206,9 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op
|
||||||
mVU->regAlloc->clearNeeded(tempACC);
|
mVU->regAlloc->clearNeeded(tempACC);
|
||||||
}
|
}
|
||||||
|
|
||||||
//SSE_MOVAPS_XMM_to_XMM(xmmACC, ACC); // For Testing
|
|
||||||
mVU->regAlloc->clearNeeded(ACC);
|
mVU->regAlloc->clearNeeded(ACC);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVU_printOP(mVU, opCase, opName, 1); }
|
pass3 { mVU_printOP(mVU, opCase, opName, 1); }
|
||||||
}
|
}
|
||||||
|
@ -223,7 +218,6 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) {
|
||||||
pass1 { setupPass1(mVU, opCase, 0, 0); }
|
pass1 { setupPass1(mVU, opCase, 0, 0); }
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs, Ft, ACC;
|
int Fs, Ft, ACC;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
setupFtReg(mVU, Ft, opCase);
|
setupFtReg(mVU, Ft, opCase);
|
||||||
|
|
||||||
ACC = mVU->regAlloc->allocReg(32);
|
ACC = mVU->regAlloc->allocReg(32);
|
||||||
|
@ -243,10 +237,9 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) {
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
||||||
|
|
||||||
mVU->regAlloc->clearNeeded(ACC);
|
|
||||||
mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First
|
mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
mVU->regAlloc->clearNeeded(ACC);
|
||||||
}
|
}
|
||||||
pass3 { mVU_printOP(mVU, opCase, opName, 0); }
|
pass3 { mVU_printOP(mVU, opCase, opName, 0); }
|
||||||
}
|
}
|
||||||
|
@ -256,7 +249,6 @@ void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) {
|
||||||
pass1 { setupPass1(mVU, opCase, 0, 0); }
|
pass1 { setupPass1(mVU, opCase, 0, 0); }
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs, Ft, Fd;
|
int Fs, Ft, Fd;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
setupFtReg(mVU, Ft, opCase);
|
setupFtReg(mVU, Ft, opCase);
|
||||||
|
|
||||||
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
|
@ -276,7 +268,6 @@ void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) {
|
||||||
mVU->regAlloc->clearNeeded(Fd); // Always Clear Written Reg First
|
mVU->regAlloc->clearNeeded(Fd); // Always Clear Written Reg First
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVU_printOP(mVU, opCase, opName, 0); }
|
pass3 { mVU_printOP(mVU, opCase, opName, 0); }
|
||||||
}
|
}
|
||||||
|
@ -286,11 +277,9 @@ mVUop(mVU_ABS) {
|
||||||
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (!_Ft_) return;
|
if (!_Ft_) return;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, ((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
|
||||||
SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip);
|
SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ABS"); mVUlogFtFs(); }
|
pass3 { mVUlog("ABS"); mVUlogFtFs(); }
|
||||||
}
|
}
|
||||||
|
@ -299,9 +288,8 @@ mVUop(mVU_ABS) {
|
||||||
mVUop(mVU_OPMULA) {
|
mVUop(mVU_OPMULA) {
|
||||||
pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W);
|
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W);
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W);
|
||||||
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W);
|
||||||
|
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
||||||
|
@ -310,7 +298,6 @@ mVUop(mVU_OPMULA) {
|
||||||
|
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("OPMULA"); mVUlogACC(); mVUlogFt(); }
|
pass3 { mVUlog("OPMULA"); mVUlogACC(); mVUlogFt(); }
|
||||||
}
|
}
|
||||||
|
@ -319,7 +306,6 @@ mVUop(mVU_OPMULA) {
|
||||||
mVUop(mVU_OPMSUB) {
|
mVUop(mVU_OPMSUB) {
|
||||||
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf);
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf);
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
int ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
|
int ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
|
||||||
|
@ -333,8 +319,6 @@ mVUop(mVU_OPMSUB) {
|
||||||
mVU->regAlloc->clearNeeded(ACC);
|
mVU->regAlloc->clearNeeded(ACC);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("OPMSUB"); mVUlogFd(); mVUlogFt(); }
|
pass3 { mVUlog("OPMSUB"); mVUlogFd(); mVUlogFt(); }
|
||||||
}
|
}
|
||||||
|
@ -344,8 +328,7 @@ void mVU_FTOIx(mP, uptr addr, const char* opName) {
|
||||||
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (!_Ft_) return;
|
if (!_Ft_) return;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, ((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
int t2 = mVU->regAlloc->allocReg();
|
int t2 = mVU->regAlloc->allocReg();
|
||||||
|
|
||||||
|
@ -363,7 +346,6 @@ void mVU_FTOIx(mP, uptr addr, const char* opName) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->clearNeeded(t2);
|
mVU->regAlloc->clearNeeded(t2);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog(opName); mVUlogFtFs(); }
|
pass3 { mVUlog(opName); mVUlogFtFs(); }
|
||||||
}
|
}
|
||||||
|
@ -373,15 +355,13 @@ void mVU_ITOFx(mP, uptr addr, const char* opName) {
|
||||||
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (!_Ft_) return;
|
if (!_Ft_) return;
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, ((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
|
||||||
|
|
||||||
SSE2_CVTDQ2PS_XMM_to_XMM(Fs, Fs);
|
SSE2_CVTDQ2PS_XMM_to_XMM(Fs, Fs);
|
||||||
if (addr) { SSE_MULPS_M128_to_XMM(Fs, addr); }
|
if (addr) { SSE_MULPS_M128_to_XMM(Fs, addr); }
|
||||||
//mVUclamp2(Fs, xmmT1, 15); // Clamp (not sure if this is needed)
|
//mVUclamp2(Fs, xmmT1, 15); // Clamp (not sure if this is needed)
|
||||||
|
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog(opName); mVUlogFtFs(); }
|
pass3 { mVUlog(opName); mVUlogFtFs(); }
|
||||||
}
|
}
|
||||||
|
@ -390,7 +370,6 @@ void mVU_ITOFx(mP, uptr addr, const char* opName) {
|
||||||
mVUop(mVU_CLIP) {
|
mVUop(mVU_CLIP) {
|
||||||
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1);
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1);
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
|
@ -424,7 +403,6 @@ mVUop(mVU_CLIP) {
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("CLIP"); mVUlogCLIP(); }
|
pass3 { mVUlog("CLIP"); mVUlogCLIP(); }
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue