microVU: more work in progress stuff...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1542 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-07-18 13:57:21 +00:00
parent 5474df1479
commit 81009002bb
9 changed files with 201 additions and 263 deletions

View File

@ -50,6 +50,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr, int vuIndex) {
mVU->prog.max = mMaxProg - 1; mVU->prog.max = mMaxProg - 1;
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64); mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
mVU->prog.progList = new int[mMaxProg]; mVU->prog.progList = new int[mMaxProg];
mVU->regAlloc = new microRegAlloc(mVU->regs);
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init"); mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
@ -113,6 +114,7 @@ microVUt(void) mVUclose(mV) {
safe_aligned_free(mVU->prog.prog); safe_aligned_free(mVU->prog.prog);
} }
safe_delete_array(mVU->prog.progList); safe_delete_array(mVU->prog.progList);
safe_delete(mVU->regAlloc);
} }
// Clears Block Data in specified range // Clears Block Data in specified range

View File

@ -150,7 +150,8 @@ struct microVU {
u32 progSize; // VU Micro Memory Size (in u32's) u32 progSize; // VU Micro Memory Size (in u32's)
u32 cacheSize; // VU Cache Size u32 cacheSize; // VU Cache Size
microProgManager prog; // Micro Program Data microProgManager prog; // Micro Program Data
microRegAlloc* regAlloc; // Reg Alloc Class
FILE* logFile; // Log File Pointer FILE* logFile; // Log File Pointer
VURegs* regs; // VU Regs Struct VURegs* regs; // VU Regs Struct

View File

@ -756,244 +756,3 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \ if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \
else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \ else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \
} }
//------------------------------------------------------------------
// Reg Alloc
//------------------------------------------------------------------
struct microXMM {
int reg; // VF Reg Number Stored
int xyzw; // xyzw to write back
int count; // Count of when first cached
bool isNeeded; // Is needed for current instruction
bool isTemp; // Is Temp Reg
};
#define xmmTotal 6 // Don't allocate PQ/ACC?
class microRegAlloc {
private:
microXMM xmmReg[xmmTotal];
VURegs* vuRegs;
int counter;
void clearReg(int reg) {
xmmReg[reg].reg = 0;
xmmReg[reg].count = 0;
xmmReg[reg].isNeeded = 0;
xmmReg[reg].isTemp = 1;
}
int findFreeRegRec(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded) {
if ((i+1) >= xmmTotal) return i;
int x = findFreeRegRec(i+1);
if (x == -1) return i;
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
}
}
return -1;
}
int findFreeReg() {
for (int i = 0; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeRegRec(0);
if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; }
return x;
}
public:
microRegAlloc(VURegs* vuRegsPtr) {
vuRegs = vuRegsPtr;
reset();
}
void reset() {
for (int i = 0; i < xmmTotal; i++) {
clearReg(i);
}
counter = 0;
}
void writeBackReg(int reg) {
if (xmmReg[reg].reg && xmmReg[reg].xyzw) {
mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
for (int i = 0; i < xmmTotal; i++) {
if (i = reg) continue;
if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) {
clearReg(i); // Invalidate any Cached Regs
}
}
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg
xmmReg[reg].count = counter;
xmmReg[reg].xyzw = 0;
xmmReg[reg].isNeeded = 0;
xmmReg[reg].isTemp = 0;
return;
}
}
clearReg(reg); // Clear Written Back Reg
}
void clearNeeded(int reg) {
xmmReg[reg].isNeeded = 0;
}
int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) {
counter++;
for (int i = 0; i < xmmTotal; i++) {
if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) {
if (writeBack) {
int z = findFreeReg();
writeBackReg(z);
if (xyzw == 8) SSE2_SHUFPD_XMM_to_XMM(z, i, 0);
else if (xyzw == 4) SSE2_SHUFPD_XMM_to_XMM(z, i, 1);
else if (xyzw == 2) SSE2_SHUFPD_XMM_to_XMM(z, i, 2);
else if (xyzw == 1) SSE2_SHUFPD_XMM_to_XMM(z, i, 3);
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
xmmReg[z].reg = vfWriteBack;
xmmReg[z].count = counter;
xmmReg[z].xyzw = xyzw;
xmmReg[z].isNeeded = 1;
xmmReg[z].isTemp = 1;
return z;
}
xmmReg[i].isNeeded = 1;
return i;
}
}
int x = findFreeReg();
writeBackReg(x);
if (vfReg >= 0) {
if (writeBack) {
mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw);
xmmReg[x].reg = vfWriteBack;
xmmReg[x].count = counter;
xmmReg[x].xyzw = xyzw;
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 1;
}
else {
SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x);
xmmReg[x].reg = vfReg;
xmmReg[x].count = counter;
xmmReg[x].xyzw = 0;
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 0;
}
}
else { // Is Temp Reg
xmmReg[x].reg = 0;
xmmReg[x].count = counter;
xmmReg[x].xyzw = 0;
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 1;
}
return x;
}
};
/*
struct microXMM {
int reg; // VF Reg Number Stored
int xyzw; // Current xyzw Order
int validXYZW; // Vectors that are valid
bool isNeeded; // Is needed for current instruction
bool isTemp; // Is Temp Reg
bool isWritten; // The reg has been written to
};
#define xmmTotal 7 // Don't allocate Last XMM Reg for PQ instances (will change this later)
class microRegAlloc {
private:
microXMM xmmReg[xmmTotal];
VURegs* vuRegs;
int vfStats[32];
int findFreeReg(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
return i; // Reg is not needed and was a temp reg
}
}
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded) {
if ((i+1) >= xmmTotal) return i;
int x = findFreeReg(i+1);
if (x == -1) return i;
return ((vfStats[xmmReg[i].reg] < vfStats[xmmReg[x].reg]) ? i : x);
}
}
return -1;
}
public:
microRegAlloc(VURegs* vuRegsPtr) {
vuRegs = vuRegsPtr;
reset();
}
void reset() {
for (int i = 0; i < xmmTotal; i++) {
xmmReg[i].reg = 0;
xmmReg[i].isNeeded = 0;
xmmReg[i].isTemp = 1;
}
}
void loadStats(int* vfRegData) {
for (int i = 0; i < 32; i++) {
vfStats[i] = vfRegData[i];
}
}
void changeXYZW(int reg, int newXYZW) {
if (xmmReg[reg].xyzw != newXYZW) {
int shuffleReg = 0;
int xyzw = xmmReg[reg].xyzw;
if (((xyzw >> 6) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (3<<0);
else if (((xyzw >> 4) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (2<<0);
else if (((xyzw >> 2) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (1<<0);
if (((xyzw >> 6) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (3<<2);
else if (((xyzw >> 4) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (2<<2);
else if (((xyzw >> 2) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (1<<2);
if (((xyzw >> 6) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (3<<4);
else if (((xyzw >> 4) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (2<<4);
else if (((xyzw >> 2) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (1<<4);
if (((xyzw >> 6) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (3<<6);
else if (((xyzw >> 4) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (2<<6);
else if (((xyzw >> 2) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (1<<6);
SSE2_PSHUFD_XMM_to_XMM(reg, reg, shuffleReg);
}
}
int writeBack(int reg) {
if (!xmmReg[reg].isTemp && xmmReg[reg].reg && xmmReg[reg].isWritten) {
changeXYZW(reg, 0xe4);
SSE_MOVAPS_XMM_to_M128(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0]); // Write Back Reg to Mem
}
}
int allocReg(int vfReg, bool cache, int xyzw, int vfWriteBack) {
for (int i = 0; i < xmmTotal; i++) {
if (!xmmReg[i].isTemp && xmmReg[i].reg == vfReg) {
if ((xmmReg[i].validXYZW != 0xf) && xmmReg[i].isWritten) {
// write back partial reg
//if (!forWriting) { get reg from mem }
}
xmmReg[i].isWritten |= (forWriting) ? 1 : 0;
xmmReg[i].isNeeded = 1;
return i;
}
}
int x = findFreeReg(0);
if (!xmmReg[x].isTemp && xmmReg[x].reg && xmmReg[x].isWritten) {
changeXYZW(x, 0xe4);
SSE_MOVAPS_XMM_to_M128(x, (uptr)&vuRegs->VF[xmmReg[x].reg].UL[0]); // Write Back Reg to Mem
}
if (vfReg != -1) {
SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[xmmReg[x].reg].UL[0], x); // Load Reg from Mem
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 0;
xmmReg[x].xyzw = 0xe4;
}
else { // Is Temp Reg
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 1;
}
return x;
}
};
*/

View File

@ -69,8 +69,6 @@ void mVUdispatcherA(mV) {
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1); SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1);
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]); SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL); SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL);
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL); SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ

View File

@ -156,3 +156,142 @@ struct microIR {
u32 startPC; // Start PC for Cur Block u32 startPC; // Start PC for Cur Block
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
}; };
//------------------------------------------------------------------
// Reg Alloc
//------------------------------------------------------------------
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
void mVUloadReg(int reg, uptr offset, int xyzw);
struct microXMM {
int reg; // VF Reg Number Stored
int xyzw; // xyzw to write back
int count; // Count of when first cached
bool isNeeded; // Is needed for current instruction
bool isTemp; // Is Temp Reg
};
#define xmmTotal 6 // Don't allocate PQ/ACC?
class microRegAlloc {
private:
microXMM xmmReg[xmmTotal];
VURegs* vuRegs;
int counter;
void clearReg(int reg) {
xmmReg[reg].reg = 0;
xmmReg[reg].count = 0;
xmmReg[reg].isNeeded = 0;
xmmReg[reg].isTemp = 1;
}
int findFreeRegRec(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded) {
if ((i+1) >= xmmTotal) return i;
int x = findFreeRegRec(i+1);
if (x == -1) return i;
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
}
}
return -1;
}
int findFreeReg() {
for (int i = 0; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeRegRec(0);
if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; }
return x;
}
public:
microRegAlloc(VURegs* vuRegsPtr) {
vuRegs = vuRegsPtr;
reset();
}
void reset() {
for (int i = 0; i < xmmTotal; i++) {
clearReg(i);
}
counter = 0;
}
void writeBackReg(int reg) {
if (xmmReg[reg].reg && (xmmReg[reg].xyzw || (xmmReg[reg].reg >= 32))) {
if (xmmReg[reg].reg == 32) SSE_MOVAPS_XMM_to_M128((uptr)&vuRegs->ACC.UL[0], reg);
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
for (int i = 0; i < xmmTotal; i++) {
if (i = reg) continue;
if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) {
clearReg(i); // Invalidate any Cached Regs
}
}
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg
xmmReg[reg].count = counter;
xmmReg[reg].xyzw = 0;
xmmReg[reg].isNeeded = 0;
xmmReg[reg].isTemp = 0;
return;
}
}
clearReg(reg); // Clear Written Back Reg
}
void clearNeeded(int reg) {
xmmReg[reg].isNeeded = 0;
}
int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) {
counter++;
for (int i = 0; i < xmmTotal; i++) {
if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) {
if (writeBack) {
int z = findFreeReg();
writeBackReg(z);
if (xyzw == 8) SSE2_PSHUFD_XMM_to_XMM(z, i, 0);
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
xmmReg[z].reg = vfWriteBack;
xmmReg[z].count = counter;
xmmReg[z].xyzw = xyzw;
xmmReg[z].isNeeded = 1;
xmmReg[z].isTemp = 1;
return z;
}
xmmReg[i].count = counter;
xmmReg[i].isNeeded = 1;
return i;
}
}
int x = findFreeReg();
writeBackReg(x);
if (vfReg >= 0) {
if (writeBack) {
mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw);
xmmReg[x].reg = vfWriteBack;
xmmReg[x].count = counter;
xmmReg[x].xyzw = xyzw;
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 1;
}
else {
if (vfReg == 32) SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->ACC.UL[0], x);
else SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x);
xmmReg[x].reg = vfReg;
xmmReg[x].count = counter;
xmmReg[x].xyzw = 0;
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 0;
}
}
else { // Is Temp Reg
xmmReg[x].reg = 0;
xmmReg[x].count = counter;
xmmReg[x].xyzw = 0;
xmmReg[x].isNeeded = 1;
xmmReg[x].isTemp = 1;
}
return x;
}
};

View File

@ -60,9 +60,9 @@ mVUop(mVU_DIV) {
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0) MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0)
x86SetJ8(bjmp); x86SetJ8(bjmp);
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt); SSE_XORPS_XMM_to_XMM (xmmFs, xmmFt);
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // If division by zero, then xmmFs = +/- fmax SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // If division by zero, then xmmFs = +/- fmax
djmp = JMP8(0); djmp = JMP8(0);
x86SetJ8(cjmp); x86SetJ8(cjmp);
@ -87,7 +87,7 @@ mVUop(mVU_SQRT) {
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt
if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive) if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(xmmFt, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt); SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt);
@ -119,7 +119,7 @@ mVUop(mVU_RSQRT) {
x86SetJ8(cjmp); x86SetJ8(cjmp);
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // xmmFs = +/-Max SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // xmmFs = +/-Max
djmp = JMP8(0); djmp = JMP8(0);
x86SetJ8(ajmp); x86SetJ8(ajmp);

View File

@ -118,10 +118,10 @@ declareAllVariables
#define xmmT1 0 // Temp Reg #define xmmT1 0 // Temp Reg
#define xmmFs 1 // Holds the Value of Fs (writes back result Fd) #define xmmFs 1 // Holds the Value of Fs (writes back result Fd)
#define xmmFt 2 // Holds the Value of Ft #define xmmFt 2 // Holds the Value of Ft
#define xmmACC 3 // Holds ACC #define xmmT2 3 // Temp Reg?
#define xmmMax 4 // Holds mVU_maxvals #define xmmT3 4 // Temp Reg?
#define xmmMin 5 // Holds mVU_minvals #define xmmT4 5 // Temp Reg?
#define xmmT2 6 // Temp Reg? #define xmmACC 6 // Holds ACC
#define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs #define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs
#define gprT1 0 // Temp Reg #define gprT1 0 // Temp Reg
@ -141,7 +141,7 @@ declareAllVariables
// Recursive Inline // Recursive Inline
#ifndef __LINUX__ #ifndef __LINUX__
#define __recInline __forceinline #define __recInline __releaseinline
#else #else
#define __recInline inline #define __recInline inline
#endif #endif

View File

@ -26,12 +26,12 @@
void mVUclamp1(int reg, int regT1, int xyzw) { void mVUclamp1(int reg, int regT1, int xyzw) {
switch (xyzw) { switch (xyzw) {
case 1: case 2: case 4: case 8: case 1: case 2: case 4: case 8:
SSE_MINSS_XMM_to_XMM(reg, xmmMax); SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
SSE_MAXSS_XMM_to_XMM(reg, xmmMin); SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals);
break; break;
default: default:
SSE_MINPS_XMM_to_XMM(reg, xmmMax); SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals);
SSE_MAXPS_XMM_to_XMM(reg, xmmMin); SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals);
break; break;
} }
} }
@ -43,15 +43,15 @@ void mVUclamp2(int reg, int regT1, int xyzw) {
case 1: case 2: case 4: case 8: case 1: case 2: case 4: case 8:
SSE_MOVSS_XMM_to_XMM(regT1, reg); SSE_MOVSS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
SSE_MINSS_XMM_to_XMM(reg, xmmMax); SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
SSE_MAXSS_XMM_to_XMM(reg, xmmMin); SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals);
SSE_ORPS_XMM_to_XMM(reg, regT1); SSE_ORPS_XMM_to_XMM(reg, regT1);
break; break;
default: default:
SSE_MOVAPS_XMM_to_XMM(regT1, reg); SSE_MOVAPS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
SSE_MINPS_XMM_to_XMM(reg, xmmMax); SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals);
SSE_MAXPS_XMM_to_XMM(reg, xmmMin); SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals);
SSE_ORPS_XMM_to_XMM(reg, regT1); SSE_ORPS_XMM_to_XMM(reg, regT1);
break; break;
} }
@ -294,8 +294,6 @@ microVUt(void) mVUbackupRegs(mV) {
microVUt(void) mVUrestoreRegs(mV) { microVUt(void) mVUrestoreRegs(mV) {
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]); SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]); SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
MOV32ItoR(gprR, Roffset); // Restore gprR MOV32ItoR(gprR, Roffset); // Restore gprR
} }

View File

@ -440,6 +440,47 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } } #define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } } #define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
#define opCase1 if (opCase == 1) // Normal
#define opCase2 if (opCase == 2) // BC Opcodes
#define opCase3 if (opCase == 3) // I Opcodes
#define opCase4 if (opCase == 4) // Q Opcodes
#define shuffleXYZW(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
static void (*SSE_PS[]) (x86SSERegType, x86SSERegType) = {
SSE_ADDPS_XMM_to_XMM, // 0
SSE_SUBPS_XMM_to_XMM, // 1
SSE_MULPS_XMM_to_XMM, // 2
SSE_MAXPS_XMM_to_XMM, // 3
SSE_MINPS_XMM_to_XMM // 4
};
static void (*SSE_SS[]) (x86SSERegType, x86SSERegType) = {
SSE_ADDSS_XMM_to_XMM, // 0
SSE_SUBSS_XMM_to_XMM, // 1
SSE_MULSS_XMM_to_XMM, // 2
SSE_MAXSS_XMM_to_XMM, // 3
SSE_MINSS_XMM_to_XMM // 4
};
void mVU_FMACa(microVU* mVU, int opCase, int opType, bool updateFlags) {
int Fs, Ft;
opCase1 { Ft = mVU->regAlloc->allocReg(_Ft_); if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } }
opCase2 { Ft = mVU->regAlloc->allocReg(_Ft_); mVU->regAlloc->clearNeeded(Ft); Ft = mVU->regAlloc->allocReg(); }
opCase3 { Ft = mVU->regAlloc->allocReg(); getIreg(Ft, 1); }
opCase4 { Ft = mVU->regAlloc->allocReg(); getQreg(Ft); }
Fs = mVU->regAlloc->allocReg(_Fs_, 1, _X_Y_Z_W, _Fd_);
if (_XYZW_SS) SSE_SS[opType](Fs, Ft);
else SSE_PS[opType](Fs, Ft);
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } }
mVU->regAlloc->clearNeeded(Ft);
mVU->regAlloc->writeBackReg(Fs);
}
//------------------------------------------------------------------ //------------------------------------------------------------------
// Micro VU Micromode Upper instructions // Micro VU Micromode Upper instructions
//------------------------------------------------------------------ //------------------------------------------------------------------