mirror of https://github.com/PCSX2/pcsx2.git
microVU: more work in progress stuff...
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1542 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5474df1479
commit
81009002bb
|
@ -50,6 +50,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr, int vuIndex) {
|
|||
mVU->prog.max = mMaxProg - 1;
|
||||
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
|
||||
mVU->prog.progList = new int[mMaxProg];
|
||||
mVU->regAlloc = new microRegAlloc(mVU->regs);
|
||||
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
|
||||
|
||||
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
|
||||
|
@ -113,6 +114,7 @@ microVUt(void) mVUclose(mV) {
|
|||
safe_aligned_free(mVU->prog.prog);
|
||||
}
|
||||
safe_delete_array(mVU->prog.progList);
|
||||
safe_delete(mVU->regAlloc);
|
||||
}
|
||||
|
||||
// Clears Block Data in specified range
|
||||
|
|
|
@ -150,7 +150,8 @@ struct microVU {
|
|||
u32 progSize; // VU Micro Memory Size (in u32's)
|
||||
u32 cacheSize; // VU Cache Size
|
||||
|
||||
microProgManager prog; // Micro Program Data
|
||||
microProgManager prog; // Micro Program Data
|
||||
microRegAlloc* regAlloc; // Reg Alloc Class
|
||||
|
||||
FILE* logFile; // Log File Pointer
|
||||
VURegs* regs; // VU Regs Struct
|
||||
|
|
|
@ -756,244 +756,3 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
|
|||
if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \
|
||||
else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Reg Alloc
|
||||
//------------------------------------------------------------------
|
||||
|
||||
struct microXMM {
|
||||
int reg; // VF Reg Number Stored
|
||||
int xyzw; // xyzw to write back
|
||||
int count; // Count of when first cached
|
||||
bool isNeeded; // Is needed for current instruction
|
||||
bool isTemp; // Is Temp Reg
|
||||
};
|
||||
|
||||
#define xmmTotal 6 // Don't allocate PQ/ACC?
|
||||
class microRegAlloc {
|
||||
private:
|
||||
microXMM xmmReg[xmmTotal];
|
||||
VURegs* vuRegs;
|
||||
int counter;
|
||||
void clearReg(int reg) {
|
||||
xmmReg[reg].reg = 0;
|
||||
xmmReg[reg].count = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
xmmReg[reg].isTemp = 1;
|
||||
}
|
||||
int findFreeRegRec(int startIdx) {
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded) {
|
||||
if ((i+1) >= xmmTotal) return i;
|
||||
int x = findFreeRegRec(i+1);
|
||||
if (x == -1) return i;
|
||||
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int findFreeReg() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
|
||||
return i; // Reg is not needed and was a temp reg
|
||||
}
|
||||
}
|
||||
int x = findFreeRegRec(0);
|
||||
if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; }
|
||||
return x;
|
||||
}
|
||||
|
||||
public:
|
||||
microRegAlloc(VURegs* vuRegsPtr) {
|
||||
vuRegs = vuRegsPtr;
|
||||
reset();
|
||||
}
|
||||
void reset() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
clearReg(i);
|
||||
}
|
||||
counter = 0;
|
||||
}
|
||||
void writeBackReg(int reg) {
|
||||
if (xmmReg[reg].reg && xmmReg[reg].xyzw) {
|
||||
mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (i = reg) continue;
|
||||
if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) {
|
||||
clearReg(i); // Invalidate any Cached Regs
|
||||
}
|
||||
}
|
||||
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg
|
||||
xmmReg[reg].count = counter;
|
||||
xmmReg[reg].xyzw = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
xmmReg[reg].isTemp = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
clearReg(reg); // Clear Written Back Reg
|
||||
}
|
||||
void clearNeeded(int reg) {
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
}
|
||||
int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) {
|
||||
counter++;
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) {
|
||||
if (writeBack) {
|
||||
int z = findFreeReg();
|
||||
writeBackReg(z);
|
||||
if (xyzw == 8) SSE2_SHUFPD_XMM_to_XMM(z, i, 0);
|
||||
else if (xyzw == 4) SSE2_SHUFPD_XMM_to_XMM(z, i, 1);
|
||||
else if (xyzw == 2) SSE2_SHUFPD_XMM_to_XMM(z, i, 2);
|
||||
else if (xyzw == 1) SSE2_SHUFPD_XMM_to_XMM(z, i, 3);
|
||||
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
|
||||
xmmReg[z].reg = vfWriteBack;
|
||||
xmmReg[z].count = counter;
|
||||
xmmReg[z].xyzw = xyzw;
|
||||
xmmReg[z].isNeeded = 1;
|
||||
xmmReg[z].isTemp = 1;
|
||||
return z;
|
||||
}
|
||||
xmmReg[i].isNeeded = 1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
int x = findFreeReg();
|
||||
writeBackReg(x);
|
||||
if (vfReg >= 0) {
|
||||
if (writeBack) {
|
||||
mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw);
|
||||
xmmReg[x].reg = vfWriteBack;
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].xyzw = xyzw;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 1;
|
||||
}
|
||||
else {
|
||||
SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x);
|
||||
xmmReg[x].reg = vfReg;
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].xyzw = 0;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 0;
|
||||
}
|
||||
}
|
||||
else { // Is Temp Reg
|
||||
xmmReg[x].reg = 0;
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].xyzw = 0;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
struct microXMM {
|
||||
int reg; // VF Reg Number Stored
|
||||
int xyzw; // Current xyzw Order
|
||||
int validXYZW; // Vectors that are valid
|
||||
bool isNeeded; // Is needed for current instruction
|
||||
bool isTemp; // Is Temp Reg
|
||||
bool isWritten; // The reg has been written to
|
||||
};
|
||||
|
||||
#define xmmTotal 7 // Don't allocate Last XMM Reg for PQ instances (will change this later)
|
||||
class microRegAlloc {
|
||||
private:
|
||||
microXMM xmmReg[xmmTotal];
|
||||
VURegs* vuRegs;
|
||||
int vfStats[32];
|
||||
int findFreeReg(int startIdx) {
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
|
||||
return i; // Reg is not needed and was a temp reg
|
||||
}
|
||||
}
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded) {
|
||||
if ((i+1) >= xmmTotal) return i;
|
||||
int x = findFreeReg(i+1);
|
||||
if (x == -1) return i;
|
||||
return ((vfStats[xmmReg[i].reg] < vfStats[xmmReg[x].reg]) ? i : x);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public:
|
||||
microRegAlloc(VURegs* vuRegsPtr) {
|
||||
vuRegs = vuRegsPtr;
|
||||
reset();
|
||||
}
|
||||
void reset() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
xmmReg[i].reg = 0;
|
||||
xmmReg[i].isNeeded = 0;
|
||||
xmmReg[i].isTemp = 1;
|
||||
}
|
||||
}
|
||||
void loadStats(int* vfRegData) {
|
||||
for (int i = 0; i < 32; i++) {
|
||||
vfStats[i] = vfRegData[i];
|
||||
}
|
||||
}
|
||||
void changeXYZW(int reg, int newXYZW) {
|
||||
if (xmmReg[reg].xyzw != newXYZW) {
|
||||
int shuffleReg = 0;
|
||||
int xyzw = xmmReg[reg].xyzw;
|
||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (3<<0);
|
||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (2<<0);
|
||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (1<<0);
|
||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (3<<2);
|
||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (2<<2);
|
||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (1<<2);
|
||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (3<<4);
|
||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (2<<4);
|
||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (1<<4);
|
||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (3<<6);
|
||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (2<<6);
|
||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (1<<6);
|
||||
SSE2_PSHUFD_XMM_to_XMM(reg, reg, shuffleReg);
|
||||
}
|
||||
}
|
||||
int writeBack(int reg) {
|
||||
if (!xmmReg[reg].isTemp && xmmReg[reg].reg && xmmReg[reg].isWritten) {
|
||||
changeXYZW(reg, 0xe4);
|
||||
SSE_MOVAPS_XMM_to_M128(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0]); // Write Back Reg to Mem
|
||||
}
|
||||
}
|
||||
int allocReg(int vfReg, bool cache, int xyzw, int vfWriteBack) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isTemp && xmmReg[i].reg == vfReg) {
|
||||
if ((xmmReg[i].validXYZW != 0xf) && xmmReg[i].isWritten) {
|
||||
// write back partial reg
|
||||
//if (!forWriting) { get reg from mem }
|
||||
}
|
||||
xmmReg[i].isWritten |= (forWriting) ? 1 : 0;
|
||||
xmmReg[i].isNeeded = 1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
int x = findFreeReg(0);
|
||||
if (!xmmReg[x].isTemp && xmmReg[x].reg && xmmReg[x].isWritten) {
|
||||
changeXYZW(x, 0xe4);
|
||||
SSE_MOVAPS_XMM_to_M128(x, (uptr)&vuRegs->VF[xmmReg[x].reg].UL[0]); // Write Back Reg to Mem
|
||||
}
|
||||
if (vfReg != -1) {
|
||||
SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[xmmReg[x].reg].UL[0], x); // Load Reg from Mem
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 0;
|
||||
xmmReg[x].xyzw = 0xe4;
|
||||
}
|
||||
else { // Is Temp Reg
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
*/
|
||||
|
|
|
@ -69,8 +69,6 @@ void mVUdispatcherA(mV) {
|
|||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1);
|
||||
|
||||
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
|
||||
|
|
|
@ -156,3 +156,142 @@ struct microIR {
|
|||
u32 startPC; // Start PC for Cur Block
|
||||
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Reg Alloc
|
||||
//------------------------------------------------------------------
|
||||
|
||||
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
|
||||
void mVUloadReg(int reg, uptr offset, int xyzw);
|
||||
|
||||
struct microXMM {
|
||||
int reg; // VF Reg Number Stored
|
||||
int xyzw; // xyzw to write back
|
||||
int count; // Count of when first cached
|
||||
bool isNeeded; // Is needed for current instruction
|
||||
bool isTemp; // Is Temp Reg
|
||||
};
|
||||
|
||||
#define xmmTotal 6 // Don't allocate PQ/ACC?
|
||||
class microRegAlloc {
|
||||
private:
|
||||
microXMM xmmReg[xmmTotal];
|
||||
VURegs* vuRegs;
|
||||
int counter;
|
||||
void clearReg(int reg) {
|
||||
xmmReg[reg].reg = 0;
|
||||
xmmReg[reg].count = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
xmmReg[reg].isTemp = 1;
|
||||
}
|
||||
int findFreeRegRec(int startIdx) {
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded) {
|
||||
if ((i+1) >= xmmTotal) return i;
|
||||
int x = findFreeRegRec(i+1);
|
||||
if (x == -1) return i;
|
||||
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int findFreeReg() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
|
||||
return i; // Reg is not needed and was a temp reg
|
||||
}
|
||||
}
|
||||
int x = findFreeRegRec(0);
|
||||
if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; }
|
||||
return x;
|
||||
}
|
||||
|
||||
public:
|
||||
microRegAlloc(VURegs* vuRegsPtr) {
|
||||
vuRegs = vuRegsPtr;
|
||||
reset();
|
||||
}
|
||||
void reset() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
clearReg(i);
|
||||
}
|
||||
counter = 0;
|
||||
}
|
||||
void writeBackReg(int reg) {
|
||||
if (xmmReg[reg].reg && (xmmReg[reg].xyzw || (xmmReg[reg].reg >= 32))) {
|
||||
if (xmmReg[reg].reg == 32) SSE_MOVAPS_XMM_to_M128((uptr)&vuRegs->ACC.UL[0], reg);
|
||||
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (i = reg) continue;
|
||||
if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) {
|
||||
clearReg(i); // Invalidate any Cached Regs
|
||||
}
|
||||
}
|
||||
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg
|
||||
xmmReg[reg].count = counter;
|
||||
xmmReg[reg].xyzw = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
xmmReg[reg].isTemp = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
clearReg(reg); // Clear Written Back Reg
|
||||
}
|
||||
void clearNeeded(int reg) {
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
}
|
||||
int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) {
|
||||
counter++;
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) {
|
||||
if (writeBack) {
|
||||
int z = findFreeReg();
|
||||
writeBackReg(z);
|
||||
if (xyzw == 8) SSE2_PSHUFD_XMM_to_XMM(z, i, 0);
|
||||
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
||||
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
||||
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
||||
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
|
||||
xmmReg[z].reg = vfWriteBack;
|
||||
xmmReg[z].count = counter;
|
||||
xmmReg[z].xyzw = xyzw;
|
||||
xmmReg[z].isNeeded = 1;
|
||||
xmmReg[z].isTemp = 1;
|
||||
return z;
|
||||
}
|
||||
xmmReg[i].count = counter;
|
||||
xmmReg[i].isNeeded = 1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
int x = findFreeReg();
|
||||
writeBackReg(x);
|
||||
if (vfReg >= 0) {
|
||||
if (writeBack) {
|
||||
mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw);
|
||||
xmmReg[x].reg = vfWriteBack;
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].xyzw = xyzw;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 1;
|
||||
}
|
||||
else {
|
||||
if (vfReg == 32) SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->ACC.UL[0], x);
|
||||
else SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x);
|
||||
xmmReg[x].reg = vfReg;
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].xyzw = 0;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 0;
|
||||
}
|
||||
}
|
||||
else { // Is Temp Reg
|
||||
xmmReg[x].reg = 0;
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].xyzw = 0;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
xmmReg[x].isTemp = 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -60,9 +60,9 @@ mVUop(mVU_DIV) {
|
|||
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0)
|
||||
x86SetJ8(bjmp);
|
||||
|
||||
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt);
|
||||
SSE_XORPS_XMM_to_XMM (xmmFs, xmmFt);
|
||||
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
||||
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // If division by zero, then xmmFs = +/- fmax
|
||||
SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // If division by zero, then xmmFs = +/- fmax
|
||||
|
||||
djmp = JMP8(0);
|
||||
x86SetJ8(cjmp);
|
||||
|
@ -87,7 +87,7 @@ mVUop(mVU_SQRT) {
|
|||
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
||||
testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt
|
||||
|
||||
if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
|
||||
if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(xmmFt, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
|
||||
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
|
||||
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt);
|
||||
|
@ -119,7 +119,7 @@ mVUop(mVU_RSQRT) {
|
|||
x86SetJ8(cjmp);
|
||||
|
||||
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
||||
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // xmmFs = +/-Max
|
||||
SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // xmmFs = +/-Max
|
||||
|
||||
djmp = JMP8(0);
|
||||
x86SetJ8(ajmp);
|
||||
|
|
|
@ -118,10 +118,10 @@ declareAllVariables
|
|||
#define xmmT1 0 // Temp Reg
|
||||
#define xmmFs 1 // Holds the Value of Fs (writes back result Fd)
|
||||
#define xmmFt 2 // Holds the Value of Ft
|
||||
#define xmmACC 3 // Holds ACC
|
||||
#define xmmMax 4 // Holds mVU_maxvals
|
||||
#define xmmMin 5 // Holds mVU_minvals
|
||||
#define xmmT2 6 // Temp Reg?
|
||||
#define xmmT2 3 // Temp Reg?
|
||||
#define xmmT3 4 // Temp Reg?
|
||||
#define xmmT4 5 // Temp Reg?
|
||||
#define xmmACC 6 // Holds ACC
|
||||
#define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs
|
||||
|
||||
#define gprT1 0 // Temp Reg
|
||||
|
@ -141,7 +141,7 @@ declareAllVariables
|
|||
|
||||
// Recursive Inline
|
||||
#ifndef __LINUX__
|
||||
#define __recInline __forceinline
|
||||
#define __recInline __releaseinline
|
||||
#else
|
||||
#define __recInline inline
|
||||
#endif
|
||||
|
|
|
@ -26,12 +26,12 @@
|
|||
void mVUclamp1(int reg, int regT1, int xyzw) {
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MINSS_XMM_to_XMM(reg, xmmMax);
|
||||
SSE_MAXSS_XMM_to_XMM(reg, xmmMin);
|
||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
|
||||
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals);
|
||||
break;
|
||||
default:
|
||||
SSE_MINPS_XMM_to_XMM(reg, xmmMax);
|
||||
SSE_MAXPS_XMM_to_XMM(reg, xmmMin);
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -43,15 +43,15 @@ void mVUclamp2(int reg, int regT1, int xyzw) {
|
|||
case 1: case 2: case 4: case 8:
|
||||
SSE_MOVSS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
|
||||
SSE_MINSS_XMM_to_XMM(reg, xmmMax);
|
||||
SSE_MAXSS_XMM_to_XMM(reg, xmmMin);
|
||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
|
||||
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals);
|
||||
SSE_ORPS_XMM_to_XMM(reg, regT1);
|
||||
break;
|
||||
default:
|
||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
|
||||
SSE_MINPS_XMM_to_XMM(reg, xmmMax);
|
||||
SSE_MAXPS_XMM_to_XMM(reg, xmmMin);
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals);
|
||||
SSE_ORPS_XMM_to_XMM(reg, regT1);
|
||||
break;
|
||||
}
|
||||
|
@ -294,8 +294,6 @@ microVUt(void) mVUbackupRegs(mV) {
|
|||
microVUt(void) mVUrestoreRegs(mV) {
|
||||
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
|
||||
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
|
||||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||
}
|
||||
|
||||
|
|
|
@ -440,6 +440,47 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
|
|||
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
||||
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
||||
|
||||
#define opCase1 if (opCase == 1) // Normal
|
||||
#define opCase2 if (opCase == 2) // BC Opcodes
|
||||
#define opCase3 if (opCase == 3) // I Opcodes
|
||||
#define opCase4 if (opCase == 4) // Q Opcodes
|
||||
|
||||
#define shuffleXYZW(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
|
||||
|
||||
static void (*SSE_PS[]) (x86SSERegType, x86SSERegType) = {
|
||||
SSE_ADDPS_XMM_to_XMM, // 0
|
||||
SSE_SUBPS_XMM_to_XMM, // 1
|
||||
SSE_MULPS_XMM_to_XMM, // 2
|
||||
SSE_MAXPS_XMM_to_XMM, // 3
|
||||
SSE_MINPS_XMM_to_XMM // 4
|
||||
};
|
||||
|
||||
static void (*SSE_SS[]) (x86SSERegType, x86SSERegType) = {
|
||||
SSE_ADDSS_XMM_to_XMM, // 0
|
||||
SSE_SUBSS_XMM_to_XMM, // 1
|
||||
SSE_MULSS_XMM_to_XMM, // 2
|
||||
SSE_MAXSS_XMM_to_XMM, // 3
|
||||
SSE_MINSS_XMM_to_XMM // 4
|
||||
};
|
||||
|
||||
void mVU_FMACa(microVU* mVU, int opCase, int opType, bool updateFlags) {
|
||||
int Fs, Ft;
|
||||
opCase1 { Ft = mVU->regAlloc->allocReg(_Ft_); if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } }
|
||||
opCase2 { Ft = mVU->regAlloc->allocReg(_Ft_); mVU->regAlloc->clearNeeded(Ft); Ft = mVU->regAlloc->allocReg(); }
|
||||
opCase3 { Ft = mVU->regAlloc->allocReg(); getIreg(Ft, 1); }
|
||||
opCase4 { Ft = mVU->regAlloc->allocReg(); getQreg(Ft); }
|
||||
|
||||
Fs = mVU->regAlloc->allocReg(_Fs_, 1, _X_Y_Z_W, _Fd_);
|
||||
|
||||
if (_XYZW_SS) SSE_SS[opType](Fs, Ft);
|
||||
else SSE_PS[opType](Fs, Ft);
|
||||
|
||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } }
|
||||
|
||||
mVU->regAlloc->clearNeeded(Ft);
|
||||
mVU->regAlloc->writeBackReg(Fs);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU Micromode Upper instructions
|
||||
//------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue