mirror of https://github.com/PCSX2/pcsx2.git
microVU: more work in progress stuff...
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1542 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5474df1479
commit
81009002bb
|
@ -50,6 +50,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr, int vuIndex) {
|
||||||
mVU->prog.max = mMaxProg - 1;
|
mVU->prog.max = mMaxProg - 1;
|
||||||
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
|
mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64);
|
||||||
mVU->prog.progList = new int[mMaxProg];
|
mVU->prog.progList = new int[mMaxProg];
|
||||||
|
mVU->regAlloc = new microRegAlloc(mVU->regs);
|
||||||
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
|
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
|
||||||
|
|
||||||
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
|
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
|
||||||
|
@ -113,6 +114,7 @@ microVUt(void) mVUclose(mV) {
|
||||||
safe_aligned_free(mVU->prog.prog);
|
safe_aligned_free(mVU->prog.prog);
|
||||||
}
|
}
|
||||||
safe_delete_array(mVU->prog.progList);
|
safe_delete_array(mVU->prog.progList);
|
||||||
|
safe_delete(mVU->regAlloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clears Block Data in specified range
|
// Clears Block Data in specified range
|
||||||
|
|
|
@ -150,7 +150,8 @@ struct microVU {
|
||||||
u32 progSize; // VU Micro Memory Size (in u32's)
|
u32 progSize; // VU Micro Memory Size (in u32's)
|
||||||
u32 cacheSize; // VU Cache Size
|
u32 cacheSize; // VU Cache Size
|
||||||
|
|
||||||
microProgManager prog; // Micro Program Data
|
microProgManager prog; // Micro Program Data
|
||||||
|
microRegAlloc* regAlloc; // Reg Alloc Class
|
||||||
|
|
||||||
FILE* logFile; // Log File Pointer
|
FILE* logFile; // Log File Pointer
|
||||||
VURegs* regs; // VU Regs Struct
|
VURegs* regs; // VU Regs Struct
|
||||||
|
|
|
@ -756,244 +756,3 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
|
||||||
if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \
|
if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \
|
||||||
else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \
|
else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
|
||||||
// Reg Alloc
|
|
||||||
//------------------------------------------------------------------
|
|
||||||
|
|
||||||
struct microXMM {
|
|
||||||
int reg; // VF Reg Number Stored
|
|
||||||
int xyzw; // xyzw to write back
|
|
||||||
int count; // Count of when first cached
|
|
||||||
bool isNeeded; // Is needed for current instruction
|
|
||||||
bool isTemp; // Is Temp Reg
|
|
||||||
};
|
|
||||||
|
|
||||||
#define xmmTotal 6 // Don't allocate PQ/ACC?
|
|
||||||
class microRegAlloc {
|
|
||||||
private:
|
|
||||||
microXMM xmmReg[xmmTotal];
|
|
||||||
VURegs* vuRegs;
|
|
||||||
int counter;
|
|
||||||
void clearReg(int reg) {
|
|
||||||
xmmReg[reg].reg = 0;
|
|
||||||
xmmReg[reg].count = 0;
|
|
||||||
xmmReg[reg].isNeeded = 0;
|
|
||||||
xmmReg[reg].isTemp = 1;
|
|
||||||
}
|
|
||||||
int findFreeRegRec(int startIdx) {
|
|
||||||
for (int i = startIdx; i < xmmTotal; i++) {
|
|
||||||
if (!xmmReg[i].isNeeded) {
|
|
||||||
if ((i+1) >= xmmTotal) return i;
|
|
||||||
int x = findFreeRegRec(i+1);
|
|
||||||
if (x == -1) return i;
|
|
||||||
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
int findFreeReg() {
|
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
|
||||||
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
|
|
||||||
return i; // Reg is not needed and was a temp reg
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int x = findFreeRegRec(0);
|
|
||||||
if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; }
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
microRegAlloc(VURegs* vuRegsPtr) {
|
|
||||||
vuRegs = vuRegsPtr;
|
|
||||||
reset();
|
|
||||||
}
|
|
||||||
void reset() {
|
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
|
||||||
clearReg(i);
|
|
||||||
}
|
|
||||||
counter = 0;
|
|
||||||
}
|
|
||||||
void writeBackReg(int reg) {
|
|
||||||
if (xmmReg[reg].reg && xmmReg[reg].xyzw) {
|
|
||||||
mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
|
||||||
if (i = reg) continue;
|
|
||||||
if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) {
|
|
||||||
clearReg(i); // Invalidate any Cached Regs
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg
|
|
||||||
xmmReg[reg].count = counter;
|
|
||||||
xmmReg[reg].xyzw = 0;
|
|
||||||
xmmReg[reg].isNeeded = 0;
|
|
||||||
xmmReg[reg].isTemp = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
clearReg(reg); // Clear Written Back Reg
|
|
||||||
}
|
|
||||||
void clearNeeded(int reg) {
|
|
||||||
xmmReg[reg].isNeeded = 0;
|
|
||||||
}
|
|
||||||
int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) {
|
|
||||||
counter++;
|
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
|
||||||
if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) {
|
|
||||||
if (writeBack) {
|
|
||||||
int z = findFreeReg();
|
|
||||||
writeBackReg(z);
|
|
||||||
if (xyzw == 8) SSE2_SHUFPD_XMM_to_XMM(z, i, 0);
|
|
||||||
else if (xyzw == 4) SSE2_SHUFPD_XMM_to_XMM(z, i, 1);
|
|
||||||
else if (xyzw == 2) SSE2_SHUFPD_XMM_to_XMM(z, i, 2);
|
|
||||||
else if (xyzw == 1) SSE2_SHUFPD_XMM_to_XMM(z, i, 3);
|
|
||||||
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
|
|
||||||
xmmReg[z].reg = vfWriteBack;
|
|
||||||
xmmReg[z].count = counter;
|
|
||||||
xmmReg[z].xyzw = xyzw;
|
|
||||||
xmmReg[z].isNeeded = 1;
|
|
||||||
xmmReg[z].isTemp = 1;
|
|
||||||
return z;
|
|
||||||
}
|
|
||||||
xmmReg[i].isNeeded = 1;
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int x = findFreeReg();
|
|
||||||
writeBackReg(x);
|
|
||||||
if (vfReg >= 0) {
|
|
||||||
if (writeBack) {
|
|
||||||
mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw);
|
|
||||||
xmmReg[x].reg = vfWriteBack;
|
|
||||||
xmmReg[x].count = counter;
|
|
||||||
xmmReg[x].xyzw = xyzw;
|
|
||||||
xmmReg[x].isNeeded = 1;
|
|
||||||
xmmReg[x].isTemp = 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x);
|
|
||||||
xmmReg[x].reg = vfReg;
|
|
||||||
xmmReg[x].count = counter;
|
|
||||||
xmmReg[x].xyzw = 0;
|
|
||||||
xmmReg[x].isNeeded = 1;
|
|
||||||
xmmReg[x].isTemp = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else { // Is Temp Reg
|
|
||||||
xmmReg[x].reg = 0;
|
|
||||||
xmmReg[x].count = counter;
|
|
||||||
xmmReg[x].xyzw = 0;
|
|
||||||
xmmReg[x].isNeeded = 1;
|
|
||||||
xmmReg[x].isTemp = 1;
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
struct microXMM {
|
|
||||||
int reg; // VF Reg Number Stored
|
|
||||||
int xyzw; // Current xyzw Order
|
|
||||||
int validXYZW; // Vectors that are valid
|
|
||||||
bool isNeeded; // Is needed for current instruction
|
|
||||||
bool isTemp; // Is Temp Reg
|
|
||||||
bool isWritten; // The reg has been written to
|
|
||||||
};
|
|
||||||
|
|
||||||
#define xmmTotal 7 // Don't allocate Last XMM Reg for PQ instances (will change this later)
|
|
||||||
class microRegAlloc {
|
|
||||||
private:
|
|
||||||
microXMM xmmReg[xmmTotal];
|
|
||||||
VURegs* vuRegs;
|
|
||||||
int vfStats[32];
|
|
||||||
int findFreeReg(int startIdx) {
|
|
||||||
for (int i = startIdx; i < xmmTotal; i++) {
|
|
||||||
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
|
|
||||||
return i; // Reg is not needed and was a temp reg
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = startIdx; i < xmmTotal; i++) {
|
|
||||||
if (!xmmReg[i].isNeeded) {
|
|
||||||
if ((i+1) >= xmmTotal) return i;
|
|
||||||
int x = findFreeReg(i+1);
|
|
||||||
if (x == -1) return i;
|
|
||||||
return ((vfStats[xmmReg[i].reg] < vfStats[xmmReg[x].reg]) ? i : x);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
microRegAlloc(VURegs* vuRegsPtr) {
|
|
||||||
vuRegs = vuRegsPtr;
|
|
||||||
reset();
|
|
||||||
}
|
|
||||||
void reset() {
|
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
|
||||||
xmmReg[i].reg = 0;
|
|
||||||
xmmReg[i].isNeeded = 0;
|
|
||||||
xmmReg[i].isTemp = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void loadStats(int* vfRegData) {
|
|
||||||
for (int i = 0; i < 32; i++) {
|
|
||||||
vfStats[i] = vfRegData[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void changeXYZW(int reg, int newXYZW) {
|
|
||||||
if (xmmReg[reg].xyzw != newXYZW) {
|
|
||||||
int shuffleReg = 0;
|
|
||||||
int xyzw = xmmReg[reg].xyzw;
|
|
||||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (3<<0);
|
|
||||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (2<<0);
|
|
||||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (1<<0);
|
|
||||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (3<<2);
|
|
||||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (2<<2);
|
|
||||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (1<<2);
|
|
||||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (3<<4);
|
|
||||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (2<<4);
|
|
||||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (1<<4);
|
|
||||||
if (((xyzw >> 6) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (3<<6);
|
|
||||||
else if (((xyzw >> 4) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (2<<6);
|
|
||||||
else if (((xyzw >> 2) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (1<<6);
|
|
||||||
SSE2_PSHUFD_XMM_to_XMM(reg, reg, shuffleReg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int writeBack(int reg) {
|
|
||||||
if (!xmmReg[reg].isTemp && xmmReg[reg].reg && xmmReg[reg].isWritten) {
|
|
||||||
changeXYZW(reg, 0xe4);
|
|
||||||
SSE_MOVAPS_XMM_to_M128(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0]); // Write Back Reg to Mem
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int allocReg(int vfReg, bool cache, int xyzw, int vfWriteBack) {
|
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
|
||||||
if (!xmmReg[i].isTemp && xmmReg[i].reg == vfReg) {
|
|
||||||
if ((xmmReg[i].validXYZW != 0xf) && xmmReg[i].isWritten) {
|
|
||||||
// write back partial reg
|
|
||||||
//if (!forWriting) { get reg from mem }
|
|
||||||
}
|
|
||||||
xmmReg[i].isWritten |= (forWriting) ? 1 : 0;
|
|
||||||
xmmReg[i].isNeeded = 1;
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int x = findFreeReg(0);
|
|
||||||
if (!xmmReg[x].isTemp && xmmReg[x].reg && xmmReg[x].isWritten) {
|
|
||||||
changeXYZW(x, 0xe4);
|
|
||||||
SSE_MOVAPS_XMM_to_M128(x, (uptr)&vuRegs->VF[xmmReg[x].reg].UL[0]); // Write Back Reg to Mem
|
|
||||||
}
|
|
||||||
if (vfReg != -1) {
|
|
||||||
SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[xmmReg[x].reg].UL[0], x); // Load Reg from Mem
|
|
||||||
xmmReg[x].isNeeded = 1;
|
|
||||||
xmmReg[x].isTemp = 0;
|
|
||||||
xmmReg[x].xyzw = 0xe4;
|
|
||||||
}
|
|
||||||
else { // Is Temp Reg
|
|
||||||
xmmReg[x].isNeeded = 1;
|
|
||||||
xmmReg[x].isTemp = 1;
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
|
@ -69,8 +69,6 @@ void mVUdispatcherA(mV) {
|
||||||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1);
|
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1);
|
||||||
|
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
|
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
|
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
|
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL);
|
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL);
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL);
|
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL);
|
||||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
|
||||||
|
|
|
@ -156,3 +156,142 @@ struct microIR {
|
||||||
u32 startPC; // Start PC for Cur Block
|
u32 startPC; // Start PC for Cur Block
|
||||||
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
|
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
// Reg Alloc
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
|
||||||
|
void mVUloadReg(int reg, uptr offset, int xyzw);
|
||||||
|
|
||||||
|
struct microXMM {
|
||||||
|
int reg; // VF Reg Number Stored
|
||||||
|
int xyzw; // xyzw to write back
|
||||||
|
int count; // Count of when first cached
|
||||||
|
bool isNeeded; // Is needed for current instruction
|
||||||
|
bool isTemp; // Is Temp Reg
|
||||||
|
};
|
||||||
|
|
||||||
|
#define xmmTotal 6 // Don't allocate PQ/ACC?
|
||||||
|
class microRegAlloc {
|
||||||
|
private:
|
||||||
|
microXMM xmmReg[xmmTotal];
|
||||||
|
VURegs* vuRegs;
|
||||||
|
int counter;
|
||||||
|
void clearReg(int reg) {
|
||||||
|
xmmReg[reg].reg = 0;
|
||||||
|
xmmReg[reg].count = 0;
|
||||||
|
xmmReg[reg].isNeeded = 0;
|
||||||
|
xmmReg[reg].isTemp = 1;
|
||||||
|
}
|
||||||
|
int findFreeRegRec(int startIdx) {
|
||||||
|
for (int i = startIdx; i < xmmTotal; i++) {
|
||||||
|
if (!xmmReg[i].isNeeded) {
|
||||||
|
if ((i+1) >= xmmTotal) return i;
|
||||||
|
int x = findFreeRegRec(i+1);
|
||||||
|
if (x == -1) return i;
|
||||||
|
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int findFreeReg() {
|
||||||
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
|
if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) {
|
||||||
|
return i; // Reg is not needed and was a temp reg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int x = findFreeRegRec(0);
|
||||||
|
if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; }
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
microRegAlloc(VURegs* vuRegsPtr) {
|
||||||
|
vuRegs = vuRegsPtr;
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
void reset() {
|
||||||
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
|
clearReg(i);
|
||||||
|
}
|
||||||
|
counter = 0;
|
||||||
|
}
|
||||||
|
void writeBackReg(int reg) {
|
||||||
|
if (xmmReg[reg].reg && (xmmReg[reg].xyzw || (xmmReg[reg].reg >= 32))) {
|
||||||
|
if (xmmReg[reg].reg == 32) SSE_MOVAPS_XMM_to_M128((uptr)&vuRegs->ACC.UL[0], reg);
|
||||||
|
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||||
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
|
if (i = reg) continue;
|
||||||
|
if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) {
|
||||||
|
clearReg(i); // Invalidate any Cached Regs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg
|
||||||
|
xmmReg[reg].count = counter;
|
||||||
|
xmmReg[reg].xyzw = 0;
|
||||||
|
xmmReg[reg].isNeeded = 0;
|
||||||
|
xmmReg[reg].isTemp = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clearReg(reg); // Clear Written Back Reg
|
||||||
|
}
|
||||||
|
void clearNeeded(int reg) {
|
||||||
|
xmmReg[reg].isNeeded = 0;
|
||||||
|
}
|
||||||
|
int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) {
|
||||||
|
counter++;
|
||||||
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
|
if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) {
|
||||||
|
if (writeBack) {
|
||||||
|
int z = findFreeReg();
|
||||||
|
writeBackReg(z);
|
||||||
|
if (xyzw == 8) SSE2_PSHUFD_XMM_to_XMM(z, i, 0);
|
||||||
|
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
||||||
|
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
||||||
|
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
||||||
|
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
|
||||||
|
xmmReg[z].reg = vfWriteBack;
|
||||||
|
xmmReg[z].count = counter;
|
||||||
|
xmmReg[z].xyzw = xyzw;
|
||||||
|
xmmReg[z].isNeeded = 1;
|
||||||
|
xmmReg[z].isTemp = 1;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
xmmReg[i].count = counter;
|
||||||
|
xmmReg[i].isNeeded = 1;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int x = findFreeReg();
|
||||||
|
writeBackReg(x);
|
||||||
|
if (vfReg >= 0) {
|
||||||
|
if (writeBack) {
|
||||||
|
mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw);
|
||||||
|
xmmReg[x].reg = vfWriteBack;
|
||||||
|
xmmReg[x].count = counter;
|
||||||
|
xmmReg[x].xyzw = xyzw;
|
||||||
|
xmmReg[x].isNeeded = 1;
|
||||||
|
xmmReg[x].isTemp = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (vfReg == 32) SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->ACC.UL[0], x);
|
||||||
|
else SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x);
|
||||||
|
xmmReg[x].reg = vfReg;
|
||||||
|
xmmReg[x].count = counter;
|
||||||
|
xmmReg[x].xyzw = 0;
|
||||||
|
xmmReg[x].isNeeded = 1;
|
||||||
|
xmmReg[x].isTemp = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // Is Temp Reg
|
||||||
|
xmmReg[x].reg = 0;
|
||||||
|
xmmReg[x].count = counter;
|
||||||
|
xmmReg[x].xyzw = 0;
|
||||||
|
xmmReg[x].isNeeded = 1;
|
||||||
|
xmmReg[x].isTemp = 1;
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
@ -60,9 +60,9 @@ mVUop(mVU_DIV) {
|
||||||
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0)
|
MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0)
|
||||||
x86SetJ8(bjmp);
|
x86SetJ8(bjmp);
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt);
|
SSE_XORPS_XMM_to_XMM (xmmFs, xmmFt);
|
||||||
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
||||||
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // If division by zero, then xmmFs = +/- fmax
|
SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // If division by zero, then xmmFs = +/- fmax
|
||||||
|
|
||||||
djmp = JMP8(0);
|
djmp = JMP8(0);
|
||||||
x86SetJ8(cjmp);
|
x86SetJ8(cjmp);
|
||||||
|
@ -87,7 +87,7 @@ mVUop(mVU_SQRT) {
|
||||||
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
||||||
testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt
|
testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt
|
||||||
|
|
||||||
if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
|
if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(xmmFt, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
|
||||||
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
|
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
|
||||||
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
|
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
|
||||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt);
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt);
|
||||||
|
@ -119,7 +119,7 @@ mVUop(mVU_RSQRT) {
|
||||||
x86SetJ8(cjmp);
|
x86SetJ8(cjmp);
|
||||||
|
|
||||||
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
|
||||||
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // xmmFs = +/-Max
|
SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // xmmFs = +/-Max
|
||||||
|
|
||||||
djmp = JMP8(0);
|
djmp = JMP8(0);
|
||||||
x86SetJ8(ajmp);
|
x86SetJ8(ajmp);
|
||||||
|
|
|
@ -118,10 +118,10 @@ declareAllVariables
|
||||||
#define xmmT1 0 // Temp Reg
|
#define xmmT1 0 // Temp Reg
|
||||||
#define xmmFs 1 // Holds the Value of Fs (writes back result Fd)
|
#define xmmFs 1 // Holds the Value of Fs (writes back result Fd)
|
||||||
#define xmmFt 2 // Holds the Value of Ft
|
#define xmmFt 2 // Holds the Value of Ft
|
||||||
#define xmmACC 3 // Holds ACC
|
#define xmmT2 3 // Temp Reg?
|
||||||
#define xmmMax 4 // Holds mVU_maxvals
|
#define xmmT3 4 // Temp Reg?
|
||||||
#define xmmMin 5 // Holds mVU_minvals
|
#define xmmT4 5 // Temp Reg?
|
||||||
#define xmmT2 6 // Temp Reg?
|
#define xmmACC 6 // Holds ACC
|
||||||
#define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs
|
#define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs
|
||||||
|
|
||||||
#define gprT1 0 // Temp Reg
|
#define gprT1 0 // Temp Reg
|
||||||
|
@ -141,7 +141,7 @@ declareAllVariables
|
||||||
|
|
||||||
// Recursive Inline
|
// Recursive Inline
|
||||||
#ifndef __LINUX__
|
#ifndef __LINUX__
|
||||||
#define __recInline __forceinline
|
#define __recInline __releaseinline
|
||||||
#else
|
#else
|
||||||
#define __recInline inline
|
#define __recInline inline
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -26,12 +26,12 @@
|
||||||
void mVUclamp1(int reg, int regT1, int xyzw) {
|
void mVUclamp1(int reg, int regT1, int xyzw) {
|
||||||
switch (xyzw) {
|
switch (xyzw) {
|
||||||
case 1: case 2: case 4: case 8:
|
case 1: case 2: case 4: case 8:
|
||||||
SSE_MINSS_XMM_to_XMM(reg, xmmMax);
|
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
|
||||||
SSE_MAXSS_XMM_to_XMM(reg, xmmMin);
|
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
SSE_MINPS_XMM_to_XMM(reg, xmmMax);
|
SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals);
|
||||||
SSE_MAXPS_XMM_to_XMM(reg, xmmMin);
|
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,15 +43,15 @@ void mVUclamp2(int reg, int regT1, int xyzw) {
|
||||||
case 1: case 2: case 4: case 8:
|
case 1: case 2: case 4: case 8:
|
||||||
SSE_MOVSS_XMM_to_XMM(regT1, reg);
|
SSE_MOVSS_XMM_to_XMM(regT1, reg);
|
||||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
|
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
|
||||||
SSE_MINSS_XMM_to_XMM(reg, xmmMax);
|
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
|
||||||
SSE_MAXSS_XMM_to_XMM(reg, xmmMin);
|
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals);
|
||||||
SSE_ORPS_XMM_to_XMM(reg, regT1);
|
SSE_ORPS_XMM_to_XMM(reg, regT1);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
|
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit);
|
||||||
SSE_MINPS_XMM_to_XMM(reg, xmmMax);
|
SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals);
|
||||||
SSE_MAXPS_XMM_to_XMM(reg, xmmMin);
|
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals);
|
||||||
SSE_ORPS_XMM_to_XMM(reg, regT1);
|
SSE_ORPS_XMM_to_XMM(reg, regT1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -294,8 +294,6 @@ microVUt(void) mVUbackupRegs(mV) {
|
||||||
microVUt(void) mVUrestoreRegs(mV) {
|
microVUt(void) mVUrestoreRegs(mV) {
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
|
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]);
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]);
|
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]);
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
|
|
||||||
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
|
|
||||||
MOV32ItoR(gprR, Roffset); // Restore gprR
|
MOV32ItoR(gprR, Roffset); // Restore gprR
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -440,6 +440,47 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool
|
||||||
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
||||||
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } }
|
||||||
|
|
||||||
|
#define opCase1 if (opCase == 1) // Normal
|
||||||
|
#define opCase2 if (opCase == 2) // BC Opcodes
|
||||||
|
#define opCase3 if (opCase == 3) // I Opcodes
|
||||||
|
#define opCase4 if (opCase == 4) // Q Opcodes
|
||||||
|
|
||||||
|
#define shuffleXYZW(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
|
||||||
|
|
||||||
|
static void (*SSE_PS[]) (x86SSERegType, x86SSERegType) = {
|
||||||
|
SSE_ADDPS_XMM_to_XMM, // 0
|
||||||
|
SSE_SUBPS_XMM_to_XMM, // 1
|
||||||
|
SSE_MULPS_XMM_to_XMM, // 2
|
||||||
|
SSE_MAXPS_XMM_to_XMM, // 3
|
||||||
|
SSE_MINPS_XMM_to_XMM // 4
|
||||||
|
};
|
||||||
|
|
||||||
|
static void (*SSE_SS[]) (x86SSERegType, x86SSERegType) = {
|
||||||
|
SSE_ADDSS_XMM_to_XMM, // 0
|
||||||
|
SSE_SUBSS_XMM_to_XMM, // 1
|
||||||
|
SSE_MULSS_XMM_to_XMM, // 2
|
||||||
|
SSE_MAXSS_XMM_to_XMM, // 3
|
||||||
|
SSE_MINSS_XMM_to_XMM // 4
|
||||||
|
};
|
||||||
|
|
||||||
|
void mVU_FMACa(microVU* mVU, int opCase, int opType, bool updateFlags) {
|
||||||
|
int Fs, Ft;
|
||||||
|
opCase1 { Ft = mVU->regAlloc->allocReg(_Ft_); if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } }
|
||||||
|
opCase2 { Ft = mVU->regAlloc->allocReg(_Ft_); mVU->regAlloc->clearNeeded(Ft); Ft = mVU->regAlloc->allocReg(); }
|
||||||
|
opCase3 { Ft = mVU->regAlloc->allocReg(); getIreg(Ft, 1); }
|
||||||
|
opCase4 { Ft = mVU->regAlloc->allocReg(); getQreg(Ft); }
|
||||||
|
|
||||||
|
Fs = mVU->regAlloc->allocReg(_Fs_, 1, _X_Y_Z_W, _Fd_);
|
||||||
|
|
||||||
|
if (_XYZW_SS) SSE_SS[opType](Fs, Ft);
|
||||||
|
else SSE_PS[opType](Fs, Ft);
|
||||||
|
|
||||||
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } }
|
||||||
|
|
||||||
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
|
mVU->regAlloc->writeBackReg(Fs);
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU Micromode Upper instructions
|
// Micro VU Micromode Upper instructions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in New Issue