- Fixed xmm reg corruption when calling console print functions from recompiler (win-vista+ clobber some xmm regs)
- Tweaked and commented regalloc class

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3723 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2010-09-03 05:02:03 +00:00
parent c2681d7baa
commit 147e4b8409
4 changed files with 154 additions and 107 deletions

View File

@ -65,7 +65,7 @@ public:
if (!thisBlock) {
listI++;
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
memzero(*newBlock);
newBlock->next = NULL;
if (blockEnd) {
blockEnd->next = newBlock;
@ -160,8 +160,8 @@ struct microVU {
__aligned16 u32 macFlag[4]; // 4 instances of mac flag (used in execution)
__aligned16 u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
__aligned16 u32 xmmPQb[4]; // Backup for xmmPQ
__aligned16 u32 xmmCTemp[4]; // Backup used in mVUclamp2()
__aligned16 u32 xmmBackup[8][4]; // Backup for xmm0~xmm7
u32 index; // VU Index (VU0 or VU1)
u32 cop2; // VU is in COP2 mode? (No/Yes)

View File

@ -19,12 +19,12 @@
// Messages Called at Execution Time...
//------------------------------------------------------------------
static void __fastcall mVUbadOp0(mV, u32 PC) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
static void __fastcall mVUbadOp1(mV, u32 PC) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
static void __fastcall mVUbadOp0(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", pc, prog); }
static void __fastcall mVUbadOp1(u32 prog, u32 pc) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", pc, prog); }
static void __fastcall mVUwarning0(u32 prog) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", prog); }
static void __fastcall mVUwarning1(u32 prog) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", prog); }
static void __fastcall mVUprintPC1(u32 pc) { Console.WriteLn("Block Start PC = 0x%04x", pc); }
static void __fastcall mVUprintPC2(u32 pc) { Console.WriteLn("Block End PC = 0x%04x", pc); }
//------------------------------------------------------------------
// Program Range Checking and Setting up Ranges
@ -170,10 +170,12 @@ static __fi void mVUcheckBadOp(mV) {
// Prints msg when exiting block early if 1st op was a bad opcode (Dawn of Mana Level 2)
static __fi void handleBadOp(mV, int count) {
if (mVUinfo.isBadOp && count == 0) {
xMOV(gprT2, (uptr)mVU);
mVUbackupRegs(mVU, true);
xMOV(gprT2, mVU->prog.cur->idx);
xMOV(gprT3, xPC);
if (!isVU1) xCALL(mVUbadOp0);
else xCALL(mVUbadOp1);
mVUrestoreRegs(mVU, true);
}
}
@ -313,9 +315,11 @@ void mVUsetCycles(mV) {
// Prints Start/End PC of blocks executed, for debugging...
static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
if (mVUdebugNow) {
mVUbackupRegs(mVU, true);
xMOV(gprT2, xPC);
if (isEndPC) xCALL(mVUprintPC2);
else xCALL(mVUprintPC1);
mVUrestoreRegs(mVU, true);
}
}
@ -341,15 +345,19 @@ static void mVUtestCycles(microVU* mVU) {
if (isVU0) {
// TEST32ItoM((uptr)&mVU->regs().flags, VUFLAG_MFLAGSET);
// xFowardJZ32 vu0jmp;
// xMOV(gprT2, (uptr)mVU);
// mVUbackupRegs(mVU, true);
// xMOV(gprT2, mVU->prog.cur->idx);
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
// mVUbackupRegs(mVU, true);
mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0);
// vu0jmp.SetTarget();
}
else {
xMOV(gprT2, (uptr)mVU);
mVUbackupRegs(mVU, true);
xMOV(gprT2, mVU->prog.cur->idx);
xCALL(mVUwarning1);
mVUbackupRegs(mVU, true);
mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0);
}

View File

@ -28,7 +28,7 @@ union regInfo {
// microRegInfo is carefully ordered for faster compares. The "important" information is
// housed in a union that is accessed via 'quick32' so that several u8 fields can be compared
// using a pair of 32-bit equalities.
// vi15 is only used if microVU constprop is enabled (it is *not* by default). When constprop
// vi15 is only used if microVU const-prop is enabled (it is *not* by default). When constprop
// is disabled the vi15 field acts as additional padding that is required for 16 byte alignment
// needed by the xmm compare.
union __aligned16 microRegInfo {
@ -44,6 +44,7 @@ union __aligned16 microRegInfo {
u8 xgkick;
u8 viBackUp; // VI reg number that was written to on branch-delay slot
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
u8 r;
};
u32 quick32[2];
};
@ -51,7 +52,6 @@ union __aligned16 microRegInfo {
struct {
u8 VI[16];
regInfo VF[32];
u8 r;
};
};
@ -60,7 +60,7 @@ union __aligned16 microRegInfo {
u32 full32[160/sizeof(u32)];
};
C_ASSERT( sizeof(microRegInfo) == 160 );
C_ASSERT(sizeof(microRegInfo) == 160);
struct __aligned16 microBlock {
microRegInfo pState; // Detailed State of Pipeline
@ -202,7 +202,7 @@ protected:
}
int findFreeRegRec(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
for(int i = startIdx; i < xmmTotal; i++) {
if (!xmmMap[i].isNeeded) {
int x = findFreeRegRec(i+1);
if (x == -1) return i;
@ -213,7 +213,7 @@ protected:
}
int findFreeReg() {
for (int i = 0; i < xmmTotal; i++) {
for(int i = 0; i < xmmTotal; i++) {
if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) {
return i; // Reg is not needed and was a temp reg
}
@ -228,15 +228,19 @@ public:
index = _index;
}
// Fully resets the regalloc by clearing all cached data
void reset() {
for (int i = 0; i < xmmTotal; i++) {
for(int i = 0; i < xmmTotal; i++) {
clearReg(i);
}
counter = 0;
}
// Flushes all allocated registers (i.e. writes-back to memory all modified registers).
// If clearState is 0, then it keeps cached reg data valid
// If clearState is 1, then it invalidates all cached reg data after write-back
void flushAll(bool clearState = 1) {
for (int i = 0; i < xmmTotal; i++) {
for(int i = 0; i < xmmTotal; i++) {
writeBackReg(xmm(i));
if (clearState) clearReg(i);
}
@ -244,7 +248,7 @@ public:
void clearReg(const xmm& reg) { clearReg(reg.Id); }
void clearReg(int regId) {
microMapXMM& clear( xmmMap[regId] );
microMapXMM& clear = xmmMap[regId];
clear.VFreg = -1;
clear.count = 0;
clear.xyzw = 0;
@ -252,95 +256,115 @@ public:
}
void clearRegVF(int VFreg) {
for (int i = 0; i < xmmTotal; i++) {
for(int i = 0; i < xmmTotal; i++) {
if (xmmMap[i].VFreg == VFreg) clearReg(i);
}
}
// Writes back modified reg to memory.
// If all vectors modified, then keeps the VF reg cached in the xmm register.
// If reg was not modified, then keeps the VF reg cached in the xmm register.
void writeBackReg(const xmm& reg, bool invalidateRegs = 1) {
microMapXMM& write( xmmMap[reg.Id] );
microMapXMM& mapX = xmmMap[reg.Id];
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
if (write.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg);
else if (write.VFreg == 32) mVUsaveReg(reg, ptr[&regs().ACC], write.xyzw, 1);
else mVUsaveReg(reg, ptr[&getVF(write.VFreg)], write.xyzw, 1);
if ((mapX.VFreg > 0) && mapX.xyzw) { // Reg was modified and not Temp or vf0
if (mapX.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg);
elif (mapX.VFreg == 32) mVUsaveReg(reg, ptr[&regs().ACC], mapX.xyzw, 1);
else mVUsaveReg(reg, ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1);
if (invalidateRegs) {
for (int i = 0; i < xmmTotal; i++) {
microMapXMM& imap (xmmMap[i]);
if ((i == reg.Id) || imap.isNeeded) continue;
if (imap.VFreg == write.VFreg) {
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
for(int i = 0; i < xmmTotal; i++) {
microMapXMM& mapI = xmmMap[i];
if ((i == reg.Id) || mapI.isNeeded) continue;
if (mapI.VFreg == mapX.VFreg) {
if (mapI.xyzw && mapI.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", mapI.VFreg);
clearReg(i); // Invalidate any Cached Regs of same vf Reg
}
}
}
if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
write.count = counter;
write.xyzw = 0;
write.isNeeded = 0;
if (mapX.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
mapX.count = counter;
mapX.xyzw = 0;
mapX.isNeeded = 0;
return;
}
clearReg(reg);
}
clearReg(reg); // Clear Reg
elif (mapX.xyzw) clearReg(reg); // Clear reg if modified and is VF0 or temp reg...
}
// Use this when done using the allocated register, it clears its "Needed" status.
// The register that was written to, should be cleared before other registers are cleared.
// This is to guarantee proper merging between registers... When a written-to reg is cleared,
// it invalidates other cached registers of the same VF reg, and merges partial-vector
// writes into them.
void clearNeeded(const xmm& reg) {
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return; // Sometimes xmmPQ hits this
microMapXMM& clear (xmmMap[reg.Id]);
microMapXMM& clear = xmmMap[reg.Id];
clear.isNeeded = 0;
if (clear.xyzw) { // Reg was modified
if (clear.VFreg > 0) {
int mergeRegs = 0;
if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
if (clear.xyzw < 0xf) mergeRegs = 1; // Try to merge partial writes
for(int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
if (i == reg.Id) continue;
microMapXMM& imap (xmmMap[i]);
if (imap.VFreg == clear.VFreg) {
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
microMapXMM& mapI = xmmMap[i];
if (mapI.VFreg == clear.VFreg) {
if (mapI.xyzw && mapI.xyzw < 0xf) {
DevCon.Error("microVU Error: clearNeeded() [%d]", mapI.VFreg);
}
if (mergeRegs == 1) {
mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
imap.xyzw = 0xf;
imap.count = counter;
mapI.xyzw = 0xf;
mapI.count = counter;
mergeRegs = 2;
}
else clearReg(i);
else clearReg(i); // Clears when mergeRegs is 0 or 2
}
}
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
if (mergeRegs==2) clearReg(reg); // Clear Current Reg if Merged
elif (mergeRegs==1) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
}
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
}
}
// vfLoadReg = VF reg to be loaded to the xmm register
// vfWriteReg = VF reg that the returned xmm register will be considered as
// xyzw = XYZW vectors that will be modified (and loaded)
// cloneWrite = When loading a reg that will be written to,
// it copies it to its own xmm reg instead of overwriting the cached one...
// Notes:
// To load a temp reg use the default param values, vfLoadReg = -1 and vfWriteReg = -1.
// To load a full reg which won't be modified and you want cached, specify vfLoadReg >= 0 and vfWriteReg = -1
// To load a reg which you don't want written back or cached, specify vfLoadReg >= 0 and vfWriteReg = 0
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
//DevCon.WriteLn("vfLoadReg = %02d, vfWriteReg = %02d, xyzw = %x, clone = %d",vfLoadReg,vfWriteReg,xyzw,(int)cloneWrite);
counter++;
if (vfLoadReg >= 0) { // Search For Cached Regs
for (int i = 0; i < xmmTotal; i++) {
const xmm& xmmi(xmm::GetInstance(i));
microMapXMM& imap (xmmMap[i]);
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
for(int i = 0; i < xmmTotal; i++) {
const xmm& xmmI = xmm::GetInstance(i);
microMapXMM& mapI = xmmMap[i];
if ((mapI.VFreg == vfLoadReg) && (!mapI.xyzw // Reg Was Not Modified
|| (mapI.VFreg && (mapI.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
int z = i;
if (vfWriteReg >= 0) { // Reg will be modified
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
z = findFreeReg();
const xmm& xmmz(xmm::GetInstance(z));
writeBackReg(xmmz);
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
else if (z != i) xMOVAPS (xmmz, xmmi);
imap.count = counter; // Reg i was used, so update counter
const xmm& xmmZ = xmm::GetInstance(z);
writeBackReg(xmmZ);
if (xyzw == 4) xPSHUF.D(xmmZ, xmmI, 1);
elif (xyzw == 2) xPSHUF.D(xmmZ, xmmI, 2);
elif (xyzw == 1) xPSHUF.D(xmmZ, xmmI, 3);
elif (z != i) xMOVAPS (xmmZ, xmmI);
mapI.count = counter; // Reg i was used, so update counter
}
else { // Don't clone reg, but shuffle to adjust for SS ops
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
if ((vfLoadReg!=vfWriteReg)||(xyzw!=0xf)) writeBackReg(xmmI);
if (xyzw == 4) xPSHUF.D(xmmI, xmmI, 1);
elif (xyzw == 2) xPSHUF.D(xmmI, xmmI, 2);
elif (xyzw == 1) xPSHUF.D(xmmI, xmmI, 3);
}
xmmMap[z].VFreg = vfWriteReg;
xmmMap[z].xyzw = xyzw;
@ -352,26 +376,26 @@ public:
}
}
int x = findFreeReg();
const xmm& xmmx = xmm::GetInstance(x);
writeBackReg(xmmx);
const xmm& xmmX = xmm::GetInstance(x);
writeBackReg(xmmX);
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
else if (vfLoadReg == 33) loadIreg (xmmx, xyzw);
else if (vfLoadReg == 32) mVUloadReg(xmmx, ptr[&regs().ACC], xyzw);
else if (vfLoadReg >= 0) mVUloadReg(xmmx, ptr[&getVF(vfLoadReg)], xyzw);
if ((vfLoadReg == 0) && !(xyzw & 1)) xPXOR(xmmX, xmmX);
elif (vfLoadReg == 33) loadIreg (xmmX, xyzw);
elif (vfLoadReg == 32) mVUloadReg(xmmX, ptr[&regs().ACC], xyzw);
elif (vfLoadReg >= 0) mVUloadReg(xmmX, ptr[&getVF(vfLoadReg)], xyzw);
xmmMap[x].VFreg = vfWriteReg;
xmmMap[x].xyzw = xyzw;
}
else { // Reg Will Not Be Modified (always load full reg for caching)
if (vfLoadReg == 33) loadIreg(xmmx, 0xf);
else if (vfLoadReg == 32) xMOVAPS (xmmx, ptr128[&regs().ACC]);
else if (vfLoadReg >= 0) xMOVAPS (xmmx, ptr128[&getVF(vfLoadReg)]);
if (vfLoadReg == 33) loadIreg(xmmX, 0xf);
elif (vfLoadReg == 32) xMOVAPS (xmmX, ptr128[&regs().ACC]);
elif (vfLoadReg >= 0) xMOVAPS (xmmX, ptr128[&getVF(vfLoadReg)]);
xmmMap[x].VFreg = vfLoadReg;
xmmMap[x].xyzw = 0;
}
xmmMap[x].count = counter;
xmmMap[x].isNeeded = 1;
return xmmx;
return xmmX;
}
};

View File

@ -213,8 +213,33 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
// Micro VU - Misc Functions
//------------------------------------------------------------------
// Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI)
__fi void mVUbackupRegs(microVU* mVU, bool toMemory = false)
{
if (toMemory) {
for(int i = 0; i < 8; i++) {
xMOVAPS(ptr128[&mVU->xmmBackup[i][0]], xmm(i));
}
}
else {
mVU->regAlloc->flushAll(); // Flush Regalloc
xMOVAPS(ptr128[&mVU->xmmBackup[xmmPQ.Id][0]], xmmPQ);
}
}
// Restore Volatile Regs
__fi void mVUrestoreRegs(microVU* mVU, bool fromMemory = false)
{
if (fromMemory) {
for(int i = 0; i < 8; i++) {
xMOVAPS(xmm(i), ptr128[&mVU->xmmBackup[i][0]]);
}
}
else xMOVAPS(xmmPQ, ptr128[&mVU->xmmBackup[xmmPQ.Id][0]]);
}
// Gets called by mVUaddrFix at execution-time
static void __fastcall mVUwarningRegAccess(mV) { Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", xPC, mVU->prog.cur); }
static void __fastcall mVUwarningRegAccess(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); }
// Transforms the Address in gprReg to valid VU0/VU1 Address
__fi void mVUaddrFix(mV, const x32& gprReg)
@ -224,6 +249,7 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
xSHL(gprReg, 4);
}
else {
if (IsDevBuild && !isCOP2) mVUbackupRegs(mVU, true);
xTEST(gprReg, 0x400);
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around
@ -233,32 +259,21 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
xPUSH(gprT1); // Note: Kernel does it via COP2 to initialize VU1!
xPUSH(gprT2); // So we don't spam console, we'll only check micro-mode...
xPUSH(gprT3);
xMOV(gprT2, (uptr)mVU);
xMOV (gprT2, mVU->prog.cur->idx);
xMOV (gprT3, xPC);
xCALL(mVUwarningRegAccess);
xPOP(gprT3);
xPOP(gprT2);
xPOP(gprT1);
xPOP (gprT3);
xPOP (gprT2);
xPOP (gprT1);
}
xAND(gprReg, 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
jmpB.SetTarget();
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
if (IsDevBuild && !isCOP2) mVUrestoreRegs(mVU, true);
}
}
// Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI)
__fi void mVUbackupRegs(microVU* mVU)
{
mVU->regAlloc->flushAll();
xMOVAPS(ptr128[mVU->xmmPQb], xmmPQ);
}
// Restore Volatile Regs
__fi void mVUrestoreRegs(microVU* mVU)
{
xMOVAPS(xmmPQ, ptr128[mVU->xmmPQb]);
}
//------------------------------------------------------------------
// Micro VU - Custom SSE Instructions
//------------------------------------------------------------------