mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Fixed xmm reg corruption when calling console print functions from recompiler (win-vista+ clobber some xmm regs) - Tweaked and commented regalloc class git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3723 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c2681d7baa
commit
147e4b8409
|
@ -65,7 +65,7 @@ public:
|
||||||
if (!thisBlock) {
|
if (!thisBlock) {
|
||||||
listI++;
|
listI++;
|
||||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
|
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
|
||||||
memzero(*newBlock);
|
newBlock->next = NULL;
|
||||||
|
|
||||||
if (blockEnd) {
|
if (blockEnd) {
|
||||||
blockEnd->next = newBlock;
|
blockEnd->next = newBlock;
|
||||||
|
@ -160,8 +160,8 @@ struct microVU {
|
||||||
|
|
||||||
__aligned16 u32 macFlag[4]; // 4 instances of mac flag (used in execution)
|
__aligned16 u32 macFlag[4]; // 4 instances of mac flag (used in execution)
|
||||||
__aligned16 u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
|
__aligned16 u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
|
||||||
__aligned16 u32 xmmPQb[4]; // Backup for xmmPQ
|
|
||||||
__aligned16 u32 xmmCTemp[4]; // Backup used in mVUclamp2()
|
__aligned16 u32 xmmCTemp[4]; // Backup used in mVUclamp2()
|
||||||
|
__aligned16 u32 xmmBackup[8][4]; // Backup for xmm0~xmm7
|
||||||
|
|
||||||
u32 index; // VU Index (VU0 or VU1)
|
u32 index; // VU Index (VU0 or VU1)
|
||||||
u32 cop2; // VU is in COP2 mode? (No/Yes)
|
u32 cop2; // VU is in COP2 mode? (No/Yes)
|
||||||
|
|
|
@ -19,12 +19,12 @@
|
||||||
// Messages Called at Execution Time...
|
// Messages Called at Execution Time...
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
static void __fastcall mVUbadOp0(mV, u32 PC) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
|
static void __fastcall mVUbadOp0(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", pc, prog); }
|
||||||
static void __fastcall mVUbadOp1(mV, u32 PC) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
|
static void __fastcall mVUbadOp1(u32 prog, u32 pc) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", pc, prog); }
|
||||||
static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
|
static void __fastcall mVUwarning0(u32 prog) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", prog); }
|
||||||
static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
|
static void __fastcall mVUwarning1(u32 prog) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", prog); }
|
||||||
static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
|
static void __fastcall mVUprintPC1(u32 pc) { Console.WriteLn("Block Start PC = 0x%04x", pc); }
|
||||||
static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
|
static void __fastcall mVUprintPC2(u32 pc) { Console.WriteLn("Block End PC = 0x%04x", pc); }
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Program Range Checking and Setting up Ranges
|
// Program Range Checking and Setting up Ranges
|
||||||
|
@ -170,10 +170,12 @@ static __fi void mVUcheckBadOp(mV) {
|
||||||
// Prints msg when exiting block early if 1st op was a bad opcode (Dawn of Mana Level 2)
|
// Prints msg when exiting block early if 1st op was a bad opcode (Dawn of Mana Level 2)
|
||||||
static __fi void handleBadOp(mV, int count) {
|
static __fi void handleBadOp(mV, int count) {
|
||||||
if (mVUinfo.isBadOp && count == 0) {
|
if (mVUinfo.isBadOp && count == 0) {
|
||||||
xMOV(gprT2, (uptr)mVU);
|
mVUbackupRegs(mVU, true);
|
||||||
|
xMOV(gprT2, mVU->prog.cur->idx);
|
||||||
xMOV(gprT3, xPC);
|
xMOV(gprT3, xPC);
|
||||||
if (!isVU1) xCALL(mVUbadOp0);
|
if (!isVU1) xCALL(mVUbadOp0);
|
||||||
else xCALL(mVUbadOp1);
|
else xCALL(mVUbadOp1);
|
||||||
|
mVUrestoreRegs(mVU, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -313,9 +315,11 @@ void mVUsetCycles(mV) {
|
||||||
// Prints Start/End PC of blocks executed, for debugging...
|
// Prints Start/End PC of blocks executed, for debugging...
|
||||||
static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
|
static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
|
||||||
if (mVUdebugNow) {
|
if (mVUdebugNow) {
|
||||||
|
mVUbackupRegs(mVU, true);
|
||||||
xMOV(gprT2, xPC);
|
xMOV(gprT2, xPC);
|
||||||
if (isEndPC) xCALL(mVUprintPC2);
|
if (isEndPC) xCALL(mVUprintPC2);
|
||||||
else xCALL(mVUprintPC1);
|
else xCALL(mVUprintPC1);
|
||||||
|
mVUrestoreRegs(mVU, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -341,15 +345,19 @@ static void mVUtestCycles(microVU* mVU) {
|
||||||
if (isVU0) {
|
if (isVU0) {
|
||||||
// TEST32ItoM((uptr)&mVU->regs().flags, VUFLAG_MFLAGSET);
|
// TEST32ItoM((uptr)&mVU->regs().flags, VUFLAG_MFLAGSET);
|
||||||
// xFowardJZ32 vu0jmp;
|
// xFowardJZ32 vu0jmp;
|
||||||
// xMOV(gprT2, (uptr)mVU);
|
// mVUbackupRegs(mVU, true);
|
||||||
|
// xMOV(gprT2, mVU->prog.cur->idx);
|
||||||
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
|
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
|
||||||
|
// mVUbackupRegs(mVU, true);
|
||||||
mVUsavePipelineState(mVU);
|
mVUsavePipelineState(mVU);
|
||||||
mVUendProgram(mVU, NULL, 0);
|
mVUendProgram(mVU, NULL, 0);
|
||||||
// vu0jmp.SetTarget();
|
// vu0jmp.SetTarget();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
xMOV(gprT2, (uptr)mVU);
|
mVUbackupRegs(mVU, true);
|
||||||
|
xMOV(gprT2, mVU->prog.cur->idx);
|
||||||
xCALL(mVUwarning1);
|
xCALL(mVUwarning1);
|
||||||
|
mVUbackupRegs(mVU, true);
|
||||||
mVUsavePipelineState(mVU);
|
mVUsavePipelineState(mVU);
|
||||||
mVUendProgram(mVU, NULL, 0);
|
mVUendProgram(mVU, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ union regInfo {
|
||||||
// microRegInfo is carefully ordered for faster compares. The "important" information is
|
// microRegInfo is carefully ordered for faster compares. The "important" information is
|
||||||
// housed in a union that is accessed via 'quick32' so that several u8 fields can be compared
|
// housed in a union that is accessed via 'quick32' so that several u8 fields can be compared
|
||||||
// using a pair of 32-bit equalities.
|
// using a pair of 32-bit equalities.
|
||||||
// vi15 is only used if microVU constprop is enabled (it is *not* by default). When constprop
|
// vi15 is only used if microVU const-prop is enabled (it is *not* by default). When constprop
|
||||||
// is disabled the vi15 field acts as additional padding that is required for 16 byte alignment
|
// is disabled the vi15 field acts as additional padding that is required for 16 byte alignment
|
||||||
// needed by the xmm compare.
|
// needed by the xmm compare.
|
||||||
union __aligned16 microRegInfo {
|
union __aligned16 microRegInfo {
|
||||||
|
@ -44,6 +44,7 @@ union __aligned16 microRegInfo {
|
||||||
u8 xgkick;
|
u8 xgkick;
|
||||||
u8 viBackUp; // VI reg number that was written to on branch-delay slot
|
u8 viBackUp; // VI reg number that was written to on branch-delay slot
|
||||||
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
||||||
|
u8 r;
|
||||||
};
|
};
|
||||||
u32 quick32[2];
|
u32 quick32[2];
|
||||||
};
|
};
|
||||||
|
@ -51,7 +52,6 @@ union __aligned16 microRegInfo {
|
||||||
struct {
|
struct {
|
||||||
u8 VI[16];
|
u8 VI[16];
|
||||||
regInfo VF[32];
|
regInfo VF[32];
|
||||||
u8 r;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -228,6 +228,7 @@ public:
|
||||||
index = _index;
|
index = _index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fully resets the regalloc by clearing all cached data
|
||||||
void reset() {
|
void reset() {
|
||||||
for(int i = 0; i < xmmTotal; i++) {
|
for(int i = 0; i < xmmTotal; i++) {
|
||||||
clearReg(i);
|
clearReg(i);
|
||||||
|
@ -235,6 +236,9 @@ public:
|
||||||
counter = 0;
|
counter = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flushes all allocated registers (i.e. writes-back to memory all modified registers).
|
||||||
|
// If clearState is 0, then it keeps cached reg data valid
|
||||||
|
// If clearState is 1, then it invalidates all cached reg data after write-back
|
||||||
void flushAll(bool clearState = 1) {
|
void flushAll(bool clearState = 1) {
|
||||||
for(int i = 0; i < xmmTotal; i++) {
|
for(int i = 0; i < xmmTotal; i++) {
|
||||||
writeBackReg(xmm(i));
|
writeBackReg(xmm(i));
|
||||||
|
@ -244,7 +248,7 @@ public:
|
||||||
|
|
||||||
void clearReg(const xmm& reg) { clearReg(reg.Id); }
|
void clearReg(const xmm& reg) { clearReg(reg.Id); }
|
||||||
void clearReg(int regId) {
|
void clearReg(int regId) {
|
||||||
microMapXMM& clear( xmmMap[regId] );
|
microMapXMM& clear = xmmMap[regId];
|
||||||
clear.VFreg = -1;
|
clear.VFreg = -1;
|
||||||
clear.count = 0;
|
clear.count = 0;
|
||||||
clear.xyzw = 0;
|
clear.xyzw = 0;
|
||||||
|
@ -257,90 +261,110 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Writes back modified reg to memory.
|
||||||
|
// If all vectors modified, then keeps the VF reg cached in the xmm register.
|
||||||
|
// If reg was not modified, then keeps the VF reg cached in the xmm register.
|
||||||
void writeBackReg(const xmm& reg, bool invalidateRegs = 1) {
|
void writeBackReg(const xmm& reg, bool invalidateRegs = 1) {
|
||||||
microMapXMM& write( xmmMap[reg.Id] );
|
microMapXMM& mapX = xmmMap[reg.Id];
|
||||||
|
|
||||||
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
|
if ((mapX.VFreg > 0) && mapX.xyzw) { // Reg was modified and not Temp or vf0
|
||||||
if (write.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg);
|
if (mapX.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg);
|
||||||
else if (write.VFreg == 32) mVUsaveReg(reg, ptr[®s().ACC], write.xyzw, 1);
|
elif (mapX.VFreg == 32) mVUsaveReg(reg, ptr[®s().ACC], mapX.xyzw, 1);
|
||||||
else mVUsaveReg(reg, ptr[&getVF(write.VFreg)], write.xyzw, 1);
|
else mVUsaveReg(reg, ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1);
|
||||||
if (invalidateRegs) {
|
if (invalidateRegs) {
|
||||||
for(int i = 0; i < xmmTotal; i++) {
|
for(int i = 0; i < xmmTotal; i++) {
|
||||||
microMapXMM& imap (xmmMap[i]);
|
microMapXMM& mapI = xmmMap[i];
|
||||||
if ((i == reg.Id) || imap.isNeeded) continue;
|
if ((i == reg.Id) || mapI.isNeeded) continue;
|
||||||
if (imap.VFreg == write.VFreg) {
|
if (mapI.VFreg == mapX.VFreg) {
|
||||||
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
|
if (mapI.xyzw && mapI.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", mapI.VFreg);
|
||||||
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
if (mapX.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
||||||
write.count = counter;
|
mapX.count = counter;
|
||||||
write.xyzw = 0;
|
mapX.xyzw = 0;
|
||||||
write.isNeeded = 0;
|
mapX.isNeeded = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
clearReg(reg);
|
||||||
}
|
}
|
||||||
clearReg(reg); // Clear Reg
|
elif (mapX.xyzw) clearReg(reg); // Clear reg if modified and is VF0 or temp reg...
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use this when done using the allocated register, it clears its "Needed" status.
|
||||||
|
// The register that was written to, should be cleared before other registers are cleared.
|
||||||
|
// This is to guarantee proper merging between registers... When a written-to reg is cleared,
|
||||||
|
// it invalidates other cached registers of the same VF reg, and merges partial-vector
|
||||||
|
// writes into them.
|
||||||
void clearNeeded(const xmm& reg) {
|
void clearNeeded(const xmm& reg) {
|
||||||
|
|
||||||
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
|
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return; // Sometimes xmmPQ hits this
|
||||||
|
|
||||||
microMapXMM& clear (xmmMap[reg.Id]);
|
microMapXMM& clear = xmmMap[reg.Id];
|
||||||
clear.isNeeded = 0;
|
clear.isNeeded = 0;
|
||||||
if (clear.xyzw) { // Reg was modified
|
if (clear.xyzw) { // Reg was modified
|
||||||
if (clear.VFreg > 0) {
|
if (clear.VFreg > 0) {
|
||||||
int mergeRegs = 0;
|
int mergeRegs = 0;
|
||||||
if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
|
if (clear.xyzw < 0xf) mergeRegs = 1; // Try to merge partial writes
|
||||||
for(int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
|
for(int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
|
||||||
if (i == reg.Id) continue;
|
if (i == reg.Id) continue;
|
||||||
microMapXMM& imap (xmmMap[i]);
|
microMapXMM& mapI = xmmMap[i];
|
||||||
if (imap.VFreg == clear.VFreg) {
|
if (mapI.VFreg == clear.VFreg) {
|
||||||
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
|
if (mapI.xyzw && mapI.xyzw < 0xf) {
|
||||||
|
DevCon.Error("microVU Error: clearNeeded() [%d]", mapI.VFreg);
|
||||||
|
}
|
||||||
if (mergeRegs == 1) {
|
if (mergeRegs == 1) {
|
||||||
mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
|
mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
|
||||||
imap.xyzw = 0xf;
|
mapI.xyzw = 0xf;
|
||||||
imap.count = counter;
|
mapI.count = counter;
|
||||||
mergeRegs = 2;
|
mergeRegs = 2;
|
||||||
}
|
}
|
||||||
else clearReg(i);
|
else clearReg(i); // Clears when mergeRegs is 0 or 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mergeRegs==2) clearReg(reg); // Clear Current Reg if Merged
|
if (mergeRegs==2) clearReg(reg); // Clear Current Reg if Merged
|
||||||
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
|
elif (mergeRegs==1) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
|
||||||
}
|
}
|
||||||
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// vfLoadReg = VF reg to be loaded to the xmm register
|
||||||
|
// vfWriteReg = VF reg that the returned xmm register will be considered as
|
||||||
|
// xyzw = XYZW vectors that will be modified (and loaded)
|
||||||
|
// cloneWrite = When loading a reg that will be written to,
|
||||||
|
// it copies it to its own xmm reg instead of overwriting the cached one...
|
||||||
|
// Notes:
|
||||||
|
// To load a temp reg use the default param values, vfLoadReg = -1 and vfWriteReg = -1.
|
||||||
|
// To load a full reg which won't be modified and you want cached, specify vfLoadReg >= 0 and vfWriteReg = -1
|
||||||
|
// To load a reg which you don't want written back or cached, specify vfLoadReg >= 0 and vfWriteReg = 0
|
||||||
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
|
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
|
||||||
|
//DevCon.WriteLn("vfLoadReg = %02d, vfWriteReg = %02d, xyzw = %x, clone = %d",vfLoadReg,vfWriteReg,xyzw,(int)cloneWrite);
|
||||||
counter++;
|
counter++;
|
||||||
if (vfLoadReg >= 0) { // Search For Cached Regs
|
if (vfLoadReg >= 0) { // Search For Cached Regs
|
||||||
for(int i = 0; i < xmmTotal; i++) {
|
for(int i = 0; i < xmmTotal; i++) {
|
||||||
const xmm& xmmi(xmm::GetInstance(i));
|
const xmm& xmmI = xmm::GetInstance(i);
|
||||||
microMapXMM& imap (xmmMap[i]);
|
microMapXMM& mapI = xmmMap[i];
|
||||||
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|
if ((mapI.VFreg == vfLoadReg) && (!mapI.xyzw // Reg Was Not Modified
|
||||||
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
|| (mapI.VFreg && (mapI.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
||||||
int z = i;
|
int z = i;
|
||||||
if (vfWriteReg >= 0) { // Reg will be modified
|
if (vfWriteReg >= 0) { // Reg will be modified
|
||||||
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
||||||
z = findFreeReg();
|
z = findFreeReg();
|
||||||
const xmm& xmmz(xmm::GetInstance(z));
|
const xmm& xmmZ = xmm::GetInstance(z);
|
||||||
writeBackReg(xmmz);
|
writeBackReg(xmmZ);
|
||||||
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
|
if (xyzw == 4) xPSHUF.D(xmmZ, xmmI, 1);
|
||||||
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
|
elif (xyzw == 2) xPSHUF.D(xmmZ, xmmI, 2);
|
||||||
else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
|
elif (xyzw == 1) xPSHUF.D(xmmZ, xmmI, 3);
|
||||||
else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
|
elif (z != i) xMOVAPS (xmmZ, xmmI);
|
||||||
else if (z != i) xMOVAPS (xmmz, xmmi);
|
mapI.count = counter; // Reg i was used, so update counter
|
||||||
imap.count = counter; // Reg i was used, so update counter
|
|
||||||
}
|
}
|
||||||
else { // Don't clone reg, but shuffle to adjust for SS ops
|
else { // Don't clone reg, but shuffle to adjust for SS ops
|
||||||
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
|
if ((vfLoadReg!=vfWriteReg)||(xyzw!=0xf)) writeBackReg(xmmI);
|
||||||
if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
|
if (xyzw == 4) xPSHUF.D(xmmI, xmmI, 1);
|
||||||
else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
|
elif (xyzw == 2) xPSHUF.D(xmmI, xmmI, 2);
|
||||||
else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
|
elif (xyzw == 1) xPSHUF.D(xmmI, xmmI, 3);
|
||||||
}
|
}
|
||||||
xmmMap[z].VFreg = vfWriteReg;
|
xmmMap[z].VFreg = vfWriteReg;
|
||||||
xmmMap[z].xyzw = xyzw;
|
xmmMap[z].xyzw = xyzw;
|
||||||
|
@ -352,26 +376,26 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int x = findFreeReg();
|
int x = findFreeReg();
|
||||||
const xmm& xmmx = xmm::GetInstance(x);
|
const xmm& xmmX = xmm::GetInstance(x);
|
||||||
writeBackReg(xmmx);
|
writeBackReg(xmmX);
|
||||||
|
|
||||||
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
||||||
if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
|
if ((vfLoadReg == 0) && !(xyzw & 1)) xPXOR(xmmX, xmmX);
|
||||||
else if (vfLoadReg == 33) loadIreg (xmmx, xyzw);
|
elif (vfLoadReg == 33) loadIreg (xmmX, xyzw);
|
||||||
else if (vfLoadReg == 32) mVUloadReg(xmmx, ptr[®s().ACC], xyzw);
|
elif (vfLoadReg == 32) mVUloadReg(xmmX, ptr[®s().ACC], xyzw);
|
||||||
else if (vfLoadReg >= 0) mVUloadReg(xmmx, ptr[&getVF(vfLoadReg)], xyzw);
|
elif (vfLoadReg >= 0) mVUloadReg(xmmX, ptr[&getVF(vfLoadReg)], xyzw);
|
||||||
xmmMap[x].VFreg = vfWriteReg;
|
xmmMap[x].VFreg = vfWriteReg;
|
||||||
xmmMap[x].xyzw = xyzw;
|
xmmMap[x].xyzw = xyzw;
|
||||||
}
|
}
|
||||||
else { // Reg Will Not Be Modified (always load full reg for caching)
|
else { // Reg Will Not Be Modified (always load full reg for caching)
|
||||||
if (vfLoadReg == 33) loadIreg(xmmx, 0xf);
|
if (vfLoadReg == 33) loadIreg(xmmX, 0xf);
|
||||||
else if (vfLoadReg == 32) xMOVAPS (xmmx, ptr128[®s().ACC]);
|
elif (vfLoadReg == 32) xMOVAPS (xmmX, ptr128[®s().ACC]);
|
||||||
else if (vfLoadReg >= 0) xMOVAPS (xmmx, ptr128[&getVF(vfLoadReg)]);
|
elif (vfLoadReg >= 0) xMOVAPS (xmmX, ptr128[&getVF(vfLoadReg)]);
|
||||||
xmmMap[x].VFreg = vfLoadReg;
|
xmmMap[x].VFreg = vfLoadReg;
|
||||||
xmmMap[x].xyzw = 0;
|
xmmMap[x].xyzw = 0;
|
||||||
}
|
}
|
||||||
xmmMap[x].count = counter;
|
xmmMap[x].count = counter;
|
||||||
xmmMap[x].isNeeded = 1;
|
xmmMap[x].isNeeded = 1;
|
||||||
return xmmx;
|
return xmmX;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -213,8 +213,33 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
||||||
// Micro VU - Misc Functions
|
// Micro VU - Misc Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI)
|
||||||
|
__fi void mVUbackupRegs(microVU* mVU, bool toMemory = false)
|
||||||
|
{
|
||||||
|
if (toMemory) {
|
||||||
|
for(int i = 0; i < 8; i++) {
|
||||||
|
xMOVAPS(ptr128[&mVU->xmmBackup[i][0]], xmm(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mVU->regAlloc->flushAll(); // Flush Regalloc
|
||||||
|
xMOVAPS(ptr128[&mVU->xmmBackup[xmmPQ.Id][0]], xmmPQ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore Volatile Regs
|
||||||
|
__fi void mVUrestoreRegs(microVU* mVU, bool fromMemory = false)
|
||||||
|
{
|
||||||
|
if (fromMemory) {
|
||||||
|
for(int i = 0; i < 8; i++) {
|
||||||
|
xMOVAPS(xmm(i), ptr128[&mVU->xmmBackup[i][0]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else xMOVAPS(xmmPQ, ptr128[&mVU->xmmBackup[xmmPQ.Id][0]]);
|
||||||
|
}
|
||||||
|
|
||||||
// Gets called by mVUaddrFix at execution-time
|
// Gets called by mVUaddrFix at execution-time
|
||||||
static void __fastcall mVUwarningRegAccess(mV) { Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", xPC, mVU->prog.cur); }
|
static void __fastcall mVUwarningRegAccess(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); }
|
||||||
|
|
||||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||||
__fi void mVUaddrFix(mV, const x32& gprReg)
|
__fi void mVUaddrFix(mV, const x32& gprReg)
|
||||||
|
@ -224,6 +249,7 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
|
||||||
xSHL(gprReg, 4);
|
xSHL(gprReg, 4);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
if (IsDevBuild && !isCOP2) mVUbackupRegs(mVU, true);
|
||||||
xTEST(gprReg, 0x400);
|
xTEST(gprReg, 0x400);
|
||||||
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
|
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
|
||||||
xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around
|
xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around
|
||||||
|
@ -233,7 +259,8 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
|
||||||
xPUSH(gprT1); // Note: Kernel does it via COP2 to initialize VU1!
|
xPUSH(gprT1); // Note: Kernel does it via COP2 to initialize VU1!
|
||||||
xPUSH(gprT2); // So we don't spam console, we'll only check micro-mode...
|
xPUSH(gprT2); // So we don't spam console, we'll only check micro-mode...
|
||||||
xPUSH(gprT3);
|
xPUSH(gprT3);
|
||||||
xMOV(gprT2, (uptr)mVU);
|
xMOV (gprT2, mVU->prog.cur->idx);
|
||||||
|
xMOV (gprT3, xPC);
|
||||||
xCALL(mVUwarningRegAccess);
|
xCALL(mVUwarningRegAccess);
|
||||||
xPOP (gprT3);
|
xPOP (gprT3);
|
||||||
xPOP (gprT2);
|
xPOP (gprT2);
|
||||||
|
@ -243,22 +270,10 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
|
||||||
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
||||||
jmpB.SetTarget();
|
jmpB.SetTarget();
|
||||||
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
||||||
|
if (IsDevBuild && !isCOP2) mVUrestoreRegs(mVU, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI)
|
|
||||||
__fi void mVUbackupRegs(microVU* mVU)
|
|
||||||
{
|
|
||||||
mVU->regAlloc->flushAll();
|
|
||||||
xMOVAPS(ptr128[mVU->xmmPQb], xmmPQ);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Restore Volatile Regs
|
|
||||||
__fi void mVUrestoreRegs(microVU* mVU)
|
|
||||||
{
|
|
||||||
xMOVAPS(xmmPQ, ptr128[mVU->xmmPQb]);
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU - Custom SSE Instructions
|
// Micro VU - Custom SSE Instructions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in New Issue