x86/microVU: Add VI caching

This commit is contained in:
Stenzek 2022-12-25 22:14:15 +10:00 committed by refractionpcsx2
parent 08faba5455
commit 00d768a6bf
14 changed files with 997 additions and 307 deletions

View File

@ -34,6 +34,12 @@ extern thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM];
namespace x86Emitter namespace x86Emitter
{ {
// Win32 requires 32 bytes of shadow stack in the caller's frame.
#ifdef _WIN32
static constexpr int SHADOW_STACK_SIZE = 32;
#else
static constexpr int SHADOW_STACK_SIZE = 0;
#endif
extern void xWrite8(u8 val); extern void xWrite8(u8 val);
extern void xWrite16(u16 val); extern void xWrite16(u16 val);
@ -401,6 +407,8 @@ namespace x86Emitter
pxAssertDev(other.canMapIDTo(4), "Mapping h registers to higher registers can produce unexpected values"); pxAssertDev(other.canMapIDTo(4), "Mapping h registers to higher registers can produce unexpected values");
} }
static const inline xRegister32& GetInstance(uint id);
bool operator==(const xRegister32& src) const { return this->Id == src.Id; } bool operator==(const xRegister32& src) const { return this->Id == src.Id; }
bool operator!=(const xRegister32& src) const { return this->Id != src.Id; } bool operator!=(const xRegister32& src) const { return this->Id != src.Id; }
}; };
@ -421,6 +429,8 @@ namespace x86Emitter
pxAssertDev(other.canMapIDTo(8), "Mapping h registers to higher registers can produce unexpected values"); pxAssertDev(other.canMapIDTo(8), "Mapping h registers to higher registers can produce unexpected values");
} }
static const inline xRegister64& GetInstance(uint id);
bool operator==(const xRegister64& src) const { return this->Id == src.Id; } bool operator==(const xRegister64& src) const { return this->Id == src.Id; }
bool operator!=(const xRegister64& src) const { return this->Id != src.Id; } bool operator!=(const xRegister64& src) const { return this->Id != src.Id; }
}; };
@ -664,6 +674,34 @@ extern const xRegister32
#endif #endif
} }
const xRegister32& xRegister32::GetInstance(uint id)
{
static const xRegister32* const m_tbl_x86Regs[] =
{
&eax, &ecx, &edx, &ebx,
&esp, &ebp, &esi, &edi,
&r8d, &r9d, &r10d, &r11d,
&r12d, &r13d, &r14d, &r15d,
};
pxAssert(id < iREGCNT_GPR);
return *m_tbl_x86Regs[id];
}
const xRegister64& xRegister64::GetInstance(uint id)
{
static const xRegister64* const m_tbl_x86Regs[] =
{
&rax, &rcx, &rdx, &rbx,
&rsp, &rbp, &rsi, &rdi,
&r8, &r9, &r10, &r11,
&r12, &r13, &r14, &r15
};
pxAssert(id < iREGCNT_GPR);
return *m_tbl_x86Regs[id];
}
bool xRegisterSSE::IsCallerSaved(uint id) bool xRegisterSSE::IsCallerSaved(uint id)
{ {
#ifdef _WIN32 #ifdef _WIN32

View File

@ -144,7 +144,7 @@ int _getFreeXMMreg(u32 maxreg)
case XMMTYPE_VFREG: case XMMTYPE_VFREG:
{ {
if (COP2INST_USEDTEST(xmmregs[i].reg)) if (EEINST_VFUSEDTEST(xmmregs[i].reg))
continue; continue;
} }
break; break;
@ -875,6 +875,16 @@ int _allocIfUsedGPRtoX86(int gprreg, int mode)
return EEINST_USEDTEST(gprreg) ? _allocX86reg(X86TYPE_GPR, gprreg, mode) : -1; return EEINST_USEDTEST(gprreg) ? _allocX86reg(X86TYPE_GPR, gprreg, mode) : -1;
} }
int _allocIfUsedVItoX86(int vireg, int mode)
{
const int x86reg = _checkX86reg(X86TYPE_VIREG, vireg, mode);
if (x86reg >= 0)
return x86reg;
// Prefer not to stop on COP2 reserved registers here.
return EEINST_VIUSEDTEST(vireg) ? _allocX86reg(X86TYPE_VIREG, vireg, mode | MODE_COP2) : -1;
}
int _allocIfUsedGPRtoXMM(int gprreg, int mode) int _allocIfUsedGPRtoXMM(int gprreg, int mode)
{ {
const int mmreg = _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode); const int mmreg = _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode);

View File

@ -30,6 +30,7 @@
#define MODE_READ 1 #define MODE_READ 1
#define MODE_WRITE 2 #define MODE_WRITE 2
#define MODE_CALLEESAVED 0x20 // can't flush reg to mem #define MODE_CALLEESAVED 0x20 // can't flush reg to mem
#define MODE_COP2 0x40 // don't allow using reserved VU registers
#define PROCESS_EE_XMM 0x02 #define PROCESS_EE_XMM 0x02
@ -119,6 +120,9 @@ void _flushConstReg(int reg);
void _validateRegs(); void _validateRegs();
void _writebackX86Reg(int x86reg); void _writebackX86Reg(int x86reg);
void mVUFreeCOP2GPR(int hostreg);
bool mVUIsReservedCOP2(int hostreg);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// XMM (128-bit) Register Allocation Tools // XMM (128-bit) Register Allocation Tools
@ -247,11 +251,17 @@ static __fi bool EEINST_XMMUSEDTEST(u32 reg)
} }
/// Returns true if the specified VF register is used later in the block. /// Returns true if the specified VF register is used later in the block.
static __fi bool COP2INST_USEDTEST(u32 reg) static __fi bool EEINST_VFUSEDTEST(u32 reg)
{ {
return (g_pCurInstInfo->vfregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED; return (g_pCurInstInfo->vfregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
} }
/// Returns true if the specified VI register is used later in the block.
static __fi bool EEINST_VIUSEDTEST(u32 reg)
{
return (g_pCurInstInfo->viregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
}
/// Returns true if the value should be computed/written back. /// Returns true if the value should be computed/written back.
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block. /// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
static __fi bool EEINST_LIVETEST(u32 reg) static __fi bool EEINST_LIVETEST(u32 reg)
@ -297,6 +307,7 @@ extern u16 g_xmmAllocCounter;
// allocates only if later insts use this register // allocates only if later insts use this register
int _allocIfUsedGPRtoX86(int gprreg, int mode); int _allocIfUsedGPRtoX86(int gprreg, int mode);
int _allocIfUsedVItoX86(int vireg, int mode);
int _allocIfUsedGPRtoXMM(int gprreg, int mode); int _allocIfUsedGPRtoXMM(int gprreg, int mode);
int _allocIfUsedFPUtoXMM(int fpureg, int mode); int _allocIfUsedFPUtoXMM(int fpureg, int mode);

View File

@ -55,6 +55,9 @@ int _getFreeX86reg(int mode)
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(reg)) if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(reg))
continue; continue;
if ((mode & MODE_COP2) && mVUIsReservedCOP2(reg))
continue;
if (x86regs[reg].inuse == 0) if (x86regs[reg].inuse == 0)
{ {
g_x86checknext = (reg + 1) % iREGCNT_GPR; g_x86checknext = (reg + 1) % iREGCNT_GPR;
@ -70,6 +73,9 @@ int _getFreeX86reg(int mode)
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(i)) if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(i))
continue; continue;
if ((mode & MODE_COP2) && mVUIsReservedCOP2(i))
continue;
// should have checked inuse in the previous loop. // should have checked inuse in the previous loop.
pxAssert(x86regs[i].inuse); pxAssert(x86regs[i].inuse);
@ -373,6 +379,13 @@ int _allocX86reg(int type, int reg, int mode)
} }
break; break;
case X86TYPE_VIREG:
{
RALOG("Loading guest VI reg %d to GPR %d", reg, regnum);
xMOVZX(xRegister32(regnum), ptr16[&VU0.VI[reg].US[0]]);
}
break;
default: default:
abort(); abort();
break; break;
@ -536,8 +549,7 @@ void _freeX86regWithoutWriteback(int x86reg)
if (x86regs[x86reg].type == X86TYPE_VIREG) if (x86regs[x86reg].type == X86TYPE_VIREG)
{ {
RALOG("Freeing VI reg %d in host GPR %d\n", x86regs[x86reg].reg, x86reg); RALOG("Freeing VI reg %d in host GPR %d\n", x86regs[x86reg].reg, x86reg);
//mVUFreeCOP2GPR(x86reg); mVUFreeCOP2GPR(x86reg);
abort();
} }
else if (x86regs[x86reg].inuse && x86regs[x86reg].type == X86TYPE_GPR) else if (x86regs[x86reg].inuse && x86regs[x86reg].type == X86TYPE_GPR)
{ {

View File

@ -89,6 +89,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
x86SetPtr(mVU.dispCache); x86SetPtr(mVU.dispCache);
mVUdispatcherAB(mVU); mVUdispatcherAB(mVU);
mVUdispatcherCD(mVU); mVUdispatcherCD(mVU);
mvuGenerateWaitMTVU(mVU);
mVUemitSearch(); mVUemitSearch();
mVU.regs().nextBlockCycles = 0; mVU.regs().nextBlockCycles = 0;

View File

@ -251,6 +251,7 @@ struct microVU
u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit) u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit)
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume) u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit) u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
u32 code; // Contains the current Instruction u32 code; // Contains the current Instruction
u32 divFlag; // 1 instance of I/D flags u32 divFlag; // 1 instance of I/D flags

View File

@ -116,32 +116,10 @@ __fi void mVUallocCFLAGb(mV, const x32& reg, int fInstance)
// VI Reg Allocators // VI Reg Allocators
//------------------------------------------------------------------ //------------------------------------------------------------------
__ri void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false) void microRegAlloc::writeVIBackup(const xRegisterInt& reg)
{ {
if (!_reg_) microVU& mVU = index ? microVU1 : microVU0;
xXOR(GPRreg, GPRreg); xMOV(ptr32[&mVU.VIbackup], xRegister32(reg));
else if (signext)
xMOVSX(GPRreg, ptr16[&mVU.regs().VI[_reg_].SL]);
else
xMOVZX(GPRreg, ptr16[&mVU.regs().VI[_reg_].UL]);
}
__ri void mVUallocVIb(mV, const x32& GPRreg, int _reg_)
{
if (mVUlow.backupVI) // Backs up reg to memory (used when VI is modified b4 a branch)
{
xMOVZX(gprT3, ptr16[&mVU.regs().VI[_reg_].UL]);
xMOV (ptr32[&mVU.VIbackup], gprT3);
}
if (_reg_ == 0)
{
return;
}
else if (_reg_ < 16)
{
xMOV(ptr16[&mVU.regs().VI[_reg_].UL], xRegister16(GPRreg.Id));
}
} }
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -123,6 +123,81 @@ void mVUdispatcherCD(mV)
"microVU: Dispatcher generation exceeded reserved cache area!"); "microVU: Dispatcher generation exceeded reserved cache area!");
} }
void mvuGenerateWaitMTVU(mV)
{
mVU.waitMTVU = x86Ptr;
int num_xmms = 0, num_gprs = 0;
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
{
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
// no need to save temps
if (i == gprT1.GetId() || i == gprT2.GetId())
continue;
xPUSH(xRegister64(i));
num_gprs++;
}
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
num_xmms++;
}
// We need 16 byte alignment on the stack.
// Since the stack is unaligned at entry to this function, we add 8 when it's even, not odd.
const int stack_size = (num_xmms * sizeof(u128)) + ((~num_gprs & 1) * sizeof(u64)) + SHADOW_STACK_SIZE;
int stack_offset = SHADOW_STACK_SIZE;
if (stack_size > 0)
{
xSUB(rsp, stack_size);
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
stack_offset += sizeof(u128);
}
}
xFastCall((void*)mVUwaitMTVU);
stack_offset = (num_xmms - 1) * sizeof(u128) + SHADOW_STACK_SIZE;
for (int i = static_cast<int>(iREGCNT_XMM - 1); i >= 0; i--)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
stack_offset -= sizeof(u128);
}
xADD(rsp, stack_size);
for (int i = static_cast<int>(iREGCNT_GPR - 1); i >= 0; i--)
{
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
if (i == gprT1.GetId() || i == gprT2.GetId())
continue;
xPOP(xRegister64(i));
}
xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
//------------------------------------------------------------------ //------------------------------------------------------------------
// Execution Functions // Execution Functions
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -313,13 +313,15 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC)
} }
else else
{ {
const xRegister32& temp3 = mVU.regAlloc->allocGPR();
xMOV(gprT1, getFlagReg(bStatus[0])); xMOV(gprT1, getFlagReg(bStatus[0]));
xMOV(gprT2, getFlagReg(bStatus[1])); xMOV(gprT2, getFlagReg(bStatus[1]));
xMOV(gprT3, getFlagReg(bStatus[2])); xMOV(temp3, getFlagReg(bStatus[2]));
xMOV(gprF3, getFlagReg(bStatus[3])); xMOV(gprF3, getFlagReg(bStatus[3]));
xMOV(gprF0, gprT1); xMOV(gprF0, gprT1);
xMOV(gprF1, gprT2); xMOV(gprF1, gprT2);
xMOV(gprF2, gprT3); xMOV(gprF2, temp3);
mVU.regAlloc->clearNeeded(temp3);
} }
} }

View File

@ -228,11 +228,25 @@ struct microMapXMM
bool isZero; // Register was loaded from VF00 and doesn't need clamping bool isZero; // Register was loaded from VF00 and doesn't need clamping
}; };
struct microMapGPR
{
int VIreg;
int count;
bool isNeeded;
bool dirty;
bool isZeroExtended;
bool usable;
};
class microRegAlloc class microRegAlloc
{ {
protected: protected:
static const int xmmTotal = 15; // PQ register is reserved static const int xmmTotal = iREGCNT_XMM - 1; // PQ register is reserved
static const int gprTotal = iREGCNT_GPR;
microMapXMM xmmMap[xmmTotal]; microMapXMM xmmMap[xmmTotal];
microMapGPR gprMap[gprTotal];
int counter; // Current allocation count int counter; // Current allocation count
int index; // VU0 or VU1 int index; // VU0 or VU1
@ -251,6 +265,18 @@ protected:
__ri void loadIreg(const xmm& reg, int xyzw) __ri void loadIreg(const xmm& reg, int xyzw)
{ {
for (int i = 0; i < gprTotal; i++)
{
if (gprMap[i].VIreg == REG_I)
{
xMOVDZX(reg, xRegister32(i));
if (!_XYZWss(xyzw))
xSHUF.PS(reg, reg, 0);
return;
}
}
xMOVSSZX(reg, ptr32[&getVI(REG_I)]); xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
if (!_XYZWss(xyzw)) if (!_XYZWss(xyzw))
xSHUF.PS(reg, reg, 0); xSHUF.PS(reg, reg, 0);
@ -290,10 +316,59 @@ protected:
return x; return x;
} }
int findFreeGPRRec(int startIdx)
{
for (int i = startIdx; i < gprTotal; i++)
{
if (gprMap[i].usable && !gprMap[i].isNeeded)
{
int x = findFreeGPRRec(i + 1);
if (x == -1)
return i;
return ((gprMap[i].count < gprMap[x].count) ? i : x);
}
}
return -1;
}
int findFreeGPR(int vireg)
{
if (regAllocCOP2)
return _allocX86reg(X86TYPE_VIREG, vireg, MODE_COP2);
for (int i = 0; i < gprTotal; i++)
{
if (gprMap[i].usable && !gprMap[i].isNeeded && (gprMap[i].VIreg < 0))
{
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeGPRRec(0);
pxAssertDev(x >= 0, "microVU register allocation failure!");
return x;
}
void writeVIBackup(const xRegisterInt& reg);
public: public:
microRegAlloc(int _index) microRegAlloc(int _index)
{ {
index = _index; index = _index;
// mark gpr registers as usable
std::memset(gprMap, 0, sizeof(gprMap));
for (int i = 0; i < gprTotal; i++)
{
if (i == gprT1.GetId() || i == gprT2.GetId() ||
i == gprF0.GetId() || i == gprF1.GetId() || i == gprF2.GetId() || i == gprF3.GetId() ||
i == rsp.GetId())
{
continue;
}
gprMap[i].usable = true;
}
reset(false); reset(false);
} }
@ -304,9 +379,10 @@ public:
regAllocCOP2 = false; regAllocCOP2 = false;
for (int i = 0; i < xmmTotal; i++) for (int i = 0; i < xmmTotal; i++)
{
clearReg(i); clearReg(i);
} for (int i = 0; i < gprTotal; i++)
clearGPR(i);
counter = 0; counter = 0;
regAllocCOP2 = cop2mode; regAllocCOP2 = cop2mode;
pxmmregs = cop2mode ? xmmregs : nullptr; pxmmregs = cop2mode ? xmmregs : nullptr;
@ -331,13 +407,37 @@ public:
xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0; xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0;
} }
} }
for (int i = 0; i < gprTotal; i++)
{
if (!x86regs[i].inuse || x86regs[i].type != X86TYPE_VIREG)
continue;
// pxAssertRel(armregs[i].reg >= 0, "Valid full register preserved");
if (x86regs[i].reg >= 0)
{
MVURALOG("Preserving VI reg %d in host reg %d across instruction\n", x86regs[i].reg, i);
x86regs[i].needed = false;
gprMap[i].isNeeded = false;
gprMap[i].isZeroExtended = false;
gprMap[i].VIreg = x86regs[i].reg;
gprMap[i].dirty = ((x86regs[i].mode & MODE_WRITE) != 0);
} }
} }
}
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
}
int getXmmCount() int getXmmCount()
{ {
return xmmTotal + 1; return xmmTotal + 1;
} }
int getGPRCount()
{
return gprTotal;
}
// Flushes all allocated registers (i.e. writes-back to memory all modified registers). // Flushes all allocated registers (i.e. writes-back to memory all modified registers).
// If clearState is 0, then it keeps cached reg data valid // If clearState is 0, then it keeps cached reg data valid
// If clearState is 1, then it invalidates all cached reg data after write-back // If clearState is 1, then it invalidates all cached reg data after write-back
@ -349,6 +449,36 @@ public:
if (clearState) if (clearState)
clearReg(i); clearReg(i);
} }
for (int i = 0; i < gprTotal; i++)
{
writeBackReg(xRegister32(i), true);
if (clearState)
clearGPR(i);
}
}
void flushCallerSavedRegisters(bool clearNeeded = false)
{
for (int i = 0; i < xmmTotal; i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
writeBackReg(xmm(i));
if (clearNeeded || !xmmMap[i].isNeeded)
clearReg(i);
}
for (int i = 0; i < gprTotal; i++)
{
if (!xRegister32::IsCallerSaved(i))
continue;
writeBackReg(xRegister32(i), true);
if (clearNeeded || !gprMap[i].isNeeded)
clearGPR(i);
}
} }
void flushPartialForCOP2() void flushPartialForCOP2()
@ -378,10 +508,19 @@ public:
clear.isNeeded = 0; clear.isNeeded = 0;
clear.isZero = 0; clear.isZero = 0;
} }
for (int i = 0; i < gprTotal; i++)
{
microMapGPR& clear = gprMap[i];
if (clear.VIreg < 0)
clearGPR(i);
}
} }
void TDwritebackAll(bool clearState = false) void TDwritebackAll()
{ {
// NOTE: We don't clear state here, this happens in an optional branch
for (int i = 0; i < xmmTotal; i++) for (int i = 0; i < xmmTotal; i++)
{ {
microMapXMM& mapX = xmmMap[xmm(i).Id]; microMapXMM& mapX = xmmMap[xmm(i).Id];
@ -396,6 +535,9 @@ public:
mVUsaveReg(xmm(i), ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1); mVUsaveReg(xmm(i), ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1);
} }
} }
for (int i = 0; i < gprTotal; i++)
writeBackReg(xRegister32(i), false);
} }
bool checkVFClamp(int regId) bool checkVFClamp(int regId)
@ -414,11 +556,19 @@ public:
return false; return false;
} }
bool checkCachedGPR(int regId)
{
if (regId < gprTotal)
return gprMap[regId].VIreg >= 0 || gprMap[regId].isNeeded;
else
return false;
}
void clearReg(const xmm& reg) { clearReg(reg.Id); } void clearReg(const xmm& reg) { clearReg(reg.Id); }
void clearReg(int regId) void clearReg(int regId)
{ {
microMapXMM& clear = xmmMap[regId]; microMapXMM& clear = xmmMap[regId];
if (regAllocCOP2) if (regAllocCOP2 && (clear.isNeeded || clear.VFreg >= 0))
{ {
pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG); pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG);
pxmmregs[regId].inuse = false; pxmmregs[regId].inuse = false;
@ -668,4 +818,262 @@ public:
updateCOP2AllocState(x); updateCOP2AllocState(x);
return xmmX; return xmmX;
} }
void clearGPR(const xRegisterInt& reg) { clearGPR(reg.GetId()); }
void clearGPR(int regId)
{
microMapGPR& clear = gprMap[regId];
if (regAllocCOP2)
{
if (x86regs[regId].inuse && x86regs[regId].type == X86TYPE_VIREG)
{
pxAssert(x86regs[regId].reg == static_cast<u8>(clear.VIreg));
_freeX86regWithoutWriteback(regId);
}
}
clear.VIreg = -1;
clear.count = 0;
clear.isNeeded = 0;
clear.dirty = false;
clear.isZeroExtended = false;
}
void clearGPRCOP2(int regId)
{
if (regAllocCOP2)
clearGPR(regId);
}
void updateCOP2AllocState(const xRegisterInt& reg)
{
if (!regAllocCOP2)
return;
const u32 rn = reg.GetId();
const bool dirty = (gprMap[rn].VIreg >= 0 && gprMap[rn].dirty);
pxAssert(x86regs[rn].type == X86TYPE_VIREG);
x86regs[rn].reg = gprMap[rn].VIreg;
x86regs[rn].counter = gprMap[rn].count;
x86regs[rn].mode = dirty ? (MODE_READ | MODE_WRITE) : MODE_READ;
x86regs[rn].needed = gprMap[rn].isNeeded;
}
void writeBackReg(const xRegisterInt& reg, bool clearDirty)
{
microMapGPR& mapX = gprMap[reg.GetId()];
pxAssert(mapX.usable || !mapX.dirty);
if (mapX.dirty)
{
pxAssert(mapX.VIreg > 0);
if (mapX.VIreg < 16)
xMOV(ptr16[&getVI(mapX.VIreg)], xRegister16(reg));
if (clearDirty)
{
mapX.dirty = false;
updateCOP2AllocState(reg);
}
}
}
void clearNeeded(const xRegisterInt& reg)
{
pxAssert(reg.GetId() < gprTotal);
microMapGPR& clear = gprMap[reg.GetId()];
clear.isNeeded = false;
if (regAllocCOP2)
x86regs[reg.GetId()].needed = false;
}
void unbindAnyVIAllocations(int reg, bool& backup)
{
for (int i = 0; i < gprTotal; i++)
{
microMapGPR& mapI = gprMap[i];
if (mapI.VIreg == reg)
{
if (backup)
{
writeVIBackup(xRegister32(i));
backup = false;
}
// if it's needed, we just unbind the allocation and preserve it, otherwise clear
if (mapI.isNeeded)
{
MVURALOG(" unbind %d to %d for write\n", i, reg);
if (regAllocCOP2)
{
pxAssert(x86regs[i].type == X86TYPE_VIREG && x86regs[i].reg == static_cast<u8>(mapI.VIreg));
x86regs[i].reg = -1;
}
mapI.VIreg = -1;
mapI.dirty = false;
mapI.isZeroExtended = false;
}
else
{
MVURALOG(" clear %d to %d for write\n", i, reg);
clearGPR(i);
}
// shouldn't be any others...
for (int j = i + 1; j < gprTotal; j++)
{
pxAssert(gprMap[j].VIreg != reg);
}
break;
}
}
}
const xRegister32& allocGPR(int viLoadReg = -1, int viWriteReg = -1, bool backup = false, bool zext_if_dirty = false)
{
// TODO: When load != write, we should check whether load is used later, and if so, copy it.
//DevCon.WriteLn("viLoadReg = %02d, viWriteReg = %02d, backup = %d",viLoadReg,viWriteReg,(int)backup);
const int this_counter = regAllocCOP2 ? (g_x86AllocCounter++) : (counter++);
if (viLoadReg == 0 || viWriteReg == 0)
{
// write zero register as temp and discard later
if (viWriteReg == 0)
{
int x = findFreeGPR(-1);
const xRegister32& gprX = xRegister32::GetInstance(x);
writeBackReg(gprX, true);
xXOR(gprX, gprX);
gprMap[x].VIreg = -1;
gprMap[x].dirty = false;
gprMap[x].count = this_counter;
gprMap[x].isNeeded = true;
gprMap[x].isZeroExtended = true;
MVURALOG(" alloc zero to scratch %d\n", x);
return gprX;
}
}
if (viLoadReg >= 0) // Search For Cached Regs
{
for (int i = 0; i < gprTotal; i++)
{
microMapGPR& mapI = gprMap[i];
if (mapI.VIreg == viLoadReg)
{
if (viWriteReg >= 0) // Reg will be modified
{
if (viLoadReg != viWriteReg)
{
// kill any allocations of viWriteReg
unbindAnyVIAllocations(viWriteReg, backup);
// allocate a new register for writing to
int x = findFreeGPR(viWriteReg);
const xRegister32& gprX = xRegister32::GetInstance(x);
writeBackReg(gprX, true);
if (zext_if_dirty)
xMOVZX(gprX, xRegister16(i));
else
xMOV(gprX, xRegister32(i));
gprMap[x].isZeroExtended = zext_if_dirty;
MVURALOG(" clone write %d in %d to %d for %d\n", viLoadReg, i, x, viWriteReg);
std::swap(x, i);
}
else
{
// writing to it, no longer zero extended
gprMap[i].isZeroExtended = false;
}
gprMap[i].VIreg = viWriteReg;
gprMap[i].dirty = true;
}
else if (zext_if_dirty && !gprMap[i].isZeroExtended)
{
xMOVZX(xRegister32(i), xRegister16(i));
gprMap[i].isZeroExtended = true;
}
gprMap[i].count = this_counter;
gprMap[i].isNeeded = true;
if (backup)
writeVIBackup(xRegister32(i));
if (regAllocCOP2)
{
pxAssert(x86regs[i].inuse && x86regs[i].type == X86TYPE_VIREG);
x86regs[i].reg = gprMap[i].VIreg;
x86regs[i].mode = gprMap[i].dirty ? (MODE_WRITE | MODE_READ) : (MODE_READ);
}
MVURALOG(" returning cached in %d\n", i);
return xRegister32::GetInstance(i);
}
}
}
if (viWriteReg >= 0) // Writing a new value, make sure this register isn't cached already
unbindAnyVIAllocations(viWriteReg, backup);
int x = findFreeGPR(viLoadReg);
const xRegister32& gprX = xRegister32::GetInstance(x);
writeBackReg(gprX, true);
if (viLoadReg > 0)
xMOVZX(gprX, ptr16[&getVI(viLoadReg)]);
else if (viLoadReg == 0)
xXOR(gprX, gprX);
gprMap[x].VIreg = viLoadReg;
gprMap[x].isZeroExtended = true;
if (viWriteReg >= 0)
{
gprMap[x].VIreg = viWriteReg;
gprMap[x].dirty = true;
gprMap[x].isZeroExtended = false;
if (backup)
{
if (viLoadReg < 0 && viWriteReg > 0)
xMOVZX(gprX, ptr16[&getVI(viWriteReg)]);
writeVIBackup(gprX);
}
}
gprMap[x].count = this_counter;
gprMap[x].isNeeded = true;
if (regAllocCOP2)
{
pxAssert(x86regs[x].inuse && x86regs[x].type == X86TYPE_VIREG);
x86regs[x].reg = gprMap[x].VIreg;
x86regs[x].mode = gprMap[x].dirty ? (MODE_WRITE | MODE_READ) : (MODE_READ);
}
MVURALOG(" returning new %d\n", x);
return gprX;
}
void moveVIToGPR(const xRegisterInt& reg, int vi, bool signext = false)
{
pxAssert(vi >= 0);
if (vi == 0)
{
xXOR(xRegister32(reg), xRegister32(reg));
return;
}
// TODO: Check liveness/usedness before allocating.
// TODO: Check whether zero-extend is needed everywhere heae. Loadstores are.
const xRegister32& srcreg = allocGPR(vi);
if (signext)
xMOVSX(xRegister32(reg), xRegister16(srcreg));
else
xMOVZX(xRegister32(reg), xRegister16(srcreg));
clearNeeded(srcreg);
}
}; };

View File

@ -611,11 +611,12 @@ mVUop(mVU_FCAND)
pass1 { mVUanalyzeCflag(mVU, 1); } pass1 { mVUanalyzeCflag(mVU, 1); }
pass2 pass2
{ {
mVUallocCFLAGa(mVU, gprT1, cFLAG.read); const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
xAND(gprT1, _Imm24_); mVUallocCFLAGa(mVU, dst, cFLAG.read);
xADD(gprT1, 0xffffff); xAND(dst, _Imm24_);
xSHR(gprT1, 24); xADD(dst, 0xffffff);
mVUallocVIb(mVU, gprT1, 1); xSHR(dst, 24);
mVU.regAlloc->clearNeeded(dst);
mVU.profiler.EmitOp(opFCAND); mVU.profiler.EmitOp(opFCAND);
} }
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); } pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
@ -627,11 +628,12 @@ mVUop(mVU_FCEQ)
pass1 { mVUanalyzeCflag(mVU, 1); } pass1 { mVUanalyzeCflag(mVU, 1); }
pass2 pass2
{ {
mVUallocCFLAGa(mVU, gprT1, cFLAG.read); const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
xXOR(gprT1, _Imm24_); mVUallocCFLAGa(mVU, dst, cFLAG.read);
xSUB(gprT1, 1); xXOR(dst, _Imm24_);
xSHR(gprT1, 31); xSUB(dst, 1);
mVUallocVIb(mVU, gprT1, 1); xSHR(dst, 31);
mVU.regAlloc->clearNeeded(dst);
mVU.profiler.EmitOp(opFCEQ); mVU.profiler.EmitOp(opFCEQ);
} }
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); } pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
@ -643,9 +645,10 @@ mVUop(mVU_FCGET)
pass1 { mVUanalyzeCflag(mVU, _It_); } pass1 { mVUanalyzeCflag(mVU, _It_); }
pass2 pass2
{ {
mVUallocCFLAGa(mVU, gprT1, cFLAG.read); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xAND(gprT1, 0xfff); mVUallocCFLAGa(mVU, regT, cFLAG.read);
mVUallocVIb(mVU, gprT1, _It_); xAND(regT, 0xfff);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFCGET); mVU.profiler.EmitOp(opFCGET);
} }
pass3 { mVUlog("FCGET vi%02d", _Ft_); } pass3 { mVUlog("FCGET vi%02d", _Ft_); }
@ -657,11 +660,12 @@ mVUop(mVU_FCOR)
pass1 { mVUanalyzeCflag(mVU, 1); } pass1 { mVUanalyzeCflag(mVU, 1); }
pass2 pass2
{ {
mVUallocCFLAGa(mVU, gprT1, cFLAG.read); const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
xOR(gprT1, _Imm24_); mVUallocCFLAGa(mVU, dst, cFLAG.read);
xADD(gprT1, 1); // If 24 1's will make 25th bit 1, else 0 xOR(dst, _Imm24_);
xSHR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg) xADD(dst, 1); // If 24 1's will make 25th bit 1, else 0
mVUallocVIb(mVU, gprT1, 1); xSHR(dst, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
mVU.regAlloc->clearNeeded(dst);
mVU.profiler.EmitOp(opFCOR); mVU.profiler.EmitOp(opFCOR);
} }
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); } pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
@ -690,9 +694,9 @@ mVUop(mVU_FMAND)
pass2 pass2
{ {
mVUallocMFLAGa(mVU, gprT1, mFLAG.read); mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_); const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
xAND(gprT1b, gprT2b); xAND(regT, gprT1);
mVUallocVIb(mVU, gprT1, _It_); mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFMAND); mVU.profiler.EmitOp(opFMAND);
} }
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); } pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
@ -705,11 +709,11 @@ mVUop(mVU_FMEQ)
pass2 pass2
{ {
mVUallocMFLAGa(mVU, gprT1, mFLAG.read); mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_); const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
xXOR(gprT1, gprT2); xXOR(regT, gprT1);
xSUB(gprT1, 1); xSUB(regT, 1);
xSHR(gprT1, 31); xSHR(regT, 31);
mVUallocVIb(mVU, gprT1, _It_); mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFMEQ); mVU.profiler.EmitOp(opFMEQ);
} }
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); } pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
@ -722,9 +726,9 @@ mVUop(mVU_FMOR)
pass2 pass2
{ {
mVUallocMFLAGa(mVU, gprT1, mFLAG.read); mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_); const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
xOR(gprT1b, gprT2b); xOR(regT, gprT1);
mVUallocVIb(mVU, gprT1, _It_); mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFMOR); mVU.profiler.EmitOp(opFMOR);
} }
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); } pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
@ -742,9 +746,10 @@ mVUop(mVU_FSAND)
{ {
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags"); if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags");
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags"); if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags");
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read); const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xAND(gprT1, _Imm12_); mVUallocSFLAGc(reg, gprT1, sFLAG.read);
mVUallocVIb(mVU, gprT1, _It_); xAND(reg, _Imm12_);
mVU.regAlloc->clearNeeded(reg);
mVU.profiler.EmitOp(opFSAND); mVU.profiler.EmitOp(opFSAND);
} }
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
@ -756,9 +761,10 @@ mVUop(mVU_FSOR)
pass1 { mVUanalyzeSflag(mVU, _It_); } pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2 pass2
{ {
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read); const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xOR(gprT1, _Imm12_); mVUallocSFLAGc(reg, gprT2, sFLAG.read);
mVUallocVIb(mVU, gprT1, _It_); xOR(reg, _Imm12_);
mVU.regAlloc->clearNeeded(reg);
mVU.profiler.EmitOp(opFSOR); mVU.profiler.EmitOp(opFSOR);
} }
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
@ -786,15 +792,16 @@ mVUop(mVU_FSEQ)
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
mVUallocSFLAGa(gprT1, sFLAG.read); const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
setBitFSEQ(gprT1, 0x0f00); // Z bit mVUallocSFLAGa(reg, sFLAG.read);
setBitFSEQ(gprT1, 0xf000); // S bit setBitFSEQ(reg, 0x0f00); // Z bit
setBitFSEQ(gprT1, 0x000f); // ZS bit setBitFSEQ(reg, 0xf000); // S bit
setBitFSEQ(gprT1, 0x00f0); // SS bit setBitFSEQ(reg, 0x000f); // ZS bit
xXOR(gprT1, imm); setBitFSEQ(reg, 0x00f0); // SS bit
xSUB(gprT1, 1); xXOR(reg, imm);
xSHR(gprT1, 31); xSUB(reg, 1);
mVUallocVIb(mVU, gprT1, _It_); xSHR(reg, 31);
mVU.regAlloc->clearNeeded(reg);
mVU.profiler.EmitOp(opFSEQ); mVU.profiler.EmitOp(opFSEQ);
} }
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); } pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
@ -834,15 +841,11 @@ mVUop(mVU_IADD)
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); } pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
pass2 pass2
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
if (_It_ != _Is_) const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
{ xADD(regS, regT);
mVUallocVIa(mVU, gprT2, _It_); mVU.regAlloc->clearNeeded(regS);
xADD(gprT1b, gprT2b); mVU.regAlloc->clearNeeded(regT);
}
else
xADD(gprT1b, gprT1b);
mVUallocVIb(mVU, gprT1, _Id_);
mVU.profiler.EmitOp(opIADD); mVU.profiler.EmitOp(opIADD);
} }
pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); } pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
@ -853,10 +856,10 @@ mVUop(mVU_IADDI)
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); } pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
pass2 pass2
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
if (_Imm5_ != 0) if (_Imm5_ != 0)
xADD(gprT1b, _Imm5_); xADD(regS, _Imm5_);
mVUallocVIb(mVU, gprT1, _It_); mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opIADDI); mVU.profiler.EmitOp(opIADDI);
} }
pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); } pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); }
@ -867,10 +870,10 @@ mVUop(mVU_IADDIU)
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); } pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
pass2 pass2
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
if (_Imm15_ != 0) if (_Imm15_ != 0)
xADD(gprT1b, _Imm15_); xADD(regS, _Imm15_);
mVUallocVIb(mVU, gprT1, _It_); mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opIADDIU); mVU.profiler.EmitOp(opIADDIU);
} }
pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); } pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
@ -881,13 +884,12 @@ mVUop(mVU_IAND)
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); } pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
pass2 pass2
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
if (_It_ != _Is_) if (_It_ != _Is_)
{ xAND(regS, regT);
mVUallocVIa(mVU, gprT2, _It_); mVU.regAlloc->clearNeeded(regS);
xAND(gprT1, gprT2); mVU.regAlloc->clearNeeded(regT);
}
mVUallocVIb(mVU, gprT1, _Id_);
mVU.profiler.EmitOp(opIAND); mVU.profiler.EmitOp(opIAND);
} }
pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); } pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
@ -898,13 +900,12 @@ mVUop(mVU_IOR)
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); } pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
pass2 pass2
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
if (_It_ != _Is_) if (_It_ != _Is_)
{ xOR(regS, regT);
mVUallocVIa(mVU, gprT2, _It_); mVU.regAlloc->clearNeeded(regS);
xOR(gprT1, gprT2); mVU.regAlloc->clearNeeded(regT);
}
mVUallocVIb(mVU, gprT1, _Id_);
mVU.profiler.EmitOp(opIOR); mVU.profiler.EmitOp(opIOR);
} }
pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); } pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
@ -917,15 +918,17 @@ mVUop(mVU_ISUB)
{ {
if (_It_ != _Is_) if (_It_ != _Is_)
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
mVUallocVIa(mVU, gprT2, _It_); const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
xSUB(gprT1b, gprT2b); xSUB(regS, regT);
mVUallocVIb(mVU, gprT1, _Id_); mVU.regAlloc->clearNeeded(regS);
mVU.regAlloc->clearNeeded(regT);
} }
else else
{ {
xXOR(gprT1, gprT1); const xRegister32& regD = mVU.regAlloc->allocGPR(-1, _Id_, mVUlow.backupVI);
mVUallocVIb(mVU, gprT1, _Id_); xXOR(regD, regD);
mVU.regAlloc->clearNeeded(regD);
} }
mVU.profiler.EmitOp(opISUB); mVU.profiler.EmitOp(opISUB);
} }
@ -937,10 +940,10 @@ mVUop(mVU_ISUBIU)
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); } pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
pass2 pass2
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
if (_Imm15_ != 0) if (_Imm15_ != 0)
xSUB(gprT1b, _Imm15_); xSUB(regS, _Imm15_);
mVUallocVIb(mVU, gprT1, _It_); mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opISUBIU); mVU.profiler.EmitOp(opISUBIU);
} }
pass3 { mVUlog("ISUBIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); } pass3 { mVUlog("ISUBIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
@ -964,10 +967,20 @@ mVUop(mVU_MFIR)
pass2 pass2
{ {
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUallocVIa(mVU, gprT1, _Is_, true); if (_Is_ != 0)
xMOVDZX(Ft, gprT1); {
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
xMOVSX(xRegister32(regS), xRegister16(regS));
// TODO: Broadcast instead
xMOVDZX(Ft, regS);
if (!_XYZW_SS) if (!_XYZW_SS)
mVUunpack_xyzw(Ft, Ft, 0); mVUunpack_xyzw(Ft, Ft, 0);
mVU.regAlloc->clearNeeded(regS);
}
else
{
xPXOR(Ft, Ft);
}
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
mVU.profiler.EmitOp(opMFIR); mVU.profiler.EmitOp(opMFIR);
} }
@ -1038,8 +1051,9 @@ mVUop(mVU_MTIR)
pass2 pass2
{ {
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
xMOVD(gprT1, Fs); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocVIb(mVU, gprT1, _It_); xMOVD(regT, Fs);
mVU.regAlloc->clearNeeded(regT);
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opMTIR); mVU.profiler.EmitOp(opMTIR);
} }
@ -1064,14 +1078,14 @@ mVUop(mVU_ILW)
{ {
void* ptr = mVU.regs().Mem + offsetSS; void* ptr = mVU.regs().Mem + offsetSS;
mVUallocVIa(mVU, gprT2, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
if (_Imm11_ != 0) if (_Imm11_ != 0)
xADD(gprT2, _Imm11_); xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
mVUallocVIb(mVU, gprT1, _It_); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opILW); mVU.profiler.EmitOp(opILW);
} }
pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
@ -1092,15 +1106,19 @@ mVUop(mVU_ILWR)
void* ptr = mVU.regs().Mem + offsetSS; void* ptr = mVU.regs().Mem + offsetSS;
if (_Is_) if (_Is_)
{ {
mVUallocVIa(mVU, gprT2, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
mVUaddrFix (mVU, gprT2q); mVUaddrFix (mVU, gprT1q);
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
mVU.regAlloc->clearNeeded(regT);
} }
else else
{ {
xMOVZX(gprT1, ptr16[ptr]); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[ptr]);
mVU.regAlloc->clearNeeded(regT);
} }
mVUallocVIb(mVU, gprT1, _It_);
mVU.profiler.EmitOp(opILWR); mVU.profiler.EmitOp(opILWR);
} }
pass3 { mVUlog("ILWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); } pass3 { mVUlog("ILWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
@ -1110,7 +1128,7 @@ mVUop(mVU_ILWR)
// ISW/ISWR // ISW/ISWR
//------------------------------------------------------------------ //------------------------------------------------------------------
static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg) static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg, const xRegister32& val)
{ {
if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr) if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr)
{ {
@ -1118,10 +1136,10 @@ static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
auto writeBackAt = [&](int offset) { auto writeBackAt = [&](int offset) {
if (register_offset == -1) if (register_offset == -1)
{ {
xLEA(gprT3q, ptr[(void*)((sptr)base_ptr + offset)]); xLEA(gprT2q, ptr[(void*)((sptr)base_ptr + offset)]);
register_offset = offset; register_offset = offset;
} }
xMOV(ptr32[gprT3q + reg + (offset - register_offset)], gprT1); xMOV(ptr32[gprT2q + reg + (offset - register_offset)], val);
}; };
if (_X) writeBackAt(0); if (_X) writeBackAt(0);
if (_Y) writeBackAt(4); if (_Y) writeBackAt(4);
@ -1130,17 +1148,17 @@ static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
} }
else if (reg.IsEmpty()) else if (reg.IsEmpty())
{ {
if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], gprT1); if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], val);
if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], gprT1); if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], val);
if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], gprT1); if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], val);
if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], gprT1); if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], val);
} }
else else
{ {
if (_X) xMOV(ptr32[base_ptr+reg ], gprT1); if (_X) xMOV(ptr32[base_ptr+reg ], val);
if (_Y) xMOV(ptr32[base_ptr+reg + 4], gprT1); if (_Y) xMOV(ptr32[base_ptr+reg + 4], val);
if (_Z) xMOV(ptr32[base_ptr+reg + 8], gprT1); if (_Z) xMOV(ptr32[base_ptr+reg + 8], val);
if (_W) xMOV(ptr32[base_ptr+reg + 12], gprT1); if (_W) xMOV(ptr32[base_ptr+reg + 12], val);
} }
} }
@ -1156,15 +1174,15 @@ mVUop(mVU_ISW)
{ {
void* ptr = mVU.regs().Mem; void* ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
if (_Imm11_ != 0) if (_Imm11_ != 0)
xADD(gprT2, _Imm11_); xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
mVUallocVIa(mVU, gprT1, _It_); // If regT is dirty, the high bits might not be zero.
writeBackISW(mVU, ptr, gprT2q); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
writeBackISW(mVU, ptr, gprT1q, regT);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opISW); mVU.profiler.EmitOp(opISW);
} }
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
@ -1184,12 +1202,13 @@ mVUop(mVU_ISWR)
xAddressReg is = xEmptyReg; xAddressReg is = xEmptyReg;
if (_Is_) if (_Is_)
{ {
mVUallocVIa(mVU, gprT2, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
is = gprT2q; is = gprT1q;
} }
mVUallocVIa(mVU, gprT1, _It_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
writeBackISW(mVU, ptr, is); writeBackISW(mVU, ptr, is, regT);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opISWR); mVU.profiler.EmitOp(opISWR);
} }
@ -1206,15 +1225,13 @@ mVUop(mVU_LQ)
pass2 pass2
{ {
void* ptr = mVU.regs().Mem; void* ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
if (_Imm11_ != 0) if (_Imm11_ != 0)
xADD(gprT2, _Imm11_); xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W); mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W);
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
mVU.profiler.EmitOp(opLQ); mVU.profiler.EmitOp(opLQ);
} }
@ -1230,12 +1247,12 @@ mVUop(mVU_LQD)
xAddressReg is = xEmptyReg; xAddressReg is = xEmptyReg;
if (_Is_ || isVU0) // Access VU1 regs mem-map in !_Is_ case if (_Is_ || isVU0) // Access VU1 regs mem-map in !_Is_ case
{ {
mVUallocVIa(mVU, gprT2, _Is_); const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
xSUB(gprT2b, 1); xDEC(regS);
if (_Is_) xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
mVUallocVIb(mVU, gprT2, _Is_); mVU.regAlloc->clearNeeded(regS);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
is = gprT2q; is = gprT1q;
} }
else else
{ {
@ -1250,7 +1267,7 @@ mVUop(mVU_LQD)
} }
else else
{ {
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W); mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
} }
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
} }
@ -1268,12 +1285,12 @@ mVUop(mVU_LQI)
xAddressReg is = xEmptyReg; xAddressReg is = xEmptyReg;
if (_Is_) if (_Is_)
{ {
mVUallocVIa(mVU, gprT1, _Is_); const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
xMOV(gprT2, gprT1); xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
xADD(gprT1b, 1); xINC(regS);
mVUallocVIb(mVU, gprT1, _Is_); mVU.regAlloc->clearNeeded(regS);
mVUaddrFix (mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
is = gprT2q; is = gprT1q;
} }
if (!mVUlow.noWriteVF) if (!mVUlow.noWriteVF)
{ {
@ -1281,7 +1298,7 @@ mVUop(mVU_LQI)
if (is.IsEmpty()) if (is.IsEmpty())
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W); mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
else else
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W); mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
} }
mVU.profiler.EmitOp(opLQI); mVU.profiler.EmitOp(opLQI);
@ -1300,15 +1317,13 @@ mVUop(mVU_SQ)
{ {
void* ptr = mVU.regs().Mem; void* ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _It_); mVU.regAlloc->moveVIToGPR(gprT1, _It_);
if (!_It_)
xXOR(gprT2, gprT2);
if (_Imm11_ != 0) if (_Imm11_ != 0)
xADD(gprT2, _Imm11_); xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1); mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQ); mVU.profiler.EmitOp(opSQ);
} }
@ -1324,12 +1339,12 @@ mVUop(mVU_SQD)
xAddressReg it = xEmptyReg; xAddressReg it = xEmptyReg;
if (_It_ || isVU0) // Access VU1 regs mem-map in !_It_ case if (_It_ || isVU0) // Access VU1 regs mem-map in !_It_ case
{ {
mVUallocVIa(mVU, gprT2, _It_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
xSUB(gprT2b, 1); xDEC(regT);
if (_It_) xMOVSX(gprT1, xRegister16(regT)); // TODO: Confirm
mVUallocVIb(mVU, gprT2, _It_); mVU.regAlloc->clearNeeded(regT);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
it = gprT2q; it = gprT1q;
} }
else else
{ {
@ -1339,7 +1354,7 @@ mVUop(mVU_SQD)
if (it.IsEmpty()) if (it.IsEmpty())
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1); mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
else else
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, it), _X_Y_Z_W, 1); mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, it), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQD); mVU.profiler.EmitOp(opSQD);
} }
@ -1354,15 +1369,15 @@ mVUop(mVU_SQI)
void* ptr = mVU.regs().Mem; void* ptr = mVU.regs().Mem;
if (_It_) if (_It_)
{ {
mVUallocVIa(mVU, gprT1, _It_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
xMOV(gprT2, gprT1); xMOVSX(gprT1, xRegister16(regT)); // TODO: Confirm
xADD(gprT1b, 1); xINC(regT);
mVUallocVIb(mVU, gprT1, _It_); mVU.regAlloc->clearNeeded(regT);
mVUaddrFix(mVU, gprT2q); mVUaddrFix(mVU, gprT1q);
} }
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
if (_It_) if (_It_)
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1); mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1);
else else
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1); mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
@ -1426,22 +1441,24 @@ mVUop(mVU_RNEXT)
pass2 pass2
{ {
// algorithm from www.project-fao.org // algorithm from www.project-fao.org
xMOV(gprT3, ptr32[Rmem]); const xRegister32& temp3 = mVU.regAlloc->allocGPR();
xMOV(gprT1, gprT3); xMOV(temp3, ptr32[Rmem]);
xMOV(gprT1, temp3);
xSHR(gprT1, 4); xSHR(gprT1, 4);
xAND(gprT1, 1); xAND(gprT1, 1);
xMOV(gprT2, gprT3); xMOV(gprT2, temp3);
xSHR(gprT2, 22); xSHR(gprT2, 22);
xAND(gprT2, 1); xAND(gprT2, 1);
xSHL(gprT3, 1); xSHL(temp3, 1);
xXOR(gprT1, gprT2); xXOR(gprT1, gprT2);
xXOR(gprT3, gprT1); xXOR(temp3, gprT1);
xAND(gprT3, 0x007fffff); xAND(temp3, 0x007fffff);
xOR (gprT3, 0x3f800000); xOR (temp3, 0x3f800000);
xMOV(ptr32[Rmem], gprT3); xMOV(ptr32[Rmem], temp3);
mVU_RGET_(mVU, gprT3); mVU_RGET_(mVU, temp3);
mVU.regAlloc->clearNeeded(temp3);
mVU.profiler.EmitOp(opRNEXT); mVU.profiler.EmitOp(opRNEXT);
} }
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); } pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
@ -1512,8 +1529,9 @@ mVUop(mVU_XTOP)
} }
pass2 pass2
{ {
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().top]); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocVIb(mVU, gprT1, _It_); xMOVZX(regT, ptr16[&mVU.getVifRegs().top]);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opXTOP); mVU.profiler.EmitOp(opXTOP);
} }
pass3 { mVUlog("XTOP vi%02d", _Ft_); } pass3 { mVUlog("XTOP vi%02d", _Ft_); }
@ -1530,9 +1548,10 @@ mVUop(mVU_XITOP)
} }
pass2 pass2
{ {
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().itop]); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xAND(gprT1, isVU1 ? 0x3ff : 0xff); xMOVZX(regT, ptr16[&mVU.getVifRegs().itop]);
mVUallocVIb(mVU, gprT1, _It_); xAND(regT, isVU1 ? 0x3ff : 0xff);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opXITOP); mVU.profiler.EmitOp(opXITOP);
} }
pass3 { mVUlog("XITOP vi%02d", _Ft_); } pass3 { mVUlog("XITOP vi%02d", _Ft_); }
@ -1634,6 +1653,8 @@ void _vuXGKICKTransfermVU(bool flush)
static __fi void mVU_XGKICK_SYNC(mV, bool flush) static __fi void mVU_XGKICK_SYNC(mV, bool flush)
{ {
mVU.regAlloc->flushCallerSavedRegisters();
// Add the single cycle remainder after this instruction, some games do the store // Add the single cycle remainder after this instruction, some games do the store
// on the second instruction after the kick and that needs to go through first // on the second instruction after the kick and that needs to go through first
// but that's VERY close.. // but that's VERY close..
@ -1652,14 +1673,16 @@ static __fi void mVU_XGKICK_SYNC(mV, bool flush)
static __fi void mVU_XGKICK_DELAY(mV) static __fi void mVU_XGKICK_DELAY(mV)
{ {
mVUbackupRegs(mVU); mVU.regAlloc->flushCallerSavedRegisters();
mVUbackupRegs(mVU, true, true);
#if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested #if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested
xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6); xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6)); xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
#endif #endif
xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]); xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
mVUrestoreRegs(mVU); mVUrestoreRegs(mVU, true, true);
} }
mVUop(mVU_XGKICK) mVUop(mVU_XGKICK)
@ -1687,10 +1710,10 @@ mVUop(mVU_XGKICK)
mVUinfo.doXGKICK = false; mVUinfo.doXGKICK = false;
} }
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
if (!CHECK_XGKICKHACK) if (!CHECK_XGKICKHACK)
{ {
mVUallocVIa(mVU, gprT1, _Is_); xMOV(ptr32[&mVU.VIxgkick], regS);
xMOV(ptr32[&mVU.VIxgkick], gprT1);
} }
else else
{ {
@ -1702,11 +1725,12 @@ mVUop(mVU_XGKICK)
xSUB(gprT2, ptr32[&mVU.cycles]); xSUB(gprT2, ptr32[&mVU.cycles]);
xADD(gprT2, ptr32[&VU1.cycle]); xADD(gprT2, ptr32[&VU1.cycle]);
xMOV(ptr32[&VU1.xgkicklastcycle], gprT2); xMOV(ptr32[&VU1.xgkicklastcycle], gprT2);
mVUallocVIa(mVU, gprT1, _Is_); xMOV(gprT1, regS);
xAND(gprT1, 0x3FF); xAND(gprT1, 0x3FF);
xSHL(gprT1, 4); xSHL(gprT1, 4);
xMOV(ptr32[&VU1.xgkickaddr], gprT1); xMOV(ptr32[&VU1.xgkickaddr], gprT1);
} }
mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opXGKICK); mVU.profiler.EmitOp(opXGKICK);
} }
pass3 { mVUlog("XGKICK vi%02d", _Fs_); } pass3 { mVUlog("XGKICK vi%02d", _Fs_); }
@ -1803,22 +1827,25 @@ mVUop(mVU_BAL)
{ {
if (!mVUlow.evilBranch) if (!mVUlow.evilBranch)
{ {
xMOV(gprT1, bSaveAddr); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocVIb(mVU, gprT1, _It_); xMOV(regT, bSaveAddr);
mVU.regAlloc->clearNeeded(regT);
} }
else else
{ {
incPC(-2); incPC(-2);
DevCon.Warning("Linking BAL from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]); DevCon.Warning("Linking BAL from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
incPC(2); incPC(2);
if (isEvilBlock)
xMOV(gprT1, ptr32[&mVU.evilBranch]);
else
xMOV(gprT1, ptr32[&mVU.badBranch]);
xADD(gprT1, 8); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xSHR(gprT1, 3); if (isEvilBlock)
mVUallocVIb(mVU, gprT1, _It_); xMOV(regT, ptr32[&mVU.evilBranch]);
else
xMOV(regT, ptr32[&mVU.badBranch]);
xADD(regT, 8);
xSHR(regT, 3);
mVU.regAlloc->clearNeeded(regT);
} }
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddr(mVU)); } if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddr(mVU)); }
@ -1837,14 +1864,15 @@ mVUop(mVU_IBEQ)
if (mVUlow.memReadIs) if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]); xMOV(gprT1, ptr32[&mVU.VIbackup]);
else else
mVUallocVIa(mVU, gprT1, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (mVUlow.memReadIt) if (mVUlow.memReadIt)
xXOR(gprT1, ptr32[&mVU.VIbackup]); xXOR(gprT1, ptr32[&mVU.VIbackup]);
else else
{ {
mVUallocVIa(mVU, gprT2, _It_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
xXOR(gprT1, gprT2); xXOR(gprT1, regT);
mVU.regAlloc->clearNeeded(regT);
} }
if (!(isBadOrEvil)) if (!(isBadOrEvil))
@ -1865,7 +1893,7 @@ mVUop(mVU_IBGEZ)
if (mVUlow.memReadIs) if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]); xMOV(gprT1, ptr32[&mVU.VIbackup]);
else else
mVUallocVIa(mVU, gprT1, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil)) if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1); xMOV(ptr32[&mVU.branch], gprT1);
else else
@ -1884,7 +1912,7 @@ mVUop(mVU_IBGTZ)
if (mVUlow.memReadIs) if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]); xMOV(gprT1, ptr32[&mVU.VIbackup]);
else else
mVUallocVIa(mVU, gprT1, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil)) if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1); xMOV(ptr32[&mVU.branch], gprT1);
else else
@ -1903,7 +1931,7 @@ mVUop(mVU_IBLEZ)
if (mVUlow.memReadIs) if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]); xMOV(gprT1, ptr32[&mVU.VIbackup]);
else else
mVUallocVIa(mVU, gprT1, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil)) if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1); xMOV(ptr32[&mVU.branch], gprT1);
else else
@ -1922,7 +1950,7 @@ mVUop(mVU_IBLTZ)
if (mVUlow.memReadIs) if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]); xMOV(gprT1, ptr32[&mVU.VIbackup]);
else else
mVUallocVIa(mVU, gprT1, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil)) if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1); xMOV(ptr32[&mVU.branch], gprT1);
else else
@ -1941,14 +1969,15 @@ mVUop(mVU_IBNE)
if (mVUlow.memReadIs) if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]); xMOV(gprT1, ptr32[&mVU.VIbackup]);
else else
mVUallocVIa(mVU, gprT1, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (mVUlow.memReadIt) if (mVUlow.memReadIt)
xXOR(gprT1, ptr32[&mVU.VIbackup]); xXOR(gprT1, ptr32[&mVU.VIbackup]);
else else
{ {
mVUallocVIa(mVU, gprT2, _It_); const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
xXOR(gprT1, gprT2); xXOR(gprT1, regT);
mVU.regAlloc->clearNeeded(regT);
} }
if (!(isBadOrEvil)) if (!(isBadOrEvil))
@ -1964,7 +1993,7 @@ void normJumpPass2(mV)
{ {
if (!mVUlow.constJump.isValid || mVUlow.evilBranch) if (!mVUlow.constJump.isValid || mVUlow.evilBranch)
{ {
mVUallocVIa(mVU, gprT1, _Is_); mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
xSHL(gprT1, 3); xSHL(gprT1, 3);
xAND(gprT1, mVU.microMemSize - 8); xAND(gprT1, mVU.microMemSize - 8);
@ -2008,17 +2037,18 @@ mVUop(mVU_JALR)
normJumpPass2(mVU); normJumpPass2(mVU);
if (!mVUlow.evilBranch) if (!mVUlow.evilBranch)
{ {
xMOV(gprT1, bSaveAddr); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocVIb(mVU, gprT1, _It_); xMOV(regT, bSaveAddr);
mVU.regAlloc->clearNeeded(regT);
} }
if (mVUlow.evilBranch) if (mVUlow.evilBranch)
{ {
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
if (isEvilBlock) if (isEvilBlock)
{ {
xMOV(gprT1, ptr32[&mVU.evilBranch]); xMOV(regT, ptr32[&mVU.evilBranch]);
xADD(gprT1, 8); xADD(regT, 8);
xSHR(gprT1, 3); xSHR(regT, 3);
mVUallocVIb(mVU, gprT1, _It_);
} }
else else
{ {
@ -2026,11 +2056,11 @@ mVUop(mVU_JALR)
DevCon.Warning("Linking JALR from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]); DevCon.Warning("Linking JALR from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
incPC(2); incPC(2);
xMOV(gprT1, ptr32[&mVU.badBranch]); xMOV(regT, ptr32[&mVU.badBranch]);
xADD(gprT1, 8); xADD(regT, 8);
xSHR(gprT1, 3); xSHR(regT, 3);
mVUallocVIb(mVU, gprT1, _It_);
} }
mVU.regAlloc->clearNeeded(regT);
} }
mVU.profiler.EmitOp(opJALR); mVU.profiler.EmitOp(opJALR);

View File

@ -37,13 +37,6 @@ void setupMacroOp(int mode, const char* opName)
// Set up reg allocation // Set up reg allocation
microVU0.regAlloc->reset(true); microVU0.regAlloc->reset(true);
if (mode & 0x110) // X86 regs are modified, or flags modified
{
_freeX86reg(eax);
_freeX86reg(ecx);
_freeX86reg(edx);
}
if (mode & 0x03) // Q will be read/written if (mode & 0x03) // Q will be read/written
_freeXMMreg(xmmPQ.Id); _freeXMMreg(xmmPQ.Id);
@ -127,6 +120,17 @@ void mVUFreeCOP2XMMreg(int hostreg)
microVU0.regAlloc->clearRegCOP2(hostreg); microVU0.regAlloc->clearRegCOP2(hostreg);
} }
void mVUFreeCOP2GPR(int hostreg)
{
microVU0.regAlloc->clearGPRCOP2(hostreg);
}
bool mVUIsReservedCOP2(int hostreg)
{
// gprF1 through 3 is not correctly used in COP2 mode.
return (hostreg == gprT1.GetId() || hostreg == gprT2.GetId() || hostreg == gprF0.GetId());
}
#define REC_COP2_mVU0(f, opName, mode) \ #define REC_COP2_mVU0(f, opName, mode) \
void recV##f() \ void recV##f() \
{ \ { \
@ -429,11 +433,22 @@ static void recCFC2()
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE); const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
pxAssert(!GPR_IS_CONST1(_Rt_)); pxAssert(!GPR_IS_CONST1(_Rt_));
// FixMe: Should R-Reg have upper 9 bits 0? if (_Rd_ == 0) // why would you read vi00?
if (_Rd_ >= REG_STATUS_FLAG) {
xXOR(xRegister32(regt), xRegister32(regt));
}
else if (_Rd_ >= REG_STATUS_FLAG) // FixMe: Should R-Reg have upper 9 bits 0?
{
xMOVSX(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]); xMOVSX(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
}
else else
xMOV(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]); {
const int vireg = _allocIfUsedVItoX86(_Rd_, MODE_READ);
if (vireg >= 0)
xMOVZX(xRegister32(regt), xRegister16(vireg));
else
xMOVZX(xRegister32(regt), ptr16[&vu0Regs.VI[_Rd_].UL]);
}
} }
static void recCTC2() static void recCTC2()
@ -532,10 +547,63 @@ static void recCTC2()
_freeXMMregWithoutWriteback(xmmreg); _freeXMMregWithoutWriteback(xmmreg);
} }
// Need to expand this out, because we want to write as 16 bits. // Little bit nasty, but optimal codegen.
const int gprreg = _allocIfUsedGPRtoX86(_Rt_, MODE_READ);
const int vireg = _allocIfUsedVItoX86(_Rd_, MODE_WRITE);
if (vireg >= 0)
{
if (gprreg >= 0)
{
xMOVZX(xRegister32(vireg), xRegister16(gprreg));
}
else
{
// it could be in an xmm..
const int gprxmmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if (gprxmmreg >= 0)
{
xMOVD(xRegister32(vireg), xRegisterSSE(gprxmmreg));
xMOVZX(xRegister32(vireg), xRegister16(vireg));
}
else if (GPR_IS_CONST1(_Rt_))
{
if (_Rt_ != 0)
xMOV(xRegister32(vireg), (g_cpuConstRegs[_Rt_].UL[0] & 0xFFFFu));
else
xXOR(xRegister32(vireg), xRegister32(vireg));
}
else
{
xMOVZX(xRegister32(vireg), ptr16[&cpuRegs.GPR.r[_Rt_].US[0]]);
}
}
}
else
{
if (gprreg >= 0)
{
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], xRegister16(gprreg));
}
else
{
const int gprxmmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if (gprxmmreg >= 0)
{
xMOVD(eax, xRegisterSSE(gprxmmreg));
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
}
else if (GPR_IS_CONST1(_Rt_))
{
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], (g_cpuConstRegs[_Rt_].UL[0] & 0xFFFFu));
}
else
{
_eeMoveGPRtoR(eax, _Rt_); _eeMoveGPRtoR(eax, _Rt_);
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax); xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
} }
}
}
}
else else
{ {
_eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_); _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
@ -562,7 +630,7 @@ static void recQMFC2()
mVUFinishVU0(); mVUFinishVU0();
} }
const bool vf_used = COP2INST_USEDTEST(_Rd_); const bool vf_used = EEINST_VFUSEDTEST(_Rd_);
const int ftreg = _allocVFtoXMMreg(_Rd_, MODE_READ); const int ftreg = _allocVFtoXMMreg(_Rd_, MODE_READ);
_deleteEEreg128(_Rt_); _deleteEEreg128(_Rt_);
@ -607,7 +675,7 @@ static void recQMTC2()
if (_Rt_) if (_Rt_)
{ {
// if we have to flush to memory anyway (has a constant or is x86), force load. // if we have to flush to memory anyway (has a constant or is x86), force load.
[[maybe_unused]] const bool vf_used = COP2INST_USEDTEST(_Rd_); [[maybe_unused]] const bool vf_used = EEINST_VFUSEDTEST(_Rd_);
const bool can_rename = EEINST_RENAMETEST(_Rt_); const bool can_rename = EEINST_RENAMETEST(_Rt_);
const int rtreg = (GPR_IS_DIRTY_CONST(_Rt_) || _hasX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE)) ? const int rtreg = (GPR_IS_DIRTY_CONST(_Rt_) || _hasX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE)) ?
_allocGPRtoXMMreg(_Rt_, MODE_READ) : _allocGPRtoXMMreg(_Rt_, MODE_READ) :

View File

@ -154,13 +154,10 @@ static const char branchSTR[16][8] = {
#define gprT1 eax // eax - Temp Reg #define gprT1 eax // eax - Temp Reg
#define gprT2 ecx // ecx - Temp Reg #define gprT2 ecx // ecx - Temp Reg
#define gprT3 edx // edx - Temp Reg
#define gprT1q rax // eax - Temp Reg #define gprT1q rax // eax - Temp Reg
#define gprT2q rcx // ecx - Temp Reg #define gprT2q rcx // ecx - Temp Reg
#define gprT3q rdx // edx - Temp Reg
#define gprT1b ax // Low 16-bit of gprT1 (eax) #define gprT1b ax // Low 16-bit of gprT1 (eax)
#define gprT2b cx // Low 16-bit of gprT2 (ecx) #define gprT2b cx // Low 16-bit of gprT2 (ecx)
#define gprT3b dx // Low 16-bit of gprT3 (edx)
#define gprF0 ebx // Status Flag 0 #define gprF0 ebx // Status Flag 0
#define gprF1 r12d // Status Flag 1 #define gprF1 r12d // Status Flag 1

View File

@ -14,6 +14,7 @@
*/ */
#pragma once #pragma once
#include <bitset>
//------------------------------------------------------------------ //------------------------------------------------------------------
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging... // Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
@ -149,14 +150,57 @@ __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false, bool onlyNeeded = f
{ {
if (toMemory) if (toMemory)
{ {
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++) int num_xmms = 0, num_gprs = 0;
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
{ {
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedGPR(i))
{
num_gprs++;
xPUSH(xRegister64(i));
}
}
std::bitset<iREGCNT_XMM> save_xmms;
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i) if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i)); {
save_xmms[i] = true;
num_xmms++;
}
}
// we need 16 byte alignment on the stack
#ifdef _WIN32
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64)) + 32;
int stack_offset = 32;
#else
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64));
int stack_offset = 0;
#endif
if (stack_size > 0)
{
xSUB(rsp, stack_size);
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (save_xmms[i])
{
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
stack_offset += sizeof(u128);
}
}
} }
} }
else else
{ {
// TODO(Stenzek): get rid of xmmbackup
mVU.regAlloc->flushAll(); // Flush Regalloc mVU.regAlloc->flushAll(); // Flush Regalloc
xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ); xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ);
} }
@ -167,47 +211,64 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false, bool onlyNeeded
{ {
if (fromMemory) if (fromMemory)
{ {
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++) int num_xmms = 0, num_gprs = 0;
std::bitset<iREGCNT_GPR> save_gprs;
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
{ {
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedGPR(i))
{
save_gprs[i] = true;
num_gprs++;
}
}
std::bitset<iREGCNT_XMM> save_xmms;
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i) if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]); {
save_xmms[i] = true;
num_xmms++;
}
}
#ifdef _WIN32
const int stack_extra = 32;
#else
const int stack_extra = 0;
#endif
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64)) + stack_extra;
if (num_xmms > 0)
{
int stack_offset = (num_xmms - 1) * sizeof(u128) + stack_extra;
for (int i = static_cast<int>(iREGCNT_XMM - 1); i >= 0; i--)
{
if (!save_xmms[i])
continue;
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
stack_offset -= sizeof(u128);
}
}
if (stack_size > 0)
xADD(rsp, stack_size);
for (int i = static_cast<int>(iREGCNT_GPR - 1); i >= 0; i--)
{
if (save_gprs[i])
xPOP(xRegister64(i));
} }
} }
else else
{
xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]); xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
}
class mVUScopedXMMBackup
{
microVU& mVU;
bool fromMemory;
public:
mVUScopedXMMBackup(microVU& mVU, bool fromMemory)
: mVU(mVU) , fromMemory(fromMemory)
{
mVUbackupRegs(mVU, fromMemory);
}
~mVUScopedXMMBackup()
{
mVUrestoreRegs(mVU, fromMemory);
}
};
_mVUt void mVUprintRegs()
{
microVU& mVU = mVUx;
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
{
Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i,
mVU.xmmBackup[i][0], mVU.xmmBackup[i][1],
mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]);
}
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
{
Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i,
(float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1],
(float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]);
} }
} }
@ -259,17 +320,15 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
jmpA.SetTarget(); jmpA.SetTarget();
if (THREAD_VU1) if (THREAD_VU1)
{ {
{ #if 0
mVUScopedXMMBackup mVUSave(mVU, true);
xScopedSavedRegisters save{gprT1q, gprT2q, gprT3q};
if (IsDevBuild && !isCOP2) // Lets see which games do this! if (IsDevBuild && !isCOP2) // Lets see which games do this!
{ {
xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1! xMOV(gprT1, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode... xMOV(gprT2, xPC); // So we don't spam console, we'll only check micro-mode...
xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd); xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
} }
xFastCall((void*)mVUwaitMTVU); #endif
} xFastCall((void*)mVU.waitMTVU);
} }
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem); xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);