x86/microVU: Add VI caching

This commit is contained in:
Stenzek 2022-12-25 22:14:15 +10:00 committed by refractionpcsx2
parent 08faba5455
commit 00d768a6bf
14 changed files with 997 additions and 307 deletions

View File

@ -34,6 +34,12 @@ extern thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM];
namespace x86Emitter
{
// Win32 requires 32 bytes of shadow stack in the caller's frame.
#ifdef _WIN32
static constexpr int SHADOW_STACK_SIZE = 32;
#else
static constexpr int SHADOW_STACK_SIZE = 0;
#endif
extern void xWrite8(u8 val);
extern void xWrite16(u16 val);
@ -401,6 +407,8 @@ namespace x86Emitter
pxAssertDev(other.canMapIDTo(4), "Mapping h registers to higher registers can produce unexpected values");
}
static const inline xRegister32& GetInstance(uint id);
bool operator==(const xRegister32& src) const { return this->Id == src.Id; }
bool operator!=(const xRegister32& src) const { return this->Id != src.Id; }
};
@ -421,6 +429,8 @@ namespace x86Emitter
pxAssertDev(other.canMapIDTo(8), "Mapping h registers to higher registers can produce unexpected values");
}
static const inline xRegister64& GetInstance(uint id);
bool operator==(const xRegister64& src) const { return this->Id == src.Id; }
bool operator!=(const xRegister64& src) const { return this->Id != src.Id; }
};
@ -664,6 +674,34 @@ extern const xRegister32
#endif
}
const xRegister32& xRegister32::GetInstance(uint id)
{
static const xRegister32* const m_tbl_x86Regs[] =
{
&eax, &ecx, &edx, &ebx,
&esp, &ebp, &esi, &edi,
&r8d, &r9d, &r10d, &r11d,
&r12d, &r13d, &r14d, &r15d,
};
pxAssert(id < iREGCNT_GPR);
return *m_tbl_x86Regs[id];
}
const xRegister64& xRegister64::GetInstance(uint id)
{
static const xRegister64* const m_tbl_x86Regs[] =
{
&rax, &rcx, &rdx, &rbx,
&rsp, &rbp, &rsi, &rdi,
&r8, &r9, &r10, &r11,
&r12, &r13, &r14, &r15
};
pxAssert(id < iREGCNT_GPR);
return *m_tbl_x86Regs[id];
}
bool xRegisterSSE::IsCallerSaved(uint id)
{
#ifdef _WIN32

View File

@ -144,7 +144,7 @@ int _getFreeXMMreg(u32 maxreg)
case XMMTYPE_VFREG:
{
if (COP2INST_USEDTEST(xmmregs[i].reg))
if (EEINST_VFUSEDTEST(xmmregs[i].reg))
continue;
}
break;
@ -875,6 +875,16 @@ int _allocIfUsedGPRtoX86(int gprreg, int mode)
return EEINST_USEDTEST(gprreg) ? _allocX86reg(X86TYPE_GPR, gprreg, mode) : -1;
}
int _allocIfUsedVItoX86(int vireg, int mode)
{
const int x86reg = _checkX86reg(X86TYPE_VIREG, vireg, mode);
if (x86reg >= 0)
return x86reg;
// Prefer not to stop on COP2 reserved registers here.
return EEINST_VIUSEDTEST(vireg) ? _allocX86reg(X86TYPE_VIREG, vireg, mode | MODE_COP2) : -1;
}
int _allocIfUsedGPRtoXMM(int gprreg, int mode)
{
const int mmreg = _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode);

View File

@ -30,6 +30,7 @@
#define MODE_READ 1
#define MODE_WRITE 2
#define MODE_CALLEESAVED 0x20 // can't flush reg to mem
#define MODE_COP2 0x40 // don't allow using reserved VU registers
#define PROCESS_EE_XMM 0x02
@ -119,6 +120,9 @@ void _flushConstReg(int reg);
void _validateRegs();
void _writebackX86Reg(int x86reg);
void mVUFreeCOP2GPR(int hostreg);
bool mVUIsReservedCOP2(int hostreg);
////////////////////////////////////////////////////////////////////////////////
// XMM (128-bit) Register Allocation Tools
@ -247,11 +251,17 @@ static __fi bool EEINST_XMMUSEDTEST(u32 reg)
}
/// Returns true if the specified VF register is used later in the block.
static __fi bool COP2INST_USEDTEST(u32 reg)
static __fi bool EEINST_VFUSEDTEST(u32 reg)
{
return (g_pCurInstInfo->vfregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
}
/// Returns true if the specified VI register is used later in the block.
static __fi bool EEINST_VIUSEDTEST(u32 reg)
{
return (g_pCurInstInfo->viregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
}
/// Returns true if the value should be computed/written back.
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
static __fi bool EEINST_LIVETEST(u32 reg)
@ -297,6 +307,7 @@ extern u16 g_xmmAllocCounter;
// allocates only if later insts use this register
int _allocIfUsedGPRtoX86(int gprreg, int mode);
int _allocIfUsedVItoX86(int vireg, int mode);
int _allocIfUsedGPRtoXMM(int gprreg, int mode);
int _allocIfUsedFPUtoXMM(int fpureg, int mode);

View File

@ -55,6 +55,9 @@ int _getFreeX86reg(int mode)
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(reg))
continue;
if ((mode & MODE_COP2) && mVUIsReservedCOP2(reg))
continue;
if (x86regs[reg].inuse == 0)
{
g_x86checknext = (reg + 1) % iREGCNT_GPR;
@ -70,6 +73,9 @@ int _getFreeX86reg(int mode)
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(i))
continue;
if ((mode & MODE_COP2) && mVUIsReservedCOP2(i))
continue;
// should have checked inuse in the previous loop.
pxAssert(x86regs[i].inuse);
@ -373,6 +379,13 @@ int _allocX86reg(int type, int reg, int mode)
}
break;
case X86TYPE_VIREG:
{
RALOG("Loading guest VI reg %d to GPR %d", reg, regnum);
xMOVZX(xRegister32(regnum), ptr16[&VU0.VI[reg].US[0]]);
}
break;
default:
abort();
break;
@ -536,8 +549,7 @@ void _freeX86regWithoutWriteback(int x86reg)
if (x86regs[x86reg].type == X86TYPE_VIREG)
{
RALOG("Freeing VI reg %d in host GPR %d\n", x86regs[x86reg].reg, x86reg);
//mVUFreeCOP2GPR(x86reg);
abort();
mVUFreeCOP2GPR(x86reg);
}
else if (x86regs[x86reg].inuse && x86regs[x86reg].type == X86TYPE_GPR)
{

View File

@ -89,6 +89,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
x86SetPtr(mVU.dispCache);
mVUdispatcherAB(mVU);
mVUdispatcherCD(mVU);
mvuGenerateWaitMTVU(mVU);
mVUemitSearch();
mVU.regs().nextBlockCycles = 0;

View File

@ -251,6 +251,7 @@ struct microVU
u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit)
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
u32 code; // Contains the current Instruction
u32 divFlag; // 1 instance of I/D flags

View File

@ -116,32 +116,10 @@ __fi void mVUallocCFLAGb(mV, const x32& reg, int fInstance)
// VI Reg Allocators
//------------------------------------------------------------------
__ri void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false)
void microRegAlloc::writeVIBackup(const xRegisterInt& reg)
{
if (!_reg_)
xXOR(GPRreg, GPRreg);
else if (signext)
xMOVSX(GPRreg, ptr16[&mVU.regs().VI[_reg_].SL]);
else
xMOVZX(GPRreg, ptr16[&mVU.regs().VI[_reg_].UL]);
}
__ri void mVUallocVIb(mV, const x32& GPRreg, int _reg_)
{
if (mVUlow.backupVI) // Backs up reg to memory (used when VI is modified b4 a branch)
{
xMOVZX(gprT3, ptr16[&mVU.regs().VI[_reg_].UL]);
xMOV (ptr32[&mVU.VIbackup], gprT3);
}
if (_reg_ == 0)
{
return;
}
else if (_reg_ < 16)
{
xMOV(ptr16[&mVU.regs().VI[_reg_].UL], xRegister16(GPRreg.Id));
}
microVU& mVU = index ? microVU1 : microVU0;
xMOV(ptr32[&mVU.VIbackup], xRegister32(reg));
}
//------------------------------------------------------------------

View File

@ -123,6 +123,81 @@ void mVUdispatcherCD(mV)
"microVU: Dispatcher generation exceeded reserved cache area!");
}
void mvuGenerateWaitMTVU(mV)
{
mVU.waitMTVU = x86Ptr;
int num_xmms = 0, num_gprs = 0;
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
{
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
// no need to save temps
if (i == gprT1.GetId() || i == gprT2.GetId())
continue;
xPUSH(xRegister64(i));
num_gprs++;
}
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
num_xmms++;
}
// We need 16 byte alignment on the stack.
// Since the stack is unaligned at entry to this function, we add 8 when it's even, not odd.
const int stack_size = (num_xmms * sizeof(u128)) + ((~num_gprs & 1) * sizeof(u64)) + SHADOW_STACK_SIZE;
int stack_offset = SHADOW_STACK_SIZE;
if (stack_size > 0)
{
xSUB(rsp, stack_size);
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
stack_offset += sizeof(u128);
}
}
xFastCall((void*)mVUwaitMTVU);
stack_offset = (num_xmms - 1) * sizeof(u128) + SHADOW_STACK_SIZE;
for (int i = static_cast<int>(iREGCNT_XMM - 1); i >= 0; i--)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
stack_offset -= sizeof(u128);
}
xADD(rsp, stack_size);
for (int i = static_cast<int>(iREGCNT_GPR - 1); i >= 0; i--)
{
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
if (i == gprT1.GetId() || i == gprT2.GetId())
continue;
xPOP(xRegister64(i));
}
xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
//------------------------------------------------------------------
// Execution Functions
//------------------------------------------------------------------

View File

@ -313,13 +313,15 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC)
}
else
{
const xRegister32& temp3 = mVU.regAlloc->allocGPR();
xMOV(gprT1, getFlagReg(bStatus[0]));
xMOV(gprT2, getFlagReg(bStatus[1]));
xMOV(gprT3, getFlagReg(bStatus[2]));
xMOV(temp3, getFlagReg(bStatus[2]));
xMOV(gprF3, getFlagReg(bStatus[3]));
xMOV(gprF0, gprT1);
xMOV(gprF1, gprT2);
xMOV(gprF2, gprT3);
xMOV(gprF2, temp3);
mVU.regAlloc->clearNeeded(temp3);
}
}

View File

@ -228,11 +228,25 @@ struct microMapXMM
bool isZero; // Register was loaded from VF00 and doesn't need clamping
};
struct microMapGPR
{
int VIreg;
int count;
bool isNeeded;
bool dirty;
bool isZeroExtended;
bool usable;
};
class microRegAlloc
{
protected:
static const int xmmTotal = 15; // PQ register is reserved
static const int xmmTotal = iREGCNT_XMM - 1; // PQ register is reserved
static const int gprTotal = iREGCNT_GPR;
microMapXMM xmmMap[xmmTotal];
microMapGPR gprMap[gprTotal];
int counter; // Current allocation count
int index; // VU0 or VU1
@ -251,6 +265,18 @@ protected:
__ri void loadIreg(const xmm& reg, int xyzw)
{
for (int i = 0; i < gprTotal; i++)
{
if (gprMap[i].VIreg == REG_I)
{
xMOVDZX(reg, xRegister32(i));
if (!_XYZWss(xyzw))
xSHUF.PS(reg, reg, 0);
return;
}
}
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
if (!_XYZWss(xyzw))
xSHUF.PS(reg, reg, 0);
@ -290,10 +316,59 @@ protected:
return x;
}
int findFreeGPRRec(int startIdx)
{
for (int i = startIdx; i < gprTotal; i++)
{
if (gprMap[i].usable && !gprMap[i].isNeeded)
{
int x = findFreeGPRRec(i + 1);
if (x == -1)
return i;
return ((gprMap[i].count < gprMap[x].count) ? i : x);
}
}
return -1;
}
int findFreeGPR(int vireg)
{
if (regAllocCOP2)
return _allocX86reg(X86TYPE_VIREG, vireg, MODE_COP2);
for (int i = 0; i < gprTotal; i++)
{
if (gprMap[i].usable && !gprMap[i].isNeeded && (gprMap[i].VIreg < 0))
{
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeGPRRec(0);
pxAssertDev(x >= 0, "microVU register allocation failure!");
return x;
}
void writeVIBackup(const xRegisterInt& reg);
public:
microRegAlloc(int _index)
{
index = _index;
// mark gpr registers as usable
std::memset(gprMap, 0, sizeof(gprMap));
for (int i = 0; i < gprTotal; i++)
{
if (i == gprT1.GetId() || i == gprT2.GetId() ||
i == gprF0.GetId() || i == gprF1.GetId() || i == gprF2.GetId() || i == gprF3.GetId() ||
i == rsp.GetId())
{
continue;
}
gprMap[i].usable = true;
}
reset(false);
}
@ -304,9 +379,10 @@ public:
regAllocCOP2 = false;
for (int i = 0; i < xmmTotal; i++)
{
clearReg(i);
}
for (int i = 0; i < gprTotal; i++)
clearGPR(i);
counter = 0;
regAllocCOP2 = cop2mode;
pxmmregs = cop2mode ? xmmregs : nullptr;
@ -331,13 +407,37 @@ public:
xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0;
}
}
for (int i = 0; i < gprTotal; i++)
{
if (!x86regs[i].inuse || x86regs[i].type != X86TYPE_VIREG)
continue;
// pxAssertRel(armregs[i].reg >= 0, "Valid full register preserved");
if (x86regs[i].reg >= 0)
{
MVURALOG("Preserving VI reg %d in host reg %d across instruction\n", x86regs[i].reg, i);
x86regs[i].needed = false;
gprMap[i].isNeeded = false;
gprMap[i].isZeroExtended = false;
gprMap[i].VIreg = x86regs[i].reg;
gprMap[i].dirty = ((x86regs[i].mode & MODE_WRITE) != 0);
}
}
}
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
}
int getXmmCount()
{
return xmmTotal + 1;
}
int getGPRCount()
{
return gprTotal;
}
// Flushes all allocated registers (i.e. writes-back to memory all modified registers).
// If clearState is 0, then it keeps cached reg data valid
// If clearState is 1, then it invalidates all cached reg data after write-back
@ -349,6 +449,36 @@ public:
if (clearState)
clearReg(i);
}
for (int i = 0; i < gprTotal; i++)
{
writeBackReg(xRegister32(i), true);
if (clearState)
clearGPR(i);
}
}
void flushCallerSavedRegisters(bool clearNeeded = false)
{
for (int i = 0; i < xmmTotal; i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
writeBackReg(xmm(i));
if (clearNeeded || !xmmMap[i].isNeeded)
clearReg(i);
}
for (int i = 0; i < gprTotal; i++)
{
if (!xRegister32::IsCallerSaved(i))
continue;
writeBackReg(xRegister32(i), true);
if (clearNeeded || !gprMap[i].isNeeded)
clearGPR(i);
}
}
void flushPartialForCOP2()
@ -378,10 +508,19 @@ public:
clear.isNeeded = 0;
clear.isZero = 0;
}
for (int i = 0; i < gprTotal; i++)
{
microMapGPR& clear = gprMap[i];
if (clear.VIreg < 0)
clearGPR(i);
}
}
void TDwritebackAll(bool clearState = false)
void TDwritebackAll()
{
// NOTE: We don't clear state here, this happens in an optional branch
for (int i = 0; i < xmmTotal; i++)
{
microMapXMM& mapX = xmmMap[xmm(i).Id];
@ -396,6 +535,9 @@ public:
mVUsaveReg(xmm(i), ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1);
}
}
for (int i = 0; i < gprTotal; i++)
writeBackReg(xRegister32(i), false);
}
bool checkVFClamp(int regId)
@ -414,11 +556,19 @@ public:
return false;
}
bool checkCachedGPR(int regId)
{
if (regId < gprTotal)
return gprMap[regId].VIreg >= 0 || gprMap[regId].isNeeded;
else
return false;
}
void clearReg(const xmm& reg) { clearReg(reg.Id); }
void clearReg(int regId)
{
microMapXMM& clear = xmmMap[regId];
if (regAllocCOP2)
if (regAllocCOP2 && (clear.isNeeded || clear.VFreg >= 0))
{
pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG);
pxmmregs[regId].inuse = false;
@ -668,4 +818,262 @@ public:
updateCOP2AllocState(x);
return xmmX;
}
void clearGPR(const xRegisterInt& reg) { clearGPR(reg.GetId()); }
void clearGPR(int regId)
{
microMapGPR& clear = gprMap[regId];
if (regAllocCOP2)
{
if (x86regs[regId].inuse && x86regs[regId].type == X86TYPE_VIREG)
{
pxAssert(x86regs[regId].reg == static_cast<u8>(clear.VIreg));
_freeX86regWithoutWriteback(regId);
}
}
clear.VIreg = -1;
clear.count = 0;
clear.isNeeded = 0;
clear.dirty = false;
clear.isZeroExtended = false;
}
void clearGPRCOP2(int regId)
{
if (regAllocCOP2)
clearGPR(regId);
}
void updateCOP2AllocState(const xRegisterInt& reg)
{
if (!regAllocCOP2)
return;
const u32 rn = reg.GetId();
const bool dirty = (gprMap[rn].VIreg >= 0 && gprMap[rn].dirty);
pxAssert(x86regs[rn].type == X86TYPE_VIREG);
x86regs[rn].reg = gprMap[rn].VIreg;
x86regs[rn].counter = gprMap[rn].count;
x86regs[rn].mode = dirty ? (MODE_READ | MODE_WRITE) : MODE_READ;
x86regs[rn].needed = gprMap[rn].isNeeded;
}
void writeBackReg(const xRegisterInt& reg, bool clearDirty)
{
microMapGPR& mapX = gprMap[reg.GetId()];
pxAssert(mapX.usable || !mapX.dirty);
if (mapX.dirty)
{
pxAssert(mapX.VIreg > 0);
if (mapX.VIreg < 16)
xMOV(ptr16[&getVI(mapX.VIreg)], xRegister16(reg));
if (clearDirty)
{
mapX.dirty = false;
updateCOP2AllocState(reg);
}
}
}
void clearNeeded(const xRegisterInt& reg)
{
pxAssert(reg.GetId() < gprTotal);
microMapGPR& clear = gprMap[reg.GetId()];
clear.isNeeded = false;
if (regAllocCOP2)
x86regs[reg.GetId()].needed = false;
}
void unbindAnyVIAllocations(int reg, bool& backup)
{
for (int i = 0; i < gprTotal; i++)
{
microMapGPR& mapI = gprMap[i];
if (mapI.VIreg == reg)
{
if (backup)
{
writeVIBackup(xRegister32(i));
backup = false;
}
// if it's needed, we just unbind the allocation and preserve it, otherwise clear
if (mapI.isNeeded)
{
MVURALOG(" unbind %d to %d for write\n", i, reg);
if (regAllocCOP2)
{
pxAssert(x86regs[i].type == X86TYPE_VIREG && x86regs[i].reg == static_cast<u8>(mapI.VIreg));
x86regs[i].reg = -1;
}
mapI.VIreg = -1;
mapI.dirty = false;
mapI.isZeroExtended = false;
}
else
{
MVURALOG(" clear %d to %d for write\n", i, reg);
clearGPR(i);
}
// shouldn't be any others...
for (int j = i + 1; j < gprTotal; j++)
{
pxAssert(gprMap[j].VIreg != reg);
}
break;
}
}
}
const xRegister32& allocGPR(int viLoadReg = -1, int viWriteReg = -1, bool backup = false, bool zext_if_dirty = false)
{
// TODO: When load != write, we should check whether load is used later, and if so, copy it.
//DevCon.WriteLn("viLoadReg = %02d, viWriteReg = %02d, backup = %d",viLoadReg,viWriteReg,(int)backup);
const int this_counter = regAllocCOP2 ? (g_x86AllocCounter++) : (counter++);
if (viLoadReg == 0 || viWriteReg == 0)
{
// write zero register as temp and discard later
if (viWriteReg == 0)
{
int x = findFreeGPR(-1);
const xRegister32& gprX = xRegister32::GetInstance(x);
writeBackReg(gprX, true);
xXOR(gprX, gprX);
gprMap[x].VIreg = -1;
gprMap[x].dirty = false;
gprMap[x].count = this_counter;
gprMap[x].isNeeded = true;
gprMap[x].isZeroExtended = true;
MVURALOG(" alloc zero to scratch %d\n", x);
return gprX;
}
}
if (viLoadReg >= 0) // Search For Cached Regs
{
for (int i = 0; i < gprTotal; i++)
{
microMapGPR& mapI = gprMap[i];
if (mapI.VIreg == viLoadReg)
{
if (viWriteReg >= 0) // Reg will be modified
{
if (viLoadReg != viWriteReg)
{
// kill any allocations of viWriteReg
unbindAnyVIAllocations(viWriteReg, backup);
// allocate a new register for writing to
int x = findFreeGPR(viWriteReg);
const xRegister32& gprX = xRegister32::GetInstance(x);
writeBackReg(gprX, true);
if (zext_if_dirty)
xMOVZX(gprX, xRegister16(i));
else
xMOV(gprX, xRegister32(i));
gprMap[x].isZeroExtended = zext_if_dirty;
MVURALOG(" clone write %d in %d to %d for %d\n", viLoadReg, i, x, viWriteReg);
std::swap(x, i);
}
else
{
// writing to it, no longer zero extended
gprMap[i].isZeroExtended = false;
}
gprMap[i].VIreg = viWriteReg;
gprMap[i].dirty = true;
}
else if (zext_if_dirty && !gprMap[i].isZeroExtended)
{
xMOVZX(xRegister32(i), xRegister16(i));
gprMap[i].isZeroExtended = true;
}
gprMap[i].count = this_counter;
gprMap[i].isNeeded = true;
if (backup)
writeVIBackup(xRegister32(i));
if (regAllocCOP2)
{
pxAssert(x86regs[i].inuse && x86regs[i].type == X86TYPE_VIREG);
x86regs[i].reg = gprMap[i].VIreg;
x86regs[i].mode = gprMap[i].dirty ? (MODE_WRITE | MODE_READ) : (MODE_READ);
}
MVURALOG(" returning cached in %d\n", i);
return xRegister32::GetInstance(i);
}
}
}
if (viWriteReg >= 0) // Writing a new value, make sure this register isn't cached already
unbindAnyVIAllocations(viWriteReg, backup);
int x = findFreeGPR(viLoadReg);
const xRegister32& gprX = xRegister32::GetInstance(x);
writeBackReg(gprX, true);
if (viLoadReg > 0)
xMOVZX(gprX, ptr16[&getVI(viLoadReg)]);
else if (viLoadReg == 0)
xXOR(gprX, gprX);
gprMap[x].VIreg = viLoadReg;
gprMap[x].isZeroExtended = true;
if (viWriteReg >= 0)
{
gprMap[x].VIreg = viWriteReg;
gprMap[x].dirty = true;
gprMap[x].isZeroExtended = false;
if (backup)
{
if (viLoadReg < 0 && viWriteReg > 0)
xMOVZX(gprX, ptr16[&getVI(viWriteReg)]);
writeVIBackup(gprX);
}
}
gprMap[x].count = this_counter;
gprMap[x].isNeeded = true;
if (regAllocCOP2)
{
pxAssert(x86regs[x].inuse && x86regs[x].type == X86TYPE_VIREG);
x86regs[x].reg = gprMap[x].VIreg;
x86regs[x].mode = gprMap[x].dirty ? (MODE_WRITE | MODE_READ) : (MODE_READ);
}
MVURALOG(" returning new %d\n", x);
return gprX;
}
void moveVIToGPR(const xRegisterInt& reg, int vi, bool signext = false)
{
pxAssert(vi >= 0);
if (vi == 0)
{
xXOR(xRegister32(reg), xRegister32(reg));
return;
}
// TODO: Check liveness/usedness before allocating.
// TODO: Check whether zero-extend is needed everywhere heae. Loadstores are.
const xRegister32& srcreg = allocGPR(vi);
if (signext)
xMOVSX(xRegister32(reg), xRegister16(srcreg));
else
xMOVZX(xRegister32(reg), xRegister16(srcreg));
clearNeeded(srcreg);
}
};

View File

@ -611,11 +611,12 @@ mVUop(mVU_FCAND)
pass1 { mVUanalyzeCflag(mVU, 1); }
pass2
{
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
xAND(gprT1, _Imm24_);
xADD(gprT1, 0xffffff);
xSHR(gprT1, 24);
mVUallocVIb(mVU, gprT1, 1);
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
mVUallocCFLAGa(mVU, dst, cFLAG.read);
xAND(dst, _Imm24_);
xADD(dst, 0xffffff);
xSHR(dst, 24);
mVU.regAlloc->clearNeeded(dst);
mVU.profiler.EmitOp(opFCAND);
}
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
@ -627,11 +628,12 @@ mVUop(mVU_FCEQ)
pass1 { mVUanalyzeCflag(mVU, 1); }
pass2
{
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
xXOR(gprT1, _Imm24_);
xSUB(gprT1, 1);
xSHR(gprT1, 31);
mVUallocVIb(mVU, gprT1, 1);
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
mVUallocCFLAGa(mVU, dst, cFLAG.read);
xXOR(dst, _Imm24_);
xSUB(dst, 1);
xSHR(dst, 31);
mVU.regAlloc->clearNeeded(dst);
mVU.profiler.EmitOp(opFCEQ);
}
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
@ -643,9 +645,10 @@ mVUop(mVU_FCGET)
pass1 { mVUanalyzeCflag(mVU, _It_); }
pass2
{
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
xAND(gprT1, 0xfff);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocCFLAGa(mVU, regT, cFLAG.read);
xAND(regT, 0xfff);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFCGET);
}
pass3 { mVUlog("FCGET vi%02d", _Ft_); }
@ -657,11 +660,12 @@ mVUop(mVU_FCOR)
pass1 { mVUanalyzeCflag(mVU, 1); }
pass2
{
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
xOR(gprT1, _Imm24_);
xADD(gprT1, 1); // If 24 1's will make 25th bit 1, else 0
xSHR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
mVUallocVIb(mVU, gprT1, 1);
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
mVUallocCFLAGa(mVU, dst, cFLAG.read);
xOR(dst, _Imm24_);
xADD(dst, 1); // If 24 1's will make 25th bit 1, else 0
xSHR(dst, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
mVU.regAlloc->clearNeeded(dst);
mVU.profiler.EmitOp(opFCOR);
}
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
@ -690,9 +694,9 @@ mVUop(mVU_FMAND)
pass2
{
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_);
xAND(gprT1b, gprT2b);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
xAND(regT, gprT1);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFMAND);
}
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
@ -705,11 +709,11 @@ mVUop(mVU_FMEQ)
pass2
{
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_);
xXOR(gprT1, gprT2);
xSUB(gprT1, 1);
xSHR(gprT1, 31);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
xXOR(regT, gprT1);
xSUB(regT, 1);
xSHR(regT, 31);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFMEQ);
}
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
@ -722,9 +726,9 @@ mVUop(mVU_FMOR)
pass2
{
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
mVUallocVIa(mVU, gprT2, _Is_);
xOR(gprT1b, gprT2b);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
xOR(regT, gprT1);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opFMOR);
}
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
@ -742,9 +746,10 @@ mVUop(mVU_FSAND)
{
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags");
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags");
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
xAND(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocSFLAGc(reg, gprT1, sFLAG.read);
xAND(reg, _Imm12_);
mVU.regAlloc->clearNeeded(reg);
mVU.profiler.EmitOp(opFSAND);
}
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
@ -756,9 +761,10 @@ mVUop(mVU_FSOR)
pass1 { mVUanalyzeSflag(mVU, _It_); }
pass2
{
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
xOR(gprT1, _Imm12_);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocSFLAGc(reg, gprT2, sFLAG.read);
xOR(reg, _Imm12_);
mVU.regAlloc->clearNeeded(reg);
mVU.profiler.EmitOp(opFSOR);
}
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
@ -786,15 +792,16 @@ mVUop(mVU_FSEQ)
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
mVUallocSFLAGa(gprT1, sFLAG.read);
setBitFSEQ(gprT1, 0x0f00); // Z bit
setBitFSEQ(gprT1, 0xf000); // S bit
setBitFSEQ(gprT1, 0x000f); // ZS bit
setBitFSEQ(gprT1, 0x00f0); // SS bit
xXOR(gprT1, imm);
xSUB(gprT1, 1);
xSHR(gprT1, 31);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
mVUallocSFLAGa(reg, sFLAG.read);
setBitFSEQ(reg, 0x0f00); // Z bit
setBitFSEQ(reg, 0xf000); // S bit
setBitFSEQ(reg, 0x000f); // ZS bit
setBitFSEQ(reg, 0x00f0); // SS bit
xXOR(reg, imm);
xSUB(reg, 1);
xSHR(reg, 31);
mVU.regAlloc->clearNeeded(reg);
mVU.profiler.EmitOp(opFSEQ);
}
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
@ -834,15 +841,11 @@ mVUop(mVU_IADD)
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
pass2
{
mVUallocVIa(mVU, gprT1, _Is_);
if (_It_ != _Is_)
{
mVUallocVIa(mVU, gprT2, _It_);
xADD(gprT1b, gprT2b);
}
else
xADD(gprT1b, gprT1b);
mVUallocVIb(mVU, gprT1, _Id_);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
xADD(regS, regT);
mVU.regAlloc->clearNeeded(regS);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opIADD);
}
pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
@ -853,10 +856,10 @@ mVUop(mVU_IADDI)
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
pass2
{
mVUallocVIa(mVU, gprT1, _Is_);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
if (_Imm5_ != 0)
xADD(gprT1b, _Imm5_);
mVUallocVIb(mVU, gprT1, _It_);
xADD(regS, _Imm5_);
mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opIADDI);
}
pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); }
@ -867,10 +870,10 @@ mVUop(mVU_IADDIU)
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
pass2
{
mVUallocVIa(mVU, gprT1, _Is_);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
if (_Imm15_ != 0)
xADD(gprT1b, _Imm15_);
mVUallocVIb(mVU, gprT1, _It_);
xADD(regS, _Imm15_);
mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opIADDIU);
}
pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
@ -881,13 +884,12 @@ mVUop(mVU_IAND)
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
pass2
{
mVUallocVIa(mVU, gprT1, _Is_);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
if (_It_ != _Is_)
{
mVUallocVIa(mVU, gprT2, _It_);
xAND(gprT1, gprT2);
}
mVUallocVIb(mVU, gprT1, _Id_);
xAND(regS, regT);
mVU.regAlloc->clearNeeded(regS);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opIAND);
}
pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
@ -898,13 +900,12 @@ mVUop(mVU_IOR)
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
pass2
{
mVUallocVIa(mVU, gprT1, _Is_);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
if (_It_ != _Is_)
{
mVUallocVIa(mVU, gprT2, _It_);
xOR(gprT1, gprT2);
}
mVUallocVIb(mVU, gprT1, _Id_);
xOR(regS, regT);
mVU.regAlloc->clearNeeded(regS);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opIOR);
}
pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
@ -917,15 +918,17 @@ mVUop(mVU_ISUB)
{
if (_It_ != _Is_)
{
mVUallocVIa(mVU, gprT1, _Is_);
mVUallocVIa(mVU, gprT2, _It_);
xSUB(gprT1b, gprT2b);
mVUallocVIb(mVU, gprT1, _Id_);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
xSUB(regS, regT);
mVU.regAlloc->clearNeeded(regS);
mVU.regAlloc->clearNeeded(regT);
}
else
{
xXOR(gprT1, gprT1);
mVUallocVIb(mVU, gprT1, _Id_);
const xRegister32& regD = mVU.regAlloc->allocGPR(-1, _Id_, mVUlow.backupVI);
xXOR(regD, regD);
mVU.regAlloc->clearNeeded(regD);
}
mVU.profiler.EmitOp(opISUB);
}
@ -937,10 +940,10 @@ mVUop(mVU_ISUBIU)
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
pass2
{
mVUallocVIa(mVU, gprT1, _Is_);
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
if (_Imm15_ != 0)
xSUB(gprT1b, _Imm15_);
mVUallocVIb(mVU, gprT1, _It_);
xSUB(regS, _Imm15_);
mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opISUBIU);
}
pass3 { mVUlog("ISUBIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
@ -964,10 +967,20 @@ mVUop(mVU_MFIR)
pass2
{
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUallocVIa(mVU, gprT1, _Is_, true);
xMOVDZX(Ft, gprT1);
if (_Is_ != 0)
{
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
xMOVSX(xRegister32(regS), xRegister16(regS));
// TODO: Broadcast instead
xMOVDZX(Ft, regS);
if (!_XYZW_SS)
mVUunpack_xyzw(Ft, Ft, 0);
mVU.regAlloc->clearNeeded(regS);
}
else
{
xPXOR(Ft, Ft);
}
mVU.regAlloc->clearNeeded(Ft);
mVU.profiler.EmitOp(opMFIR);
}
@ -1038,8 +1051,9 @@ mVUop(mVU_MTIR)
pass2
{
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
xMOVD(gprT1, Fs);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVD(regT, Fs);
mVU.regAlloc->clearNeeded(regT);
mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opMTIR);
}
@ -1064,14 +1078,14 @@ mVUop(mVU_ILW)
{
void* ptr = mVU.regs().Mem + offsetSS;
mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (_Imm11_ != 0)
xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2q);
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
mVUallocVIb(mVU, gprT1, _It_);
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opILW);
}
pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
@ -1092,15 +1106,19 @@ mVUop(mVU_ILWR)
void* ptr = mVU.regs().Mem + offsetSS;
if (_Is_)
{
mVUallocVIa(mVU, gprT2, _Is_);
mVUaddrFix (mVU, gprT2q);
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
mVUaddrFix (mVU, gprT1q);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
mVU.regAlloc->clearNeeded(regT);
}
else
{
xMOVZX(gprT1, ptr16[ptr]);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[ptr]);
mVU.regAlloc->clearNeeded(regT);
}
mVUallocVIb(mVU, gprT1, _It_);
mVU.profiler.EmitOp(opILWR);
}
pass3 { mVUlog("ILWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
@ -1110,7 +1128,7 @@ mVUop(mVU_ILWR)
// ISW/ISWR
//------------------------------------------------------------------
static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg, const xRegister32& val)
{
if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr)
{
@ -1118,10 +1136,10 @@ static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
auto writeBackAt = [&](int offset) {
if (register_offset == -1)
{
xLEA(gprT3q, ptr[(void*)((sptr)base_ptr + offset)]);
xLEA(gprT2q, ptr[(void*)((sptr)base_ptr + offset)]);
register_offset = offset;
}
xMOV(ptr32[gprT3q + reg + (offset - register_offset)], gprT1);
xMOV(ptr32[gprT2q + reg + (offset - register_offset)], val);
};
if (_X) writeBackAt(0);
if (_Y) writeBackAt(4);
@ -1130,17 +1148,17 @@ static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
}
else if (reg.IsEmpty())
{
if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], gprT1);
if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], gprT1);
if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], gprT1);
if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], gprT1);
if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], val);
if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], val);
if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], val);
if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], val);
}
else
{
if (_X) xMOV(ptr32[base_ptr+reg ], gprT1);
if (_Y) xMOV(ptr32[base_ptr+reg + 4], gprT1);
if (_Z) xMOV(ptr32[base_ptr+reg + 8], gprT1);
if (_W) xMOV(ptr32[base_ptr+reg + 12], gprT1);
if (_X) xMOV(ptr32[base_ptr+reg ], val);
if (_Y) xMOV(ptr32[base_ptr+reg + 4], val);
if (_Z) xMOV(ptr32[base_ptr+reg + 8], val);
if (_W) xMOV(ptr32[base_ptr+reg + 12], val);
}
}
@ -1156,15 +1174,15 @@ mVUop(mVU_ISW)
{
void* ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (_Imm11_ != 0)
xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2q);
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
mVUallocVIa(mVU, gprT1, _It_);
writeBackISW(mVU, ptr, gprT2q);
// If regT is dirty, the high bits might not be zero.
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
writeBackISW(mVU, ptr, gprT1q, regT);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opISW);
}
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
@ -1184,12 +1202,13 @@ mVUop(mVU_ISWR)
xAddressReg is = xEmptyReg;
if (_Is_)
{
mVUallocVIa(mVU, gprT2, _Is_);
mVUaddrFix(mVU, gprT2q);
is = gprT2q;
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
mVUaddrFix(mVU, gprT1q);
is = gprT1q;
}
mVUallocVIa(mVU, gprT1, _It_);
writeBackISW(mVU, ptr, is);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
writeBackISW(mVU, ptr, is, regT);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opISWR);
}
@ -1206,15 +1225,13 @@ mVUop(mVU_LQ)
pass2
{
void* ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (_Imm11_ != 0)
xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2q);
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W);
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W);
mVU.regAlloc->clearNeeded(Ft);
mVU.profiler.EmitOp(opLQ);
}
@ -1230,12 +1247,12 @@ mVUop(mVU_LQD)
xAddressReg is = xEmptyReg;
if (_Is_ || isVU0) // Access VU1 regs mem-map in !_Is_ case
{
mVUallocVIa(mVU, gprT2, _Is_);
xSUB(gprT2b, 1);
if (_Is_)
mVUallocVIb(mVU, gprT2, _Is_);
mVUaddrFix(mVU, gprT2q);
is = gprT2q;
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
xDEC(regS);
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
mVU.regAlloc->clearNeeded(regS);
mVUaddrFix(mVU, gprT1q);
is = gprT1q;
}
else
{
@ -1250,7 +1267,7 @@ mVUop(mVU_LQD)
}
else
{
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
}
mVU.regAlloc->clearNeeded(Ft);
}
@ -1268,12 +1285,12 @@ mVUop(mVU_LQI)
xAddressReg is = xEmptyReg;
if (_Is_)
{
mVUallocVIa(mVU, gprT1, _Is_);
xMOV(gprT2, gprT1);
xADD(gprT1b, 1);
mVUallocVIb(mVU, gprT1, _Is_);
mVUaddrFix (mVU, gprT2q);
is = gprT2q;
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
xINC(regS);
mVU.regAlloc->clearNeeded(regS);
mVUaddrFix(mVU, gprT1q);
is = gprT1q;
}
if (!mVUlow.noWriteVF)
{
@ -1281,7 +1298,7 @@ mVUop(mVU_LQI)
if (is.IsEmpty())
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
else
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
mVU.regAlloc->clearNeeded(Ft);
}
mVU.profiler.EmitOp(opLQI);
@ -1300,15 +1317,13 @@ mVUop(mVU_SQ)
{
void* ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _It_);
if (!_It_)
xXOR(gprT2, gprT2);
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
if (_Imm11_ != 0)
xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2q);
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQ);
}
@ -1324,12 +1339,12 @@ mVUop(mVU_SQD)
xAddressReg it = xEmptyReg;
if (_It_ || isVU0) // Access VU1 regs mem-map in !_It_ case
{
mVUallocVIa(mVU, gprT2, _It_);
xSUB(gprT2b, 1);
if (_It_)
mVUallocVIb(mVU, gprT2, _It_);
mVUaddrFix(mVU, gprT2q);
it = gprT2q;
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
xDEC(regT);
xMOVSX(gprT1, xRegister16(regT)); // TODO: Confirm
mVU.regAlloc->clearNeeded(regT);
mVUaddrFix(mVU, gprT1q);
it = gprT1q;
}
else
{
@ -1339,7 +1354,7 @@ mVUop(mVU_SQD)
if (it.IsEmpty())
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
else
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, it), _X_Y_Z_W, 1);
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, it), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQD);
}
@ -1354,15 +1369,15 @@ mVUop(mVU_SQI)
void* ptr = mVU.regs().Mem;
if (_It_)
{
mVUallocVIa(mVU, gprT1, _It_);
xMOV(gprT2, gprT1);
xADD(gprT1b, 1);
mVUallocVIb(mVU, gprT1, _It_);
mVUaddrFix(mVU, gprT2q);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
xMOVSX(gprT1, xRegister16(regT)); // TODO: Confirm
xINC(regT);
mVU.regAlloc->clearNeeded(regT);
mVUaddrFix(mVU, gprT1q);
}
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
if (_It_)
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1);
else
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs);
@ -1426,22 +1441,24 @@ mVUop(mVU_RNEXT)
pass2
{
// algorithm from www.project-fao.org
xMOV(gprT3, ptr32[Rmem]);
xMOV(gprT1, gprT3);
const xRegister32& temp3 = mVU.regAlloc->allocGPR();
xMOV(temp3, ptr32[Rmem]);
xMOV(gprT1, temp3);
xSHR(gprT1, 4);
xAND(gprT1, 1);
xMOV(gprT2, gprT3);
xMOV(gprT2, temp3);
xSHR(gprT2, 22);
xAND(gprT2, 1);
xSHL(gprT3, 1);
xSHL(temp3, 1);
xXOR(gprT1, gprT2);
xXOR(gprT3, gprT1);
xAND(gprT3, 0x007fffff);
xOR (gprT3, 0x3f800000);
xMOV(ptr32[Rmem], gprT3);
mVU_RGET_(mVU, gprT3);
xXOR(temp3, gprT1);
xAND(temp3, 0x007fffff);
xOR (temp3, 0x3f800000);
xMOV(ptr32[Rmem], temp3);
mVU_RGET_(mVU, temp3);
mVU.regAlloc->clearNeeded(temp3);
mVU.profiler.EmitOp(opRNEXT);
}
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
@ -1512,8 +1529,9 @@ mVUop(mVU_XTOP)
}
pass2
{
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().top]);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[&mVU.getVifRegs().top]);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opXTOP);
}
pass3 { mVUlog("XTOP vi%02d", _Ft_); }
@ -1530,9 +1548,10 @@ mVUop(mVU_XITOP)
}
pass2
{
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().itop]);
xAND(gprT1, isVU1 ? 0x3ff : 0xff);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[&mVU.getVifRegs().itop]);
xAND(regT, isVU1 ? 0x3ff : 0xff);
mVU.regAlloc->clearNeeded(regT);
mVU.profiler.EmitOp(opXITOP);
}
pass3 { mVUlog("XITOP vi%02d", _Ft_); }
@ -1634,6 +1653,8 @@ void _vuXGKICKTransfermVU(bool flush)
static __fi void mVU_XGKICK_SYNC(mV, bool flush)
{
mVU.regAlloc->flushCallerSavedRegisters();
// Add the single cycle remainder after this instruction, some games do the store
// on the second instruction after the kick and that needs to go through first
// but that's VERY close..
@ -1652,14 +1673,16 @@ static __fi void mVU_XGKICK_SYNC(mV, bool flush)
static __fi void mVU_XGKICK_DELAY(mV)
{
mVUbackupRegs(mVU);
mVU.regAlloc->flushCallerSavedRegisters();
mVUbackupRegs(mVU, true, true);
#if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested
xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
#endif
xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
mVUrestoreRegs(mVU);
mVUrestoreRegs(mVU, true, true);
}
mVUop(mVU_XGKICK)
@ -1687,10 +1710,10 @@ mVUop(mVU_XGKICK)
mVUinfo.doXGKICK = false;
}
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
if (!CHECK_XGKICKHACK)
{
mVUallocVIa(mVU, gprT1, _Is_);
xMOV(ptr32[&mVU.VIxgkick], gprT1);
xMOV(ptr32[&mVU.VIxgkick], regS);
}
else
{
@ -1702,11 +1725,12 @@ mVUop(mVU_XGKICK)
xSUB(gprT2, ptr32[&mVU.cycles]);
xADD(gprT2, ptr32[&VU1.cycle]);
xMOV(ptr32[&VU1.xgkicklastcycle], gprT2);
mVUallocVIa(mVU, gprT1, _Is_);
xMOV(gprT1, regS);
xAND(gprT1, 0x3FF);
xSHL(gprT1, 4);
xMOV(ptr32[&VU1.xgkickaddr], gprT1);
}
mVU.regAlloc->clearNeeded(regS);
mVU.profiler.EmitOp(opXGKICK);
}
pass3 { mVUlog("XGKICK vi%02d", _Fs_); }
@ -1803,22 +1827,25 @@ mVUop(mVU_BAL)
{
if (!mVUlow.evilBranch)
{
xMOV(gprT1, bSaveAddr);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOV(regT, bSaveAddr);
mVU.regAlloc->clearNeeded(regT);
}
else
{
incPC(-2);
DevCon.Warning("Linking BAL from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
incPC(2);
if (isEvilBlock)
xMOV(gprT1, ptr32[&mVU.evilBranch]);
else
xMOV(gprT1, ptr32[&mVU.badBranch]);
xADD(gprT1, 8);
xSHR(gprT1, 3);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
if (isEvilBlock)
xMOV(regT, ptr32[&mVU.evilBranch]);
else
xMOV(regT, ptr32[&mVU.badBranch]);
xADD(regT, 8);
xSHR(regT, 3);
mVU.regAlloc->clearNeeded(regT);
}
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddr(mVU)); }
@ -1837,14 +1864,15 @@ mVUop(mVU_IBEQ)
if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]);
else
mVUallocVIa(mVU, gprT1, _Is_);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (mVUlow.memReadIt)
xXOR(gprT1, ptr32[&mVU.VIbackup]);
else
{
mVUallocVIa(mVU, gprT2, _It_);
xXOR(gprT1, gprT2);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
xXOR(gprT1, regT);
mVU.regAlloc->clearNeeded(regT);
}
if (!(isBadOrEvil))
@ -1865,7 +1893,7 @@ mVUop(mVU_IBGEZ)
if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]);
else
mVUallocVIa(mVU, gprT1, _Is_);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1);
else
@ -1884,7 +1912,7 @@ mVUop(mVU_IBGTZ)
if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]);
else
mVUallocVIa(mVU, gprT1, _Is_);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1);
else
@ -1903,7 +1931,7 @@ mVUop(mVU_IBLEZ)
if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]);
else
mVUallocVIa(mVU, gprT1, _Is_);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1);
else
@ -1922,7 +1950,7 @@ mVUop(mVU_IBLTZ)
if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]);
else
mVUallocVIa(mVU, gprT1, _Is_);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (!(isBadOrEvil))
xMOV(ptr32[&mVU.branch], gprT1);
else
@ -1941,14 +1969,15 @@ mVUop(mVU_IBNE)
if (mVUlow.memReadIs)
xMOV(gprT1, ptr32[&mVU.VIbackup]);
else
mVUallocVIa(mVU, gprT1, _Is_);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (mVUlow.memReadIt)
xXOR(gprT1, ptr32[&mVU.VIbackup]);
else
{
mVUallocVIa(mVU, gprT2, _It_);
xXOR(gprT1, gprT2);
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
xXOR(gprT1, regT);
mVU.regAlloc->clearNeeded(regT);
}
if (!(isBadOrEvil))
@ -1964,7 +1993,7 @@ void normJumpPass2(mV)
{
if (!mVUlow.constJump.isValid || mVUlow.evilBranch)
{
mVUallocVIa(mVU, gprT1, _Is_);
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
xSHL(gprT1, 3);
xAND(gprT1, mVU.microMemSize - 8);
@ -2008,17 +2037,18 @@ mVUop(mVU_JALR)
normJumpPass2(mVU);
if (!mVUlow.evilBranch)
{
xMOV(gprT1, bSaveAddr);
mVUallocVIb(mVU, gprT1, _It_);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOV(regT, bSaveAddr);
mVU.regAlloc->clearNeeded(regT);
}
if (mVUlow.evilBranch)
{
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
if (isEvilBlock)
{
xMOV(gprT1, ptr32[&mVU.evilBranch]);
xADD(gprT1, 8);
xSHR(gprT1, 3);
mVUallocVIb(mVU, gprT1, _It_);
xMOV(regT, ptr32[&mVU.evilBranch]);
xADD(regT, 8);
xSHR(regT, 3);
}
else
{
@ -2026,11 +2056,11 @@ mVUop(mVU_JALR)
DevCon.Warning("Linking JALR from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
incPC(2);
xMOV(gprT1, ptr32[&mVU.badBranch]);
xADD(gprT1, 8);
xSHR(gprT1, 3);
mVUallocVIb(mVU, gprT1, _It_);
xMOV(regT, ptr32[&mVU.badBranch]);
xADD(regT, 8);
xSHR(regT, 3);
}
mVU.regAlloc->clearNeeded(regT);
}
mVU.profiler.EmitOp(opJALR);

View File

@ -37,13 +37,6 @@ void setupMacroOp(int mode, const char* opName)
// Set up reg allocation
microVU0.regAlloc->reset(true);
if (mode & 0x110) // X86 regs are modified, or flags modified
{
_freeX86reg(eax);
_freeX86reg(ecx);
_freeX86reg(edx);
}
if (mode & 0x03) // Q will be read/written
_freeXMMreg(xmmPQ.Id);
@ -127,6 +120,17 @@ void mVUFreeCOP2XMMreg(int hostreg)
microVU0.regAlloc->clearRegCOP2(hostreg);
}
void mVUFreeCOP2GPR(int hostreg)
{
microVU0.regAlloc->clearGPRCOP2(hostreg);
}
bool mVUIsReservedCOP2(int hostreg)
{
// gprF1 through 3 is not correctly used in COP2 mode.
return (hostreg == gprT1.GetId() || hostreg == gprT2.GetId() || hostreg == gprF0.GetId());
}
#define REC_COP2_mVU0(f, opName, mode) \
void recV##f() \
{ \
@ -429,11 +433,22 @@ static void recCFC2()
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
pxAssert(!GPR_IS_CONST1(_Rt_));
// FixMe: Should R-Reg have upper 9 bits 0?
if (_Rd_ >= REG_STATUS_FLAG)
if (_Rd_ == 0) // why would you read vi00?
{
xXOR(xRegister32(regt), xRegister32(regt));
}
else if (_Rd_ >= REG_STATUS_FLAG) // FixMe: Should R-Reg have upper 9 bits 0?
{
xMOVSX(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
}
else
xMOV(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
{
const int vireg = _allocIfUsedVItoX86(_Rd_, MODE_READ);
if (vireg >= 0)
xMOVZX(xRegister32(regt), xRegister16(vireg));
else
xMOVZX(xRegister32(regt), ptr16[&vu0Regs.VI[_Rd_].UL]);
}
}
static void recCTC2()
@ -532,10 +547,63 @@ static void recCTC2()
_freeXMMregWithoutWriteback(xmmreg);
}
// Need to expand this out, because we want to write as 16 bits.
// Little bit nasty, but optimal codegen.
const int gprreg = _allocIfUsedGPRtoX86(_Rt_, MODE_READ);
const int vireg = _allocIfUsedVItoX86(_Rd_, MODE_WRITE);
if (vireg >= 0)
{
if (gprreg >= 0)
{
xMOVZX(xRegister32(vireg), xRegister16(gprreg));
}
else
{
// it could be in an xmm..
const int gprxmmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if (gprxmmreg >= 0)
{
xMOVD(xRegister32(vireg), xRegisterSSE(gprxmmreg));
xMOVZX(xRegister32(vireg), xRegister16(vireg));
}
else if (GPR_IS_CONST1(_Rt_))
{
if (_Rt_ != 0)
xMOV(xRegister32(vireg), (g_cpuConstRegs[_Rt_].UL[0] & 0xFFFFu));
else
xXOR(xRegister32(vireg), xRegister32(vireg));
}
else
{
xMOVZX(xRegister32(vireg), ptr16[&cpuRegs.GPR.r[_Rt_].US[0]]);
}
}
}
else
{
if (gprreg >= 0)
{
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], xRegister16(gprreg));
}
else
{
const int gprxmmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if (gprxmmreg >= 0)
{
xMOVD(eax, xRegisterSSE(gprxmmreg));
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
}
else if (GPR_IS_CONST1(_Rt_))
{
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], (g_cpuConstRegs[_Rt_].UL[0] & 0xFFFFu));
}
else
{
_eeMoveGPRtoR(eax, _Rt_);
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
}
}
}
}
else
{
_eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
@ -562,7 +630,7 @@ static void recQMFC2()
mVUFinishVU0();
}
const bool vf_used = COP2INST_USEDTEST(_Rd_);
const bool vf_used = EEINST_VFUSEDTEST(_Rd_);
const int ftreg = _allocVFtoXMMreg(_Rd_, MODE_READ);
_deleteEEreg128(_Rt_);
@ -607,7 +675,7 @@ static void recQMTC2()
if (_Rt_)
{
// if we have to flush to memory anyway (has a constant or is x86), force load.
[[maybe_unused]] const bool vf_used = COP2INST_USEDTEST(_Rd_);
[[maybe_unused]] const bool vf_used = EEINST_VFUSEDTEST(_Rd_);
const bool can_rename = EEINST_RENAMETEST(_Rt_);
const int rtreg = (GPR_IS_DIRTY_CONST(_Rt_) || _hasX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE)) ?
_allocGPRtoXMMreg(_Rt_, MODE_READ) :

View File

@ -154,13 +154,10 @@ static const char branchSTR[16][8] = {
#define gprT1 eax // eax - Temp Reg
#define gprT2 ecx // ecx - Temp Reg
#define gprT3 edx // edx - Temp Reg
#define gprT1q rax // eax - Temp Reg
#define gprT2q rcx // ecx - Temp Reg
#define gprT3q rdx // edx - Temp Reg
#define gprT1b ax // Low 16-bit of gprT1 (eax)
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
#define gprT3b dx // Low 16-bit of gprT3 (edx)
#define gprF0 ebx // Status Flag 0
#define gprF1 r12d // Status Flag 1

View File

@ -14,6 +14,7 @@
*/
#pragma once
#include <bitset>
//------------------------------------------------------------------
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
@ -149,14 +150,57 @@ __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false, bool onlyNeeded = f
{
if (toMemory)
{
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
int num_xmms = 0, num_gprs = 0;
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
{
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedGPR(i))
{
num_gprs++;
xPUSH(xRegister64(i));
}
}
std::bitset<iREGCNT_XMM> save_xmms;
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i));
{
save_xmms[i] = true;
num_xmms++;
}
}
// we need 16 byte alignment on the stack
#ifdef _WIN32
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64)) + 32;
int stack_offset = 32;
#else
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64));
int stack_offset = 0;
#endif
if (stack_size > 0)
{
xSUB(rsp, stack_size);
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (save_xmms[i])
{
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
stack_offset += sizeof(u128);
}
}
}
}
else
{
// TODO(Stenzek): get rid of xmmbackup
mVU.regAlloc->flushAll(); // Flush Regalloc
xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ);
}
@ -167,48 +211,65 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false, bool onlyNeeded
{
if (fromMemory)
{
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
int num_xmms = 0, num_gprs = 0;
std::bitset<iREGCNT_GPR> save_gprs;
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
{
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedGPR(i))
{
save_gprs[i] = true;
num_gprs++;
}
}
std::bitset<iREGCNT_XMM> save_xmms;
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
{
if (!xRegisterSSE::IsCallerSaved(i))
continue;
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]);
{
save_xmms[i] = true;
num_xmms++;
}
}
#ifdef _WIN32
const int stack_extra = 32;
#else
const int stack_extra = 0;
#endif
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64)) + stack_extra;
if (num_xmms > 0)
{
int stack_offset = (num_xmms - 1) * sizeof(u128) + stack_extra;
for (int i = static_cast<int>(iREGCNT_XMM - 1); i >= 0; i--)
{
if (!save_xmms[i])
continue;
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
stack_offset -= sizeof(u128);
}
}
if (stack_size > 0)
xADD(rsp, stack_size);
for (int i = static_cast<int>(iREGCNT_GPR - 1); i >= 0; i--)
{
if (save_gprs[i])
xPOP(xRegister64(i));
}
}
else
{
xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
}
class mVUScopedXMMBackup
{
microVU& mVU;
bool fromMemory;
public:
mVUScopedXMMBackup(microVU& mVU, bool fromMemory)
: mVU(mVU) , fromMemory(fromMemory)
{
mVUbackupRegs(mVU, fromMemory);
}
~mVUScopedXMMBackup()
{
mVUrestoreRegs(mVU, fromMemory);
}
};
_mVUt void mVUprintRegs()
{
microVU& mVU = mVUx;
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
{
Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i,
mVU.xmmBackup[i][0], mVU.xmmBackup[i][1],
mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]);
}
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
{
Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i,
(float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1],
(float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]);
}
}
// Gets called by mVUaddrFix at execution-time
@ -259,17 +320,15 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
jmpA.SetTarget();
if (THREAD_VU1)
{
{
mVUScopedXMMBackup mVUSave(mVU, true);
xScopedSavedRegisters save{gprT1q, gprT2q, gprT3q};
#if 0
if (IsDevBuild && !isCOP2) // Lets see which games do this!
{
xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode...
xMOV(gprT1, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV(gprT2, xPC); // So we don't spam console, we'll only check micro-mode...
xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
}
xFastCall((void*)mVUwaitMTVU);
}
#endif
xFastCall((void*)mVU.waitMTVU);
}
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);