mirror of https://github.com/PCSX2/pcsx2.git
x86/microVU: Add VI caching
This commit is contained in:
parent
08faba5455
commit
00d768a6bf
|
@ -34,6 +34,12 @@ extern thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM];
|
||||||
|
|
||||||
namespace x86Emitter
|
namespace x86Emitter
|
||||||
{
|
{
|
||||||
|
// Win32 requires 32 bytes of shadow stack in the caller's frame.
|
||||||
|
#ifdef _WIN32
|
||||||
|
static constexpr int SHADOW_STACK_SIZE = 32;
|
||||||
|
#else
|
||||||
|
static constexpr int SHADOW_STACK_SIZE = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
extern void xWrite8(u8 val);
|
extern void xWrite8(u8 val);
|
||||||
extern void xWrite16(u16 val);
|
extern void xWrite16(u16 val);
|
||||||
|
@ -401,6 +407,8 @@ namespace x86Emitter
|
||||||
pxAssertDev(other.canMapIDTo(4), "Mapping h registers to higher registers can produce unexpected values");
|
pxAssertDev(other.canMapIDTo(4), "Mapping h registers to higher registers can produce unexpected values");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const inline xRegister32& GetInstance(uint id);
|
||||||
|
|
||||||
bool operator==(const xRegister32& src) const { return this->Id == src.Id; }
|
bool operator==(const xRegister32& src) const { return this->Id == src.Id; }
|
||||||
bool operator!=(const xRegister32& src) const { return this->Id != src.Id; }
|
bool operator!=(const xRegister32& src) const { return this->Id != src.Id; }
|
||||||
};
|
};
|
||||||
|
@ -421,6 +429,8 @@ namespace x86Emitter
|
||||||
pxAssertDev(other.canMapIDTo(8), "Mapping h registers to higher registers can produce unexpected values");
|
pxAssertDev(other.canMapIDTo(8), "Mapping h registers to higher registers can produce unexpected values");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const inline xRegister64& GetInstance(uint id);
|
||||||
|
|
||||||
bool operator==(const xRegister64& src) const { return this->Id == src.Id; }
|
bool operator==(const xRegister64& src) const { return this->Id == src.Id; }
|
||||||
bool operator!=(const xRegister64& src) const { return this->Id != src.Id; }
|
bool operator!=(const xRegister64& src) const { return this->Id != src.Id; }
|
||||||
};
|
};
|
||||||
|
@ -664,6 +674,34 @@ extern const xRegister32
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const xRegister32& xRegister32::GetInstance(uint id)
|
||||||
|
{
|
||||||
|
static const xRegister32* const m_tbl_x86Regs[] =
|
||||||
|
{
|
||||||
|
&eax, &ecx, &edx, &ebx,
|
||||||
|
&esp, &ebp, &esi, &edi,
|
||||||
|
&r8d, &r9d, &r10d, &r11d,
|
||||||
|
&r12d, &r13d, &r14d, &r15d,
|
||||||
|
};
|
||||||
|
|
||||||
|
pxAssert(id < iREGCNT_GPR);
|
||||||
|
return *m_tbl_x86Regs[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
const xRegister64& xRegister64::GetInstance(uint id)
|
||||||
|
{
|
||||||
|
static const xRegister64* const m_tbl_x86Regs[] =
|
||||||
|
{
|
||||||
|
&rax, &rcx, &rdx, &rbx,
|
||||||
|
&rsp, &rbp, &rsi, &rdi,
|
||||||
|
&r8, &r9, &r10, &r11,
|
||||||
|
&r12, &r13, &r14, &r15
|
||||||
|
};
|
||||||
|
|
||||||
|
pxAssert(id < iREGCNT_GPR);
|
||||||
|
return *m_tbl_x86Regs[id];
|
||||||
|
}
|
||||||
|
|
||||||
bool xRegisterSSE::IsCallerSaved(uint id)
|
bool xRegisterSSE::IsCallerSaved(uint id)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
|
@ -144,7 +144,7 @@ int _getFreeXMMreg(u32 maxreg)
|
||||||
|
|
||||||
case XMMTYPE_VFREG:
|
case XMMTYPE_VFREG:
|
||||||
{
|
{
|
||||||
if (COP2INST_USEDTEST(xmmregs[i].reg))
|
if (EEINST_VFUSEDTEST(xmmregs[i].reg))
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -875,6 +875,16 @@ int _allocIfUsedGPRtoX86(int gprreg, int mode)
|
||||||
return EEINST_USEDTEST(gprreg) ? _allocX86reg(X86TYPE_GPR, gprreg, mode) : -1;
|
return EEINST_USEDTEST(gprreg) ? _allocX86reg(X86TYPE_GPR, gprreg, mode) : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int _allocIfUsedVItoX86(int vireg, int mode)
|
||||||
|
{
|
||||||
|
const int x86reg = _checkX86reg(X86TYPE_VIREG, vireg, mode);
|
||||||
|
if (x86reg >= 0)
|
||||||
|
return x86reg;
|
||||||
|
|
||||||
|
// Prefer not to stop on COP2 reserved registers here.
|
||||||
|
return EEINST_VIUSEDTEST(vireg) ? _allocX86reg(X86TYPE_VIREG, vireg, mode | MODE_COP2) : -1;
|
||||||
|
}
|
||||||
|
|
||||||
int _allocIfUsedGPRtoXMM(int gprreg, int mode)
|
int _allocIfUsedGPRtoXMM(int gprreg, int mode)
|
||||||
{
|
{
|
||||||
const int mmreg = _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode);
|
const int mmreg = _checkXMMreg(XMMTYPE_GPRREG, gprreg, mode);
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#define MODE_READ 1
|
#define MODE_READ 1
|
||||||
#define MODE_WRITE 2
|
#define MODE_WRITE 2
|
||||||
#define MODE_CALLEESAVED 0x20 // can't flush reg to mem
|
#define MODE_CALLEESAVED 0x20 // can't flush reg to mem
|
||||||
|
#define MODE_COP2 0x40 // don't allow using reserved VU registers
|
||||||
|
|
||||||
#define PROCESS_EE_XMM 0x02
|
#define PROCESS_EE_XMM 0x02
|
||||||
|
|
||||||
|
@ -119,6 +120,9 @@ void _flushConstReg(int reg);
|
||||||
void _validateRegs();
|
void _validateRegs();
|
||||||
void _writebackX86Reg(int x86reg);
|
void _writebackX86Reg(int x86reg);
|
||||||
|
|
||||||
|
void mVUFreeCOP2GPR(int hostreg);
|
||||||
|
bool mVUIsReservedCOP2(int hostreg);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// XMM (128-bit) Register Allocation Tools
|
// XMM (128-bit) Register Allocation Tools
|
||||||
|
|
||||||
|
@ -247,11 +251,17 @@ static __fi bool EEINST_XMMUSEDTEST(u32 reg)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if the specified VF register is used later in the block.
|
/// Returns true if the specified VF register is used later in the block.
|
||||||
static __fi bool COP2INST_USEDTEST(u32 reg)
|
static __fi bool EEINST_VFUSEDTEST(u32 reg)
|
||||||
{
|
{
|
||||||
return (g_pCurInstInfo->vfregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
|
return (g_pCurInstInfo->vfregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the specified VI register is used later in the block.
|
||||||
|
static __fi bool EEINST_VIUSEDTEST(u32 reg)
|
||||||
|
{
|
||||||
|
return (g_pCurInstInfo->viregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if the value should be computed/written back.
|
/// Returns true if the value should be computed/written back.
|
||||||
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
|
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
|
||||||
static __fi bool EEINST_LIVETEST(u32 reg)
|
static __fi bool EEINST_LIVETEST(u32 reg)
|
||||||
|
@ -297,6 +307,7 @@ extern u16 g_xmmAllocCounter;
|
||||||
|
|
||||||
// allocates only if later insts use this register
|
// allocates only if later insts use this register
|
||||||
int _allocIfUsedGPRtoX86(int gprreg, int mode);
|
int _allocIfUsedGPRtoX86(int gprreg, int mode);
|
||||||
|
int _allocIfUsedVItoX86(int vireg, int mode);
|
||||||
int _allocIfUsedGPRtoXMM(int gprreg, int mode);
|
int _allocIfUsedGPRtoXMM(int gprreg, int mode);
|
||||||
int _allocIfUsedFPUtoXMM(int fpureg, int mode);
|
int _allocIfUsedFPUtoXMM(int fpureg, int mode);
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,9 @@ int _getFreeX86reg(int mode)
|
||||||
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(reg))
|
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(reg))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if ((mode & MODE_COP2) && mVUIsReservedCOP2(reg))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (x86regs[reg].inuse == 0)
|
if (x86regs[reg].inuse == 0)
|
||||||
{
|
{
|
||||||
g_x86checknext = (reg + 1) % iREGCNT_GPR;
|
g_x86checknext = (reg + 1) % iREGCNT_GPR;
|
||||||
|
@ -70,6 +73,9 @@ int _getFreeX86reg(int mode)
|
||||||
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(i))
|
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(i))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if ((mode & MODE_COP2) && mVUIsReservedCOP2(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
// should have checked inuse in the previous loop.
|
// should have checked inuse in the previous loop.
|
||||||
pxAssert(x86regs[i].inuse);
|
pxAssert(x86regs[i].inuse);
|
||||||
|
|
||||||
|
@ -373,6 +379,13 @@ int _allocX86reg(int type, int reg, int mode)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case X86TYPE_VIREG:
|
||||||
|
{
|
||||||
|
RALOG("Loading guest VI reg %d to GPR %d", reg, regnum);
|
||||||
|
xMOVZX(xRegister32(regnum), ptr16[&VU0.VI[reg].US[0]]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
abort();
|
abort();
|
||||||
break;
|
break;
|
||||||
|
@ -536,8 +549,7 @@ void _freeX86regWithoutWriteback(int x86reg)
|
||||||
if (x86regs[x86reg].type == X86TYPE_VIREG)
|
if (x86regs[x86reg].type == X86TYPE_VIREG)
|
||||||
{
|
{
|
||||||
RALOG("Freeing VI reg %d in host GPR %d\n", x86regs[x86reg].reg, x86reg);
|
RALOG("Freeing VI reg %d in host GPR %d\n", x86regs[x86reg].reg, x86reg);
|
||||||
//mVUFreeCOP2GPR(x86reg);
|
mVUFreeCOP2GPR(x86reg);
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
else if (x86regs[x86reg].inuse && x86regs[x86reg].type == X86TYPE_GPR)
|
else if (x86regs[x86reg].inuse && x86regs[x86reg].type == X86TYPE_GPR)
|
||||||
{
|
{
|
||||||
|
|
|
@ -89,6 +89,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
||||||
x86SetPtr(mVU.dispCache);
|
x86SetPtr(mVU.dispCache);
|
||||||
mVUdispatcherAB(mVU);
|
mVUdispatcherAB(mVU);
|
||||||
mVUdispatcherCD(mVU);
|
mVUdispatcherCD(mVU);
|
||||||
|
mvuGenerateWaitMTVU(mVU);
|
||||||
mVUemitSearch();
|
mVUemitSearch();
|
||||||
|
|
||||||
mVU.regs().nextBlockCycles = 0;
|
mVU.regs().nextBlockCycles = 0;
|
||||||
|
|
|
@ -251,6 +251,7 @@ struct microVU
|
||||||
u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit)
|
u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit)
|
||||||
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
|
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
|
||||||
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
|
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
|
||||||
|
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
|
||||||
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
|
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
|
||||||
u32 code; // Contains the current Instruction
|
u32 code; // Contains the current Instruction
|
||||||
u32 divFlag; // 1 instance of I/D flags
|
u32 divFlag; // 1 instance of I/D flags
|
||||||
|
|
|
@ -116,32 +116,10 @@ __fi void mVUallocCFLAGb(mV, const x32& reg, int fInstance)
|
||||||
// VI Reg Allocators
|
// VI Reg Allocators
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
__ri void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false)
|
void microRegAlloc::writeVIBackup(const xRegisterInt& reg)
|
||||||
{
|
{
|
||||||
if (!_reg_)
|
microVU& mVU = index ? microVU1 : microVU0;
|
||||||
xXOR(GPRreg, GPRreg);
|
xMOV(ptr32[&mVU.VIbackup], xRegister32(reg));
|
||||||
else if (signext)
|
|
||||||
xMOVSX(GPRreg, ptr16[&mVU.regs().VI[_reg_].SL]);
|
|
||||||
else
|
|
||||||
xMOVZX(GPRreg, ptr16[&mVU.regs().VI[_reg_].UL]);
|
|
||||||
}
|
|
||||||
|
|
||||||
__ri void mVUallocVIb(mV, const x32& GPRreg, int _reg_)
|
|
||||||
{
|
|
||||||
if (mVUlow.backupVI) // Backs up reg to memory (used when VI is modified b4 a branch)
|
|
||||||
{
|
|
||||||
xMOVZX(gprT3, ptr16[&mVU.regs().VI[_reg_].UL]);
|
|
||||||
xMOV (ptr32[&mVU.VIbackup], gprT3);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_reg_ == 0)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else if (_reg_ < 16)
|
|
||||||
{
|
|
||||||
xMOV(ptr16[&mVU.regs().VI[_reg_].UL], xRegister16(GPRreg.Id));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
|
@ -123,6 +123,81 @@ void mVUdispatcherCD(mV)
|
||||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mvuGenerateWaitMTVU(mV)
|
||||||
|
{
|
||||||
|
mVU.waitMTVU = x86Ptr;
|
||||||
|
|
||||||
|
int num_xmms = 0, num_gprs = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
|
||||||
|
{
|
||||||
|
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// no need to save temps
|
||||||
|
if (i == gprT1.GetId() || i == gprT2.GetId())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
xPUSH(xRegister64(i));
|
||||||
|
num_gprs++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
|
||||||
|
{
|
||||||
|
if (!xRegisterSSE::IsCallerSaved(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
num_xmms++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need 16 byte alignment on the stack.
|
||||||
|
// Since the stack is unaligned at entry to this function, we add 8 when it's even, not odd.
|
||||||
|
const int stack_size = (num_xmms * sizeof(u128)) + ((~num_gprs & 1) * sizeof(u64)) + SHADOW_STACK_SIZE;
|
||||||
|
int stack_offset = SHADOW_STACK_SIZE;
|
||||||
|
|
||||||
|
if (stack_size > 0)
|
||||||
|
{
|
||||||
|
xSUB(rsp, stack_size);
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
|
||||||
|
{
|
||||||
|
if (!xRegisterSSE::IsCallerSaved(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
|
||||||
|
stack_offset += sizeof(u128);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
xFastCall((void*)mVUwaitMTVU);
|
||||||
|
|
||||||
|
stack_offset = (num_xmms - 1) * sizeof(u128) + SHADOW_STACK_SIZE;
|
||||||
|
for (int i = static_cast<int>(iREGCNT_XMM - 1); i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (!xRegisterSSE::IsCallerSaved(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
|
||||||
|
stack_offset -= sizeof(u128);
|
||||||
|
}
|
||||||
|
xADD(rsp, stack_size);
|
||||||
|
|
||||||
|
for (int i = static_cast<int>(iREGCNT_GPR - 1); i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (i == gprT1.GetId() || i == gprT2.GetId())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
xPOP(xRegister64(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
xRET();
|
||||||
|
|
||||||
|
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
||||||
|
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Execution Functions
|
// Execution Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
|
@ -313,13 +313,15 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
const xRegister32& temp3 = mVU.regAlloc->allocGPR();
|
||||||
xMOV(gprT1, getFlagReg(bStatus[0]));
|
xMOV(gprT1, getFlagReg(bStatus[0]));
|
||||||
xMOV(gprT2, getFlagReg(bStatus[1]));
|
xMOV(gprT2, getFlagReg(bStatus[1]));
|
||||||
xMOV(gprT3, getFlagReg(bStatus[2]));
|
xMOV(temp3, getFlagReg(bStatus[2]));
|
||||||
xMOV(gprF3, getFlagReg(bStatus[3]));
|
xMOV(gprF3, getFlagReg(bStatus[3]));
|
||||||
xMOV(gprF0, gprT1);
|
xMOV(gprF0, gprT1);
|
||||||
xMOV(gprF1, gprT2);
|
xMOV(gprF1, gprT2);
|
||||||
xMOV(gprF2, gprT3);
|
xMOV(gprF2, temp3);
|
||||||
|
mVU.regAlloc->clearNeeded(temp3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -228,11 +228,25 @@ struct microMapXMM
|
||||||
bool isZero; // Register was loaded from VF00 and doesn't need clamping
|
bool isZero; // Register was loaded from VF00 and doesn't need clamping
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct microMapGPR
|
||||||
|
{
|
||||||
|
int VIreg;
|
||||||
|
int count;
|
||||||
|
bool isNeeded;
|
||||||
|
bool dirty;
|
||||||
|
bool isZeroExtended;
|
||||||
|
bool usable;
|
||||||
|
};
|
||||||
|
|
||||||
class microRegAlloc
|
class microRegAlloc
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
static const int xmmTotal = 15; // PQ register is reserved
|
static const int xmmTotal = iREGCNT_XMM - 1; // PQ register is reserved
|
||||||
|
static const int gprTotal = iREGCNT_GPR;
|
||||||
|
|
||||||
microMapXMM xmmMap[xmmTotal];
|
microMapXMM xmmMap[xmmTotal];
|
||||||
|
microMapGPR gprMap[gprTotal];
|
||||||
|
|
||||||
int counter; // Current allocation count
|
int counter; // Current allocation count
|
||||||
int index; // VU0 or VU1
|
int index; // VU0 or VU1
|
||||||
|
|
||||||
|
@ -251,6 +265,18 @@ protected:
|
||||||
|
|
||||||
__ri void loadIreg(const xmm& reg, int xyzw)
|
__ri void loadIreg(const xmm& reg, int xyzw)
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
if (gprMap[i].VIreg == REG_I)
|
||||||
|
{
|
||||||
|
xMOVDZX(reg, xRegister32(i));
|
||||||
|
if (!_XYZWss(xyzw))
|
||||||
|
xSHUF.PS(reg, reg, 0);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
|
xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
|
||||||
if (!_XYZWss(xyzw))
|
if (!_XYZWss(xyzw))
|
||||||
xSHUF.PS(reg, reg, 0);
|
xSHUF.PS(reg, reg, 0);
|
||||||
|
@ -290,10 +316,59 @@ protected:
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int findFreeGPRRec(int startIdx)
|
||||||
|
{
|
||||||
|
for (int i = startIdx; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
if (gprMap[i].usable && !gprMap[i].isNeeded)
|
||||||
|
{
|
||||||
|
int x = findFreeGPRRec(i + 1);
|
||||||
|
if (x == -1)
|
||||||
|
return i;
|
||||||
|
return ((gprMap[i].count < gprMap[x].count) ? i : x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int findFreeGPR(int vireg)
|
||||||
|
{
|
||||||
|
if (regAllocCOP2)
|
||||||
|
return _allocX86reg(X86TYPE_VIREG, vireg, MODE_COP2);
|
||||||
|
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
if (gprMap[i].usable && !gprMap[i].isNeeded && (gprMap[i].VIreg < 0))
|
||||||
|
{
|
||||||
|
return i; // Reg is not needed and was a temp reg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int x = findFreeGPRRec(0);
|
||||||
|
pxAssertDev(x >= 0, "microVU register allocation failure!");
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeVIBackup(const xRegisterInt& reg);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
microRegAlloc(int _index)
|
microRegAlloc(int _index)
|
||||||
{
|
{
|
||||||
index = _index;
|
index = _index;
|
||||||
|
|
||||||
|
// mark gpr registers as usable
|
||||||
|
std::memset(gprMap, 0, sizeof(gprMap));
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
if (i == gprT1.GetId() || i == gprT2.GetId() ||
|
||||||
|
i == gprF0.GetId() || i == gprF1.GetId() || i == gprF2.GetId() || i == gprF3.GetId() ||
|
||||||
|
i == rsp.GetId())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
gprMap[i].usable = true;
|
||||||
|
}
|
||||||
|
|
||||||
reset(false);
|
reset(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,9 +379,10 @@ public:
|
||||||
regAllocCOP2 = false;
|
regAllocCOP2 = false;
|
||||||
|
|
||||||
for (int i = 0; i < xmmTotal; i++)
|
for (int i = 0; i < xmmTotal; i++)
|
||||||
{
|
|
||||||
clearReg(i);
|
clearReg(i);
|
||||||
}
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
clearGPR(i);
|
||||||
|
|
||||||
counter = 0;
|
counter = 0;
|
||||||
regAllocCOP2 = cop2mode;
|
regAllocCOP2 = cop2mode;
|
||||||
pxmmregs = cop2mode ? xmmregs : nullptr;
|
pxmmregs = cop2mode ? xmmregs : nullptr;
|
||||||
|
@ -331,13 +407,37 @@ public:
|
||||||
xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0;
|
xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
if (!x86regs[i].inuse || x86regs[i].type != X86TYPE_VIREG)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// pxAssertRel(armregs[i].reg >= 0, "Valid full register preserved");
|
||||||
|
if (x86regs[i].reg >= 0)
|
||||||
|
{
|
||||||
|
MVURALOG("Preserving VI reg %d in host reg %d across instruction\n", x86regs[i].reg, i);
|
||||||
|
x86regs[i].needed = false;
|
||||||
|
gprMap[i].isNeeded = false;
|
||||||
|
gprMap[i].isZeroExtended = false;
|
||||||
|
gprMap[i].VIreg = x86regs[i].reg;
|
||||||
|
gprMap[i].dirty = ((x86regs[i].mode & MODE_WRITE) != 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getXmmCount()
|
int getXmmCount()
|
||||||
{
|
{
|
||||||
return xmmTotal + 1;
|
return xmmTotal + 1;
|
||||||
}
|
}
|
||||||
|
int getGPRCount()
|
||||||
|
{
|
||||||
|
return gprTotal;
|
||||||
|
}
|
||||||
|
|
||||||
// Flushes all allocated registers (i.e. writes-back to memory all modified registers).
|
// Flushes all allocated registers (i.e. writes-back to memory all modified registers).
|
||||||
// If clearState is 0, then it keeps cached reg data valid
|
// If clearState is 0, then it keeps cached reg data valid
|
||||||
// If clearState is 1, then it invalidates all cached reg data after write-back
|
// If clearState is 1, then it invalidates all cached reg data after write-back
|
||||||
|
@ -349,6 +449,36 @@ public:
|
||||||
if (clearState)
|
if (clearState)
|
||||||
clearReg(i);
|
clearReg(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
writeBackReg(xRegister32(i), true);
|
||||||
|
if (clearState)
|
||||||
|
clearGPR(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void flushCallerSavedRegisters(bool clearNeeded = false)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < xmmTotal; i++)
|
||||||
|
{
|
||||||
|
if (!xRegisterSSE::IsCallerSaved(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
writeBackReg(xmm(i));
|
||||||
|
if (clearNeeded || !xmmMap[i].isNeeded)
|
||||||
|
clearReg(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
if (!xRegister32::IsCallerSaved(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
writeBackReg(xRegister32(i), true);
|
||||||
|
if (clearNeeded || !gprMap[i].isNeeded)
|
||||||
|
clearGPR(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void flushPartialForCOP2()
|
void flushPartialForCOP2()
|
||||||
|
@ -378,10 +508,19 @@ public:
|
||||||
clear.isNeeded = 0;
|
clear.isNeeded = 0;
|
||||||
clear.isZero = 0;
|
clear.isZero = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
microMapGPR& clear = gprMap[i];
|
||||||
|
if (clear.VIreg < 0)
|
||||||
|
clearGPR(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TDwritebackAll(bool clearState = false)
|
void TDwritebackAll()
|
||||||
{
|
{
|
||||||
|
// NOTE: We don't clear state here, this happens in an optional branch
|
||||||
|
|
||||||
for (int i = 0; i < xmmTotal; i++)
|
for (int i = 0; i < xmmTotal; i++)
|
||||||
{
|
{
|
||||||
microMapXMM& mapX = xmmMap[xmm(i).Id];
|
microMapXMM& mapX = xmmMap[xmm(i).Id];
|
||||||
|
@ -396,6 +535,9 @@ public:
|
||||||
mVUsaveReg(xmm(i), ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1);
|
mVUsaveReg(xmm(i), ptr[&getVF(mapX.VFreg)], mapX.xyzw, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
writeBackReg(xRegister32(i), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool checkVFClamp(int regId)
|
bool checkVFClamp(int regId)
|
||||||
|
@ -414,11 +556,19 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool checkCachedGPR(int regId)
|
||||||
|
{
|
||||||
|
if (regId < gprTotal)
|
||||||
|
return gprMap[regId].VIreg >= 0 || gprMap[regId].isNeeded;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void clearReg(const xmm& reg) { clearReg(reg.Id); }
|
void clearReg(const xmm& reg) { clearReg(reg.Id); }
|
||||||
void clearReg(int regId)
|
void clearReg(int regId)
|
||||||
{
|
{
|
||||||
microMapXMM& clear = xmmMap[regId];
|
microMapXMM& clear = xmmMap[regId];
|
||||||
if (regAllocCOP2)
|
if (regAllocCOP2 && (clear.isNeeded || clear.VFreg >= 0))
|
||||||
{
|
{
|
||||||
pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG);
|
pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG);
|
||||||
pxmmregs[regId].inuse = false;
|
pxmmregs[regId].inuse = false;
|
||||||
|
@ -668,4 +818,262 @@ public:
|
||||||
updateCOP2AllocState(x);
|
updateCOP2AllocState(x);
|
||||||
return xmmX;
|
return xmmX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void clearGPR(const xRegisterInt& reg) { clearGPR(reg.GetId()); }
|
||||||
|
|
||||||
|
void clearGPR(int regId)
|
||||||
|
{
|
||||||
|
microMapGPR& clear = gprMap[regId];
|
||||||
|
|
||||||
|
if (regAllocCOP2)
|
||||||
|
{
|
||||||
|
if (x86regs[regId].inuse && x86regs[regId].type == X86TYPE_VIREG)
|
||||||
|
{
|
||||||
|
pxAssert(x86regs[regId].reg == static_cast<u8>(clear.VIreg));
|
||||||
|
_freeX86regWithoutWriteback(regId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clear.VIreg = -1;
|
||||||
|
clear.count = 0;
|
||||||
|
clear.isNeeded = 0;
|
||||||
|
clear.dirty = false;
|
||||||
|
clear.isZeroExtended = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clearGPRCOP2(int regId)
|
||||||
|
{
|
||||||
|
if (regAllocCOP2)
|
||||||
|
clearGPR(regId);
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateCOP2AllocState(const xRegisterInt& reg)
|
||||||
|
{
|
||||||
|
if (!regAllocCOP2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const u32 rn = reg.GetId();
|
||||||
|
const bool dirty = (gprMap[rn].VIreg >= 0 && gprMap[rn].dirty);
|
||||||
|
pxAssert(x86regs[rn].type == X86TYPE_VIREG);
|
||||||
|
x86regs[rn].reg = gprMap[rn].VIreg;
|
||||||
|
x86regs[rn].counter = gprMap[rn].count;
|
||||||
|
x86regs[rn].mode = dirty ? (MODE_READ | MODE_WRITE) : MODE_READ;
|
||||||
|
x86regs[rn].needed = gprMap[rn].isNeeded;
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeBackReg(const xRegisterInt& reg, bool clearDirty)
|
||||||
|
{
|
||||||
|
microMapGPR& mapX = gprMap[reg.GetId()];
|
||||||
|
pxAssert(mapX.usable || !mapX.dirty);
|
||||||
|
if (mapX.dirty)
|
||||||
|
{
|
||||||
|
pxAssert(mapX.VIreg > 0);
|
||||||
|
if (mapX.VIreg < 16)
|
||||||
|
xMOV(ptr16[&getVI(mapX.VIreg)], xRegister16(reg));
|
||||||
|
if (clearDirty)
|
||||||
|
{
|
||||||
|
mapX.dirty = false;
|
||||||
|
updateCOP2AllocState(reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void clearNeeded(const xRegisterInt& reg)
|
||||||
|
{
|
||||||
|
pxAssert(reg.GetId() < gprTotal);
|
||||||
|
microMapGPR& clear = gprMap[reg.GetId()];
|
||||||
|
clear.isNeeded = false;
|
||||||
|
if (regAllocCOP2)
|
||||||
|
x86regs[reg.GetId()].needed = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void unbindAnyVIAllocations(int reg, bool& backup)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
microMapGPR& mapI = gprMap[i];
|
||||||
|
if (mapI.VIreg == reg)
|
||||||
|
{
|
||||||
|
if (backup)
|
||||||
|
{
|
||||||
|
writeVIBackup(xRegister32(i));
|
||||||
|
backup = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if it's needed, we just unbind the allocation and preserve it, otherwise clear
|
||||||
|
if (mapI.isNeeded)
|
||||||
|
{
|
||||||
|
MVURALOG(" unbind %d to %d for write\n", i, reg);
|
||||||
|
if (regAllocCOP2)
|
||||||
|
{
|
||||||
|
pxAssert(x86regs[i].type == X86TYPE_VIREG && x86regs[i].reg == static_cast<u8>(mapI.VIreg));
|
||||||
|
x86regs[i].reg = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
mapI.VIreg = -1;
|
||||||
|
mapI.dirty = false;
|
||||||
|
mapI.isZeroExtended = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MVURALOG(" clear %d to %d for write\n", i, reg);
|
||||||
|
clearGPR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// shouldn't be any others...
|
||||||
|
for (int j = i + 1; j < gprTotal; j++)
|
||||||
|
{
|
||||||
|
pxAssert(gprMap[j].VIreg != reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const xRegister32& allocGPR(int viLoadReg = -1, int viWriteReg = -1, bool backup = false, bool zext_if_dirty = false)
|
||||||
|
{
|
||||||
|
// TODO: When load != write, we should check whether load is used later, and if so, copy it.
|
||||||
|
|
||||||
|
//DevCon.WriteLn("viLoadReg = %02d, viWriteReg = %02d, backup = %d",viLoadReg,viWriteReg,(int)backup);
|
||||||
|
const int this_counter = regAllocCOP2 ? (g_x86AllocCounter++) : (counter++);
|
||||||
|
if (viLoadReg == 0 || viWriteReg == 0)
|
||||||
|
{
|
||||||
|
// write zero register as temp and discard later
|
||||||
|
if (viWriteReg == 0)
|
||||||
|
{
|
||||||
|
int x = findFreeGPR(-1);
|
||||||
|
const xRegister32& gprX = xRegister32::GetInstance(x);
|
||||||
|
writeBackReg(gprX, true);
|
||||||
|
xXOR(gprX, gprX);
|
||||||
|
gprMap[x].VIreg = -1;
|
||||||
|
gprMap[x].dirty = false;
|
||||||
|
gprMap[x].count = this_counter;
|
||||||
|
gprMap[x].isNeeded = true;
|
||||||
|
gprMap[x].isZeroExtended = true;
|
||||||
|
MVURALOG(" alloc zero to scratch %d\n", x);
|
||||||
|
return gprX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (viLoadReg >= 0) // Search For Cached Regs
|
||||||
|
{
|
||||||
|
for (int i = 0; i < gprTotal; i++)
|
||||||
|
{
|
||||||
|
microMapGPR& mapI = gprMap[i];
|
||||||
|
if (mapI.VIreg == viLoadReg)
|
||||||
|
{
|
||||||
|
if (viWriteReg >= 0) // Reg will be modified
|
||||||
|
{
|
||||||
|
if (viLoadReg != viWriteReg)
|
||||||
|
{
|
||||||
|
// kill any allocations of viWriteReg
|
||||||
|
unbindAnyVIAllocations(viWriteReg, backup);
|
||||||
|
|
||||||
|
// allocate a new register for writing to
|
||||||
|
int x = findFreeGPR(viWriteReg);
|
||||||
|
const xRegister32& gprX = xRegister32::GetInstance(x);
|
||||||
|
writeBackReg(gprX, true);
|
||||||
|
if (zext_if_dirty)
|
||||||
|
xMOVZX(gprX, xRegister16(i));
|
||||||
|
else
|
||||||
|
xMOV(gprX, xRegister32(i));
|
||||||
|
gprMap[x].isZeroExtended = zext_if_dirty;
|
||||||
|
MVURALOG(" clone write %d in %d to %d for %d\n", viLoadReg, i, x, viWriteReg);
|
||||||
|
std::swap(x, i);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// writing to it, no longer zero extended
|
||||||
|
gprMap[i].isZeroExtended = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
gprMap[i].VIreg = viWriteReg;
|
||||||
|
gprMap[i].dirty = true;
|
||||||
|
}
|
||||||
|
else if (zext_if_dirty && !gprMap[i].isZeroExtended)
|
||||||
|
{
|
||||||
|
xMOVZX(xRegister32(i), xRegister16(i));
|
||||||
|
gprMap[i].isZeroExtended = true;
|
||||||
|
}
|
||||||
|
gprMap[i].count = this_counter;
|
||||||
|
gprMap[i].isNeeded = true;
|
||||||
|
|
||||||
|
if (backup)
|
||||||
|
writeVIBackup(xRegister32(i));
|
||||||
|
|
||||||
|
if (regAllocCOP2)
|
||||||
|
{
|
||||||
|
pxAssert(x86regs[i].inuse && x86regs[i].type == X86TYPE_VIREG);
|
||||||
|
x86regs[i].reg = gprMap[i].VIreg;
|
||||||
|
x86regs[i].mode = gprMap[i].dirty ? (MODE_WRITE | MODE_READ) : (MODE_READ);
|
||||||
|
}
|
||||||
|
|
||||||
|
MVURALOG(" returning cached in %d\n", i);
|
||||||
|
return xRegister32::GetInstance(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (viWriteReg >= 0) // Writing a new value, make sure this register isn't cached already
|
||||||
|
unbindAnyVIAllocations(viWriteReg, backup);
|
||||||
|
|
||||||
|
int x = findFreeGPR(viLoadReg);
|
||||||
|
const xRegister32& gprX = xRegister32::GetInstance(x);
|
||||||
|
writeBackReg(gprX, true);
|
||||||
|
|
||||||
|
if (viLoadReg > 0)
|
||||||
|
xMOVZX(gprX, ptr16[&getVI(viLoadReg)]);
|
||||||
|
else if (viLoadReg == 0)
|
||||||
|
xXOR(gprX, gprX);
|
||||||
|
|
||||||
|
gprMap[x].VIreg = viLoadReg;
|
||||||
|
gprMap[x].isZeroExtended = true;
|
||||||
|
if (viWriteReg >= 0)
|
||||||
|
{
|
||||||
|
gprMap[x].VIreg = viWriteReg;
|
||||||
|
gprMap[x].dirty = true;
|
||||||
|
gprMap[x].isZeroExtended = false;
|
||||||
|
|
||||||
|
if (backup)
|
||||||
|
{
|
||||||
|
if (viLoadReg < 0 && viWriteReg > 0)
|
||||||
|
xMOVZX(gprX, ptr16[&getVI(viWriteReg)]);
|
||||||
|
|
||||||
|
writeVIBackup(gprX);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gprMap[x].count = this_counter;
|
||||||
|
gprMap[x].isNeeded = true;
|
||||||
|
|
||||||
|
if (regAllocCOP2)
|
||||||
|
{
|
||||||
|
pxAssert(x86regs[x].inuse && x86regs[x].type == X86TYPE_VIREG);
|
||||||
|
x86regs[x].reg = gprMap[x].VIreg;
|
||||||
|
x86regs[x].mode = gprMap[x].dirty ? (MODE_WRITE | MODE_READ) : (MODE_READ);
|
||||||
|
}
|
||||||
|
|
||||||
|
MVURALOG(" returning new %d\n", x);
|
||||||
|
return gprX;
|
||||||
|
}
|
||||||
|
|
||||||
|
void moveVIToGPR(const xRegisterInt& reg, int vi, bool signext = false)
|
||||||
|
{
|
||||||
|
pxAssert(vi >= 0);
|
||||||
|
if (vi == 0)
|
||||||
|
{
|
||||||
|
xXOR(xRegister32(reg), xRegister32(reg));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Check liveness/usedness before allocating.
|
||||||
|
// TODO: Check whether zero-extend is needed everywhere heae. Loadstores are.
|
||||||
|
const xRegister32& srcreg = allocGPR(vi);
|
||||||
|
if (signext)
|
||||||
|
xMOVSX(xRegister32(reg), xRegister16(srcreg));
|
||||||
|
else
|
||||||
|
xMOVZX(xRegister32(reg), xRegister16(srcreg));
|
||||||
|
clearNeeded(srcreg);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -611,11 +611,12 @@ mVUop(mVU_FCAND)
|
||||||
pass1 { mVUanalyzeCflag(mVU, 1); }
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
|
||||||
xAND(gprT1, _Imm24_);
|
mVUallocCFLAGa(mVU, dst, cFLAG.read);
|
||||||
xADD(gprT1, 0xffffff);
|
xAND(dst, _Imm24_);
|
||||||
xSHR(gprT1, 24);
|
xADD(dst, 0xffffff);
|
||||||
mVUallocVIb(mVU, gprT1, 1);
|
xSHR(dst, 24);
|
||||||
|
mVU.regAlloc->clearNeeded(dst);
|
||||||
mVU.profiler.EmitOp(opFCAND);
|
mVU.profiler.EmitOp(opFCAND);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
|
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
|
||||||
|
@ -627,11 +628,12 @@ mVUop(mVU_FCEQ)
|
||||||
pass1 { mVUanalyzeCflag(mVU, 1); }
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
|
||||||
xXOR(gprT1, _Imm24_);
|
mVUallocCFLAGa(mVU, dst, cFLAG.read);
|
||||||
xSUB(gprT1, 1);
|
xXOR(dst, _Imm24_);
|
||||||
xSHR(gprT1, 31);
|
xSUB(dst, 1);
|
||||||
mVUallocVIb(mVU, gprT1, 1);
|
xSHR(dst, 31);
|
||||||
|
mVU.regAlloc->clearNeeded(dst);
|
||||||
mVU.profiler.EmitOp(opFCEQ);
|
mVU.profiler.EmitOp(opFCEQ);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
|
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
|
||||||
|
@ -643,9 +645,10 @@ mVUop(mVU_FCGET)
|
||||||
pass1 { mVUanalyzeCflag(mVU, _It_); }
|
pass1 { mVUanalyzeCflag(mVU, _It_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
xAND(gprT1, 0xfff);
|
mVUallocCFLAGa(mVU, regT, cFLAG.read);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xAND(regT, 0xfff);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opFCGET);
|
mVU.profiler.EmitOp(opFCGET);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FCGET vi%02d", _Ft_); }
|
pass3 { mVUlog("FCGET vi%02d", _Ft_); }
|
||||||
|
@ -657,11 +660,12 @@ mVUop(mVU_FCOR)
|
||||||
pass1 { mVUanalyzeCflag(mVU, 1); }
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
|
||||||
xOR(gprT1, _Imm24_);
|
mVUallocCFLAGa(mVU, dst, cFLAG.read);
|
||||||
xADD(gprT1, 1); // If 24 1's will make 25th bit 1, else 0
|
xOR(dst, _Imm24_);
|
||||||
xSHR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
|
xADD(dst, 1); // If 24 1's will make 25th bit 1, else 0
|
||||||
mVUallocVIb(mVU, gprT1, 1);
|
xSHR(dst, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
|
||||||
|
mVU.regAlloc->clearNeeded(dst);
|
||||||
mVU.profiler.EmitOp(opFCOR);
|
mVU.profiler.EmitOp(opFCOR);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
|
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
|
||||||
|
@ -690,9 +694,9 @@ mVUop(mVU_FMAND)
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
||||||
xAND(gprT1b, gprT2b);
|
xAND(regT, gprT1);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opFMAND);
|
mVU.profiler.EmitOp(opFMAND);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
|
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
|
||||||
|
@ -705,11 +709,11 @@ mVUop(mVU_FMEQ)
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
||||||
xXOR(gprT1, gprT2);
|
xXOR(regT, gprT1);
|
||||||
xSUB(gprT1, 1);
|
xSUB(regT, 1);
|
||||||
xSHR(gprT1, 31);
|
xSHR(regT, 31);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opFMEQ);
|
mVU.profiler.EmitOp(opFMEQ);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
|
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
|
||||||
|
@ -722,9 +726,9 @@ mVUop(mVU_FMOR)
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
||||||
xOR(gprT1b, gprT2b);
|
xOR(regT, gprT1);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opFMOR);
|
mVU.profiler.EmitOp(opFMOR);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
|
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
|
||||||
|
@ -742,9 +746,10 @@ mVUop(mVU_FSAND)
|
||||||
{
|
{
|
||||||
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags");
|
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags");
|
||||||
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags");
|
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags");
|
||||||
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
|
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
xAND(gprT1, _Imm12_);
|
mVUallocSFLAGc(reg, gprT1, sFLAG.read);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xAND(reg, _Imm12_);
|
||||||
|
mVU.regAlloc->clearNeeded(reg);
|
||||||
mVU.profiler.EmitOp(opFSAND);
|
mVU.profiler.EmitOp(opFSAND);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
|
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
|
||||||
|
@ -756,9 +761,10 @@ mVUop(mVU_FSOR)
|
||||||
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
|
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
xOR(gprT1, _Imm12_);
|
mVUallocSFLAGc(reg, gprT2, sFLAG.read);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xOR(reg, _Imm12_);
|
||||||
|
mVU.regAlloc->clearNeeded(reg);
|
||||||
mVU.profiler.EmitOp(opFSOR);
|
mVU.profiler.EmitOp(opFSOR);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
|
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
|
||||||
|
@ -786,15 +792,16 @@ mVUop(mVU_FSEQ)
|
||||||
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
||||||
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
||||||
|
|
||||||
mVUallocSFLAGa(gprT1, sFLAG.read);
|
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
setBitFSEQ(gprT1, 0x0f00); // Z bit
|
mVUallocSFLAGa(reg, sFLAG.read);
|
||||||
setBitFSEQ(gprT1, 0xf000); // S bit
|
setBitFSEQ(reg, 0x0f00); // Z bit
|
||||||
setBitFSEQ(gprT1, 0x000f); // ZS bit
|
setBitFSEQ(reg, 0xf000); // S bit
|
||||||
setBitFSEQ(gprT1, 0x00f0); // SS bit
|
setBitFSEQ(reg, 0x000f); // ZS bit
|
||||||
xXOR(gprT1, imm);
|
setBitFSEQ(reg, 0x00f0); // SS bit
|
||||||
xSUB(gprT1, 1);
|
xXOR(reg, imm);
|
||||||
xSHR(gprT1, 31);
|
xSUB(reg, 1);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xSHR(reg, 31);
|
||||||
|
mVU.regAlloc->clearNeeded(reg);
|
||||||
mVU.profiler.EmitOp(opFSEQ);
|
mVU.profiler.EmitOp(opFSEQ);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
||||||
|
@ -834,15 +841,11 @@ mVUop(mVU_IADD)
|
||||||
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
||||||
if (_It_ != _Is_)
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
||||||
{
|
xADD(regS, regT);
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
xADD(gprT1b, gprT2b);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
|
||||||
else
|
|
||||||
xADD(gprT1b, gprT1b);
|
|
||||||
mVUallocVIb(mVU, gprT1, _Id_);
|
|
||||||
mVU.profiler.EmitOp(opIADD);
|
mVU.profiler.EmitOp(opIADD);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
||||||
|
@ -853,10 +856,10 @@ mVUop(mVU_IADDI)
|
||||||
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
|
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
||||||
if (_Imm5_ != 0)
|
if (_Imm5_ != 0)
|
||||||
xADD(gprT1b, _Imm5_);
|
xADD(regS, _Imm5_);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVU.profiler.EmitOp(opIADDI);
|
mVU.profiler.EmitOp(opIADDI);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); }
|
pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); }
|
||||||
|
@ -867,10 +870,10 @@ mVUop(mVU_IADDIU)
|
||||||
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
|
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
||||||
if (_Imm15_ != 0)
|
if (_Imm15_ != 0)
|
||||||
xADD(gprT1b, _Imm15_);
|
xADD(regS, _Imm15_);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVU.profiler.EmitOp(opIADDIU);
|
mVU.profiler.EmitOp(opIADDIU);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
||||||
|
@ -881,13 +884,12 @@ mVUop(mVU_IAND)
|
||||||
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
||||||
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
||||||
if (_It_ != _Is_)
|
if (_It_ != _Is_)
|
||||||
{
|
xAND(regS, regT);
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
xAND(gprT1, gprT2);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
|
||||||
mVUallocVIb(mVU, gprT1, _Id_);
|
|
||||||
mVU.profiler.EmitOp(opIAND);
|
mVU.profiler.EmitOp(opIAND);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
||||||
|
@ -898,13 +900,12 @@ mVUop(mVU_IOR)
|
||||||
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
||||||
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
||||||
if (_It_ != _Is_)
|
if (_It_ != _Is_)
|
||||||
{
|
xOR(regS, regT);
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
xOR(gprT1, gprT2);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
|
||||||
mVUallocVIb(mVU, gprT1, _Id_);
|
|
||||||
mVU.profiler.EmitOp(opIOR);
|
mVU.profiler.EmitOp(opIOR);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
||||||
|
@ -917,15 +918,17 @@ mVUop(mVU_ISUB)
|
||||||
{
|
{
|
||||||
if (_It_ != _Is_)
|
if (_It_ != _Is_)
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
||||||
xSUB(gprT1b, gprT2b);
|
xSUB(regS, regT);
|
||||||
mVUallocVIb(mVU, gprT1, _Id_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xXOR(gprT1, gprT1);
|
const xRegister32& regD = mVU.regAlloc->allocGPR(-1, _Id_, mVUlow.backupVI);
|
||||||
mVUallocVIb(mVU, gprT1, _Id_);
|
xXOR(regD, regD);
|
||||||
|
mVU.regAlloc->clearNeeded(regD);
|
||||||
}
|
}
|
||||||
mVU.profiler.EmitOp(opISUB);
|
mVU.profiler.EmitOp(opISUB);
|
||||||
}
|
}
|
||||||
|
@ -937,10 +940,10 @@ mVUop(mVU_ISUBIU)
|
||||||
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
|
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
||||||
if (_Imm15_ != 0)
|
if (_Imm15_ != 0)
|
||||||
xSUB(gprT1b, _Imm15_);
|
xSUB(regS, _Imm15_);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVU.profiler.EmitOp(opISUBIU);
|
mVU.profiler.EmitOp(opISUBIU);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ISUBIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
pass3 { mVUlog("ISUBIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
||||||
|
@ -964,10 +967,20 @@ mVUop(mVU_MFIR)
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
mVUallocVIa(mVU, gprT1, _Is_, true);
|
if (_Is_ != 0)
|
||||||
xMOVDZX(Ft, gprT1);
|
{
|
||||||
if (!_XYZW_SS)
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
|
||||||
mVUunpack_xyzw(Ft, Ft, 0);
|
xMOVSX(xRegister32(regS), xRegister16(regS));
|
||||||
|
// TODO: Broadcast instead
|
||||||
|
xMOVDZX(Ft, regS);
|
||||||
|
if (!_XYZW_SS)
|
||||||
|
mVUunpack_xyzw(Ft, Ft, 0);
|
||||||
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
xPXOR(Ft, Ft);
|
||||||
|
}
|
||||||
mVU.regAlloc->clearNeeded(Ft);
|
mVU.regAlloc->clearNeeded(Ft);
|
||||||
mVU.profiler.EmitOp(opMFIR);
|
mVU.profiler.EmitOp(opMFIR);
|
||||||
}
|
}
|
||||||
|
@ -1038,8 +1051,9 @@ mVUop(mVU_MTIR)
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xMOVD(gprT1, Fs);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xMOVD(regT, Fs);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.regAlloc->clearNeeded(Fs);
|
mVU.regAlloc->clearNeeded(Fs);
|
||||||
mVU.profiler.EmitOp(opMTIR);
|
mVU.profiler.EmitOp(opMTIR);
|
||||||
}
|
}
|
||||||
|
@ -1064,14 +1078,14 @@ mVUop(mVU_ILW)
|
||||||
{
|
{
|
||||||
void* ptr = mVU.regs().Mem + offsetSS;
|
void* ptr = mVU.regs().Mem + offsetSS;
|
||||||
|
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (!_Is_)
|
|
||||||
xXOR(gprT2, gprT2);
|
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT2, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
|
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
|
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opILW);
|
mVU.profiler.EmitOp(opILW);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
||||||
|
@ -1092,15 +1106,19 @@ mVUop(mVU_ILWR)
|
||||||
void* ptr = mVU.regs().Mem + offsetSS;
|
void* ptr = mVU.regs().Mem + offsetSS;
|
||||||
if (_Is_)
|
if (_Is_)
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
mVUaddrFix (mVU, gprT2q);
|
mVUaddrFix (mVU, gprT1q);
|
||||||
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
|
|
||||||
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
|
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xMOVZX(gprT1, ptr16[ptr]);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
|
xMOVZX(regT, ptr16[ptr]);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
|
||||||
mVU.profiler.EmitOp(opILWR);
|
mVU.profiler.EmitOp(opILWR);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ILWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
pass3 { mVUlog("ILWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
||||||
|
@ -1110,7 +1128,7 @@ mVUop(mVU_ILWR)
|
||||||
// ISW/ISWR
|
// ISW/ISWR
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
|
static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg, const xRegister32& val)
|
||||||
{
|
{
|
||||||
if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr)
|
if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr)
|
||||||
{
|
{
|
||||||
|
@ -1118,10 +1136,10 @@ static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
|
||||||
auto writeBackAt = [&](int offset) {
|
auto writeBackAt = [&](int offset) {
|
||||||
if (register_offset == -1)
|
if (register_offset == -1)
|
||||||
{
|
{
|
||||||
xLEA(gprT3q, ptr[(void*)((sptr)base_ptr + offset)]);
|
xLEA(gprT2q, ptr[(void*)((sptr)base_ptr + offset)]);
|
||||||
register_offset = offset;
|
register_offset = offset;
|
||||||
}
|
}
|
||||||
xMOV(ptr32[gprT3q + reg + (offset - register_offset)], gprT1);
|
xMOV(ptr32[gprT2q + reg + (offset - register_offset)], val);
|
||||||
};
|
};
|
||||||
if (_X) writeBackAt(0);
|
if (_X) writeBackAt(0);
|
||||||
if (_Y) writeBackAt(4);
|
if (_Y) writeBackAt(4);
|
||||||
|
@ -1130,17 +1148,17 @@ static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg)
|
||||||
}
|
}
|
||||||
else if (reg.IsEmpty())
|
else if (reg.IsEmpty())
|
||||||
{
|
{
|
||||||
if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], gprT1);
|
if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], val);
|
||||||
if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], gprT1);
|
if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], val);
|
||||||
if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], gprT1);
|
if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], val);
|
||||||
if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], gprT1);
|
if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], val);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (_X) xMOV(ptr32[base_ptr+reg ], gprT1);
|
if (_X) xMOV(ptr32[base_ptr+reg ], val);
|
||||||
if (_Y) xMOV(ptr32[base_ptr+reg + 4], gprT1);
|
if (_Y) xMOV(ptr32[base_ptr+reg + 4], val);
|
||||||
if (_Z) xMOV(ptr32[base_ptr+reg + 8], gprT1);
|
if (_Z) xMOV(ptr32[base_ptr+reg + 8], val);
|
||||||
if (_W) xMOV(ptr32[base_ptr+reg + 12], gprT1);
|
if (_W) xMOV(ptr32[base_ptr+reg + 12], val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1156,15 +1174,15 @@ mVUop(mVU_ISW)
|
||||||
{
|
{
|
||||||
void* ptr = mVU.regs().Mem;
|
void* ptr = mVU.regs().Mem;
|
||||||
|
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (!_Is_)
|
|
||||||
xXOR(gprT2, gprT2);
|
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT2, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
|
|
||||||
mVUallocVIa(mVU, gprT1, _It_);
|
// If regT is dirty, the high bits might not be zero.
|
||||||
writeBackISW(mVU, ptr, gprT2q);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
||||||
|
writeBackISW(mVU, ptr, gprT1q, regT);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opISW);
|
mVU.profiler.EmitOp(opISW);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
||||||
|
@ -1184,12 +1202,13 @@ mVUop(mVU_ISWR)
|
||||||
xAddressReg is = xEmptyReg;
|
xAddressReg is = xEmptyReg;
|
||||||
if (_Is_)
|
if (_Is_)
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
is = gprT2q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
mVUallocVIa(mVU, gprT1, _It_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
||||||
writeBackISW(mVU, ptr, is);
|
writeBackISW(mVU, ptr, is, regT);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
|
|
||||||
mVU.profiler.EmitOp(opISWR);
|
mVU.profiler.EmitOp(opISWR);
|
||||||
}
|
}
|
||||||
|
@ -1206,15 +1225,13 @@ mVUop(mVU_LQ)
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
void* ptr = mVU.regs().Mem;
|
void* ptr = mVU.regs().Mem;
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (!_Is_)
|
|
||||||
xXOR(gprT2, gprT2);
|
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT2, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
|
|
||||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W);
|
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W);
|
||||||
mVU.regAlloc->clearNeeded(Ft);
|
mVU.regAlloc->clearNeeded(Ft);
|
||||||
mVU.profiler.EmitOp(opLQ);
|
mVU.profiler.EmitOp(opLQ);
|
||||||
}
|
}
|
||||||
|
@ -1230,12 +1247,12 @@ mVUop(mVU_LQD)
|
||||||
xAddressReg is = xEmptyReg;
|
xAddressReg is = xEmptyReg;
|
||||||
if (_Is_ || isVU0) // Access VU1 regs mem-map in !_Is_ case
|
if (_Is_ || isVU0) // Access VU1 regs mem-map in !_Is_ case
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT2, _Is_);
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
|
||||||
xSUB(gprT2b, 1);
|
xDEC(regS);
|
||||||
if (_Is_)
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||||
mVUallocVIb(mVU, gprT2, _Is_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
is = gprT2q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1250,7 +1267,7 @@ mVUop(mVU_LQD)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
|
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
|
||||||
}
|
}
|
||||||
mVU.regAlloc->clearNeeded(Ft);
|
mVU.regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
|
@ -1268,12 +1285,12 @@ mVUop(mVU_LQI)
|
||||||
xAddressReg is = xEmptyReg;
|
xAddressReg is = xEmptyReg;
|
||||||
if (_Is_)
|
if (_Is_)
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
|
||||||
xMOV(gprT2, gprT1);
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||||
xADD(gprT1b, 1);
|
xINC(regS);
|
||||||
mVUallocVIb(mVU, gprT1, _Is_);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVUaddrFix (mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
is = gprT2q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
if (!mVUlow.noWriteVF)
|
if (!mVUlow.noWriteVF)
|
||||||
{
|
{
|
||||||
|
@ -1281,7 +1298,7 @@ mVUop(mVU_LQI)
|
||||||
if (is.IsEmpty())
|
if (is.IsEmpty())
|
||||||
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
|
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
|
||||||
else
|
else
|
||||||
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
|
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
|
||||||
mVU.regAlloc->clearNeeded(Ft);
|
mVU.regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
mVU.profiler.EmitOp(opLQI);
|
mVU.profiler.EmitOp(opLQI);
|
||||||
|
@ -1300,15 +1317,13 @@ mVUop(mVU_SQ)
|
||||||
{
|
{
|
||||||
void* ptr = mVU.regs().Mem;
|
void* ptr = mVU.regs().Mem;
|
||||||
|
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
|
||||||
if (!_It_)
|
|
||||||
xXOR(gprT2, gprT2);
|
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT2, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
|
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1);
|
||||||
mVU.regAlloc->clearNeeded(Fs);
|
mVU.regAlloc->clearNeeded(Fs);
|
||||||
mVU.profiler.EmitOp(opSQ);
|
mVU.profiler.EmitOp(opSQ);
|
||||||
}
|
}
|
||||||
|
@ -1324,12 +1339,12 @@ mVUop(mVU_SQD)
|
||||||
xAddressReg it = xEmptyReg;
|
xAddressReg it = xEmptyReg;
|
||||||
if (_It_ || isVU0) // Access VU1 regs mem-map in !_It_ case
|
if (_It_ || isVU0) // Access VU1 regs mem-map in !_It_ case
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
|
||||||
xSUB(gprT2b, 1);
|
xDEC(regT);
|
||||||
if (_It_)
|
xMOVSX(gprT1, xRegister16(regT)); // TODO: Confirm
|
||||||
mVUallocVIb(mVU, gprT2, _It_);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
it = gprT2q;
|
it = gprT1q;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1339,7 +1354,7 @@ mVUop(mVU_SQD)
|
||||||
if (it.IsEmpty())
|
if (it.IsEmpty())
|
||||||
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
||||||
else
|
else
|
||||||
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, it), _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, it), _X_Y_Z_W, 1);
|
||||||
mVU.regAlloc->clearNeeded(Fs);
|
mVU.regAlloc->clearNeeded(Fs);
|
||||||
mVU.profiler.EmitOp(opSQD);
|
mVU.profiler.EmitOp(opSQD);
|
||||||
}
|
}
|
||||||
|
@ -1354,15 +1369,15 @@ mVUop(mVU_SQI)
|
||||||
void* ptr = mVU.regs().Mem;
|
void* ptr = mVU.regs().Mem;
|
||||||
if (_It_)
|
if (_It_)
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _It_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
|
||||||
xMOV(gprT2, gprT1);
|
xMOVSX(gprT1, xRegister16(regT)); // TODO: Confirm
|
||||||
xADD(gprT1b, 1);
|
xINC(regT);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVUaddrFix(mVU, gprT2q);
|
mVUaddrFix(mVU, gprT1q);
|
||||||
}
|
}
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
if (_It_)
|
if (_It_)
|
||||||
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1);
|
||||||
else
|
else
|
||||||
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
||||||
mVU.regAlloc->clearNeeded(Fs);
|
mVU.regAlloc->clearNeeded(Fs);
|
||||||
|
@ -1426,22 +1441,24 @@ mVUop(mVU_RNEXT)
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
// algorithm from www.project-fao.org
|
// algorithm from www.project-fao.org
|
||||||
xMOV(gprT3, ptr32[Rmem]);
|
const xRegister32& temp3 = mVU.regAlloc->allocGPR();
|
||||||
xMOV(gprT1, gprT3);
|
xMOV(temp3, ptr32[Rmem]);
|
||||||
|
xMOV(gprT1, temp3);
|
||||||
xSHR(gprT1, 4);
|
xSHR(gprT1, 4);
|
||||||
xAND(gprT1, 1);
|
xAND(gprT1, 1);
|
||||||
|
|
||||||
xMOV(gprT2, gprT3);
|
xMOV(gprT2, temp3);
|
||||||
xSHR(gprT2, 22);
|
xSHR(gprT2, 22);
|
||||||
xAND(gprT2, 1);
|
xAND(gprT2, 1);
|
||||||
|
|
||||||
xSHL(gprT3, 1);
|
xSHL(temp3, 1);
|
||||||
xXOR(gprT1, gprT2);
|
xXOR(gprT1, gprT2);
|
||||||
xXOR(gprT3, gprT1);
|
xXOR(temp3, gprT1);
|
||||||
xAND(gprT3, 0x007fffff);
|
xAND(temp3, 0x007fffff);
|
||||||
xOR (gprT3, 0x3f800000);
|
xOR (temp3, 0x3f800000);
|
||||||
xMOV(ptr32[Rmem], gprT3);
|
xMOV(ptr32[Rmem], temp3);
|
||||||
mVU_RGET_(mVU, gprT3);
|
mVU_RGET_(mVU, temp3);
|
||||||
|
mVU.regAlloc->clearNeeded(temp3);
|
||||||
mVU.profiler.EmitOp(opRNEXT);
|
mVU.profiler.EmitOp(opRNEXT);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
|
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
|
||||||
|
@ -1512,8 +1529,9 @@ mVUop(mVU_XTOP)
|
||||||
}
|
}
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().top]);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xMOVZX(regT, ptr16[&mVU.getVifRegs().top]);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opXTOP);
|
mVU.profiler.EmitOp(opXTOP);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("XTOP vi%02d", _Ft_); }
|
pass3 { mVUlog("XTOP vi%02d", _Ft_); }
|
||||||
|
@ -1530,9 +1548,10 @@ mVUop(mVU_XITOP)
|
||||||
}
|
}
|
||||||
pass2
|
pass2
|
||||||
{
|
{
|
||||||
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().itop]);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
xAND(gprT1, isVU1 ? 0x3ff : 0xff);
|
xMOVZX(regT, ptr16[&mVU.getVifRegs().itop]);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xAND(regT, isVU1 ? 0x3ff : 0xff);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVU.profiler.EmitOp(opXITOP);
|
mVU.profiler.EmitOp(opXITOP);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("XITOP vi%02d", _Ft_); }
|
pass3 { mVUlog("XITOP vi%02d", _Ft_); }
|
||||||
|
@ -1634,6 +1653,8 @@ void _vuXGKICKTransfermVU(bool flush)
|
||||||
|
|
||||||
static __fi void mVU_XGKICK_SYNC(mV, bool flush)
|
static __fi void mVU_XGKICK_SYNC(mV, bool flush)
|
||||||
{
|
{
|
||||||
|
mVU.regAlloc->flushCallerSavedRegisters();
|
||||||
|
|
||||||
// Add the single cycle remainder after this instruction, some games do the store
|
// Add the single cycle remainder after this instruction, some games do the store
|
||||||
// on the second instruction after the kick and that needs to go through first
|
// on the second instruction after the kick and that needs to go through first
|
||||||
// but that's VERY close..
|
// but that's VERY close..
|
||||||
|
@ -1652,14 +1673,16 @@ static __fi void mVU_XGKICK_SYNC(mV, bool flush)
|
||||||
|
|
||||||
static __fi void mVU_XGKICK_DELAY(mV)
|
static __fi void mVU_XGKICK_DELAY(mV)
|
||||||
{
|
{
|
||||||
mVUbackupRegs(mVU);
|
mVU.regAlloc->flushCallerSavedRegisters();
|
||||||
|
|
||||||
|
mVUbackupRegs(mVU, true, true);
|
||||||
#if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested
|
#if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested
|
||||||
xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution
|
xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution
|
||||||
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
|
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
|
||||||
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
|
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
|
||||||
#endif
|
#endif
|
||||||
xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
|
xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
|
||||||
mVUrestoreRegs(mVU);
|
mVUrestoreRegs(mVU, true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_XGKICK)
|
mVUop(mVU_XGKICK)
|
||||||
|
@ -1687,10 +1710,10 @@ mVUop(mVU_XGKICK)
|
||||||
mVUinfo.doXGKICK = false;
|
mVUinfo.doXGKICK = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
|
||||||
if (!CHECK_XGKICKHACK)
|
if (!CHECK_XGKICKHACK)
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
xMOV(ptr32[&mVU.VIxgkick], regS);
|
||||||
xMOV(ptr32[&mVU.VIxgkick], gprT1);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1702,11 +1725,12 @@ mVUop(mVU_XGKICK)
|
||||||
xSUB(gprT2, ptr32[&mVU.cycles]);
|
xSUB(gprT2, ptr32[&mVU.cycles]);
|
||||||
xADD(gprT2, ptr32[&VU1.cycle]);
|
xADD(gprT2, ptr32[&VU1.cycle]);
|
||||||
xMOV(ptr32[&VU1.xgkicklastcycle], gprT2);
|
xMOV(ptr32[&VU1.xgkicklastcycle], gprT2);
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
xMOV(gprT1, regS);
|
||||||
xAND(gprT1, 0x3FF);
|
xAND(gprT1, 0x3FF);
|
||||||
xSHL(gprT1, 4);
|
xSHL(gprT1, 4);
|
||||||
xMOV(ptr32[&VU1.xgkickaddr], gprT1);
|
xMOV(ptr32[&VU1.xgkickaddr], gprT1);
|
||||||
}
|
}
|
||||||
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVU.profiler.EmitOp(opXGKICK);
|
mVU.profiler.EmitOp(opXGKICK);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("XGKICK vi%02d", _Fs_); }
|
pass3 { mVUlog("XGKICK vi%02d", _Fs_); }
|
||||||
|
@ -1803,22 +1827,25 @@ mVUop(mVU_BAL)
|
||||||
{
|
{
|
||||||
if (!mVUlow.evilBranch)
|
if (!mVUlow.evilBranch)
|
||||||
{
|
{
|
||||||
xMOV(gprT1, bSaveAddr);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xMOV(regT, bSaveAddr);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
incPC(-2);
|
incPC(-2);
|
||||||
DevCon.Warning("Linking BAL from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
|
DevCon.Warning("Linking BAL from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
|
||||||
incPC(2);
|
incPC(2);
|
||||||
if (isEvilBlock)
|
|
||||||
xMOV(gprT1, ptr32[&mVU.evilBranch]);
|
|
||||||
else
|
|
||||||
xMOV(gprT1, ptr32[&mVU.badBranch]);
|
|
||||||
|
|
||||||
xADD(gprT1, 8);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
xSHR(gprT1, 3);
|
if (isEvilBlock)
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xMOV(regT, ptr32[&mVU.evilBranch]);
|
||||||
|
else
|
||||||
|
xMOV(regT, ptr32[&mVU.badBranch]);
|
||||||
|
|
||||||
|
xADD(regT, 8);
|
||||||
|
xSHR(regT, 3);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddr(mVU)); }
|
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddr(mVU)); }
|
||||||
|
@ -1837,14 +1864,15 @@ mVUop(mVU_IBEQ)
|
||||||
if (mVUlow.memReadIs)
|
if (mVUlow.memReadIs)
|
||||||
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
|
|
||||||
if (mVUlow.memReadIt)
|
if (mVUlow.memReadIt)
|
||||||
xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
|
||||||
xXOR(gprT1, gprT2);
|
xXOR(gprT1, regT);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(isBadOrEvil))
|
if (!(isBadOrEvil))
|
||||||
|
@ -1865,7 +1893,7 @@ mVUop(mVU_IBGEZ)
|
||||||
if (mVUlow.memReadIs)
|
if (mVUlow.memReadIs)
|
||||||
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (!(isBadOrEvil))
|
if (!(isBadOrEvil))
|
||||||
xMOV(ptr32[&mVU.branch], gprT1);
|
xMOV(ptr32[&mVU.branch], gprT1);
|
||||||
else
|
else
|
||||||
|
@ -1884,7 +1912,7 @@ mVUop(mVU_IBGTZ)
|
||||||
if (mVUlow.memReadIs)
|
if (mVUlow.memReadIs)
|
||||||
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (!(isBadOrEvil))
|
if (!(isBadOrEvil))
|
||||||
xMOV(ptr32[&mVU.branch], gprT1);
|
xMOV(ptr32[&mVU.branch], gprT1);
|
||||||
else
|
else
|
||||||
|
@ -1903,7 +1931,7 @@ mVUop(mVU_IBLEZ)
|
||||||
if (mVUlow.memReadIs)
|
if (mVUlow.memReadIs)
|
||||||
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (!(isBadOrEvil))
|
if (!(isBadOrEvil))
|
||||||
xMOV(ptr32[&mVU.branch], gprT1);
|
xMOV(ptr32[&mVU.branch], gprT1);
|
||||||
else
|
else
|
||||||
|
@ -1922,7 +1950,7 @@ mVUop(mVU_IBLTZ)
|
||||||
if (mVUlow.memReadIs)
|
if (mVUlow.memReadIs)
|
||||||
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (!(isBadOrEvil))
|
if (!(isBadOrEvil))
|
||||||
xMOV(ptr32[&mVU.branch], gprT1);
|
xMOV(ptr32[&mVU.branch], gprT1);
|
||||||
else
|
else
|
||||||
|
@ -1941,14 +1969,15 @@ mVUop(mVU_IBNE)
|
||||||
if (mVUlow.memReadIs)
|
if (mVUlow.memReadIs)
|
||||||
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
|
|
||||||
if (mVUlow.memReadIt)
|
if (mVUlow.memReadIt)
|
||||||
xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT2, _It_);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
|
||||||
xXOR(gprT1, gprT2);
|
xXOR(gprT1, regT);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(isBadOrEvil))
|
if (!(isBadOrEvil))
|
||||||
|
@ -1964,7 +1993,7 @@ void normJumpPass2(mV)
|
||||||
{
|
{
|
||||||
if (!mVUlow.constJump.isValid || mVUlow.evilBranch)
|
if (!mVUlow.constJump.isValid || mVUlow.evilBranch)
|
||||||
{
|
{
|
||||||
mVUallocVIa(mVU, gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
xSHL(gprT1, 3);
|
xSHL(gprT1, 3);
|
||||||
xAND(gprT1, mVU.microMemSize - 8);
|
xAND(gprT1, mVU.microMemSize - 8);
|
||||||
|
|
||||||
|
@ -2008,17 +2037,18 @@ mVUop(mVU_JALR)
|
||||||
normJumpPass2(mVU);
|
normJumpPass2(mVU);
|
||||||
if (!mVUlow.evilBranch)
|
if (!mVUlow.evilBranch)
|
||||||
{
|
{
|
||||||
xMOV(gprT1, bSaveAddr);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
xMOV(regT, bSaveAddr);
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
if (mVUlow.evilBranch)
|
if (mVUlow.evilBranch)
|
||||||
{
|
{
|
||||||
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
if (isEvilBlock)
|
if (isEvilBlock)
|
||||||
{
|
{
|
||||||
xMOV(gprT1, ptr32[&mVU.evilBranch]);
|
xMOV(regT, ptr32[&mVU.evilBranch]);
|
||||||
xADD(gprT1, 8);
|
xADD(regT, 8);
|
||||||
xSHR(gprT1, 3);
|
xSHR(regT, 3);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2026,11 +2056,11 @@ mVUop(mVU_JALR)
|
||||||
DevCon.Warning("Linking JALR from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
|
DevCon.Warning("Linking JALR from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
|
||||||
incPC(2);
|
incPC(2);
|
||||||
|
|
||||||
xMOV(gprT1, ptr32[&mVU.badBranch]);
|
xMOV(regT, ptr32[&mVU.badBranch]);
|
||||||
xADD(gprT1, 8);
|
xADD(regT, 8);
|
||||||
xSHR(gprT1, 3);
|
xSHR(regT, 3);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
|
||||||
}
|
}
|
||||||
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
}
|
}
|
||||||
|
|
||||||
mVU.profiler.EmitOp(opJALR);
|
mVU.profiler.EmitOp(opJALR);
|
||||||
|
|
|
@ -37,13 +37,6 @@ void setupMacroOp(int mode, const char* opName)
|
||||||
// Set up reg allocation
|
// Set up reg allocation
|
||||||
microVU0.regAlloc->reset(true);
|
microVU0.regAlloc->reset(true);
|
||||||
|
|
||||||
if (mode & 0x110) // X86 regs are modified, or flags modified
|
|
||||||
{
|
|
||||||
_freeX86reg(eax);
|
|
||||||
_freeX86reg(ecx);
|
|
||||||
_freeX86reg(edx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mode & 0x03) // Q will be read/written
|
if (mode & 0x03) // Q will be read/written
|
||||||
_freeXMMreg(xmmPQ.Id);
|
_freeXMMreg(xmmPQ.Id);
|
||||||
|
|
||||||
|
@ -127,6 +120,17 @@ void mVUFreeCOP2XMMreg(int hostreg)
|
||||||
microVU0.regAlloc->clearRegCOP2(hostreg);
|
microVU0.regAlloc->clearRegCOP2(hostreg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mVUFreeCOP2GPR(int hostreg)
|
||||||
|
{
|
||||||
|
microVU0.regAlloc->clearGPRCOP2(hostreg);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mVUIsReservedCOP2(int hostreg)
|
||||||
|
{
|
||||||
|
// gprF1 through 3 is not correctly used in COP2 mode.
|
||||||
|
return (hostreg == gprT1.GetId() || hostreg == gprT2.GetId() || hostreg == gprF0.GetId());
|
||||||
|
}
|
||||||
|
|
||||||
#define REC_COP2_mVU0(f, opName, mode) \
|
#define REC_COP2_mVU0(f, opName, mode) \
|
||||||
void recV##f() \
|
void recV##f() \
|
||||||
{ \
|
{ \
|
||||||
|
@ -429,11 +433,22 @@ static void recCFC2()
|
||||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||||
pxAssert(!GPR_IS_CONST1(_Rt_));
|
pxAssert(!GPR_IS_CONST1(_Rt_));
|
||||||
|
|
||||||
// FixMe: Should R-Reg have upper 9 bits 0?
|
if (_Rd_ == 0) // why would you read vi00?
|
||||||
if (_Rd_ >= REG_STATUS_FLAG)
|
{
|
||||||
|
xXOR(xRegister32(regt), xRegister32(regt));
|
||||||
|
}
|
||||||
|
else if (_Rd_ >= REG_STATUS_FLAG) // FixMe: Should R-Reg have upper 9 bits 0?
|
||||||
|
{
|
||||||
xMOVSX(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
|
xMOVSX(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
xMOV(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
|
{
|
||||||
|
const int vireg = _allocIfUsedVItoX86(_Rd_, MODE_READ);
|
||||||
|
if (vireg >= 0)
|
||||||
|
xMOVZX(xRegister32(regt), xRegister16(vireg));
|
||||||
|
else
|
||||||
|
xMOVZX(xRegister32(regt), ptr16[&vu0Regs.VI[_Rd_].UL]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void recCTC2()
|
static void recCTC2()
|
||||||
|
@ -532,9 +547,62 @@ static void recCTC2()
|
||||||
_freeXMMregWithoutWriteback(xmmreg);
|
_freeXMMregWithoutWriteback(xmmreg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Need to expand this out, because we want to write as 16 bits.
|
// Little bit nasty, but optimal codegen.
|
||||||
_eeMoveGPRtoR(eax, _Rt_);
|
const int gprreg = _allocIfUsedGPRtoX86(_Rt_, MODE_READ);
|
||||||
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
|
const int vireg = _allocIfUsedVItoX86(_Rd_, MODE_WRITE);
|
||||||
|
if (vireg >= 0)
|
||||||
|
{
|
||||||
|
if (gprreg >= 0)
|
||||||
|
{
|
||||||
|
xMOVZX(xRegister32(vireg), xRegister16(gprreg));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// it could be in an xmm..
|
||||||
|
const int gprxmmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
|
||||||
|
if (gprxmmreg >= 0)
|
||||||
|
{
|
||||||
|
xMOVD(xRegister32(vireg), xRegisterSSE(gprxmmreg));
|
||||||
|
xMOVZX(xRegister32(vireg), xRegister16(vireg));
|
||||||
|
}
|
||||||
|
else if (GPR_IS_CONST1(_Rt_))
|
||||||
|
{
|
||||||
|
if (_Rt_ != 0)
|
||||||
|
xMOV(xRegister32(vireg), (g_cpuConstRegs[_Rt_].UL[0] & 0xFFFFu));
|
||||||
|
else
|
||||||
|
xXOR(xRegister32(vireg), xRegister32(vireg));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
xMOVZX(xRegister32(vireg), ptr16[&cpuRegs.GPR.r[_Rt_].US[0]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (gprreg >= 0)
|
||||||
|
{
|
||||||
|
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], xRegister16(gprreg));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int gprxmmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
|
||||||
|
if (gprxmmreg >= 0)
|
||||||
|
{
|
||||||
|
xMOVD(eax, xRegisterSSE(gprxmmreg));
|
||||||
|
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
|
||||||
|
}
|
||||||
|
else if (GPR_IS_CONST1(_Rt_))
|
||||||
|
{
|
||||||
|
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], (g_cpuConstRegs[_Rt_].UL[0] & 0xFFFFu));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_eeMoveGPRtoR(eax, _Rt_);
|
||||||
|
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -562,7 +630,7 @@ static void recQMFC2()
|
||||||
mVUFinishVU0();
|
mVUFinishVU0();
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool vf_used = COP2INST_USEDTEST(_Rd_);
|
const bool vf_used = EEINST_VFUSEDTEST(_Rd_);
|
||||||
const int ftreg = _allocVFtoXMMreg(_Rd_, MODE_READ);
|
const int ftreg = _allocVFtoXMMreg(_Rd_, MODE_READ);
|
||||||
_deleteEEreg128(_Rt_);
|
_deleteEEreg128(_Rt_);
|
||||||
|
|
||||||
|
@ -607,7 +675,7 @@ static void recQMTC2()
|
||||||
if (_Rt_)
|
if (_Rt_)
|
||||||
{
|
{
|
||||||
// if we have to flush to memory anyway (has a constant or is x86), force load.
|
// if we have to flush to memory anyway (has a constant or is x86), force load.
|
||||||
[[maybe_unused]] const bool vf_used = COP2INST_USEDTEST(_Rd_);
|
[[maybe_unused]] const bool vf_used = EEINST_VFUSEDTEST(_Rd_);
|
||||||
const bool can_rename = EEINST_RENAMETEST(_Rt_);
|
const bool can_rename = EEINST_RENAMETEST(_Rt_);
|
||||||
const int rtreg = (GPR_IS_DIRTY_CONST(_Rt_) || _hasX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE)) ?
|
const int rtreg = (GPR_IS_DIRTY_CONST(_Rt_) || _hasX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE)) ?
|
||||||
_allocGPRtoXMMreg(_Rt_, MODE_READ) :
|
_allocGPRtoXMMreg(_Rt_, MODE_READ) :
|
||||||
|
|
|
@ -154,13 +154,10 @@ static const char branchSTR[16][8] = {
|
||||||
|
|
||||||
#define gprT1 eax // eax - Temp Reg
|
#define gprT1 eax // eax - Temp Reg
|
||||||
#define gprT2 ecx // ecx - Temp Reg
|
#define gprT2 ecx // ecx - Temp Reg
|
||||||
#define gprT3 edx // edx - Temp Reg
|
|
||||||
#define gprT1q rax // eax - Temp Reg
|
#define gprT1q rax // eax - Temp Reg
|
||||||
#define gprT2q rcx // ecx - Temp Reg
|
#define gprT2q rcx // ecx - Temp Reg
|
||||||
#define gprT3q rdx // edx - Temp Reg
|
|
||||||
#define gprT1b ax // Low 16-bit of gprT1 (eax)
|
#define gprT1b ax // Low 16-bit of gprT1 (eax)
|
||||||
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
|
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
|
||||||
#define gprT3b dx // Low 16-bit of gprT3 (edx)
|
|
||||||
|
|
||||||
#define gprF0 ebx // Status Flag 0
|
#define gprF0 ebx // Status Flag 0
|
||||||
#define gprF1 r12d // Status Flag 1
|
#define gprF1 r12d // Status Flag 1
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include <bitset>
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
|
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
|
||||||
|
@ -149,14 +150,57 @@ __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false, bool onlyNeeded = f
|
||||||
{
|
{
|
||||||
if (toMemory)
|
if (toMemory)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
|
int num_xmms = 0, num_gprs = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
|
||||||
{
|
{
|
||||||
|
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!onlyNeeded || mVU.regAlloc->checkCachedGPR(i))
|
||||||
|
{
|
||||||
|
num_gprs++;
|
||||||
|
xPUSH(xRegister64(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::bitset<iREGCNT_XMM> save_xmms;
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
|
||||||
|
{
|
||||||
|
if (!xRegisterSSE::IsCallerSaved(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
|
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
|
||||||
xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i));
|
{
|
||||||
|
save_xmms[i] = true;
|
||||||
|
num_xmms++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we need 16 byte alignment on the stack
|
||||||
|
#ifdef _WIN32
|
||||||
|
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64)) + 32;
|
||||||
|
int stack_offset = 32;
|
||||||
|
#else
|
||||||
|
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64));
|
||||||
|
int stack_offset = 0;
|
||||||
|
#endif
|
||||||
|
if (stack_size > 0)
|
||||||
|
{
|
||||||
|
xSUB(rsp, stack_size);
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
|
||||||
|
{
|
||||||
|
if (save_xmms[i])
|
||||||
|
{
|
||||||
|
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
|
||||||
|
stack_offset += sizeof(u128);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// TODO(Stenzek): get rid of xmmbackup
|
||||||
mVU.regAlloc->flushAll(); // Flush Regalloc
|
mVU.regAlloc->flushAll(); // Flush Regalloc
|
||||||
xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ);
|
xMOVAPS(ptr128[&mVU.xmmBackup[xmmPQ.Id][0]], xmmPQ);
|
||||||
}
|
}
|
||||||
|
@ -167,47 +211,64 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false, bool onlyNeeded
|
||||||
{
|
{
|
||||||
if (fromMemory)
|
if (fromMemory)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
|
int num_xmms = 0, num_gprs = 0;
|
||||||
|
|
||||||
|
std::bitset<iREGCNT_GPR> save_gprs;
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
|
||||||
{
|
{
|
||||||
|
if (!xRegister32::IsCallerSaved(i) || i == rsp.GetId())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!onlyNeeded || mVU.regAlloc->checkCachedGPR(i))
|
||||||
|
{
|
||||||
|
save_gprs[i] = true;
|
||||||
|
num_gprs++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::bitset<iREGCNT_XMM> save_xmms;
|
||||||
|
for (int i = 0; i < static_cast<int>(iREGCNT_XMM); i++)
|
||||||
|
{
|
||||||
|
if (!xRegisterSSE::IsCallerSaved(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
|
if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i)
|
||||||
xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]);
|
{
|
||||||
|
save_xmms[i] = true;
|
||||||
|
num_xmms++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
const int stack_extra = 32;
|
||||||
|
#else
|
||||||
|
const int stack_extra = 0;
|
||||||
|
#endif
|
||||||
|
const int stack_size = (num_xmms * sizeof(u128)) + ((num_gprs & 1) * sizeof(u64)) + stack_extra;
|
||||||
|
if (num_xmms > 0)
|
||||||
|
{
|
||||||
|
int stack_offset = (num_xmms - 1) * sizeof(u128) + stack_extra;
|
||||||
|
for (int i = static_cast<int>(iREGCNT_XMM - 1); i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (!save_xmms[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
|
||||||
|
stack_offset -= sizeof(u128);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (stack_size > 0)
|
||||||
|
xADD(rsp, stack_size);
|
||||||
|
|
||||||
|
for (int i = static_cast<int>(iREGCNT_GPR - 1); i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (save_gprs[i])
|
||||||
|
xPOP(xRegister64(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
|
xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
|
||||||
}
|
|
||||||
|
|
||||||
class mVUScopedXMMBackup
|
|
||||||
{
|
|
||||||
microVU& mVU;
|
|
||||||
bool fromMemory;
|
|
||||||
|
|
||||||
public:
|
|
||||||
mVUScopedXMMBackup(microVU& mVU, bool fromMemory)
|
|
||||||
: mVU(mVU) , fromMemory(fromMemory)
|
|
||||||
{
|
|
||||||
mVUbackupRegs(mVU, fromMemory);
|
|
||||||
}
|
|
||||||
~mVUScopedXMMBackup()
|
|
||||||
{
|
|
||||||
mVUrestoreRegs(mVU, fromMemory);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
_mVUt void mVUprintRegs()
|
|
||||||
{
|
|
||||||
microVU& mVU = mVUx;
|
|
||||||
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
|
|
||||||
{
|
|
||||||
Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i,
|
|
||||||
mVU.xmmBackup[i][0], mVU.xmmBackup[i][1],
|
|
||||||
mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < mVU.regAlloc->getXmmCount(); i++)
|
|
||||||
{
|
|
||||||
Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i,
|
|
||||||
(float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1],
|
|
||||||
(float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -259,17 +320,15 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
||||||
jmpA.SetTarget();
|
jmpA.SetTarget();
|
||||||
if (THREAD_VU1)
|
if (THREAD_VU1)
|
||||||
{
|
{
|
||||||
{
|
#if 0
|
||||||
mVUScopedXMMBackup mVUSave(mVU, true);
|
|
||||||
xScopedSavedRegisters save{gprT1q, gprT2q, gprT3q};
|
|
||||||
if (IsDevBuild && !isCOP2) // Lets see which games do this!
|
if (IsDevBuild && !isCOP2) // Lets see which games do this!
|
||||||
{
|
{
|
||||||
xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
|
xMOV(gprT1, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
|
||||||
xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode...
|
xMOV(gprT2, xPC); // So we don't spam console, we'll only check micro-mode...
|
||||||
xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
|
xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
|
||||||
}
|
}
|
||||||
xFastCall((void*)mVUwaitMTVU);
|
#endif
|
||||||
}
|
xFastCall((void*)mVU.waitMTVU);
|
||||||
}
|
}
|
||||||
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
||||||
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
||||||
|
|
Loading…
Reference in New Issue