mirror of https://github.com/PCSX2/pcsx2.git
microVU: avoid making unnecessary copies or instances of xRegister classes. Cuts out overhead which admittedly shouldn't even be there except MSVC 2008's C++ optimizer can't optimize constructors to save its life.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3408 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
3649a9e226
commit
641c3b56cf
|
@ -363,8 +363,6 @@ template< typename T > void xWrite( T val );
|
||||||
bool operator==( const xRegisterSSE& src ) const { return this->Id == src.Id; }
|
bool operator==( const xRegisterSSE& src ) const { return this->Id == src.Id; }
|
||||||
bool operator!=( const xRegisterSSE& src ) const { return this->Id != src.Id; }
|
bool operator!=( const xRegisterSSE& src ) const { return this->Id != src.Id; }
|
||||||
|
|
||||||
void operator=( xRegisterSSE src ) { Id = src.Id; }
|
|
||||||
|
|
||||||
xRegisterSSE& operator++()
|
xRegisterSSE& operator++()
|
||||||
{
|
{
|
||||||
++Id &= (iREGCNT_XMM-1);
|
++Id &= (iREGCNT_XMM-1);
|
||||||
|
@ -376,6 +374,14 @@ template< typename T > void xWrite( T val );
|
||||||
--Id &= (iREGCNT_XMM-1);
|
--Id &= (iREGCNT_XMM-1);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const xRegisterSSE* const m_tbl_xmmRegs[iREGCNT_XMM];
|
||||||
|
|
||||||
|
static const xRegisterSSE& GetInstance(uint id)
|
||||||
|
{
|
||||||
|
pxAssume(id < iREGCNT_XMM);
|
||||||
|
return *m_tbl_xmmRegs[id];
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class xRegisterCL : public xRegister8
|
class xRegisterCL : public xRegister8
|
||||||
|
|
|
@ -138,6 +138,14 @@ const xRegister8
|
||||||
|
|
||||||
const xRegisterCL cl;
|
const xRegisterCL cl;
|
||||||
|
|
||||||
|
const xRegisterSSE *const xRegisterSSE::m_tbl_xmmRegs[iREGCNT_XMM] =
|
||||||
|
{
|
||||||
|
&xmm0, &xmm1,
|
||||||
|
&xmm2, &xmm3,
|
||||||
|
&xmm4, &xmm5,
|
||||||
|
&xmm6, &xmm7
|
||||||
|
};
|
||||||
|
|
||||||
const char *const x86_regnames_gpr8[8] =
|
const char *const x86_regnames_gpr8[8] =
|
||||||
{
|
{
|
||||||
"al", "cl", "dl", "bl",
|
"al", "cl", "dl", "bl",
|
||||||
|
|
|
@ -36,7 +36,7 @@ _f static x32 getFlagReg(int fInst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void setBitSFLAG(x32 reg, x32 regT, int bitTest, int bitSet)
|
_f void setBitSFLAG(const x32& reg, const x32& regT, int bitTest, int bitSet)
|
||||||
{
|
{
|
||||||
xTEST(regT, bitTest);
|
xTEST(regT, bitTest);
|
||||||
xForwardJZ8 skip;
|
xForwardJZ8 skip;
|
||||||
|
@ -44,7 +44,7 @@ _f void setBitSFLAG(x32 reg, x32 regT, int bitTest, int bitSet)
|
||||||
skip.SetTarget();
|
skip.SetTarget();
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void setBitFSEQ(x32 reg, int bitX)
|
_f void setBitFSEQ(const x32& reg, int bitX)
|
||||||
{
|
{
|
||||||
xTEST(reg, bitX);
|
xTEST(reg, bitX);
|
||||||
xForwardJump8 skip(Jcc_Zero);
|
xForwardJump8 skip(Jcc_Zero);
|
||||||
|
@ -52,18 +52,18 @@ _f void setBitFSEQ(x32 reg, int bitX)
|
||||||
skip.SetTarget();
|
skip.SetTarget();
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVUallocSFLAGa(x32 reg, int fInstance)
|
_f void mVUallocSFLAGa(const x32& reg, int fInstance)
|
||||||
{
|
{
|
||||||
xMOV(reg, getFlagReg(fInstance));
|
xMOV(reg, getFlagReg(fInstance));
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVUallocSFLAGb(x32 reg, int fInstance)
|
_f void mVUallocSFLAGb(const x32& reg, int fInstance)
|
||||||
{
|
{
|
||||||
xMOV(getFlagReg(fInstance), reg);
|
xMOV(getFlagReg(fInstance), reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize Status Flag
|
// Normalize Status Flag
|
||||||
_f void mVUallocSFLAGc(x32 reg, x32 regT, int fInstance)
|
_f void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance)
|
||||||
{
|
{
|
||||||
xXOR(reg, reg);
|
xXOR(reg, reg);
|
||||||
mVUallocSFLAGa(regT, fInstance);
|
mVUallocSFLAGa(regT, fInstance);
|
||||||
|
@ -107,25 +107,25 @@ _f void mVUallocSFLAGd(u32* memAddr, bool setAllflags) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVUallocMFLAGa(mV, x32 reg, int fInstance)
|
_f void mVUallocMFLAGa(mV, const x32& reg, int fInstance)
|
||||||
{
|
{
|
||||||
xMOVZX(reg, ptr16[&mVU->macFlag[fInstance]]);
|
xMOVZX(reg, ptr16[&mVU->macFlag[fInstance]]);
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVUallocMFLAGb(mV, x32 reg, int fInstance)
|
_f void mVUallocMFLAGb(mV, const x32& reg, int fInstance)
|
||||||
{
|
{
|
||||||
//xAND(reg, 0xffff);
|
//xAND(reg, 0xffff);
|
||||||
if (fInstance < 4) xMOV(ptr32[&mVU->macFlag[fInstance]], reg); // microVU
|
if (fInstance < 4) xMOV(ptr32[&mVU->macFlag[fInstance]], reg); // microVU
|
||||||
else xMOV(ptr32[&mVU->regs->VI[REG_MAC_FLAG].UL], reg); // macroVU
|
else xMOV(ptr32[&mVU->regs->VI[REG_MAC_FLAG].UL], reg); // macroVU
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVUallocCFLAGa(mV, x32 reg, int fInstance)
|
_f void mVUallocCFLAGa(mV, const x32& reg, int fInstance)
|
||||||
{
|
{
|
||||||
if (fInstance < 4) xMOV(reg, ptr32[&mVU->clipFlag[fInstance]]); // microVU
|
if (fInstance < 4) xMOV(reg, ptr32[&mVU->clipFlag[fInstance]]); // microVU
|
||||||
else xMOV(reg, ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL]); // macroVU
|
else xMOV(reg, ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL]); // macroVU
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVUallocCFLAGb(mV, x32 reg, int fInstance)
|
_f void mVUallocCFLAGb(mV, const x32& reg, int fInstance)
|
||||||
{
|
{
|
||||||
if (fInstance < 4) xMOV(ptr32[&mVU->clipFlag[fInstance]], reg); // microVU
|
if (fInstance < 4) xMOV(ptr32[&mVU->clipFlag[fInstance]], reg); // microVU
|
||||||
else xMOV(ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL], reg); // macroVU
|
else xMOV(ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL], reg); // macroVU
|
||||||
|
@ -135,7 +135,7 @@ _f void mVUallocCFLAGb(mV, x32 reg, int fInstance)
|
||||||
// VI Reg Allocators
|
// VI Reg Allocators
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
_f void mVUallocVIa(mV, x32 GPRreg, int _reg_, bool signext = false)
|
_f void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false)
|
||||||
{
|
{
|
||||||
if (!_reg_)
|
if (!_reg_)
|
||||||
xXOR(GPRreg, GPRreg);
|
xXOR(GPRreg, GPRreg);
|
||||||
|
@ -146,7 +146,7 @@ _f void mVUallocVIa(mV, x32 GPRreg, int _reg_, bool signext = false)
|
||||||
xMOVZX(GPRreg, ptr16[&mVU->regs->VI[_reg_].UL]);
|
xMOVZX(GPRreg, ptr16[&mVU->regs->VI[_reg_].UL]);
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVUallocVIb(mV, x32 GPRreg, int _reg_)
|
_f void mVUallocVIb(mV, const x32& GPRreg, int _reg_)
|
||||||
{
|
{
|
||||||
if (mVUlow.backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch)
|
if (mVUlow.backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch)
|
||||||
xMOVZX(gprT3, ptr16[&mVU->regs->VI[_reg_].UL]);
|
xMOVZX(gprT3, ptr16[&mVU->regs->VI[_reg_].UL]);
|
||||||
|
@ -160,19 +160,19 @@ _f void mVUallocVIb(mV, x32 GPRreg, int _reg_)
|
||||||
// P/Q Reg Allocators
|
// P/Q Reg Allocators
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
_f void getPreg(mV, xmm reg)
|
_f void getPreg(mV, const xmm& reg)
|
||||||
{
|
{
|
||||||
mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP));
|
mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP));
|
||||||
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/
|
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void getQreg(xmm reg, int qInstance)
|
_f void getQreg(const xmm& reg, int qInstance)
|
||||||
{
|
{
|
||||||
mVUunpack_xyzw(reg, xmmPQ, qInstance);
|
mVUunpack_xyzw(reg, xmmPQ, qInstance);
|
||||||
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 15);*/
|
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 15);*/
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void writeQreg(xmm reg, int qInstance)
|
_f void writeQreg(const xmm& reg, int qInstance)
|
||||||
{
|
{
|
||||||
if (qInstance) {
|
if (qInstance) {
|
||||||
if (!x86caps.hasStreamingSIMD4Extensions) {
|
if (!x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
|
|
@ -34,7 +34,7 @@ const __aligned16 u32 sse4_maxvals[2][4] = {
|
||||||
// gotten a NaN value, then something went wrong; and the NaN's sign
|
// gotten a NaN value, then something went wrong; and the NaN's sign
|
||||||
// is not to be trusted. Games like positive values better usually,
|
// is not to be trusted. Games like positive values better usually,
|
||||||
// and its faster... so just always make NaNs into positive infinity.
|
// and its faster... so just always make NaNs into positive infinity.
|
||||||
void mVUclamp1(xmm reg, xmm regT1, int xyzw, bool bClampE = 0) {
|
void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) {
|
||||||
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
||||||
switch (xyzw) {
|
switch (xyzw) {
|
||||||
case 1: case 2: case 4: case 8:
|
case 1: case 2: case 4: case 8:
|
||||||
|
@ -54,7 +54,7 @@ void mVUclamp1(xmm reg, xmm regT1, int xyzw, bool bClampE = 0) {
|
||||||
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
|
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
|
||||||
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
|
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
|
||||||
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
|
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
|
||||||
void mVUclamp2(microVU* mVU, xmm reg, xmm regT1in, int xyzw, bool bClampE = 0) {
|
void mVUclamp2(microVU* mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) {
|
||||||
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
|
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
|
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
|
||||||
|
@ -62,8 +62,8 @@ void mVUclamp2(microVU* mVU, xmm reg, xmm regT1in, int xyzw, bool bClampE = 0) {
|
||||||
xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]);
|
xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//xmm regT1 = regT1b ? mVU->regAlloc->allocReg() : regT1in;
|
//const xmm& regT1 = regT1b ? mVU->regAlloc->allocReg() : regT1in;
|
||||||
xmm regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in;
|
const xmm& regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in;
|
||||||
if (regT1 != regT1in) xMOVAPS(ptr128[mVU->xmmCTemp], regT1);
|
if (regT1 != regT1in) xMOVAPS(ptr128[mVU->xmmCTemp], regT1);
|
||||||
switch (xyzw) {
|
switch (xyzw) {
|
||||||
case 1: case 2: case 4: case 8:
|
case 1: case 2: case 4: case 8:
|
||||||
|
@ -88,7 +88,7 @@ void mVUclamp2(microVU* mVU, xmm reg, xmm regT1in, int xyzw, bool bClampE = 0) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
|
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
|
||||||
void mVUclamp3(microVU* mVU, xmm reg, xmm regT1, int xyzw) {
|
void mVUclamp3(microVU* mVU, const xmm& reg, const xmm& regT1, int xyzw) {
|
||||||
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,6 +98,6 @@ void mVUclamp3(microVU* mVU, xmm reg, xmm regT1, int xyzw) {
|
||||||
// emulated opcodes (causing crashes). Since we're clamping the operands
|
// emulated opcodes (causing crashes). Since we're clamping the operands
|
||||||
// with mVUclamp3, we should almost never be getting a NaN result,
|
// with mVUclamp3, we should almost never be getting a NaN result,
|
||||||
// but this clamp is just a precaution just-in-case.
|
// but this clamp is just a precaution just-in-case.
|
||||||
void mVUclamp4(xmm reg, xmm regT1, int xyzw) {
|
void mVUclamp4(const xmm& reg, const xmm& regT1, int xyzw) {
|
||||||
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
|
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,25 +134,32 @@ void doIbit(mV) {
|
||||||
void doSwapOp(mV) {
|
void doSwapOp(mV) {
|
||||||
if (mVUinfo.backupVF && !mVUlow.noWriteVF) {
|
if (mVUinfo.backupVF && !mVUlow.noWriteVF) {
|
||||||
DevCon.WriteLn(Color_Green, "microVU%d: Backing Up VF Reg [%04x]", getIndex, xPC);
|
DevCon.WriteLn(Color_Green, "microVU%d: Backing Up VF Reg [%04x]", getIndex, xPC);
|
||||||
xmm t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg);
|
|
||||||
xmm t2 = mVU->regAlloc->allocReg();
|
const xmm& t2 = mVU->regAlloc->allocReg();
|
||||||
|
|
||||||
|
{
|
||||||
|
const xmm& t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg);
|
||||||
xMOVAPS(t2, t1);
|
xMOVAPS(t2, t1);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
|
}
|
||||||
|
|
||||||
mVUopL(mVU, 1);
|
mVUopL(mVU, 1);
|
||||||
|
|
||||||
t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0);
|
{
|
||||||
|
const xmm& t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0);
|
||||||
xXOR.PS(t2, t1);
|
xXOR.PS(t2, t1);
|
||||||
xXOR.PS(t1, t2);
|
xXOR.PS(t1, t2);
|
||||||
xXOR.PS(t2, t1);
|
xXOR.PS(t2, t1);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
|
}
|
||||||
|
|
||||||
incPC(1);
|
incPC(1);
|
||||||
doUpperOp();
|
doUpperOp();
|
||||||
|
{
|
||||||
t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf);
|
const xmm& t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf);
|
||||||
xMOVAPS(t1, t2);
|
xMOVAPS(t1, t2);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
|
}
|
||||||
mVU->regAlloc->clearNeeded(t2);
|
mVU->regAlloc->clearNeeded(t2);
|
||||||
}
|
}
|
||||||
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
|
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
|
||||||
|
|
|
@ -204,18 +204,19 @@ public:
|
||||||
}
|
}
|
||||||
void reset() {
|
void reset() {
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
clearReg(xmm(i));
|
clearReg(i);
|
||||||
}
|
}
|
||||||
counter = 0;
|
counter = 0;
|
||||||
}
|
}
|
||||||
void flushAll(bool clearState = 1) {
|
void flushAll(bool clearState = 1) {
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
writeBackReg(xmm(i));
|
writeBackReg(xmm(i));
|
||||||
if (clearState) clearReg(xmm(i));
|
if (clearState) clearReg(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void clearReg(xmm reg) {
|
void clearReg(const xmm& reg) { clearReg(reg.Id); }
|
||||||
microMapXMM& clear( xmmMap[reg.Id] );
|
void clearReg(int regId) {
|
||||||
|
microMapXMM& clear( xmmMap[regId] );
|
||||||
clear.VFreg = -1;
|
clear.VFreg = -1;
|
||||||
clear.count = 0;
|
clear.count = 0;
|
||||||
clear.xyzw = 0;
|
clear.xyzw = 0;
|
||||||
|
@ -223,10 +224,10 @@ public:
|
||||||
}
|
}
|
||||||
void clearRegVF(int VFreg) {
|
void clearRegVF(int VFreg) {
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
if (xmmMap[i].VFreg == VFreg) clearReg(xmm(i));
|
if (xmmMap[i].VFreg == VFreg) clearReg(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void writeBackReg(xmm reg, bool invalidateRegs = 1) {
|
void writeBackReg(const xmm& reg, bool invalidateRegs = 1) {
|
||||||
microMapXMM& write( xmmMap[reg.Id] );
|
microMapXMM& write( xmmMap[reg.Id] );
|
||||||
|
|
||||||
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
|
if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
|
||||||
|
@ -239,7 +240,7 @@ public:
|
||||||
if ((i == reg.Id) || imap.isNeeded) continue;
|
if ((i == reg.Id) || imap.isNeeded) continue;
|
||||||
if (imap.VFreg == write.VFreg) {
|
if (imap.VFreg == write.VFreg) {
|
||||||
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
|
if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
|
||||||
clearReg(xmm(i)); // Invalidate any Cached Regs of same vf Reg
|
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -252,7 +253,7 @@ public:
|
||||||
}
|
}
|
||||||
clearReg(reg); // Clear Reg
|
clearReg(reg); // Clear Reg
|
||||||
}
|
}
|
||||||
void clearNeeded(xmm reg)
|
void clearNeeded(const xmm& reg)
|
||||||
{
|
{
|
||||||
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
|
if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
|
||||||
|
|
||||||
|
@ -273,7 +274,7 @@ public:
|
||||||
imap.count = counter;
|
imap.count = counter;
|
||||||
mergeRegs = 2;
|
mergeRegs = 2;
|
||||||
}
|
}
|
||||||
else clearReg(xmm(i));
|
else clearReg(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
|
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
|
||||||
|
@ -282,11 +283,11 @@ public:
|
||||||
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
xmm allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
|
const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
|
||||||
counter++;
|
counter++;
|
||||||
if (vfLoadReg >= 0) { // Search For Cached Regs
|
if (vfLoadReg >= 0) { // Search For Cached Regs
|
||||||
for (int i = 0; i < xmmTotal; i++) {
|
for (int i = 0; i < xmmTotal; i++) {
|
||||||
xmm xmmi(i);
|
const xmm& xmmi(xmm::GetInstance(i));
|
||||||
microMapXMM& imap (xmmMap[i]);
|
microMapXMM& imap (xmmMap[i]);
|
||||||
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|
if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
|
||||||
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
|| (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
||||||
|
@ -294,7 +295,7 @@ public:
|
||||||
if (vfWriteReg >= 0) { // Reg will be modified
|
if (vfWriteReg >= 0) { // Reg will be modified
|
||||||
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
||||||
z = findFreeReg();
|
z = findFreeReg();
|
||||||
xmm xmmz(z);
|
const xmm& xmmz(xmm::GetInstance(z));
|
||||||
writeBackReg(xmmz);
|
writeBackReg(xmmz);
|
||||||
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
|
if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
|
||||||
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
|
else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
|
||||||
|
@ -314,12 +315,12 @@ public:
|
||||||
}
|
}
|
||||||
xmmMap[z].count = counter;
|
xmmMap[z].count = counter;
|
||||||
xmmMap[z].isNeeded = 1;
|
xmmMap[z].isNeeded = 1;
|
||||||
return xmm(z);
|
return xmm::GetInstance(z);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int x = findFreeReg();
|
int x = findFreeReg();
|
||||||
xmm xmmx(x);
|
const xmm& xmmx = xmm::GetInstance(x);
|
||||||
writeBackReg(xmmx);
|
writeBackReg(xmmx);
|
||||||
|
|
||||||
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
// Test if Vector is +/- Zero
|
// Test if Vector is +/- Zero
|
||||||
_f static void testZero(xmm xmmReg, xmm xmmTemp, x32 gprTemp)
|
_f static void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprTemp)
|
||||||
{
|
{
|
||||||
xXOR.PS(xmmTemp, xmmTemp);
|
xXOR.PS(xmmTemp, xmmTemp);
|
||||||
xCMPEQ.SS(xmmTemp, xmmReg);
|
xCMPEQ.SS(xmmTemp, xmmReg);
|
||||||
|
@ -36,7 +36,7 @@ _f static void testZero(xmm xmmReg, xmm xmmTemp, x32 gprTemp)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test if Vector is Negative (Set Flags and Makes Positive)
|
// Test if Vector is Negative (Set Flags and Makes Positive)
|
||||||
_f static void testNeg(mV, xmm xmmReg, x32 gprTemp)
|
_f static void testNeg(mV, const xmm& xmmReg, const x32& gprTemp)
|
||||||
{
|
{
|
||||||
xMOVMSKPS(gprTemp, xmmReg);
|
xMOVMSKPS(gprTemp, xmmReg);
|
||||||
xTEST(gprTemp, 1);
|
xTEST(gprTemp, 1);
|
||||||
|
@ -52,8 +52,8 @@ mVUop(mVU_DIV) {
|
||||||
xmm Ft;
|
xmm Ft;
|
||||||
if (_Ftf_) Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
if (_Ftf_) Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
||||||
else Ft = mVU->regAlloc->allocReg(_Ft_);
|
else Ft = mVU->regAlloc->allocReg(_Ft_);
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
|
|
||||||
testZero(Ft, t1, gprT1); // Test if Ft is zero
|
testZero(Ft, t1, gprT1); // Test if Ft is zero
|
||||||
xForwardJZ8 cjmp; // Skip if not zero
|
xForwardJZ8 cjmp; // Skip if not zero
|
||||||
|
@ -89,7 +89,7 @@ mVUop(mVU_DIV) {
|
||||||
mVUop(mVU_SQRT) {
|
mVUop(mVU_SQRT) {
|
||||||
pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); }
|
pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
||||||
|
|
||||||
xMOV(ptr32[&mVU->divFlag], 0); // Clear I/D flags
|
xMOV(ptr32[&mVU->divFlag], 0); // Clear I/D flags
|
||||||
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
||||||
|
@ -106,9 +106,9 @@ mVUop(mVU_SQRT) {
|
||||||
mVUop(mVU_RSQRT) {
|
mVUop(mVU_RSQRT) {
|
||||||
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); }
|
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
|
|
||||||
xMOV(ptr32[&mVU->divFlag], 0); // Clear I/D flags
|
xMOV(ptr32[&mVU->divFlag], 0); // Clear I/D flags
|
||||||
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
||||||
|
@ -156,7 +156,7 @@ mVUop(mVU_RSQRT) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
|
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
|
||||||
_f static void mVU_EATAN_(mV, xmm PQ, xmm Fs, xmm t1, xmm t2) {
|
_f static void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2) {
|
||||||
xMOVSS(PQ, Fs);
|
xMOVSS(PQ, Fs);
|
||||||
xMUL.SS(PQ, ptr32[&mVUglob.T1[0]]);
|
xMUL.SS(PQ, ptr32[&mVUglob.T1[0]]);
|
||||||
xMOVAPS(t2, Fs);
|
xMOVAPS(t2, Fs);
|
||||||
|
@ -174,9 +174,9 @@ _f static void mVU_EATAN_(mV, xmm PQ, xmm Fs, xmm t1, xmm t2) {
|
||||||
mVUop(mVU_EATAN) {
|
mVUop(mVU_EATAN) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
xmm t2 = mVU->regAlloc->allocReg();
|
const xmm& t2 = mVU->regAlloc->allocReg();
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xMOVSS (xmmPQ, Fs);
|
xMOVSS (xmmPQ, Fs);
|
||||||
xSUB.SS(Fs, ptr32[&mVUglob.one[0]]);
|
xSUB.SS(Fs, ptr32[&mVUglob.one[0]]);
|
||||||
|
@ -193,9 +193,9 @@ mVUop(mVU_EATAN) {
|
||||||
mVUop(mVU_EATANxy) {
|
mVUop(mVU_EATANxy) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
const xmm& t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
xmm Fs = mVU->regAlloc->allocReg();
|
const xmm& Fs = mVU->regAlloc->allocReg();
|
||||||
xmm t2 = mVU->regAlloc->allocReg();
|
const xmm& t2 = mVU->regAlloc->allocReg();
|
||||||
xPSHUF.D(Fs, t1, 0x01);
|
xPSHUF.D(Fs, t1, 0x01);
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xMOVSS (xmmPQ, Fs);
|
xMOVSS (xmmPQ, Fs);
|
||||||
|
@ -213,9 +213,9 @@ mVUop(mVU_EATANxy) {
|
||||||
mVUop(mVU_EATANxz) {
|
mVUop(mVU_EATANxz) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
const xmm& t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
xmm Fs = mVU->regAlloc->allocReg();
|
const xmm& Fs = mVU->regAlloc->allocReg();
|
||||||
xmm t2 = mVU->regAlloc->allocReg();
|
const xmm& t2 = mVU->regAlloc->allocReg();
|
||||||
xPSHUF.D(Fs, t1, 0x02);
|
xPSHUF.D(Fs, t1, 0x02);
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xMOVSS (xmmPQ, Fs);
|
xMOVSS (xmmPQ, Fs);
|
||||||
|
@ -240,9 +240,9 @@ mVUop(mVU_EATANxz) {
|
||||||
mVUop(mVU_EEXP) {
|
mVUop(mVU_EEXP) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
xmm t2 = mVU->regAlloc->allocReg();
|
const xmm& t2 = mVU->regAlloc->allocReg();
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xMOVSS (xmmPQ, Fs);
|
xMOVSS (xmmPQ, Fs);
|
||||||
xMUL.SS (xmmPQ, ptr32[mVUglob.E1]);
|
xMUL.SS (xmmPQ, ptr32[mVUglob.E1]);
|
||||||
|
@ -272,7 +272,7 @@ mVUop(mVU_EEXP) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
||||||
_f void mVU_sumXYZ(mV, xmm PQ, xmm Fs) {
|
_f void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) {
|
||||||
if( x86caps.hasStreamingSIMD4Extensions ) {
|
if( x86caps.hasStreamingSIMD4Extensions ) {
|
||||||
xDP.PS(Fs, Fs, 0x71);
|
xDP.PS(Fs, Fs, 0x71);
|
||||||
xMOVSS(PQ, Fs);
|
xMOVSS(PQ, Fs);
|
||||||
|
@ -290,7 +290,7 @@ _f void mVU_sumXYZ(mV, xmm PQ, xmm Fs) {
|
||||||
mVUop(mVU_ELENG) {
|
mVUop(mVU_ELENG) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
||||||
xSQRT.SS (xmmPQ, xmmPQ);
|
xSQRT.SS (xmmPQ, xmmPQ);
|
||||||
|
@ -303,7 +303,7 @@ mVUop(mVU_ELENG) {
|
||||||
mVUop(mVU_ERCPR) {
|
mVUop(mVU_ERCPR) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xMOVSS (xmmPQ, Fs);
|
xMOVSS (xmmPQ, Fs);
|
||||||
xMOVSSZX (Fs, ptr32[&mVUglob.one[0]]);
|
xMOVSSZX (Fs, ptr32[&mVUglob.one[0]]);
|
||||||
|
@ -318,7 +318,7 @@ mVUop(mVU_ERCPR) {
|
||||||
mVUop(mVU_ERLENG) {
|
mVUop(mVU_ERLENG) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
||||||
xSQRT.SS (xmmPQ, xmmPQ);
|
xSQRT.SS (xmmPQ, xmmPQ);
|
||||||
|
@ -334,7 +334,7 @@ mVUop(mVU_ERLENG) {
|
||||||
mVUop(mVU_ERSADD) {
|
mVUop(mVU_ERSADD) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
||||||
xMOVSSZX (Fs, ptr32[&mVUglob.one[0]]);
|
xMOVSSZX (Fs, ptr32[&mVUglob.one[0]]);
|
||||||
|
@ -349,7 +349,7 @@ mVUop(mVU_ERSADD) {
|
||||||
mVUop(mVU_ERSQRT) {
|
mVUop(mVU_ERSQRT) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xAND.PS (Fs, ptr128[&mVUglob.absclip[0]]);
|
xAND.PS (Fs, ptr128[&mVUglob.absclip[0]]);
|
||||||
xSQRT.SS (xmmPQ, Fs);
|
xSQRT.SS (xmmPQ, Fs);
|
||||||
|
@ -365,7 +365,7 @@ mVUop(mVU_ERSQRT) {
|
||||||
mVUop(mVU_ESADD) {
|
mVUop(mVU_ESADD) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
@ -384,9 +384,9 @@ mVUop(mVU_ESADD) {
|
||||||
mVUop(mVU_ESIN) {
|
mVUop(mVU_ESIN) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
xmm t2 = mVU->regAlloc->allocReg();
|
const xmm& t2 = mVU->regAlloc->allocReg();
|
||||||
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xMOVSS (xmmPQ, Fs);
|
xMOVSS (xmmPQ, Fs);
|
||||||
xMOVAPS (t1, Fs);
|
xMOVAPS (t1, Fs);
|
||||||
|
@ -412,7 +412,7 @@ mVUop(mVU_ESIN) {
|
||||||
mVUop(mVU_ESQRT) {
|
mVUop(mVU_ESQRT) {
|
||||||
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xAND.PS (Fs, ptr128[&mVUglob.absclip[0]]);
|
xAND.PS (Fs, ptr128[&mVUglob.absclip[0]]);
|
||||||
xSQRT.SS(xmmPQ, Fs);
|
xSQRT.SS(xmmPQ, Fs);
|
||||||
|
@ -425,8 +425,8 @@ mVUop(mVU_ESQRT) {
|
||||||
mVUop(mVU_ESUM) {
|
mVUop(mVU_ESUM) {
|
||||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); }
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
xPSHUF.D (t1, Fs, 0x1b);
|
xPSHUF.D (t1, Fs, 0x1b);
|
||||||
SSE_ADDPS(mVU, Fs, t1);
|
SSE_ADDPS(mVU, Fs, t1);
|
||||||
|
@ -719,7 +719,7 @@ mVUop(mVU_ISUBIU) {
|
||||||
mVUop(mVU_MFIR) {
|
mVUop(mVU_MFIR) {
|
||||||
pass1 { if (!_Ft_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeReg2(_Ft_, mVUlow.VF_write, 1); }
|
pass1 { if (!_Ft_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeReg2(_Ft_, mVUlow.VF_write, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
mVUallocVIa(mVU, eax, _Is_);
|
mVUallocVIa(mVU, eax, _Is_);
|
||||||
xMOVSX(eax, ax);
|
xMOVSX(eax, ax);
|
||||||
xMOVDZX(Ft, eax);
|
xMOVDZX(Ft, eax);
|
||||||
|
@ -732,7 +732,7 @@ mVUop(mVU_MFIR) {
|
||||||
mVUop(mVU_MFP) {
|
mVUop(mVU_MFP) {
|
||||||
pass1 { mVUanalyzeMFP(mVU, _Ft_); }
|
pass1 { mVUanalyzeMFP(mVU, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
getPreg(mVU, Ft);
|
getPreg(mVU, Ft);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
|
@ -742,7 +742,7 @@ mVUop(mVU_MFP) {
|
||||||
mVUop(mVU_MOVE) {
|
mVUop(mVU_MOVE) {
|
||||||
pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
}
|
}
|
||||||
pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
||||||
|
@ -751,8 +751,8 @@ mVUop(mVU_MOVE) {
|
||||||
mVUop(mVU_MR32) {
|
mVUop(mVU_MR32) {
|
||||||
pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_);
|
||||||
xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0))));
|
if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0))));
|
||||||
else xPSHUF.D(Ft, Fs, 0x39);
|
else xPSHUF.D(Ft, Fs, 0x39);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
|
@ -764,7 +764,7 @@ mVUop(mVU_MR32) {
|
||||||
mVUop(mVU_MTIR) {
|
mVUop(mVU_MTIR) {
|
||||||
pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeReg5(_Fs_, _Fsf_, mVUlow.VF_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
|
pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeReg5(_Fs_, _Fsf_, mVUlow.VF_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xMOVD(gprT1, Fs);
|
xMOVD(gprT1, Fs);
|
||||||
mVUallocVIb(mVU, gprT1, _It_);
|
mVUallocVIb(mVU, gprT1, _It_);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -868,7 +868,7 @@ mVUop(mVU_LQ) {
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ptr += getVUmem(_Imm11_);
|
ptr += getVUmem(_Imm11_);
|
||||||
xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
|
@ -887,7 +887,7 @@ mVUop(mVU_LQD) {
|
||||||
ptr += ecx;
|
ptr += ecx;
|
||||||
}
|
}
|
||||||
if (!mVUlow.noWriteVF) {
|
if (!mVUlow.noWriteVF) {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
|
@ -908,7 +908,7 @@ mVUop(mVU_LQI) {
|
||||||
ptr += ecx;
|
ptr += ecx;
|
||||||
}
|
}
|
||||||
if (!mVUlow.noWriteVF) {
|
if (!mVUlow.noWriteVF) {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
}
|
}
|
||||||
|
@ -932,7 +932,7 @@ mVUop(mVU_SQ) {
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ptr += getVUmem(_Imm11_);
|
ptr += getVUmem(_Imm11_);
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
}
|
}
|
||||||
|
@ -950,7 +950,7 @@ mVUop(mVU_SQD) {
|
||||||
mVUaddrFix(mVU, ecx);
|
mVUaddrFix(mVU, ecx);
|
||||||
ptr += ecx;
|
ptr += ecx;
|
||||||
}
|
}
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
}
|
}
|
||||||
|
@ -969,7 +969,7 @@ mVUop(mVU_SQI) {
|
||||||
mVUaddrFix(mVU, ecx);
|
mVUaddrFix(mVU, ecx);
|
||||||
ptr += ecx;
|
ptr += ecx;
|
||||||
}
|
}
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
}
|
}
|
||||||
|
@ -984,7 +984,7 @@ mVUop(mVU_RINIT) {
|
||||||
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (_Fs_ || (_Fsf_ == 3)) {
|
if (_Fs_ || (_Fsf_ == 3)) {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xMOVD(gprT1, Fs);
|
xMOVD(gprT1, Fs);
|
||||||
xAND(gprT1, 0x007fffff);
|
xAND(gprT1, 0x007fffff);
|
||||||
xOR (gprT1, 0x3f800000);
|
xOR (gprT1, 0x3f800000);
|
||||||
|
@ -996,9 +996,9 @@ mVUop(mVU_RINIT) {
|
||||||
pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); }
|
pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); }
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void mVU_RGET_(mV, x32 Rreg) {
|
_f void mVU_RGET_(mV, const x32& Rreg) {
|
||||||
if (!mVUlow.noWriteVF) {
|
if (!mVUlow.noWriteVF) {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
xMOVDZX(Ft, Rreg);
|
xMOVDZX(Ft, Rreg);
|
||||||
if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0);
|
if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
|
@ -1039,7 +1039,7 @@ mVUop(mVU_RXOR) {
|
||||||
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (_Fs_ || (_Fsf_ == 3)) {
|
if (_Fs_ || (_Fsf_ == 3)) {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
||||||
xMOVD(gprT1, Fs);
|
xMOVD(gprT1, Fs);
|
||||||
xAND(gprT1, 0x7fffff);
|
xAND(gprT1, 0x7fffff);
|
||||||
xXOR(ptr32[Rmem], gprT1);
|
xXOR(ptr32[Rmem], gprT1);
|
||||||
|
|
|
@ -93,21 +93,20 @@ typedef xRegister32 x32;
|
||||||
#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12)))
|
#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12)))
|
||||||
#define offsetReg ((_X) ? (0) : ((_Y) ? (1) : ((_Z) ? 2: 3)))
|
#define offsetReg ((_X) ? (0) : ((_Y) ? (1) : ((_Z) ? 2: 3)))
|
||||||
|
|
||||||
const xmm
|
#define xmmT1 xmm0 // Used for regAlloc
|
||||||
xmmT1 = xmm(0), // Used for regAlloc
|
#define xmmT2 xmm1 // Used for regAlloc
|
||||||
xmmT2 = xmm(1), // Used for regAlloc
|
#define xmmT3 xmm2 // Used for regAlloc
|
||||||
xmmT3 = xmm(2), // Used for regAlloc
|
#define xmmT4 xmm3 // Used for regAlloc
|
||||||
xmmT4 = xmm(3), // Used for regAlloc
|
#define xmmT5 xmm4 // Used for regAlloc
|
||||||
xmmT5 = xmm(4), // Used for regAlloc
|
#define xmmT6 xmm5 // Used for regAlloc
|
||||||
xmmT6 = xmm(5), // Used for regAlloc
|
#define xmmT7 xmm6 // Used for regAlloc
|
||||||
xmmT7 = xmm(6), // Used for regAlloc
|
#define xmmPQ xmm7 // Holds the Value and Backup Values of P and Q regs
|
||||||
xmmPQ = xmm(7); // Holds the Value and Backup Values of P and Q regs
|
|
||||||
|
|
||||||
const x32
|
#define gprT1 eax // eax - Temp Reg
|
||||||
gprT1 = x32(0), // eax - Temp Reg
|
#define gprT2 ecx // ecx - Temp Reg
|
||||||
gprT2 = x32(1), // ecx - Temp Reg
|
#define gprT3 edx // edx - Temp Reg
|
||||||
gprT3 = x32(2), // edx - Temp Reg
|
|
||||||
gprF[4] = {x32(3), x32(5), x32(6), x32(7)}; // ebx, ebp, esi, edi - Status Flags
|
const x32 gprF[4] = {x32(3), x32(5), x32(6), x32(7)}; // ebx, ebp, esi, edi - Status Flags
|
||||||
|
|
||||||
// Function Params
|
// Function Params
|
||||||
#define mP microVU* mVU, int recPass
|
#define mP microVU* mVU, int recPass
|
||||||
|
@ -297,7 +296,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
void mVUmergeRegs(xmm dest, xmm src, int xyzw, bool modXYZW=false);
|
extern void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW=false);
|
||||||
void mVUsaveReg(xmm reg, xAddressVoid ptr, int xyzw, bool modXYZW);
|
extern void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW);
|
||||||
void mVUloadReg(xmm reg, xAddressVoid ptr, int xyzw);
|
extern void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw);
|
||||||
void mVUloadIreg(xmm reg, int xyzw, VURegs* vuRegs);
|
extern void mVUloadIreg(const xmm& reg, int xyzw, VURegs* vuRegs);
|
|
@ -19,7 +19,7 @@
|
||||||
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
|
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
void mVUunpack_xyzw(xmm dstreg, xmm srcreg, int xyzw)
|
void mVUunpack_xyzw(const xmm& dstreg, const xmm& srcreg, int xyzw)
|
||||||
{
|
{
|
||||||
switch ( xyzw ) {
|
switch ( xyzw ) {
|
||||||
case 0: xPSHUF.D(dstreg, srcreg, 0x00); break; // XXXX
|
case 0: xPSHUF.D(dstreg, srcreg, 0x00); break; // XXXX
|
||||||
|
@ -29,7 +29,7 @@ void mVUunpack_xyzw(xmm dstreg, xmm srcreg, int xyzw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void mVUloadReg(xmm reg, xAddressVoid ptr, int xyzw)
|
void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw)
|
||||||
{
|
{
|
||||||
switch( xyzw ) {
|
switch( xyzw ) {
|
||||||
case 8: xMOVSSZX(reg, ptr32[ptr]); break; // X
|
case 8: xMOVSSZX(reg, ptr32[ptr]); break; // X
|
||||||
|
@ -40,14 +40,14 @@ void mVUloadReg(xmm reg, xAddressVoid ptr, int xyzw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void mVUloadIreg(xmm reg, int xyzw, VURegs* vuRegs)
|
void mVUloadIreg(const xmm& reg, int xyzw, VURegs* vuRegs)
|
||||||
{
|
{
|
||||||
xMOVSSZX(reg, ptr32[&vuRegs->VI[REG_I].UL]);
|
xMOVSSZX(reg, ptr32[&vuRegs->VI[REG_I].UL]);
|
||||||
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
|
if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modifies the Source Reg!
|
// Modifies the Source Reg!
|
||||||
void mVUsaveReg(xmm reg, xAddressVoid ptr, int xyzw, bool modXYZW)
|
void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
|
||||||
{
|
{
|
||||||
/*xMOVAPS(xmmT2, ptr128[ptr]);
|
/*xMOVAPS(xmmT2, ptr128[ptr]);
|
||||||
if (modXYZW && (xyzw == 8 || xyzw == 4 || xyzw == 2 || xyzw == 1)) {
|
if (modXYZW && (xyzw == 8 || xyzw == 4 || xyzw == 2 || xyzw == 1)) {
|
||||||
|
@ -143,7 +143,7 @@ void mVUsaveReg(xmm reg, xAddressVoid ptr, int xyzw, bool modXYZW)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases)
|
// Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases)
|
||||||
void mVUmergeRegs(xmm dest, xmm src, int xyzw, bool modXYZW)
|
void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
||||||
{
|
{
|
||||||
xyzw &= 0xf;
|
xyzw &= 0xf;
|
||||||
if ( (dest != src) && (xyzw != 0) ) {
|
if ( (dest != src) && (xyzw != 0) ) {
|
||||||
|
@ -214,7 +214,7 @@ void mVUmergeRegs(xmm dest, xmm src, int xyzw, bool modXYZW)
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||||
_f void mVUaddrFix(mV, x32 gprReg)
|
_f void mVUaddrFix(mV, const x32& gprReg)
|
||||||
{
|
{
|
||||||
if (isVU1) {
|
if (isVU1) {
|
||||||
xAND(gprReg, 0x3ff); // wrap around
|
xAND(gprReg, 0x3ff); // wrap around
|
||||||
|
@ -259,10 +259,10 @@ static const __aligned16 SSEMaskPair MIN_MAX =
|
||||||
|
|
||||||
|
|
||||||
// Warning: Modifies t1 and t2
|
// Warning: Modifies t1 and t2
|
||||||
void MIN_MAX_PS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in, bool min)
|
void MIN_MAX_PS(microVU* mVU, const xmm& to, const xmm& from, const xmm& t1in, const xmm& t2in, bool min)
|
||||||
{
|
{
|
||||||
xmm t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in;
|
const xmm& t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in;
|
||||||
xmm t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in;
|
const xmm& t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in;
|
||||||
// ZW
|
// ZW
|
||||||
xPSHUF.D(t1, to, 0xfa);
|
xPSHUF.D(t1, to, 0xfa);
|
||||||
xPAND (t1, ptr128[MIN_MAX.mask1]);
|
xPAND (t1, ptr128[MIN_MAX.mask1]);
|
||||||
|
@ -289,9 +289,9 @@ void MIN_MAX_PS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in, bool min)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warning: Modifies to's upper 3 vectors, and t1
|
// Warning: Modifies to's upper 3 vectors, and t1
|
||||||
void MIN_MAX_SS(mV, xmm to, xmm from, xmm t1in, bool min)
|
void MIN_MAX_SS(mV, const xmm& to, const xmm& from, const xmm& t1in, bool min)
|
||||||
{
|
{
|
||||||
xmm t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in;
|
const xmm& t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in;
|
||||||
xSHUF.PS(to, from, 0);
|
xSHUF.PS(to, from, 0);
|
||||||
xPAND (to, ptr128[MIN_MAX.mask1]);
|
xPAND (to, ptr128[MIN_MAX.mask1]);
|
||||||
xPOR (to, ptr128[MIN_MAX.mask2]);
|
xPOR (to, ptr128[MIN_MAX.mask2]);
|
||||||
|
@ -302,10 +302,10 @@ void MIN_MAX_SS(mV, xmm to, xmm from, xmm t1in, bool min)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2
|
// Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2
|
||||||
void ADD_SS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in)
|
void ADD_SS(microVU* mVU, const xmm& to, const xmm& from, const xmm& t1in, const xmm& t2in)
|
||||||
{
|
{
|
||||||
xmm t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in;
|
const xmm& t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in;
|
||||||
xmm t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in;
|
const xmm& t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in;
|
||||||
|
|
||||||
xMOVAPS(t1, to);
|
xMOVAPS(t1, to);
|
||||||
xMOVAPS(t2, from);
|
xMOVAPS(t2, from);
|
||||||
|
@ -379,66 +379,66 @@ void ADD_SS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in)
|
||||||
mVUclamp4(to, t1, (isPS)?0xf:0x8); \
|
mVUclamp4(to, t1, (isPS)?0xf:0x8); \
|
||||||
}
|
}
|
||||||
|
|
||||||
void SSE_MAXPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_MAXPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
if (CHECK_VU_MINMAXHACK) { xMAX.PS(to, from); }
|
if (CHECK_VU_MINMAXHACK) { xMAX.PS(to, from); }
|
||||||
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
|
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
|
||||||
}
|
}
|
||||||
void SSE_MINPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_MINPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
if (CHECK_VU_MINMAXHACK) { xMIN.PS(to, from); }
|
if (CHECK_VU_MINMAXHACK) { xMIN.PS(to, from); }
|
||||||
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
|
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
|
||||||
}
|
}
|
||||||
void SSE_MAXSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_MAXSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
if (CHECK_VU_MINMAXHACK) { xMAX.SS(to, from); }
|
if (CHECK_VU_MINMAXHACK) { xMAX.SS(to, from); }
|
||||||
else { MIN_MAX_SS(mVU, to, from, t1, 0); }
|
else { MIN_MAX_SS(mVU, to, from, t1, 0); }
|
||||||
}
|
}
|
||||||
void SSE_MINSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_MINSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
if (CHECK_VU_MINMAXHACK) { xMIN.SS(to, from); }
|
if (CHECK_VU_MINMAXHACK) { xMIN.SS(to, from); }
|
||||||
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
|
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
|
||||||
}
|
}
|
||||||
void SSE_ADD2SS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, 0); }
|
if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, 0); }
|
||||||
else { ADD_SS(mVU, to, from, t1, t2); }
|
else { ADD_SS(mVU, to, from, t1, t2); }
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: why do we need two identical definitions with different names?
|
// FIXME: why do we need two identical definitions with different names?
|
||||||
void SSE_ADD2PS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_ADD2PS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xADD.PS, 1);
|
clampOp(xADD.PS, 1);
|
||||||
}
|
}
|
||||||
void SSE_ADDPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_ADDPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xADD.PS, 1);
|
clampOp(xADD.PS, 1);
|
||||||
}
|
}
|
||||||
void SSE_ADDSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_ADDSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xADD.SS, 0);
|
clampOp(xADD.SS, 0);
|
||||||
}
|
}
|
||||||
void SSE_SUBPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_SUBPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xSUB.PS, 1);
|
clampOp(xSUB.PS, 1);
|
||||||
}
|
}
|
||||||
void SSE_SUBSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_SUBSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xSUB.SS, 0);
|
clampOp(xSUB.SS, 0);
|
||||||
}
|
}
|
||||||
void SSE_MULPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_MULPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xMUL.PS, 1);
|
clampOp(xMUL.PS, 1);
|
||||||
}
|
}
|
||||||
void SSE_MULSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_MULSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xMUL.SS, 0);
|
clampOp(xMUL.SS, 0);
|
||||||
}
|
}
|
||||||
void SSE_DIVPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_DIVPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xDIV.PS, 1);
|
clampOp(xDIV.PS, 1);
|
||||||
}
|
}
|
||||||
void SSE_DIVSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg)
|
void SSE_DIVSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
|
||||||
{
|
{
|
||||||
clampOp(xDIV.SS, 0);
|
clampOp(xDIV.SS, 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,28 +24,32 @@
|
||||||
#define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { xSHL(gprReg, ADD_XYZW); } }
|
#define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { xSHL(gprReg, ADD_XYZW); } }
|
||||||
|
|
||||||
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
|
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
|
||||||
static void mVUupdateFlags(mV, xmm reg, xmm regT1 = xEmptyReg, xmm regT2 = xEmptyReg, bool modXYZW = 1) {
|
static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, const xmm& regT2in = xEmptyReg, bool modXYZW = 1) {
|
||||||
x32 mReg = gprT1, sReg = getFlagReg(sFLAG.write);
|
const x32& mReg = gprT1;
|
||||||
bool regT1b = false, regT2b = false;
|
const x32& sReg = getFlagReg(sFLAG.write);
|
||||||
|
bool regT1b = regT1in.IsEmpty(), regT2b = false;
|
||||||
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
|
||||||
|
|
||||||
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
|
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
|
||||||
if (!sFLAG.doFlag && !mFLAG.doFlag) { return; }
|
if (!sFLAG.doFlag && !mFLAG.doFlag) { return; }
|
||||||
|
|
||||||
if (regT1.IsEmpty()) {
|
const xmm& regT1 = regT1b ? mVU->regAlloc->allocReg() : regT1in;
|
||||||
regT1 = mVU->regAlloc->allocReg();
|
|
||||||
regT1b = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) {
|
xmm regT2 = reg;
|
||||||
if (regT2.IsEmpty()) {
|
if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW)))
|
||||||
|
{
|
||||||
|
regT2 = regT2in;
|
||||||
|
if (regT2.IsEmpty())
|
||||||
|
{
|
||||||
regT2 = mVU->regAlloc->allocReg();
|
regT2 = mVU->regAlloc->allocReg();
|
||||||
regT2b = true;
|
regT2b = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
xPSHUF.D(regT2, reg, 0x1B); // Flip wzyx to xyzw
|
xPSHUF.D(regT2, reg, 0x1B); // Flip wzyx to xyzw
|
||||||
}
|
}
|
||||||
else regT2 = reg;
|
else regT2 = reg;
|
||||||
|
|
||||||
|
|
||||||
if (sFLAG.doFlag) {
|
if (sFLAG.doFlag) {
|
||||||
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
|
mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag
|
||||||
if (sFLAG.doNonSticky) xAND(sReg, 0xfffc00ff); // Clear O,U,S,Z flags
|
if (sFLAG.doNonSticky) xAND(sReg, 0xfffc00ff); // Clear O,U,S,Z flags
|
||||||
|
@ -85,7 +89,7 @@ static void mVUupdateFlags(mV, xmm reg, xmm regT1 = xEmptyReg, xmm regT2 = xEmpt
|
||||||
// Helper Macros and Functions
|
// Helper Macros and Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
static void (*SSE_PS[]) (microVU*, xmm, xmm, xmm, xmm) = {
|
static void (*const SSE_PS[]) (microVU*, const xmm&, const xmm&, const xmm&, const xmm&) = {
|
||||||
SSE_ADDPS, // 0
|
SSE_ADDPS, // 0
|
||||||
SSE_SUBPS, // 1
|
SSE_SUBPS, // 1
|
||||||
SSE_MULPS, // 2
|
SSE_MULPS, // 2
|
||||||
|
@ -94,7 +98,7 @@ static void (*SSE_PS[]) (microVU*, xmm, xmm, xmm, xmm) = {
|
||||||
SSE_ADD2PS // 5
|
SSE_ADD2PS // 5
|
||||||
};
|
};
|
||||||
|
|
||||||
static void (*SSE_SS[]) (microVU*, xmm, xmm, xmm, xmm) = {
|
static void (*const SSE_SS[]) (microVU*, const xmm&, const xmm&, const xmm&, const xmm&) = {
|
||||||
SSE_ADDSS, // 0
|
SSE_ADDSS, // 0
|
||||||
SSE_SUBSS, // 1
|
SSE_SUBSS, // 1
|
||||||
SSE_MULSS, // 2
|
SSE_MULSS, // 2
|
||||||
|
@ -131,7 +135,7 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) {
|
||||||
bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) {
|
bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) {
|
||||||
opCase1 {
|
opCase1 {
|
||||||
if ((opType == 1) && (_Ft_ == _Fs_)) {
|
if ((opType == 1) && (_Ft_ == _Fs_)) {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W);
|
||||||
xPXOR(Fs, Fs); // Set to Positive 0
|
xPXOR(Fs, Fs); // Set to Positive 0
|
||||||
mVUupdateFlags(mVU, Fs);
|
mVUupdateFlags(mVU, Fs);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -225,7 +229,7 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op
|
||||||
if (_XYZW_SS && _X_Y_Z_W != 8) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
|
if (_XYZW_SS && _X_Y_Z_W != 8) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
xmm tempACC = mVU->regAlloc->allocReg();
|
const xmm& tempACC = mVU->regAlloc->allocReg();
|
||||||
xMOVAPS(tempACC, ACC);
|
xMOVAPS(tempACC, ACC);
|
||||||
SSE_PS[opType](mVU, tempACC, Fs, tempFt, xEmptyReg);
|
SSE_PS[opType](mVU, tempACC, Fs, tempFt, xEmptyReg);
|
||||||
mVUmergeRegs(ACC, tempACC, _X_Y_Z_W);
|
mVUmergeRegs(ACC, tempACC, _X_Y_Z_W);
|
||||||
|
@ -304,7 +308,7 @@ mVUop(mVU_ABS) {
|
||||||
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (!_Ft_) return;
|
if (!_Ft_) return;
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||||
xAND.PS(Fs, ptr128[mVUglob.absclip]);
|
xAND.PS(Fs, ptr128[mVUglob.absclip]);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
}
|
}
|
||||||
|
@ -315,8 +319,8 @@ mVUop(mVU_ABS) {
|
||||||
mVUop(mVU_OPMULA) {
|
mVUop(mVU_OPMULA) {
|
||||||
pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W);
|
const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W);
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W);
|
||||||
|
|
||||||
xPSHUF.D(Fs, Fs, 0xC9); // WXZY
|
xPSHUF.D(Fs, Fs, 0xC9); // WXZY
|
||||||
xPSHUF.D(Ft, Ft, 0xD2); // WYXZ
|
xPSHUF.D(Ft, Ft, 0xD2); // WYXZ
|
||||||
|
@ -333,9 +337,9 @@ mVUop(mVU_OPMULA) {
|
||||||
mVUop(mVU_OPMSUB) {
|
mVUop(mVU_OPMSUB) {
|
||||||
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf);
|
const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf);
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
xmm ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
|
const xmm& ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
|
||||||
|
|
||||||
xPSHUF.D(Fs, Fs, 0xC9); // WXZY
|
xPSHUF.D(Fs, Fs, 0xC9); // WXZY
|
||||||
xPSHUF.D(Ft, Ft, 0xD2); // WYXZ
|
xPSHUF.D(Ft, Ft, 0xD2); // WYXZ
|
||||||
|
@ -356,9 +360,9 @@ static void mVU_FTOIx(mP, const float* addr, const char* opName) {
|
||||||
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (!_Ft_) return;
|
if (!_Ft_) return;
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
xmm t2 = mVU->regAlloc->allocReg();
|
const xmm& t2 = mVU->regAlloc->allocReg();
|
||||||
|
|
||||||
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
|
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
|
||||||
xMOVAPS(t1, Fs);
|
xMOVAPS(t1, Fs);
|
||||||
|
@ -383,7 +387,7 @@ static void mVU_ITOFx(mP, const float* addr, const char* opName) {
|
||||||
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
if (!_Ft_) return;
|
if (!_Ft_) return;
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||||
|
|
||||||
xCVTDQ2PS(Fs, Fs);
|
xCVTDQ2PS(Fs, Fs);
|
||||||
if (addr) { xMUL.PS(Fs, ptr128[addr]); }
|
if (addr) { xMUL.PS(Fs, ptr128[addr]); }
|
||||||
|
@ -398,9 +402,9 @@ static void mVU_ITOFx(mP, const float* addr, const char* opName) {
|
||||||
mVUop(mVU_CLIP) {
|
mVUop(mVU_CLIP) {
|
||||||
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1);
|
const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1);
|
||||||
xmm t1 = mVU->regAlloc->allocReg();
|
const xmm& t1 = mVU->regAlloc->allocReg();
|
||||||
|
|
||||||
mVUunpack_xyzw(Ft, Ft, 0);
|
mVUunpack_xyzw(Ft, Ft, 0);
|
||||||
mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite);
|
mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite);
|
||||||
|
|
|
@ -33,7 +33,7 @@ typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src);
|
||||||
#include "newVif_BlockBuffer.h"
|
#include "newVif_BlockBuffer.h"
|
||||||
#include "newVif_HashBucket.h"
|
#include "newVif_HashBucket.h"
|
||||||
|
|
||||||
extern void mVUmergeRegs(xRegisterSSE dest, xRegisterSSE src, int xyzw, bool modXYZW = 0);
|
extern void mVUmergeRegs(const xRegisterSSE& dest, const xRegisterSSE& src, int xyzw, bool modXYZW = 0);
|
||||||
extern void _nVifUnpack (int idx, u8 *data, u32 size, bool isFill);
|
extern void _nVifUnpack (int idx, u8 *data, u32 size, bool isFill);
|
||||||
extern void dVifUnpack (int idx, u8 *data, u32 size, bool isFill);
|
extern void dVifUnpack (int idx, u8 *data, u32 size, bool isFill);
|
||||||
extern void dVifReset (int idx);
|
extern void dVifReset (int idx);
|
||||||
|
|
Loading…
Reference in New Issue