mirror of https://github.com/PCSX2/pcsx2.git
2102 lines
53 KiB
C++
2102 lines
53 KiB
C++
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
|
// SPDX-License-Identifier: LGPL-3.0+
|
|
|
|
#pragma once
|
|
|
|
//------------------------------------------------------------------
|
|
// Micro VU Micromode Lower instructions
|
|
//------------------------------------------------------------------
|
|
|
|
//------------------------------------------------------------------
|
|
// DIV/SQRT/RSQRT
|
|
//------------------------------------------------------------------
|
|
|
|
// Test if Vector is +/- Zero
|
|
static __fi void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprTemp)
|
|
{
|
|
xXOR.PS(xmmTemp, xmmTemp);
|
|
xCMPEQ.SS(xmmTemp, xmmReg);
|
|
xPTEST(xmmTemp, xmmTemp);
|
|
}
|
|
|
|
// Test if Vector is Negative (Set Flags and Makes Positive)
|
|
static __fi void testNeg(mV, const xmm& xmmReg, const x32& gprTemp)
|
|
{
|
|
xMOVMSKPS(gprTemp, xmmReg);
|
|
xTEST(gprTemp, 1);
|
|
xForwardJZ8 skip;
|
|
xMOV(ptr32[&mVU.divFlag], divI);
|
|
xAND.PS(xmmReg, ptr128[mVUglob.absclip]);
|
|
skip.SetTarget();
|
|
}
|
|
|
|
mVUop(mVU_DIV)
|
|
{
|
|
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); }
|
|
pass2
|
|
{
|
|
xmm Ft;
|
|
if (_Ftf_) Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
|
else Ft = mVU.regAlloc->allocReg(_Ft_);
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
|
|
testZero(Ft, t1, gprT1); // Test if Ft is zero
|
|
xForwardJZ8 cjmp; // Skip if not zero
|
|
|
|
testZero(Fs, t1, gprT1); // Test if Fs is zero
|
|
xForwardJZ8 ajmp;
|
|
xMOV(ptr32[&mVU.divFlag], divI); // Set invalid flag (0/0)
|
|
xForwardJump8 bjmp;
|
|
ajmp.SetTarget();
|
|
xMOV(ptr32[&mVU.divFlag], divD); // Zero divide (only when not 0/0)
|
|
bjmp.SetTarget();
|
|
|
|
xXOR.PS(Fs, Ft);
|
|
xAND.PS(Fs, ptr128[mVUglob.signbit]);
|
|
xOR.PS (Fs, ptr128[mVUglob.maxvals]); // If division by zero, then xmmFs = +/- fmax
|
|
|
|
xForwardJump8 djmp;
|
|
cjmp.SetTarget();
|
|
xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags
|
|
SSE_DIVSS(mVU, Fs, Ft);
|
|
mVUclamp1(mVU, Fs, t1, 8, true);
|
|
djmp.SetTarget();
|
|
|
|
writeQreg(Fs, mVUinfo.writeQ);
|
|
|
|
if (mVU.cop2)
|
|
{
|
|
xAND(gprF0, ~0xc0000);
|
|
xOR(gprF0, ptr32[&mVU.divFlag]);
|
|
}
|
|
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.profiler.EmitOp(opDIV);
|
|
}
|
|
pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
|
}
|
|
|
|
mVUop(mVU_SQRT)
|
|
{
|
|
pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); }
|
|
pass2
|
|
{
|
|
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
|
|
|
xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags
|
|
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
|
|
|
if (CHECK_VU_OVERFLOW(mVU.index)) // Clamp infinities (only need to do positive clamp since xmmFt is positive)
|
|
xMIN.SS(Ft, ptr32[mVUglob.maxvals]);
|
|
xSQRT.SS(Ft, Ft);
|
|
writeQreg(Ft, mVUinfo.writeQ);
|
|
|
|
if (mVU.cop2)
|
|
{
|
|
xAND(gprF0, ~0xc0000);
|
|
xOR(gprF0, ptr32[&mVU.divFlag]);
|
|
}
|
|
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opSQRT);
|
|
}
|
|
pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); }
|
|
}
|
|
|
|
mVUop(mVU_RSQRT)
|
|
{
|
|
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); }
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
|
|
xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags
|
|
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
|
|
|
xSQRT.SS(Ft, Ft);
|
|
testZero(Ft, t1, gprT1); // Test if Ft is zero
|
|
xForwardJZ8 ajmp; // Skip if not zero
|
|
|
|
testZero(Fs, t1, gprT1); // Test if Fs is zero
|
|
xForwardJZ8 bjmp; // Skip if none are
|
|
xMOV(ptr32[&mVU.divFlag], divI); // Set invalid flag (0/0)
|
|
xForwardJump8 cjmp;
|
|
bjmp.SetTarget();
|
|
xMOV(ptr32[&mVU.divFlag], divD); // Zero divide flag (only when not 0/0)
|
|
cjmp.SetTarget();
|
|
|
|
xAND.PS(Fs, ptr128[mVUglob.signbit]);
|
|
xOR.PS(Fs, ptr128[mVUglob.maxvals]); // xmmFs = +/-Max
|
|
|
|
xForwardJump8 djmp;
|
|
ajmp.SetTarget();
|
|
SSE_DIVSS(mVU, Fs, Ft);
|
|
mVUclamp1(mVU, Fs, t1, 8, true);
|
|
djmp.SetTarget();
|
|
|
|
writeQreg(Fs, mVUinfo.writeQ);
|
|
|
|
if (mVU.cop2)
|
|
{
|
|
xAND(gprF0, ~0xc0000);
|
|
xOR(gprF0, ptr32[&mVU.divFlag]);
|
|
}
|
|
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.profiler.EmitOp(opRSQRT);
|
|
}
|
|
pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// EATAN/EEXP/ELENG/ERCPR/ERLENG/ERSADD/ERSQRT/ESADD/ESIN/ESQRT/ESUM
|
|
//------------------------------------------------------------------
|
|
|
|
#define EATANhelper(addr) \
|
|
{ \
|
|
SSE_MULSS(mVU, t2, Fs); \
|
|
SSE_MULSS(mVU, t2, Fs); \
|
|
xMOVAPS(t1, t2); \
|
|
xMUL.SS(t1, ptr32[addr]); \
|
|
SSE_ADDSS(mVU, PQ, t1); \
|
|
}
|
|
|
|
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
|
|
static __fi void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2)
|
|
{
|
|
xMOVSS(PQ, Fs);
|
|
xMUL.SS(PQ, ptr32[mVUglob.T1]);
|
|
xMOVAPS(t2, Fs);
|
|
EATANhelper(mVUglob.T2);
|
|
EATANhelper(mVUglob.T3);
|
|
EATANhelper(mVUglob.T4);
|
|
EATANhelper(mVUglob.T5);
|
|
EATANhelper(mVUglob.T6);
|
|
EATANhelper(mVUglob.T7);
|
|
EATANhelper(mVUglob.T8);
|
|
xADD.SS(PQ, ptr32[mVUglob.Pi4]);
|
|
xPSHUF.D(PQ, PQ, mVUinfo.writeP ? 0x27 : 0xC6);
|
|
}
|
|
|
|
mVUop(mVU_EATAN)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
xSUB.SS(Fs, ptr32[mVUglob.one]);
|
|
xADD.SS(xmmPQ, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, Fs, xmmPQ);
|
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEATAN);
|
|
}
|
|
pass3 { mVUlog("EATAN P"); }
|
|
}
|
|
|
|
mVUop(mVU_EATANxy)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU2(mVU, _Fs_, 54);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& t1 = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
|
|
const xmm& Fs = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(Fs, t1, 0x01);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
SSE_SUBSS (mVU, Fs, t1); // y-x, not y-1? ><
|
|
SSE_ADDSS (mVU, t1, xmmPQ);
|
|
SSE_DIVSS (mVU, Fs, t1);
|
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEATANxy);
|
|
}
|
|
pass3 { mVUlog("EATANxy P"); }
|
|
}
|
|
|
|
mVUop(mVU_EATANxz)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU2(mVU, _Fs_, 54);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& t1 = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
|
|
const xmm& Fs = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(Fs, t1, 0x02);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
SSE_SUBSS (mVU, Fs, t1);
|
|
SSE_ADDSS (mVU, t1, xmmPQ);
|
|
SSE_DIVSS (mVU, Fs, t1);
|
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEATANxz);
|
|
}
|
|
pass3 { mVUlog("EATANxz P"); }
|
|
}
|
|
|
|
#define eexpHelper(addr) \
|
|
{ \
|
|
SSE_MULSS(mVU, t2, Fs); \
|
|
xMOVAPS(t1, t2); \
|
|
xMUL.SS(t1, ptr32[addr]); \
|
|
SSE_ADDSS(mVU, xmmPQ, t1); \
|
|
}
|
|
|
|
mVUop(mVU_EEXP)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
xMUL.SS (xmmPQ, ptr32[mVUglob.E1]);
|
|
xADD.SS (xmmPQ, ptr32[mVUglob.one]);
|
|
xMOVAPS(t1, Fs);
|
|
SSE_MULSS(mVU, t1, Fs);
|
|
xMOVAPS(t2, t1);
|
|
xMUL.SS(t1, ptr32[mVUglob.E2]);
|
|
SSE_ADDSS(mVU, xmmPQ, t1);
|
|
eexpHelper(&mVUglob.E3);
|
|
eexpHelper(&mVUglob.E4);
|
|
eexpHelper(&mVUglob.E5);
|
|
SSE_MULSS(mVU, t2, Fs);
|
|
xMUL.SS(t2, ptr32[mVUglob.E6]);
|
|
SSE_ADDSS(mVU, xmmPQ, t2);
|
|
SSE_MULSS(mVU, xmmPQ, xmmPQ);
|
|
SSE_MULSS(mVU, xmmPQ, xmmPQ);
|
|
xMOVSSZX(t2, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, t2, xmmPQ);
|
|
xMOVSS(xmmPQ, t2);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEEXP);
|
|
}
|
|
pass3 { mVUlog("EEXP P"); }
|
|
}
|
|
|
|
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
|
static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs)
|
|
{
|
|
xDP.PS(Fs, Fs, 0x71);
|
|
xMOVSS(PQ, Fs);
|
|
}
|
|
|
|
mVUop(mVU_ELENG)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU2(mVU, _Fs_, 18);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xSQRT.SS (xmmPQ, xmmPQ);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opELENG);
|
|
}
|
|
pass3 { mVUlog("ELENG P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERCPR)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERCPR);
|
|
}
|
|
pass3 { mVUlog("ERCPR P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERLENG)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU2(mVU, _Fs_, 24);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xSQRT.SS (xmmPQ, xmmPQ);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERLENG);
|
|
}
|
|
pass3 { mVUlog("ERLENG P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERSADD)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU2(mVU, _Fs_, 18);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERSADD);
|
|
}
|
|
pass3 { mVUlog("ERSADD P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERSQRT)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xAND.PS (Fs, ptr128[mVUglob.absclip]);
|
|
xSQRT.SS (xmmPQ, Fs);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERSQRT);
|
|
}
|
|
pass3 { mVUlog("ERSQRT P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESADD)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU2(mVU, _Fs_, 11);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opESADD);
|
|
}
|
|
pass3 { mVUlog("ESADD P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESIN)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 29);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs); // pq = X
|
|
SSE_MULSS(mVU, Fs, Fs); // fs = X^2
|
|
xMOVAPS (t1, Fs); // t1 = X^2
|
|
SSE_MULSS(mVU, Fs, xmmPQ); // fs = X^3
|
|
xMOVAPS (t2, Fs); // t2 = X^3
|
|
xMUL.SS (Fs, ptr32[mVUglob.S2]); // fs = s2 * X^3
|
|
SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3
|
|
|
|
SSE_MULSS(mVU, t2, t1); // t2 = X^3 * X^2
|
|
xMOVAPS (Fs, t2); // fs = X^5
|
|
xMUL.SS (Fs, ptr32[mVUglob.S3]); // ps = s3 * X^5
|
|
SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5
|
|
|
|
SSE_MULSS(mVU, t2, t1); // t2 = X^5 * X^2
|
|
xMOVAPS (Fs, t2); // fs = X^7
|
|
xMUL.SS (Fs, ptr32[mVUglob.S4]); // fs = s4 * X^7
|
|
SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7
|
|
|
|
SSE_MULSS(mVU, t2, t1); // t2 = X^7 * X^2
|
|
xMUL.SS (t2, ptr32[mVUglob.S5]); // t2 = s5 * X^9
|
|
SSE_ADDSS(mVU, xmmPQ, t2); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7 + s5 * X^9
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opESIN);
|
|
}
|
|
pass3 { mVUlog("ESIN P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESQRT)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xAND.PS (Fs, ptr128[mVUglob.absclip]);
|
|
xSQRT.SS(xmmPQ, Fs);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opESQRT);
|
|
}
|
|
pass3 { mVUlog("ESQRT P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESUM)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeEFU2(mVU, _Fs_, 12);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xPSHUF.D(t1, Fs, 0x1b);
|
|
SSE_ADDPS(mVU, Fs, t1);
|
|
xPSHUF.D(t1, Fs, 0x01);
|
|
SSE_ADDSS(mVU, Fs, t1);
|
|
xMOVSS(xmmPQ, Fs);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.profiler.EmitOp(opESUM);
|
|
}
|
|
pass3 { mVUlog("ESUM P"); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// FCAND/FCEQ/FCGET/FCOR/FCSET
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_FCAND)
|
|
{
|
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
|
pass2
|
|
{
|
|
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
|
|
mVUallocCFLAGa(mVU, dst, cFLAG.read);
|
|
xAND(dst, _Imm24_);
|
|
xADD(dst, 0xffffff);
|
|
xSHR(dst, 24);
|
|
mVU.regAlloc->clearNeeded(dst);
|
|
mVU.profiler.EmitOp(opFCAND);
|
|
}
|
|
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCEQ)
|
|
{
|
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
|
pass2
|
|
{
|
|
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
|
|
mVUallocCFLAGa(mVU, dst, cFLAG.read);
|
|
xXOR(dst, _Imm24_);
|
|
xSUB(dst, 1);
|
|
xSHR(dst, 31);
|
|
mVU.regAlloc->clearNeeded(dst);
|
|
mVU.profiler.EmitOp(opFCEQ);
|
|
}
|
|
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCGET)
|
|
{
|
|
pass1 { mVUanalyzeCflag(mVU, _It_); }
|
|
pass2
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
mVUallocCFLAGa(mVU, regT, cFLAG.read);
|
|
xAND(regT, 0xfff);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opFCGET);
|
|
}
|
|
pass3 { mVUlog("FCGET vi%02d", _Ft_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCOR)
|
|
{
|
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
|
pass2
|
|
{
|
|
const xRegister32& dst = mVU.regAlloc->allocGPR(-1, 1, mVUlow.backupVI);
|
|
mVUallocCFLAGa(mVU, dst, cFLAG.read);
|
|
xOR(dst, _Imm24_);
|
|
xADD(dst, 1); // If 24 1's will make 25th bit 1, else 0
|
|
xSHR(dst, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
|
|
mVU.regAlloc->clearNeeded(dst);
|
|
mVU.profiler.EmitOp(opFCOR);
|
|
}
|
|
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCSET)
|
|
{
|
|
pass1 { cFLAG.doFlag = true; }
|
|
pass2
|
|
{
|
|
xMOV(gprT1, _Imm24_);
|
|
mVUallocCFLAGb(mVU, gprT1, cFLAG.write);
|
|
mVU.profiler.EmitOp(opFCSET);
|
|
}
|
|
pass3 { mVUlog("FCSET $%x", _Imm24_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// FMAND/FMEQ/FMOR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_FMAND)
|
|
{
|
|
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
|
xAND(regT, gprT1);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opFMAND);
|
|
}
|
|
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
|
|
pass4 { mVUregs.needExactMatch |= 2; }
|
|
}
|
|
|
|
mVUop(mVU_FMEQ)
|
|
{
|
|
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
|
xXOR(regT, gprT1);
|
|
xSUB(regT, 1);
|
|
xSHR(regT, 31);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opFMEQ);
|
|
}
|
|
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
|
|
pass4 { mVUregs.needExactMatch |= 2; }
|
|
}
|
|
|
|
mVUop(mVU_FMOR)
|
|
{
|
|
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
|
xOR(regT, gprT1);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opFMOR);
|
|
}
|
|
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
|
|
pass4 { mVUregs.needExactMatch |= 2; }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// FSAND/FSEQ/FSOR/FSSET
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_FSAND)
|
|
{
|
|
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
|
pass2
|
|
{
|
|
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags");
|
|
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags");
|
|
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
mVUallocSFLAGc(reg, gprT1, sFLAG.read);
|
|
xAND(reg, _Imm12_);
|
|
mVU.regAlloc->clearNeeded(reg);
|
|
mVU.profiler.EmitOp(opFSAND);
|
|
}
|
|
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
|
|
pass4 { mVUregs.needExactMatch |= 1; }
|
|
}
|
|
|
|
mVUop(mVU_FSOR)
|
|
{
|
|
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
|
pass2
|
|
{
|
|
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
mVUallocSFLAGc(reg, gprT2, sFLAG.read);
|
|
xOR(reg, _Imm12_);
|
|
mVU.regAlloc->clearNeeded(reg);
|
|
mVU.profiler.EmitOp(opFSOR);
|
|
}
|
|
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
|
|
pass4 { mVUregs.needExactMatch |= 1; }
|
|
}
|
|
|
|
mVUop(mVU_FSEQ)
|
|
{
|
|
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
|
pass2
|
|
{
|
|
int imm = 0;
|
|
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSEQ: Checking I/D/IS/DS Flags");
|
|
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSEQ: Checking U/O/US/OS Flags");
|
|
if (_Imm12_ & 0x0001) imm |= 0x0000f00; // Z
|
|
if (_Imm12_ & 0x0002) imm |= 0x000f000; // S
|
|
if (_Imm12_ & 0x0004) imm |= 0x0010000; // U
|
|
if (_Imm12_ & 0x0008) imm |= 0x0020000; // O
|
|
if (_Imm12_ & 0x0010) imm |= 0x0040000; // I
|
|
if (_Imm12_ & 0x0020) imm |= 0x0080000; // D
|
|
if (_Imm12_ & 0x0040) imm |= 0x000000f; // ZS
|
|
if (_Imm12_ & 0x0080) imm |= 0x00000f0; // SS
|
|
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
|
|
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
|
|
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
|
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
|
|
|
const xRegister32& reg = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
mVUallocSFLAGa(reg, sFLAG.read);
|
|
setBitFSEQ(reg, 0x0f00); // Z bit
|
|
setBitFSEQ(reg, 0xf000); // S bit
|
|
setBitFSEQ(reg, 0x000f); // ZS bit
|
|
setBitFSEQ(reg, 0x00f0); // SS bit
|
|
xXOR(reg, imm);
|
|
xSUB(reg, 1);
|
|
xSHR(reg, 31);
|
|
mVU.regAlloc->clearNeeded(reg);
|
|
mVU.profiler.EmitOp(opFSEQ);
|
|
}
|
|
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
|
pass4 { mVUregs.needExactMatch |= 1; }
|
|
}
|
|
|
|
mVUop(mVU_FSSET)
|
|
{
|
|
pass1 { mVUanalyzeFSSET(mVU); }
|
|
pass2
|
|
{
|
|
int imm = 0;
|
|
if (_Imm12_ & 0x0040) imm |= 0x000000f; // ZS
|
|
if (_Imm12_ & 0x0080) imm |= 0x00000f0; // SS
|
|
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
|
|
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
|
|
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
|
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
|
if (!(sFLAG.doFlag || mVUinfo.doDivFlag))
|
|
{
|
|
mVUallocSFLAGa(getFlagReg(sFLAG.write), sFLAG.lastWrite); // Get Prev Status Flag
|
|
}
|
|
xAND(getFlagReg(sFLAG.write), 0xfff00); // Keep Non-Sticky Bits
|
|
if (imm)
|
|
xOR(getFlagReg(sFLAG.write), imm);
|
|
mVU.profiler.EmitOp(opFSSET);
|
|
}
|
|
pass3 { mVUlog("FSSET $%x", _Imm12_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// IADD/IADDI/IADDIU/IAND/IOR/ISUB/ISUBIU
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_IADD)
|
|
{
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
if (_Is_ == 0 || _It_ == 0)
|
|
{
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_ ? _Is_ : _It_, -1);
|
|
const xRegister32& regD = mVU.regAlloc->allocGPR(-1, _Id_, mVUlow.backupVI);
|
|
xMOV(regD, regS);
|
|
mVU.regAlloc->clearNeeded(regD);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
}
|
|
else
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
|
xADD(regS, regT);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
mVU.profiler.EmitOp(opIADD);
|
|
}
|
|
pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_IADDI)
|
|
{
|
|
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
|
|
pass2
|
|
{
|
|
if (_Is_ == 0)
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
if (_Imm5_ != 0)
|
|
xMOV(regT, _Imm5_);
|
|
else
|
|
xXOR(regT, regT);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
else
|
|
{
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
|
if (_Imm5_ != 0)
|
|
xADD(regS, _Imm5_);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
}
|
|
mVU.profiler.EmitOp(opIADDI);
|
|
}
|
|
pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); }
|
|
}
|
|
|
|
mVUop(mVU_IADDIU)
|
|
{
|
|
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
|
|
pass2
|
|
{
|
|
if (_Is_ == 0)
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
if (_Imm15_ != 0)
|
|
xMOV(regT, _Imm15_);
|
|
else
|
|
xXOR(regT, regT);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
else
|
|
{
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
|
if (_Imm15_ != 0)
|
|
xADD(regS, _Imm15_);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
}
|
|
mVU.profiler.EmitOp(opIADDIU);
|
|
}
|
|
pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
|
}
|
|
|
|
mVUop(mVU_IAND)
|
|
{
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
|
if (_It_ != _Is_)
|
|
xAND(regS, regT);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opIAND);
|
|
}
|
|
pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_IOR)
|
|
{
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
|
if (_It_ != _Is_)
|
|
xOR(regS, regT);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opIOR);
|
|
}
|
|
pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_ISUB)
|
|
{
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
if (_It_ != _Is_)
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1);
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Id_, mVUlow.backupVI);
|
|
xSUB(regS, regT);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
else
|
|
{
|
|
const xRegister32& regD = mVU.regAlloc->allocGPR(-1, _Id_, mVUlow.backupVI);
|
|
xXOR(regD, regD);
|
|
mVU.regAlloc->clearNeeded(regD);
|
|
}
|
|
mVU.profiler.EmitOp(opISUB);
|
|
}
|
|
pass3 { mVUlog("ISUB vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_ISUBIU)
|
|
{
|
|
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _It_, mVUlow.backupVI);
|
|
if (_Imm15_ != 0)
|
|
xSUB(regS, _Imm15_);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVU.profiler.EmitOp(opISUBIU);
|
|
}
|
|
pass3 { mVUlog("ISUBIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// MFIR/MFP/MOVE/MR32/MTIR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_MFIR)
|
|
{
|
|
pass1
|
|
{
|
|
if (!_Ft_)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
}
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeReg2 (mVU, _Ft_, mVUlow.VF_write, 1);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
if (_Is_ != 0)
|
|
{
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
|
|
xMOVSX(xRegister32(regS), xRegister16(regS));
|
|
// TODO: Broadcast instead
|
|
xMOVDZX(Ft, regS);
|
|
if (!_XYZW_SS)
|
|
mVUunpack_xyzw(Ft, Ft, 0);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
}
|
|
else
|
|
{
|
|
xPXOR(Ft, Ft);
|
|
}
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opMFIR);
|
|
}
|
|
pass3 { mVUlog("MFIR.%s vf%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_MFP)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeMFP(mVU, _Ft_);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
getPreg(mVU, Ft);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opMFP);
|
|
}
|
|
pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_MOVE)
|
|
{
|
|
pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); }
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opMOVE);
|
|
}
|
|
pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_MR32)
|
|
{
|
|
pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); }
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_);
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
if (_XYZW_SS)
|
|
mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0))));
|
|
else
|
|
xPSHUF.D(Ft, Fs, 0x39);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opMR32);
|
|
}
|
|
pass3 { mVUlog("MR32.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_MTIR)
|
|
{
|
|
pass1
|
|
{
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeReg5(mVU, _Fs_, _Fsf_, mVUlow.VF_read[0]);
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1);
|
|
}
|
|
pass2
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOVD(regT, Fs);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opMTIR);
|
|
}
|
|
pass3 { mVUlog("MTIR vi%02d, vf%02d%s", _Ft_, _Fs_, _Fsf_String); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// ILW/ILWR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_ILW)
|
|
{
|
|
pass1
|
|
{
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
|
|
}
|
|
pass2
|
|
{
|
|
void* ptr = mVU.regs().Mem + offsetSS;
|
|
std::optional<xAddressVoid> optaddr(mVUoptimizeConstantAddr(mVU, _Is_, _Imm11_, offsetSS));
|
|
if (!optaddr.has_value())
|
|
{
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
if (_Imm11_ != 0)
|
|
xADD(gprT1, _Imm11_);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
}
|
|
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOVZX(regT, ptr16[optaddr.has_value() ? optaddr.value() : xComplexAddress(gprT2q, ptr, gprT1q)]);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opILW);
|
|
}
|
|
pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_ILWR)
|
|
{
|
|
pass1
|
|
{
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
|
|
}
|
|
pass2
|
|
{
|
|
void* ptr = mVU.regs().Mem + offsetSS;
|
|
if (_Is_)
|
|
{
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
mVUaddrFix (mVU, gprT1q);
|
|
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
else
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOVZX(regT, ptr16[ptr]);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
mVU.profiler.EmitOp(opILWR);
|
|
}
|
|
pass3 { mVUlog("ILWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// ISW/ISWR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_ISW)
|
|
{
|
|
pass1
|
|
{
|
|
mVUlow.isMemWrite = true;
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]);
|
|
}
|
|
pass2
|
|
{
|
|
std::optional<xAddressVoid> optaddr(mVUoptimizeConstantAddr(mVU, _Is_, _Imm11_, 0));
|
|
if (!optaddr.has_value())
|
|
{
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
if (_Imm11_ != 0)
|
|
xADD(gprT1, _Imm11_);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
}
|
|
|
|
// If regT is dirty, the high bits might not be zero.
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
|
const xAddressVoid ptr(optaddr.has_value() ? optaddr.value() : xComplexAddress(gprT2q, mVU.regs().Mem, gprT1q));
|
|
if (_X) xMOV(ptr32[ptr], regT);
|
|
if (_Y) xMOV(ptr32[ptr + 4], regT);
|
|
if (_Z) xMOV(ptr32[ptr + 8], regT);
|
|
if (_W) xMOV(ptr32[ptr + 12], regT);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opISW);
|
|
}
|
|
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_ISWR)
|
|
{
|
|
pass1
|
|
{
|
|
mVUlow.isMemWrite = true;
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]);
|
|
}
|
|
pass2
|
|
{
|
|
void* base = mVU.regs().Mem;
|
|
xAddressReg is = xEmptyReg;
|
|
if (_Is_)
|
|
{
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
is = gprT1q;
|
|
}
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
|
if (!is.IsEmpty() && (sptr)base != (s32)(sptr)base)
|
|
{
|
|
int register_offset = -1;
|
|
auto writeBackAt = [&](int offset) {
|
|
if (register_offset == -1)
|
|
{
|
|
xLEA(gprT2q, ptr[(void*)((sptr)base + offset)]);
|
|
register_offset = offset;
|
|
}
|
|
xMOV(ptr32[gprT2q + is + (offset - register_offset)], regT);
|
|
};
|
|
if (_X) writeBackAt(0);
|
|
if (_Y) writeBackAt(4);
|
|
if (_Z) writeBackAt(8);
|
|
if (_W) writeBackAt(12);
|
|
}
|
|
else if (is.IsEmpty())
|
|
{
|
|
if (_X) xMOV(ptr32[(void*)((uptr)base)], regT);
|
|
if (_Y) xMOV(ptr32[(void*)((uptr)base + 4)], regT);
|
|
if (_Z) xMOV(ptr32[(void*)((uptr)base + 8)], regT);
|
|
if (_W) xMOV(ptr32[(void*)((uptr)base + 12)], regT);
|
|
}
|
|
else
|
|
{
|
|
if (_X) xMOV(ptr32[base + is], regT);
|
|
if (_Y) xMOV(ptr32[base + is + 4], regT);
|
|
if (_Z) xMOV(ptr32[base + is + 8], regT);
|
|
if (_W) xMOV(ptr32[base + is + 12], regT);
|
|
}
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
|
|
mVU.profiler.EmitOp(opISWR);
|
|
}
|
|
pass3 { mVUlog("ISWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// LQ/LQD/LQI
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_LQ)
|
|
{
|
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); }
|
|
pass2
|
|
{
|
|
const std::optional<xAddressVoid> optaddr(mVUoptimizeConstantAddr(mVU, _Is_, _Imm11_, 0));
|
|
if (!optaddr.has_value())
|
|
{
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
if (_Imm11_ != 0)
|
|
xADD(gprT1, _Imm11_);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
}
|
|
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
mVUloadReg(Ft, optaddr.has_value() ? optaddr.value() : xComplexAddress(gprT2q, mVU.regs().Mem, gprT1q), _X_Y_Z_W);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opLQ);
|
|
}
|
|
pass3 { mVUlog("LQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_LQD)
|
|
{
|
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
|
|
pass2
|
|
{
|
|
void* ptr = mVU.regs().Mem;
|
|
xAddressReg is = xEmptyReg;
|
|
if (_Is_ || isVU0) // Access VU1 regs mem-map in !_Is_ case
|
|
{
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
|
|
xDEC(regS);
|
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
is = gprT1q;
|
|
}
|
|
else
|
|
{
|
|
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize - 8)));
|
|
}
|
|
if (!mVUlow.noWriteVF)
|
|
{
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
if (is.IsEmpty())
|
|
{
|
|
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
|
|
}
|
|
else
|
|
{
|
|
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
|
|
}
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
}
|
|
mVU.profiler.EmitOp(opLQD);
|
|
}
|
|
pass3 { mVUlog("LQD.%s vf%02d, --vi%02d", _XYZW_String, _Ft_, _Is_); }
|
|
}
|
|
|
|
mVUop(mVU_LQI)
|
|
{
|
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
|
|
pass2
|
|
{
|
|
void* ptr = mVU.regs().Mem;
|
|
xAddressReg is = xEmptyReg;
|
|
if (_Is_)
|
|
{
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, _Is_, mVUlow.backupVI);
|
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
|
xINC(regS);
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
is = gprT1q;
|
|
}
|
|
if (!mVUlow.noWriteVF)
|
|
{
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
if (is.IsEmpty())
|
|
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
|
|
else
|
|
mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, is), _X_Y_Z_W);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
}
|
|
mVU.profiler.EmitOp(opLQI);
|
|
}
|
|
pass3 { mVUlog("LQI.%s vf%02d, vi%02d++", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// SQ/SQD/SQI
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_SQ)
|
|
{
|
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); }
|
|
pass2
|
|
{
|
|
const std::optional<xAddressVoid> optptr(mVUoptimizeConstantAddr(mVU, _It_, _Imm11_, 0));
|
|
if (!optptr.has_value())
|
|
{
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
|
|
if (_Imm11_ != 0)
|
|
xADD(gprT1, _Imm11_);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
}
|
|
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
|
mVUsaveReg(Fs, optptr.has_value() ? optptr.value() : xComplexAddress(gprT2q, mVU.regs().Mem, gprT1q), _X_Y_Z_W, 1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opSQ);
|
|
}
|
|
pass3 { mVUlog("SQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Fs_, _Ft_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_SQD)
|
|
{
|
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
|
|
pass2
|
|
{
|
|
void* ptr = mVU.regs().Mem;
|
|
xAddressReg it = xEmptyReg;
|
|
if (_It_ || isVU0) // Access VU1 regs mem-map in !_It_ case
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
|
|
xDEC(regT);
|
|
xMOVZX(gprT1, xRegister16(regT));
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
it = gprT1q;
|
|
}
|
|
else
|
|
{
|
|
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize - 8)));
|
|
}
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
|
if (it.IsEmpty())
|
|
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
|
else
|
|
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, it), _X_Y_Z_W, 1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opSQD);
|
|
}
|
|
pass3 { mVUlog("SQD.%s vf%02d, --vi%02d", _XYZW_String, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_SQI)
|
|
{
|
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
|
|
pass2
|
|
{
|
|
void* ptr = mVU.regs().Mem;
|
|
if (_It_)
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, _It_, mVUlow.backupVI);
|
|
xMOVZX(gprT1, xRegister16(regT));
|
|
xINC(regT);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVUaddrFix(mVU, gprT1q);
|
|
}
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
|
if (_It_)
|
|
mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1);
|
|
else
|
|
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opSQI);
|
|
}
|
|
pass3 { mVUlog("SQI.%s vf%02d, vi%02d++", _XYZW_String, _Fs_, _Ft_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// RINIT/RGET/RNEXT/RXOR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_RINIT)
|
|
{
|
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
|
pass2
|
|
{
|
|
if (_Fs_ || (_Fsf_ == 3))
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xMOVD(gprT1, Fs);
|
|
xAND(gprT1, 0x007fffff);
|
|
xOR (gprT1, 0x3f800000);
|
|
xMOV(ptr32[Rmem], gprT1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
}
|
|
else
|
|
xMOV(ptr32[Rmem], 0x3f800000);
|
|
mVU.profiler.EmitOp(opRINIT);
|
|
}
|
|
pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); }
|
|
}
|
|
|
|
static __fi void mVU_RGET_(mV, const x32& Rreg)
|
|
{
|
|
if (!mVUlow.noWriteVF)
|
|
{
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
xMOVDZX(Ft, Rreg);
|
|
if (!_XYZW_SS)
|
|
mVUunpack_xyzw(Ft, Ft, 0);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
}
|
|
}
|
|
|
|
mVUop(mVU_RGET)
|
|
{
|
|
pass1 { mVUanalyzeR2(mVU, _Ft_, true); }
|
|
pass2
|
|
{
|
|
xMOV(gprT1, ptr32[Rmem]);
|
|
mVU_RGET_(mVU, gprT1);
|
|
mVU.profiler.EmitOp(opRGET);
|
|
}
|
|
pass3 { mVUlog("RGET.%s vf%02d, R", _XYZW_String, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_RNEXT)
|
|
{
|
|
pass1 { mVUanalyzeR2(mVU, _Ft_, false); }
|
|
pass2
|
|
{
|
|
// algorithm from www.project-fao.org
|
|
const xRegister32& temp3 = mVU.regAlloc->allocGPR();
|
|
xMOV(temp3, ptr32[Rmem]);
|
|
xMOV(gprT1, temp3);
|
|
xSHR(gprT1, 4);
|
|
xAND(gprT1, 1);
|
|
|
|
xMOV(gprT2, temp3);
|
|
xSHR(gprT2, 22);
|
|
xAND(gprT2, 1);
|
|
|
|
xSHL(temp3, 1);
|
|
xXOR(gprT1, gprT2);
|
|
xXOR(temp3, gprT1);
|
|
xAND(temp3, 0x007fffff);
|
|
xOR (temp3, 0x3f800000);
|
|
xMOV(ptr32[Rmem], temp3);
|
|
mVU_RGET_(mVU, temp3);
|
|
mVU.regAlloc->clearNeeded(temp3);
|
|
mVU.profiler.EmitOp(opRNEXT);
|
|
}
|
|
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_RXOR)
|
|
{
|
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
|
pass2
|
|
{
|
|
if (_Fs_ || (_Fsf_ == 3))
|
|
{
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xMOVD(gprT1, Fs);
|
|
xAND(gprT1, 0x7fffff);
|
|
xXOR(ptr32[Rmem], gprT1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
}
|
|
mVU.profiler.EmitOp(opRXOR);
|
|
}
|
|
pass3 { mVUlog("RXOR R, vf%02d%s", _Fs_, _Fsf_String); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// WaitP/WaitQ
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_WAITP)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUstall = std::max(mVUstall, (u8)((mVUregs.p) ? (mVUregs.p - 1) : 0));
|
|
}
|
|
pass2 { mVU.profiler.EmitOp(opWAITP); }
|
|
pass3 { mVUlog("WAITP"); }
|
|
}
|
|
|
|
mVUop(mVU_WAITQ)
|
|
{
|
|
pass1 { mVUstall = std::max(mVUstall, mVUregs.q); }
|
|
pass2 { mVU.profiler.EmitOp(opWAITQ); }
|
|
pass3 { mVUlog("WAITQ"); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// XTOP/XITOP
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_XTOP)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1);
|
|
}
|
|
pass2
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOVZX(regT, ptr16[&mVU.getVifRegs().top]);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opXTOP);
|
|
}
|
|
pass3 { mVUlog("XTOP vi%02d", _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_XITOP)
|
|
{
|
|
pass1
|
|
{
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1);
|
|
}
|
|
pass2
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOVZX(regT, ptr16[&mVU.getVifRegs().itop]);
|
|
xAND(regT, isVU1 ? 0x3ff : 0xff);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
mVU.profiler.EmitOp(opXITOP);
|
|
}
|
|
pass3 { mVUlog("XITOP vi%02d", _Ft_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// XGkick
|
|
//------------------------------------------------------------------
|
|
|
|
void mVU_XGKICK_(u32 addr)
|
|
{
|
|
addr = (addr & 0x3ff) * 16;
|
|
u32 diff = 0x4000 - addr;
|
|
u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, addr, ~0u, true);
|
|
|
|
if (size > diff)
|
|
{
|
|
//DevCon.WriteLn(Color_Green, "microVU1: XGkick Wrap!");
|
|
gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&vuRegs[1].Mem[addr], diff, true);
|
|
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[0], size - diff, true);
|
|
}
|
|
else
|
|
{
|
|
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[addr], size, true);
|
|
}
|
|
}
|
|
|
|
void _vuXGKICKTransfermVU(bool flush)
|
|
{
|
|
while (VU1.xgkickenable && (flush || VU1.xgkickcyclecount >= 2))
|
|
{
|
|
u32 transfersize = 0;
|
|
|
|
if (VU1.xgkicksizeremaining == 0)
|
|
{
|
|
//VUM_LOG("XGKICK reading new tag from %x", VU1.xgkickaddr);
|
|
u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, VU1.xgkickaddr, ~0u, flush);
|
|
VU1.xgkicksizeremaining = size & 0xFFFF;
|
|
VU1.xgkickendpacket = size >> 31;
|
|
VU1.xgkickdiff = 0x4000 - VU1.xgkickaddr;
|
|
|
|
if (VU1.xgkicksizeremaining == 0)
|
|
{
|
|
//VUM_LOG("Invalid GS packet size returned, cancelling XGKick");
|
|
VU1.xgkickenable = false;
|
|
break;
|
|
}
|
|
//else
|
|
//VUM_LOG("XGKICK New tag size %d bytes EOP %d", VU1.xgkicksizeremaining, VU1.xgkickendpacket);
|
|
}
|
|
|
|
if (!flush)
|
|
{
|
|
transfersize = std::min(VU1.xgkicksizeremaining, VU1.xgkickcyclecount * 8);
|
|
transfersize = std::min(transfersize, VU1.xgkickdiff);
|
|
}
|
|
else
|
|
{
|
|
transfersize = VU1.xgkicksizeremaining;
|
|
transfersize = std::min(transfersize, VU1.xgkickdiff);
|
|
}
|
|
|
|
//VUM_LOG("XGKICK Transferring %x bytes from %x size %x", transfersize * 0x10, VU1.xgkickaddr, VU1.xgkicksizeremaining);
|
|
|
|
// Would be "nicer" to do the copy until it's all up, however this really screws up PATH3 masking stuff
|
|
// So lets just do it the other way :)
|
|
if (THREAD_VU1)
|
|
{
|
|
if (transfersize < VU1.xgkicksizeremaining)
|
|
gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU1.Mem[VU1.xgkickaddr], transfersize, true);
|
|
else
|
|
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[VU1.xgkickaddr], transfersize, true);
|
|
}
|
|
else
|
|
{
|
|
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[VU1.xgkickaddr], transfersize, true);
|
|
}
|
|
|
|
if (flush)
|
|
VU1.cycle += transfersize / 8;
|
|
|
|
VU1.xgkickcyclecount -= transfersize / 8;
|
|
|
|
VU1.xgkickaddr = (VU1.xgkickaddr + transfersize) & 0x3FFF;
|
|
VU1.xgkicksizeremaining -= transfersize;
|
|
VU1.xgkickdiff = 0x4000 - VU1.xgkickaddr;
|
|
|
|
if (VU1.xgkickendpacket && !VU1.xgkicksizeremaining)
|
|
// VUM_LOG("XGKICK next addr %x left size %x", VU1.xgkickaddr, VU1.xgkicksizeremaining);
|
|
//else
|
|
{
|
|
//VUM_LOG("XGKICK transfer finished");
|
|
VU1.xgkickenable = false;
|
|
// Check if VIF is waiting for the GIF to not be busy
|
|
}
|
|
}
|
|
//VUM_LOG("XGKick run complete Enabled %d", VU1.xgkickenable);
|
|
}
|
|
|
|
static __fi void mVU_XGKICK_SYNC(mV, bool flush)
|
|
{
|
|
mVU.regAlloc->flushCallerSavedRegisters();
|
|
|
|
// Add the single cycle remainder after this instruction, some games do the store
|
|
// on the second instruction after the kick and that needs to go through first
|
|
// but that's VERY close..
|
|
xTEST(ptr32[&VU1.xgkickenable], 0x1);
|
|
xForwardJZ32 skipxgkick;
|
|
xADD(ptr32[&VU1.xgkickcyclecount], mVUlow.kickcycles-1);
|
|
xCMP(ptr32[&VU1.xgkickcyclecount], 2);
|
|
xForwardJL32 needcycles;
|
|
mVUbackupRegs(mVU, true, true);
|
|
xFastCall(_vuXGKICKTransfermVU, flush);
|
|
mVUrestoreRegs(mVU, true, true);
|
|
needcycles.SetTarget();
|
|
xADD(ptr32[&VU1.xgkickcyclecount], 1);
|
|
skipxgkick.SetTarget();
|
|
}
|
|
|
|
static __fi void mVU_XGKICK_DELAY(mV)
|
|
{
|
|
mVU.regAlloc->flushCallerSavedRegisters();
|
|
|
|
mVUbackupRegs(mVU, true, true);
|
|
#if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested
|
|
xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution
|
|
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
|
|
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
|
|
#endif
|
|
xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
|
|
mVUrestoreRegs(mVU, true, true);
|
|
}
|
|
|
|
mVUop(mVU_XGKICK)
|
|
{
|
|
pass1
|
|
{
|
|
if (isVU0)
|
|
{
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUanalyzeXGkick(mVU, _Is_, 1);
|
|
}
|
|
pass2
|
|
{
|
|
if (CHECK_XGKICKHACK)
|
|
{
|
|
mVUlow.kickcycles = 99;
|
|
mVU_XGKICK_SYNC(mVU, true);
|
|
mVUlow.kickcycles = 0;
|
|
}
|
|
if (mVUinfo.doXGKICK) // check for XGkick Transfer
|
|
{
|
|
mVU_XGKICK_DELAY(mVU);
|
|
mVUinfo.doXGKICK = false;
|
|
}
|
|
|
|
const xRegister32& regS = mVU.regAlloc->allocGPR(_Is_, -1);
|
|
if (!CHECK_XGKICKHACK)
|
|
{
|
|
xMOV(ptr32[&mVU.VIxgkick], regS);
|
|
}
|
|
else
|
|
{
|
|
xMOV(ptr32[&VU1.xgkickenable], 1);
|
|
xMOV(ptr32[&VU1.xgkickendpacket], 0);
|
|
xMOV(ptr32[&VU1.xgkicksizeremaining], 0);
|
|
xMOV(ptr32[&VU1.xgkickcyclecount], 0);
|
|
xMOV(gprT2, ptr32[&mVU.totalCycles]);
|
|
xSUB(gprT2, ptr32[&mVU.cycles]);
|
|
xADD(gprT2, ptr32[&VU1.cycle]);
|
|
xMOV(ptr32[&VU1.xgkicklastcycle], gprT2);
|
|
xMOV(gprT1, regS);
|
|
xAND(gprT1, 0x3FF);
|
|
xSHL(gprT1, 4);
|
|
xMOV(ptr32[&VU1.xgkickaddr], gprT1);
|
|
}
|
|
mVU.regAlloc->clearNeeded(regS);
|
|
mVU.profiler.EmitOp(opXGKICK);
|
|
}
|
|
pass3 { mVUlog("XGKICK vi%02d", _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// Branches/Jumps
|
|
//------------------------------------------------------------------
|
|
|
|
void setBranchA(mP, int x, int _x_)
|
|
{
|
|
bool isBranchDelaySlot = false;
|
|
|
|
incPC(-2);
|
|
if (mVUlow.branch)
|
|
isBranchDelaySlot = true;
|
|
incPC(2);
|
|
|
|
pass1
|
|
{
|
|
if (_Imm11_ == 1 && !_x_ && !isBranchDelaySlot)
|
|
{
|
|
DevCon.WriteLn(Color_Green, "microVU%d: Branch Optimization", mVU.index);
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUbranch = x;
|
|
mVUlow.branch = x;
|
|
}
|
|
pass2 { if (_Imm11_ == 1 && !_x_ && !isBranchDelaySlot) { return; } mVUbranch = x; }
|
|
pass3 { mVUbranch = x; }
|
|
pass4 { if (_Imm11_ == 1 && !_x_ && !isBranchDelaySlot) { return; } mVUbranch = x; }
|
|
}
|
|
|
|
void condEvilBranch(mV, int JMPcc)
|
|
{
|
|
if (mVUlow.badBranch)
|
|
{
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
xMOV(ptr32[&mVU.badBranch], branchAddr(mVU));
|
|
|
|
xCMP(gprT1b, 0);
|
|
xForwardJump8 cJMP((JccComparisonType)JMPcc);
|
|
incPC(4); // Branch Not Taken Addr
|
|
xMOV(ptr32[&mVU.badBranch], xPC);
|
|
incPC(-4);
|
|
cJMP.SetTarget();
|
|
return;
|
|
}
|
|
if (isEvilBlock)
|
|
{
|
|
xMOV(ptr32[&mVU.evilevilBranch], branchAddr(mVU));
|
|
xCMP(gprT1b, 0);
|
|
xForwardJump8 cJMP((JccComparisonType)JMPcc);
|
|
xMOV(gprT1, ptr32[&mVU.evilBranch]); // Branch Not Taken
|
|
xADD(gprT1, 8); // We have already executed 1 instruction from the original branch
|
|
xMOV(ptr32[&mVU.evilevilBranch], gprT1);
|
|
cJMP.SetTarget();
|
|
}
|
|
else
|
|
{
|
|
xMOV(ptr32[&mVU.evilBranch], branchAddr(mVU));
|
|
xCMP(gprT1b, 0);
|
|
xForwardJump8 cJMP((JccComparisonType)JMPcc);
|
|
xMOV(gprT1, ptr32[&mVU.badBranch]); // Branch Not Taken
|
|
xADD(gprT1, 8); // We have already executed 1 instruction from the original branch
|
|
xMOV(ptr32[&mVU.evilBranch], gprT1);
|
|
cJMP.SetTarget();
|
|
incPC(-2);
|
|
if (mVUlow.branch >= 9)
|
|
DevCon.Warning("Conditional in JALR/JR delay slot - If game broken report to PCSX2 Team");
|
|
incPC(2);
|
|
}
|
|
}
|
|
|
|
mVUop(mVU_B)
|
|
{
|
|
setBranchA(mX, 1, 0);
|
|
pass1 { mVUanalyzeNormBranch(mVU, 0, false); }
|
|
pass2
|
|
{
|
|
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddr(mVU)); }
|
|
if (mVUlow.evilBranch) { if(isEvilBlock) xMOV(ptr32[&mVU.evilevilBranch], branchAddr(mVU)); else xMOV(ptr32[&mVU.evilBranch], branchAddr(mVU)); }
|
|
mVU.profiler.EmitOp(opB);
|
|
}
|
|
pass3 { mVUlog("B [<a href=\"#addr%04x\">%04x</a>]", branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_BAL)
|
|
{
|
|
setBranchA(mX, 2, _It_);
|
|
pass1 { mVUanalyzeNormBranch(mVU, _It_, true); }
|
|
pass2
|
|
{
|
|
if (!mVUlow.evilBranch)
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOV(regT, bSaveAddr);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
else
|
|
{
|
|
incPC(-2);
|
|
DevCon.Warning("Linking BAL from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
|
|
incPC(2);
|
|
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
if (isEvilBlock)
|
|
xMOV(regT, ptr32[&mVU.evilBranch]);
|
|
else
|
|
xMOV(regT, ptr32[&mVU.badBranch]);
|
|
|
|
xADD(regT, 8);
|
|
xSHR(regT, 3);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
|
|
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddr(mVU)); }
|
|
if (mVUlow.evilBranch) { if (isEvilBlock) xMOV(ptr32[&mVU.evilevilBranch], branchAddr(mVU)); else xMOV(ptr32[&mVU.evilBranch], branchAddr(mVU)); }
|
|
mVU.profiler.EmitOp(opBAL);
|
|
}
|
|
pass3 { mVUlog("BAL vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Ft_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBEQ)
|
|
{
|
|
setBranchA(mX, 3, 0);
|
|
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
if (mVUlow.memReadIs)
|
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
|
|
if (mVUlow.memReadIt)
|
|
xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
|
|
xXOR(gprT1, regT);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
|
|
if (!(isBadOrEvil))
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
else
|
|
condEvilBranch(mVU, Jcc_Equal);
|
|
mVU.profiler.EmitOp(opIBEQ);
|
|
}
|
|
pass3 { mVUlog("IBEQ vi%02d, vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Ft_, _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBGEZ)
|
|
{
|
|
setBranchA(mX, 4, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2
|
|
{
|
|
if (mVUlow.memReadIs)
|
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
if (!(isBadOrEvil))
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
else
|
|
condEvilBranch(mVU, Jcc_GreaterOrEqual);
|
|
mVU.profiler.EmitOp(opIBGEZ);
|
|
}
|
|
pass3 { mVUlog("IBGEZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBGTZ)
|
|
{
|
|
setBranchA(mX, 5, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2
|
|
{
|
|
if (mVUlow.memReadIs)
|
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
if (!(isBadOrEvil))
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
else
|
|
condEvilBranch(mVU, Jcc_Greater);
|
|
mVU.profiler.EmitOp(opIBGTZ);
|
|
}
|
|
pass3 { mVUlog("IBGTZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBLEZ)
|
|
{
|
|
setBranchA(mX, 6, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2
|
|
{
|
|
if (mVUlow.memReadIs)
|
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
if (!(isBadOrEvil))
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
else
|
|
condEvilBranch(mVU, Jcc_LessOrEqual);
|
|
mVU.profiler.EmitOp(opIBLEZ);
|
|
}
|
|
pass3 { mVUlog("IBLEZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBLTZ)
|
|
{
|
|
setBranchA(mX, 7, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2
|
|
{
|
|
if (mVUlow.memReadIs)
|
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
if (!(isBadOrEvil))
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
else
|
|
condEvilBranch(mVU, Jcc_Less);
|
|
mVU.profiler.EmitOp(opIBLTZ);
|
|
}
|
|
pass3 { mVUlog("IBLTZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBNE)
|
|
{
|
|
setBranchA(mX, 8, 0);
|
|
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
|
pass2
|
|
{
|
|
if (mVUlow.memReadIs)
|
|
xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
|
|
if (mVUlow.memReadIt)
|
|
xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
|
else
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_);
|
|
xXOR(gprT1, regT);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
|
|
if (!(isBadOrEvil))
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
else
|
|
condEvilBranch(mVU, Jcc_NotEqual);
|
|
mVU.profiler.EmitOp(opIBNE);
|
|
}
|
|
pass3 { mVUlog("IBNE vi%02d, vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Ft_, _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
void normJumpPass2(mV)
|
|
{
|
|
if (!mVUlow.constJump.isValid || mVUlow.evilBranch)
|
|
{
|
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
|
xSHL(gprT1, 3);
|
|
xAND(gprT1, mVU.microMemSize - 8);
|
|
|
|
if (!mVUlow.evilBranch)
|
|
{
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
}
|
|
else
|
|
{
|
|
if(isEvilBlock)
|
|
xMOV(ptr32[&mVU.evilevilBranch], gprT1);
|
|
else
|
|
xMOV(ptr32[&mVU.evilBranch], gprT1);
|
|
}
|
|
//If delay slot is conditional, it uses badBranch to go to its target
|
|
if (mVUlow.badBranch)
|
|
{
|
|
xMOV(ptr32[&mVU.badBranch], gprT1);
|
|
}
|
|
}
|
|
}
|
|
|
|
mVUop(mVU_JR)
|
|
{
|
|
mVUbranch = 9;
|
|
pass1 { mVUanalyzeJump(mVU, _Is_, 0, false); }
|
|
pass2
|
|
{
|
|
normJumpPass2(mVU);
|
|
mVU.profiler.EmitOp(opJR);
|
|
}
|
|
pass3 { mVUlog("JR [vi%02d]", _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_JALR)
|
|
{
|
|
mVUbranch = 10;
|
|
pass1 { mVUanalyzeJump(mVU, _Is_, _It_, 1); }
|
|
pass2
|
|
{
|
|
normJumpPass2(mVU);
|
|
if (!mVUlow.evilBranch)
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
xMOV(regT, bSaveAddr);
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
if (mVUlow.evilBranch)
|
|
{
|
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
|
if (isEvilBlock)
|
|
{
|
|
xMOV(regT, ptr32[&mVU.evilBranch]);
|
|
xADD(regT, 8);
|
|
xSHR(regT, 3);
|
|
}
|
|
else
|
|
{
|
|
incPC(-2);
|
|
DevCon.Warning("Linking JALR from %s branch taken/not taken target! - If game broken report to PCSX2 Team", branchSTR[mVUlow.branch & 0xf]);
|
|
incPC(2);
|
|
|
|
xMOV(regT, ptr32[&mVU.badBranch]);
|
|
xADD(regT, 8);
|
|
xSHR(regT, 3);
|
|
}
|
|
mVU.regAlloc->clearNeeded(regT);
|
|
}
|
|
|
|
mVU.profiler.EmitOp(opJALR);
|
|
}
|
|
pass3 { mVUlog("JALR vi%02d, [vi%02d]", _Ft_, _Fs_); }
|
|
}
|