mirror of https://github.com/PCSX2/pcsx2.git
1448 lines
42 KiB
C++
1448 lines
42 KiB
C++
/* PCSX2 - PS2 Emulator for PCs
|
|
* Copyright (C) 2002-2010 PCSX2 Dev Team
|
|
*
|
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
* ation, either version 3 of the License, or (at your option) any later version.
|
|
*
|
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
//------------------------------------------------------------------
|
|
// Micro VU Micromode Lower instructions
|
|
//------------------------------------------------------------------
|
|
|
|
//------------------------------------------------------------------
|
|
// DIV/SQRT/RSQRT
|
|
//------------------------------------------------------------------
|
|
|
|
// Test if Vector is +/- Zero
|
|
static __fi void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprTemp)
|
|
{
|
|
xXOR.PS(xmmTemp, xmmTemp);
|
|
xCMPEQ.SS(xmmTemp, xmmReg);
|
|
if (!x86caps.hasStreamingSIMD4Extensions) {
|
|
xMOVMSKPS(gprTemp, xmmTemp);
|
|
xTEST(gprTemp, 1);
|
|
}
|
|
else xPTEST(xmmTemp, xmmTemp);
|
|
}
|
|
|
|
// Test if Vector is Negative (Set Flags and Makes Positive)
|
|
static __fi void testNeg(mV, const xmm& xmmReg, const x32& gprTemp)
|
|
{
|
|
xMOVMSKPS(gprTemp, xmmReg);
|
|
xTEST(gprTemp, 1);
|
|
xForwardJZ8 skip;
|
|
xMOV(ptr32[&mVU.divFlag], divI);
|
|
xAND.PS(xmmReg, ptr128[mVUglob.absclip]);
|
|
skip.SetTarget();
|
|
}
|
|
|
|
mVUop(mVU_DIV) {
|
|
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); }
|
|
pass2 {
|
|
xmm Ft;
|
|
if (_Ftf_) Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
|
else Ft = mVU.regAlloc->allocReg(_Ft_);
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
|
|
testZero(Ft, t1, gprT1); // Test if Ft is zero
|
|
xForwardJZ8 cjmp; // Skip if not zero
|
|
|
|
testZero(Fs, t1, gprT1); // Test if Fs is zero
|
|
xForwardJZ8 ajmp;
|
|
xMOV(ptr32[&mVU.divFlag], divI); // Set invalid flag (0/0)
|
|
xForwardJump8 bjmp;
|
|
ajmp.SetTarget();
|
|
xMOV(ptr32[&mVU.divFlag], divD); // Zero divide (only when not 0/0)
|
|
bjmp.SetTarget();
|
|
|
|
xXOR.PS(Fs, Ft);
|
|
xAND.PS(Fs, ptr128[mVUglob.signbit]);
|
|
xOR.PS (Fs, ptr128[mVUglob.maxvals]); // If division by zero, then xmmFs = +/- fmax
|
|
|
|
xForwardJump8 djmp;
|
|
cjmp.SetTarget();
|
|
xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags
|
|
SSE_DIVSS(mVU, Fs, Ft);
|
|
mVUclamp1(Fs, t1, 8, true);
|
|
djmp.SetTarget();
|
|
|
|
writeQreg(Fs, mVUinfo.writeQ);
|
|
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.profiler.EmitOp(opDIV);
|
|
}
|
|
pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
|
}
|
|
|
|
mVUop(mVU_SQRT) {
|
|
pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); }
|
|
pass2 {
|
|
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
|
|
|
xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags
|
|
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
|
|
|
if (CHECK_VU_OVERFLOW) xMIN.SS(Ft, ptr32[mVUglob.maxvals]); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
|
|
xSQRT.SS(Ft, Ft);
|
|
writeQreg(Ft, mVUinfo.writeQ);
|
|
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opSQRT);
|
|
}
|
|
pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); }
|
|
}
|
|
|
|
mVUop(mVU_RSQRT) {
|
|
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& Ft = mVU.regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
|
|
xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags
|
|
testNeg(mVU, Ft, gprT1); // Check for negative sqrt
|
|
|
|
xSQRT.SS(Ft, Ft);
|
|
testZero(Ft, t1, gprT1); // Test if Ft is zero
|
|
xForwardJZ8 ajmp; // Skip if not zero
|
|
|
|
testZero(Fs, t1, gprT1); // Test if Fs is zero
|
|
xForwardJZ8 bjmp; // Skip if none are
|
|
xMOV(ptr32[&mVU.divFlag], divI); // Set invalid flag (0/0)
|
|
xForwardJump8 cjmp;
|
|
bjmp.SetTarget();
|
|
xMOV(ptr32[&mVU.divFlag], divD); // Zero divide flag (only when not 0/0)
|
|
cjmp.SetTarget();
|
|
|
|
xAND.PS(Fs, ptr128[mVUglob.signbit]);
|
|
xOR.PS (Fs, ptr128[mVUglob.maxvals]); // xmmFs = +/-Max
|
|
|
|
xForwardJump8 djmp;
|
|
ajmp.SetTarget();
|
|
SSE_DIVSS(mVU, Fs, Ft);
|
|
mVUclamp1(Fs, t1, 8, true);
|
|
djmp.SetTarget();
|
|
|
|
writeQreg(Fs, mVUinfo.writeQ);
|
|
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.profiler.EmitOp(opRSQRT);
|
|
}
|
|
pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// EATAN/EEXP/ELENG/ERCPR/ERLENG/ERSADD/ERSQRT/ESADD/ESIN/ESQRT/ESUM
|
|
//------------------------------------------------------------------
|
|
|
|
#define EATANhelper(addr) { \
|
|
SSE_MULSS(mVU, t2, Fs); \
|
|
SSE_MULSS(mVU, t2, Fs); \
|
|
xMOVAPS (t1, t2); \
|
|
xMUL.SS (t1, ptr32[addr]); \
|
|
SSE_ADDSS(mVU, PQ, t1); \
|
|
}
|
|
|
|
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
|
|
static __fi void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2) {
|
|
xMOVSS(PQ, Fs);
|
|
xMUL.SS(PQ, ptr32[mVUglob.T1]);
|
|
xMOVAPS(t2, Fs);
|
|
EATANhelper(mVUglob.T2);
|
|
EATANhelper(mVUglob.T3);
|
|
EATANhelper(mVUglob.T4);
|
|
EATANhelper(mVUglob.T5);
|
|
EATANhelper(mVUglob.T6);
|
|
EATANhelper(mVUglob.T7);
|
|
EATANhelper(mVUglob.T8);
|
|
xADD.SS(PQ, ptr32[mVUglob.Pi4]);
|
|
xPSHUF.D(PQ, PQ, mVUinfo.writeP ? 0x27 : 0xC6);
|
|
}
|
|
|
|
mVUop(mVU_EATAN) {
|
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
xSUB.SS(Fs, ptr32[mVUglob.one]);
|
|
xADD.SS(xmmPQ, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, Fs, xmmPQ);
|
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEATAN);
|
|
}
|
|
pass3 { mVUlog("EATAN P"); }
|
|
}
|
|
|
|
mVUop(mVU_EATANxy) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
|
pass2 {
|
|
const xmm& t1 = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
|
|
const xmm& Fs = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(Fs, t1, 0x01);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
SSE_SUBSS (mVU, Fs, t1); // y-x, not y-1? ><
|
|
SSE_ADDSS (mVU, t1, xmmPQ);
|
|
SSE_DIVSS (mVU, Fs, t1);
|
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEATANxy);
|
|
}
|
|
pass3 { mVUlog("EATANxy P"); }
|
|
}
|
|
|
|
mVUop(mVU_EATANxz) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); }
|
|
pass2 {
|
|
const xmm& t1 = mVU.regAlloc->allocReg(_Fs_, 0, 0xf);
|
|
const xmm& Fs = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(Fs, t1, 0x02);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
SSE_SUBSS (mVU, Fs, t1);
|
|
SSE_ADDSS (mVU, t1, xmmPQ);
|
|
SSE_DIVSS (mVU, Fs, t1);
|
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEATANxz);
|
|
}
|
|
pass3 { mVUlog("EATANxz P"); }
|
|
}
|
|
|
|
#define eexpHelper(addr) { \
|
|
SSE_MULSS(mVU, t2, Fs); \
|
|
xMOVAPS (t1, t2); \
|
|
xMUL.SS (t1, ptr32[addr]); \
|
|
SSE_ADDSS(mVU, xmmPQ, t1); \
|
|
}
|
|
|
|
mVUop(mVU_EEXP) {
|
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
xMUL.SS (xmmPQ, ptr32[mVUglob.E1]);
|
|
xADD.SS (xmmPQ, ptr32[mVUglob.one]);
|
|
xMOVAPS (t1, Fs);
|
|
SSE_MULSS(mVU, t1, Fs);
|
|
xMOVAPS (t2, t1);
|
|
xMUL.SS (t1, ptr32[mVUglob.E2]);
|
|
SSE_ADDSS(mVU, xmmPQ, t1);
|
|
eexpHelper(&mVUglob.E3);
|
|
eexpHelper(&mVUglob.E4);
|
|
eexpHelper(&mVUglob.E5);
|
|
SSE_MULSS(mVU, t2, Fs);
|
|
xMUL.SS (t2, ptr32[mVUglob.E6]);
|
|
SSE_ADDSS(mVU, xmmPQ, t2);
|
|
SSE_MULSS(mVU, xmmPQ, xmmPQ);
|
|
SSE_MULSS(mVU, xmmPQ, xmmPQ);
|
|
xMOVSSZX (t2, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, t2, xmmPQ);
|
|
xMOVSS (xmmPQ, t2);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opEEXP);
|
|
}
|
|
pass3 { mVUlog("EEXP P"); }
|
|
}
|
|
|
|
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
|
static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) {
|
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
xDP.PS(Fs, Fs, 0x71);
|
|
xMOVSS(PQ, Fs);
|
|
}
|
|
else {
|
|
SSE_MULPS(mVU, Fs, Fs); // wzyx ^ 2
|
|
xMOVSS (PQ, Fs); // x ^ 2
|
|
xPSHUF.D (Fs, Fs, 0xe1); // wzyx -> wzxy
|
|
SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2
|
|
xPSHUF.D (Fs, Fs, 0xd2); // wzxy -> wxyz
|
|
SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
|
|
}
|
|
}
|
|
|
|
mVUop(mVU_ELENG) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xSQRT.SS (xmmPQ, xmmPQ);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opELENG);
|
|
}
|
|
pass3 { mVUlog("ELENG P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERCPR) {
|
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERCPR);
|
|
}
|
|
pass3 { mVUlog("ERCPR P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERLENG) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xSQRT.SS (xmmPQ, xmmPQ);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERLENG);
|
|
}
|
|
pass3 { mVUlog("ERLENG P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERSADD) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERSADD);
|
|
}
|
|
pass3 { mVUlog("ERSADD P"); }
|
|
}
|
|
|
|
mVUop(mVU_ERSQRT) {
|
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xAND.PS (Fs, ptr128[mVUglob.absclip]);
|
|
xSQRT.SS (xmmPQ, Fs);
|
|
xMOVSSZX (Fs, ptr32[mVUglob.one]);
|
|
SSE_DIVSS(mVU, Fs, xmmPQ);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opERSQRT);
|
|
}
|
|
pass3 { mVUlog("ERSQRT P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESADD) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opESADD);
|
|
}
|
|
pass3 { mVUlog("ESADD P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESIN) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xMOVSS (xmmPQ, Fs); // pq = X
|
|
SSE_MULSS(mVU, Fs, Fs); // fs = X^2
|
|
xMOVAPS (t1, Fs); // t1 = X^2
|
|
SSE_MULSS(mVU, Fs, xmmPQ); // fs = X^3
|
|
xMOVAPS (t2, Fs); // t2 = X^3
|
|
xMUL.SS (Fs, ptr32[mVUglob.S2]); // fs = s2 * X^3
|
|
SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3
|
|
|
|
SSE_MULSS(mVU, t2, t1); // t2 = X^3 * X^2
|
|
xMOVAPS (Fs, t2); // fs = X^5
|
|
xMUL.SS (Fs, ptr32[mVUglob.S3]); // ps = s3 * X^5
|
|
SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5
|
|
|
|
SSE_MULSS(mVU, t2, t1); // t2 = X^5 * X^2
|
|
xMOVAPS (Fs, t2); // fs = X^7
|
|
xMUL.SS (Fs, ptr32[mVUglob.S4]); // fs = s4 * X^7
|
|
SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7
|
|
|
|
SSE_MULSS(mVU, t2, t1); // t2 = X^7 * X^2
|
|
xMUL.SS (t2, ptr32[mVUglob.S5]); // t2 = s5 * X^9
|
|
SSE_ADDSS(mVU, xmmPQ, t2); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7 + s5 * X^9
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.regAlloc->clearNeeded(t2);
|
|
mVU.profiler.EmitOp(opESIN);
|
|
}
|
|
pass3 { mVUlog("ESIN P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESQRT) {
|
|
pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xAND.PS (Fs, ptr128[mVUglob.absclip]);
|
|
xSQRT.SS(xmmPQ, Fs);
|
|
xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opESQRT);
|
|
}
|
|
pass3 { mVUlog("ESQRT P"); }
|
|
}
|
|
|
|
mVUop(mVU_ESUM) {
|
|
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
|
xPSHUF.D (t1, Fs, 0x1b);
|
|
SSE_ADDPS(mVU, Fs, t1);
|
|
xPSHUF.D (t1, Fs, 0x01);
|
|
SSE_ADDSS(mVU, Fs, t1);
|
|
xMOVSS (xmmPQ, Fs);
|
|
xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.regAlloc->clearNeeded(t1);
|
|
mVU.profiler.EmitOp(opESUM);
|
|
}
|
|
pass3 { mVUlog("ESUM P"); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// FCAND/FCEQ/FCGET/FCOR/FCSET
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_FCAND) {
|
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
|
pass2 {
|
|
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
|
xAND(gprT1, _Imm24_);
|
|
xADD(gprT1, 0xffffff);
|
|
xSHR(gprT1, 24);
|
|
mVUallocVIb(mVU, gprT1, 1);
|
|
mVU.profiler.EmitOp(opFCAND);
|
|
}
|
|
pass3 { mVUlog("FCAND vi01, $%x", _Imm24_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCEQ) {
|
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
|
pass2 {
|
|
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
|
xXOR(gprT1, _Imm24_);
|
|
xSUB(gprT1, 1);
|
|
xSHR(gprT1, 31);
|
|
mVUallocVIb(mVU, gprT1, 1);
|
|
mVU.profiler.EmitOp(opFCEQ);
|
|
}
|
|
pass3 { mVUlog("FCEQ vi01, $%x", _Imm24_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCGET) {
|
|
pass1 { mVUanalyzeCflag(mVU, _It_); }
|
|
pass2 {
|
|
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
|
xAND(gprT1, 0xfff);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opFCGET);
|
|
}
|
|
pass3 { mVUlog("FCGET vi%02d", _Ft_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCOR) {
|
|
pass1 { mVUanalyzeCflag(mVU, 1); }
|
|
pass2 {
|
|
mVUallocCFLAGa(mVU, gprT1, cFLAG.read);
|
|
xOR(gprT1, _Imm24_);
|
|
xADD(gprT1, 1); // If 24 1's will make 25th bit 1, else 0
|
|
xSHR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg)
|
|
mVUallocVIb(mVU, gprT1, 1);
|
|
mVU.profiler.EmitOp(opFCOR);
|
|
}
|
|
pass3 { mVUlog("FCOR vi01, $%x", _Imm24_); }
|
|
pass4 { mVUregs.needExactMatch |= 4; }
|
|
}
|
|
|
|
mVUop(mVU_FCSET) {
|
|
pass1 { cFLAG.doFlag = true; }
|
|
pass2 {
|
|
xMOV(gprT1, _Imm24_);
|
|
mVUallocCFLAGb(mVU, gprT1, cFLAG.write);
|
|
mVU.profiler.EmitOp(opFCSET);
|
|
}
|
|
pass3 { mVUlog("FCSET $%x", _Imm24_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// FMAND/FMEQ/FMOR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_FMAND) {
|
|
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
|
|
pass2 {
|
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
xAND(gprT1b, gprT2b);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opFMAND);
|
|
}
|
|
pass3 { mVUlog("FMAND vi%02d, vi%02d", _Ft_, _Fs_); }
|
|
pass4 { mVUregs.needExactMatch |= 2; }
|
|
}
|
|
|
|
mVUop(mVU_FMEQ) {
|
|
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
|
|
pass2 {
|
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
xXOR(gprT1, gprT2);
|
|
xSUB(gprT1, 1);
|
|
xSHR(gprT1, 31);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opFMEQ);
|
|
}
|
|
pass3 { mVUlog("FMEQ vi%02d, vi%02d", _Ft_, _Fs_); }
|
|
pass4 { mVUregs.needExactMatch |= 2; }
|
|
}
|
|
|
|
mVUop(mVU_FMOR) {
|
|
pass1 { mVUanalyzeMflag(mVU, _Is_, _It_); }
|
|
pass2 {
|
|
mVUallocMFLAGa(mVU, gprT1, mFLAG.read);
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
xOR(gprT1b, gprT2b);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opFMOR);
|
|
}
|
|
pass3 { mVUlog("FMOR vi%02d, vi%02d", _Ft_, _Fs_); }
|
|
pass4 { mVUregs.needExactMatch |= 2; }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// FSAND/FSEQ/FSOR/FSSET
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_FSAND) {
|
|
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
|
pass2 {
|
|
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking I/D/IS/DS Flags");
|
|
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSAND: Checking U/O/US/OS Flags");
|
|
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
|
|
xAND(gprT1, _Imm12_);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opFSAND);
|
|
}
|
|
pass3 { mVUlog("FSAND vi%02d, $%x", _Ft_, _Imm12_); }
|
|
pass4 { mVUregs.needExactMatch |= 1; }
|
|
}
|
|
|
|
mVUop(mVU_FSOR) {
|
|
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
|
pass2 {
|
|
mVUallocSFLAGc(gprT1, gprT2, sFLAG.read);
|
|
xOR(gprT1, _Imm12_);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opFSOR);
|
|
}
|
|
pass3 { mVUlog("FSOR vi%02d, $%x", _Ft_, _Imm12_); }
|
|
pass4 { mVUregs.needExactMatch |= 1; }
|
|
}
|
|
|
|
mVUop(mVU_FSEQ) {
|
|
pass1 { mVUanalyzeSflag(mVU, _It_); }
|
|
pass2 {
|
|
int imm = 0;
|
|
if (_Imm12_ & 0x0c30) DevCon.WriteLn(Color_Green, "mVU_FSEQ: Checking I/D/IS/DS Flags");
|
|
if (_Imm12_ & 0x030c) DevCon.WriteLn(Color_Green, "mVU_FSEQ: Checking U/O/US/OS Flags");
|
|
if (_Imm12_ & 0x0001) imm |= 0x0000f00; // Z
|
|
if (_Imm12_ & 0x0002) imm |= 0x000f000; // S
|
|
if (_Imm12_ & 0x0004) imm |= 0x0010000; // U
|
|
if (_Imm12_ & 0x0008) imm |= 0x0020000; // O
|
|
if (_Imm12_ & 0x0010) imm |= 0x0040000; // I
|
|
if (_Imm12_ & 0x0020) imm |= 0x0080000; // D
|
|
if (_Imm12_ & 0x0040) imm |= 0x000000f; // ZS
|
|
if (_Imm12_ & 0x0080) imm |= 0x00000f0; // SS
|
|
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
|
|
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
|
|
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
|
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
|
|
|
mVUallocSFLAGa(gprT1, sFLAG.read);
|
|
setBitFSEQ(gprT1, 0x0f00); // Z bit
|
|
setBitFSEQ(gprT1, 0xf000); // S bit
|
|
setBitFSEQ(gprT1, 0x000f); // ZS bit
|
|
setBitFSEQ(gprT1, 0x00f0); // SS bit
|
|
xXOR(gprT1, imm);
|
|
xSUB(gprT1, 1);
|
|
xSHR(gprT1, 31);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opFSEQ);
|
|
}
|
|
pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); }
|
|
pass4 { mVUregs.needExactMatch |= 1; }
|
|
}
|
|
|
|
mVUop(mVU_FSSET) {
|
|
pass1 { mVUanalyzeFSSET(mVU); }
|
|
pass2 {
|
|
int imm = 0;
|
|
if (_Imm12_ & 0x0040) imm |= 0x000000f; // ZS
|
|
if (_Imm12_ & 0x0080) imm |= 0x00000f0; // SS
|
|
if (_Imm12_ & 0x0100) imm |= 0x0400000; // US
|
|
if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS
|
|
if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS
|
|
if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS
|
|
if (!(sFLAG.doFlag || mVUinfo.doDivFlag)) {
|
|
mVUallocSFLAGa(getFlagReg(sFLAG.write), sFLAG.lastWrite); // Get Prev Status Flag
|
|
}
|
|
xAND(getFlagReg(sFLAG.write), 0xfff00); // Keep Non-Sticky Bits
|
|
if (imm) xOR(getFlagReg(sFLAG.write), imm);
|
|
mVU.profiler.EmitOp(opFSSET);
|
|
}
|
|
pass3 { mVUlog("FSSET $%x", _Imm12_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// IADD/IADDI/IADDIU/IAND/IOR/ISUB/ISUBIU
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_IADD) {
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2 {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
if (_It_ != _Is_) {
|
|
mVUallocVIa(mVU, gprT2, _It_);
|
|
xADD(gprT1b, gprT2b);
|
|
}
|
|
else xADD(gprT1b, gprT1b);
|
|
mVUallocVIb(mVU, gprT1, _Id_);
|
|
mVU.profiler.EmitOp(opIADD);
|
|
}
|
|
pass3 { mVUlog("IADD vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_IADDI) {
|
|
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm5_); }
|
|
pass2 {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
xADD(gprT1b, _Imm5_);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opIADDI);
|
|
}
|
|
pass3 { mVUlog("IADDI vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm5_); }
|
|
}
|
|
|
|
mVUop(mVU_IADDIU) {
|
|
pass1 { mVUanalyzeIADDI(mVU, _Is_, _It_, _Imm15_); }
|
|
pass2 {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
xADD(gprT1b, _Imm15_);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opIADDIU);
|
|
}
|
|
pass3 { mVUlog("IADDIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
|
}
|
|
|
|
mVUop(mVU_IAND) {
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2 {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
if (_It_ != _Is_) {
|
|
mVUallocVIa(mVU, gprT2, _It_);
|
|
xAND(gprT1, gprT2);
|
|
}
|
|
mVUallocVIb(mVU, gprT1, _Id_);
|
|
mVU.profiler.EmitOp(opIAND);
|
|
}
|
|
pass3 { mVUlog("IAND vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_IOR) {
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2 {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
if (_It_ != _Is_) {
|
|
mVUallocVIa(mVU, gprT2, _It_);
|
|
xOR(gprT1, gprT2);
|
|
}
|
|
mVUallocVIb(mVU, gprT1, _Id_);
|
|
mVU.profiler.EmitOp(opIOR);
|
|
}
|
|
pass3 { mVUlog("IOR vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_ISUB) {
|
|
pass1 { mVUanalyzeIALU1(mVU, _Id_, _Is_, _It_); }
|
|
pass2 {
|
|
if (_It_ != _Is_) {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
mVUallocVIa(mVU, gprT2, _It_);
|
|
xSUB(gprT1b, gprT2b);
|
|
mVUallocVIb(mVU, gprT1, _Id_);
|
|
}
|
|
else {
|
|
xXOR(gprT1, gprT1);
|
|
mVUallocVIb(mVU, gprT1, _Id_);
|
|
}
|
|
mVU.profiler.EmitOp(opISUB);
|
|
}
|
|
pass3 { mVUlog("ISUB vi%02d, vi%02d, vi%02d", _Fd_, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_ISUBIU) {
|
|
pass1 { mVUanalyzeIALU2(mVU, _Is_, _It_); }
|
|
pass2 {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
xSUB(gprT1b, _Imm15_);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opISUBIU);
|
|
}
|
|
pass3 { mVUlog("ISUBIU vi%02d, vi%02d, %d", _Ft_, _Fs_, _Imm15_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// MFIR/MFP/MOVE/MR32/MTIR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_MFIR) {
|
|
pass1 {
|
|
if (!_Ft_) {
|
|
mVUlow.isNOP = true;
|
|
}
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeReg2 (mVU, _Ft_, mVUlow.VF_write, 1);
|
|
}
|
|
pass2 {
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
mVUallocVIa(mVU, gprT1, _Is_, true);
|
|
xMOVDZX(Ft, gprT1);
|
|
if (!_XYZW_SS) { mVUunpack_xyzw(Ft, Ft, 0); }
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opMFIR);
|
|
}
|
|
pass3 { mVUlog("MFIR.%s vf%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_MFP) {
|
|
pass1 { mVUanalyzeMFP(mVU, _Ft_); }
|
|
pass2 {
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
getPreg(mVU, Ft);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opMFP);
|
|
}
|
|
pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_MOVE) {
|
|
pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opMOVE);
|
|
}
|
|
pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_MR32) {
|
|
pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); }
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_);
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0))));
|
|
else xPSHUF.D(Ft, Fs, 0x39);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opMR32);
|
|
}
|
|
pass3 { mVUlog("MR32.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_MTIR) {
|
|
pass1 {
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeReg5 (mVU, _Fs_, _Fsf_, mVUlow.VF_read[0]);
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1);
|
|
}
|
|
pass2 {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xMOVD(gprT1, Fs);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opMTIR);
|
|
}
|
|
pass3 { mVUlog("MTIR vi%02d, vf%02d%s", _Ft_, _Fs_, _Fsf_String); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// ILW/ILWR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_ILW) {
|
|
pass1 {
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
|
|
}
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem + offsetSS);
|
|
if (_Is_) {
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
xADD(gprT2, _Imm11_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
else {
|
|
ptr += getVUmem(_Imm11_);
|
|
}
|
|
xMOVZX(gprT1, ptr16[ptr]);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opILW);
|
|
}
|
|
pass3 { mVUlog("ILW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_ILWR) {
|
|
pass1 {
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
|
|
}
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem + offsetSS);
|
|
if (_Is_) {
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
xMOVZX(gprT1, ptr16[ptr]);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opILWR);
|
|
}
|
|
pass3 { mVUlog("ILWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// ISW/ISWR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_ISW) {
|
|
pass1 {
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]);
|
|
}
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_Is_) {
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
xADD(gprT2, _Imm11_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
else
|
|
ptr += getVUmem(_Imm11_);
|
|
mVUallocVIa(mVU, gprT1, _It_);
|
|
if (_X) xMOV(ptr32[ptr], gprT1);
|
|
if (_Y) xMOV(ptr32[ptr+4], gprT1);
|
|
if (_Z) xMOV(ptr32[ptr+8], gprT1);
|
|
if (_W) xMOV(ptr32[ptr+12], gprT1);
|
|
mVU.profiler.EmitOp(opISW);
|
|
}
|
|
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_ISWR) {
|
|
pass1 {
|
|
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
|
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); }
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_Is_) {
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
mVUallocVIa(mVU, gprT1, _It_);
|
|
if (_X) xMOV(ptr32[ptr], gprT1);
|
|
if (_Y) xMOV(ptr32[ptr+4], gprT1);
|
|
if (_Z) xMOV(ptr32[ptr+8], gprT1);
|
|
if (_W) xMOV(ptr32[ptr+12], gprT1);
|
|
mVU.profiler.EmitOp(opISWR);
|
|
}
|
|
pass3 { mVUlog("ISWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// LQ/LQD/LQI
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_LQ) {
|
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); }
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_Is_) {
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
xADD(gprT2, _Imm11_);
|
|
mVUaddrFix(mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
else
|
|
ptr += getVUmem(_Imm11_);
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
mVU.profiler.EmitOp(opLQ);
|
|
}
|
|
pass3 { mVUlog("LQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_LQD) {
|
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_Is_ || isVU0) { // Access VU1 regs mem-map in !_Is_ case
|
|
mVUallocVIa(mVU, gprT2, _Is_);
|
|
xSUB(gprT2b, 1);
|
|
if (_Is_) mVUallocVIb(mVU, gprT2, _Is_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
else ptr += (0xffff & (mVU.microMemSize-8));
|
|
if (!mVUlow.noWriteVF) {
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
}
|
|
mVU.profiler.EmitOp(opLQD);
|
|
}
|
|
pass3 { mVUlog("LQD.%s vf%02d, --vi%02d", _XYZW_String, _Ft_, _Is_); }
|
|
}
|
|
|
|
mVUop(mVU_LQI) {
|
|
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_Is_) {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
xMOV(gprT2, gprT1);
|
|
xADD(gprT1b, 1);
|
|
mVUallocVIb(mVU, gprT1, _Is_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
if (!mVUlow.noWriteVF) {
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
}
|
|
mVU.profiler.EmitOp(opLQI);
|
|
}
|
|
pass3 { mVUlog("LQI.%s vf%02d, vi%02d++", _XYZW_String, _Ft_, _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// SQ/SQD/SQI
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_SQ) {
|
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); }
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_It_) {
|
|
mVUallocVIa(mVU, gprT2, _It_);
|
|
xADD(gprT2, _Imm11_);
|
|
mVUaddrFix(mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
else
|
|
ptr += getVUmem(_Imm11_);
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opSQ);
|
|
}
|
|
pass3 { mVUlog("SQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Fs_, _Ft_, _Imm11_); }
|
|
}
|
|
|
|
mVUop(mVU_SQD) {
|
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_It_ || isVU0) {// Access VU1 regs mem-map in !_It_ case
|
|
mVUallocVIa(mVU, gprT2, _It_);
|
|
xSUB(gprT2b, 1);
|
|
if (_It_) mVUallocVIb(mVU, gprT2, _It_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
else ptr += (0xffff & (mVU.microMemSize-8));
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opSQD);
|
|
}
|
|
pass3 { mVUlog("SQD.%s vf%02d, --vi%02d", _XYZW_String, _Fs_, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_SQI) {
|
|
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
|
|
pass2 {
|
|
xAddressVoid ptr(mVU.regs().Mem);
|
|
if (_It_) {
|
|
mVUallocVIa(mVU, gprT1, _It_);
|
|
xMOV(gprT2, gprT1);
|
|
xADD(gprT1b, 1);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVUaddrFix (mVU, gprT2);
|
|
ptr += gprT2;
|
|
}
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
|
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
mVU.profiler.EmitOp(opSQI);
|
|
}
|
|
pass3 { mVUlog("SQI.%s vf%02d, vi%02d++", _XYZW_String, _Fs_, _Ft_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// RINIT/RGET/RNEXT/RXOR
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_RINIT) {
|
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
|
pass2 {
|
|
if (_Fs_ || (_Fsf_ == 3)) {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xMOVD(gprT1, Fs);
|
|
xAND(gprT1, 0x007fffff);
|
|
xOR (gprT1, 0x3f800000);
|
|
xMOV(ptr32[Rmem], gprT1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
}
|
|
else xMOV(ptr32[Rmem], 0x3f800000);
|
|
mVU.profiler.EmitOp(opRINIT);
|
|
}
|
|
pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); }
|
|
}
|
|
|
|
static __fi void mVU_RGET_(mV, const x32& Rreg) {
|
|
if (!mVUlow.noWriteVF) {
|
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
|
xMOVDZX(Ft, Rreg);
|
|
if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0);
|
|
mVU.regAlloc->clearNeeded(Ft);
|
|
}
|
|
}
|
|
|
|
mVUop(mVU_RGET) {
|
|
pass1 { mVUanalyzeR2(mVU, _Ft_, true); }
|
|
pass2 {
|
|
xMOV(gprT1, ptr32[Rmem]);
|
|
mVU_RGET_(mVU, gprT1);
|
|
mVU.profiler.EmitOp(opRGET);
|
|
}
|
|
pass3 { mVUlog("RGET.%s vf%02d, R", _XYZW_String, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_RNEXT) {
|
|
pass1 { mVUanalyzeR2(mVU, _Ft_, false); }
|
|
pass2 {
|
|
// algorithm from www.project-fao.org
|
|
xMOV(gprT3, ptr32[Rmem]);
|
|
xMOV(gprT1, gprT3);
|
|
xSHR(gprT1, 4);
|
|
xAND(gprT1, 1);
|
|
|
|
xMOV(gprT2, gprT3);
|
|
xSHR(gprT2, 22);
|
|
xAND(gprT2, 1);
|
|
|
|
xSHL(gprT3, 1);
|
|
xXOR(gprT1, gprT2);
|
|
xXOR(gprT3, gprT1);
|
|
xAND(gprT3, 0x007fffff);
|
|
xOR (gprT3, 0x3f800000);
|
|
xMOV(ptr32[Rmem], gprT3);
|
|
mVU_RGET_(mVU, gprT3);
|
|
mVU.profiler.EmitOp(opRNEXT);
|
|
}
|
|
pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_RXOR) {
|
|
pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); }
|
|
pass2 {
|
|
if (_Fs_ || (_Fsf_ == 3)) {
|
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
|
|
xMOVD(gprT1, Fs);
|
|
xAND(gprT1, 0x7fffff);
|
|
xXOR(ptr32[Rmem], gprT1);
|
|
mVU.regAlloc->clearNeeded(Fs);
|
|
}
|
|
mVU.profiler.EmitOp(opRXOR);
|
|
}
|
|
pass3 { mVUlog("RXOR R, vf%02d%s", _Fs_, _Fsf_String); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// WaitP/WaitQ
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_WAITP) {
|
|
pass1 { mVUstall = std::max(mVUstall, (u8)((mVUregs.p) ? (mVUregs.p - 1) : 0)); }
|
|
pass2 { mVU.profiler.EmitOp(opWAITP); }
|
|
pass3 { mVUlog("WAITP"); }
|
|
}
|
|
|
|
mVUop(mVU_WAITQ) {
|
|
pass1 { mVUstall = std::max(mVUstall, mVUregs.q); }
|
|
pass2 { mVU.profiler.EmitOp(opWAITQ); }
|
|
pass3 { mVUlog("WAITQ"); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// XTOP/XITOP
|
|
//------------------------------------------------------------------
|
|
|
|
mVUop(mVU_XTOP) {
|
|
pass1 {
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1);
|
|
}
|
|
pass2 {
|
|
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().top]);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opXTOP);
|
|
}
|
|
pass3 { mVUlog("XTOP vi%02d", _Ft_); }
|
|
}
|
|
|
|
mVUop(mVU_XITOP) {
|
|
pass1 {
|
|
if (!_It_)
|
|
mVUlow.isNOP = true;
|
|
|
|
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 1);
|
|
}
|
|
pass2 {
|
|
xMOVZX(gprT1, ptr16[&mVU.getVifRegs().itop]);
|
|
xAND (gprT1, isVU1 ? 0x3ff : 0xff);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
mVU.profiler.EmitOp(opXITOP);
|
|
}
|
|
pass3 { mVUlog("XITOP vi%02d", _Ft_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// XGkick
|
|
//------------------------------------------------------------------
|
|
|
|
void __fastcall mVU_XGKICK_(u32 addr) {
|
|
addr = (addr & 0x3ff) * 16;
|
|
u32 diff = 0x4000 - addr;
|
|
u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, addr);
|
|
|
|
if (size > diff) {
|
|
//DevCon.WriteLn(Color_Green, "microVU1: XGkick Wrap!");
|
|
gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData( &vuRegs[1].Mem[addr], diff,true);
|
|
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[0],size-diff,true);
|
|
}
|
|
else {
|
|
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[addr], size, true);
|
|
}
|
|
}
|
|
|
|
static __fi void mVU_XGKICK_DELAY(mV) {
|
|
mVUbackupRegs(mVU);
|
|
#if 0 // XGkick Break - ToDo: Change "SomeGifPathValue" to w/e needs to be tested
|
|
xTEST (ptr32[&SomeGifPathValue], 1); // If '1', breaks execution
|
|
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
|
|
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
|
|
#endif
|
|
xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
|
|
mVUrestoreRegs(mVU);
|
|
}
|
|
|
|
mVUop(mVU_XGKICK) {
|
|
pass1 { mVUanalyzeXGkick(mVU, _Is_, mVU_XGKICK_CYCLES); }
|
|
pass2 {
|
|
if (mVUinfo.doXGKICK) { // check for XGkick Transfer
|
|
mVU_XGKICK_DELAY(mVU);
|
|
mVUinfo.doXGKICK = false;
|
|
}
|
|
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
xMOV(ptr32[&mVU.VIxgkick], gprT1);
|
|
mVU.profiler.EmitOp(opXGKICK);
|
|
}
|
|
pass3 { mVUlog("XGKICK vi%02d", _Fs_); }
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// Branches/Jumps
|
|
//------------------------------------------------------------------
|
|
|
|
void setBranchA(mP, int x, int _x_) {
|
|
pass1 {
|
|
if (_Imm11_ == 1 && !_x_) {
|
|
DevCon.WriteLn(Color_Green, "microVU%d: Branch Optimization", mVU.index);
|
|
mVUlow.isNOP = true;
|
|
return;
|
|
}
|
|
mVUbranch = x;
|
|
mVUlow.branch = x;
|
|
}
|
|
pass2 { if (_Imm11_ == 1 && !_x_) { return; } mVUbranch = x; }
|
|
pass3 { mVUbranch = x; }
|
|
pass4 { if (_Imm11_ == 1 && !_x_) { return; } mVUbranch = x; }
|
|
}
|
|
|
|
void condEvilBranch(mV, int JMPcc) {
|
|
if (mVUlow.badBranch) {
|
|
xMOV(ptr32[&mVU.branch], gprT1);
|
|
xMOV(ptr32[&mVU.badBranch], branchAddrN(mVU));
|
|
|
|
xCMP(gprT1b, 0);
|
|
xForwardJump8 cJMP((JccComparisonType)JMPcc);
|
|
incPC(6); // Branch Not Taken Addr + 8
|
|
xMOV(ptr32[&mVU.badBranch], xPC);
|
|
incPC(-6);
|
|
cJMP.SetTarget();
|
|
return;
|
|
}
|
|
xMOV(ptr32[&mVU.evilBranch], branchAddr(mVU));
|
|
xCMP(gprT1b, 0);
|
|
xForwardJump8 cJMP((JccComparisonType)JMPcc);
|
|
xMOV(gprT1, ptr32[&mVU.badBranch]); // Branch Not Taken
|
|
xMOV(ptr32[&mVU.evilBranch], gprT1);
|
|
cJMP.SetTarget();
|
|
incPC(-2);
|
|
if(mVUlow.branch >= 9) DevCon.Warning("Conditional in JALR/JR delay slot - If game broken report to PCSX2 Team");
|
|
incPC(2);
|
|
}
|
|
|
|
mVUop(mVU_B) {
|
|
setBranchA(mX, 1, 0);
|
|
pass1 { mVUanalyzeNormBranch(mVU, 0, false); }
|
|
pass2 {
|
|
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddrN(mVU)); }
|
|
if (mVUlow.evilBranch) { xMOV(ptr32[&mVU.evilBranch], branchAddr(mVU)); }
|
|
mVU.profiler.EmitOp(opB);
|
|
}
|
|
pass3 { mVUlog("B [<a href=\"#addr%04x\">%04x</a>]", branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_BAL) {
|
|
setBranchA(mX, 2, _It_);
|
|
pass1 { mVUanalyzeNormBranch(mVU, _It_, true); }
|
|
pass2 {
|
|
if(!mVUlow.evilBranch)
|
|
{
|
|
xMOV(gprT1, bSaveAddr);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
}
|
|
|
|
if (mVUlow.badBranch) { xMOV(ptr32[&mVU.badBranch], branchAddrN(mVU)); }
|
|
if (mVUlow.evilBranch) { xMOV(ptr32[&mVU.evilBranch], branchAddr(mVU));}
|
|
mVU.profiler.EmitOp(opBAL);
|
|
}
|
|
pass3 { mVUlog("BAL vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Ft_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBEQ) {
|
|
setBranchA(mX, 3, 0);
|
|
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
|
pass2 {
|
|
if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else mVUallocVIa(mVU, gprT1, _Is_);
|
|
|
|
if (mVUlow.memReadIt) xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
|
else { mVUallocVIa(mVU, gprT2, _It_); xXOR(gprT1, gprT2); }
|
|
|
|
if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1);
|
|
else condEvilBranch(mVU, Jcc_Equal);
|
|
mVU.profiler.EmitOp(opIBEQ);
|
|
}
|
|
pass3 { mVUlog("IBEQ vi%02d, vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Ft_, _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBGEZ) {
|
|
setBranchA(mX, 4, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2 {
|
|
if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else mVUallocVIa(mVU, gprT1, _Is_);
|
|
if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1);
|
|
else condEvilBranch(mVU, Jcc_GreaterOrEqual);
|
|
mVU.profiler.EmitOp(opIBGEZ);
|
|
}
|
|
pass3 { mVUlog("IBGEZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBGTZ) {
|
|
setBranchA(mX, 5, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2 {
|
|
if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else mVUallocVIa(mVU, gprT1, _Is_);
|
|
if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1);
|
|
else condEvilBranch(mVU, Jcc_Greater);
|
|
mVU.profiler.EmitOp(opIBGTZ);
|
|
}
|
|
pass3 { mVUlog("IBGTZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBLEZ) {
|
|
setBranchA(mX, 6, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2 {
|
|
if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else mVUallocVIa(mVU, gprT1, _Is_);
|
|
if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1);
|
|
else condEvilBranch(mVU, Jcc_LessOrEqual);
|
|
mVU.profiler.EmitOp(opIBLEZ);
|
|
}
|
|
pass3 { mVUlog("IBLEZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBLTZ) {
|
|
setBranchA(mX, 7, 0);
|
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
|
pass2 {
|
|
if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else mVUallocVIa(mVU, gprT1, _Is_);
|
|
if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1);
|
|
else condEvilBranch(mVU, Jcc_Less);
|
|
mVU.profiler.EmitOp(opIBLTZ);
|
|
}
|
|
pass3 { mVUlog("IBLTZ vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
mVUop(mVU_IBNE) {
|
|
setBranchA(mX, 8, 0);
|
|
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
|
pass2 {
|
|
if (mVUlow.memReadIs) xMOV(gprT1, ptr32[&mVU.VIbackup]);
|
|
else mVUallocVIa(mVU, gprT1, _Is_);
|
|
|
|
if (mVUlow.memReadIt) xXOR(gprT1, ptr32[&mVU.VIbackup]);
|
|
else { mVUallocVIa(mVU, gprT2, _It_); xXOR(gprT1, gprT2); }
|
|
|
|
if (!(isBadOrEvil)) xMOV(ptr32[&mVU.branch], gprT1);
|
|
else condEvilBranch(mVU, Jcc_NotEqual);
|
|
mVU.profiler.EmitOp(opIBNE);
|
|
}
|
|
pass3 { mVUlog("IBNE vi%02d, vi%02d [<a href=\"#addr%04x\">%04x</a>]", _Ft_, _Fs_, branchAddr(mVU), branchAddr(mVU)); }
|
|
}
|
|
|
|
void normJumpPass2(mV) {
|
|
if (!mVUlow.constJump.isValid || mVUlow.evilBranch) {
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
xSHL(gprT1, 3);
|
|
xAND(gprT1, mVU.microMemSize - 8);
|
|
|
|
if (!mVUlow.evilBranch) { xMOV(ptr32[&mVU.branch], gprT1 ); }
|
|
else { xMOV(ptr32[&mVU.evilBranch], gprT1 ); }
|
|
//If delay slot is conditional, it uses badBranch to go to its target
|
|
if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); }
|
|
}
|
|
}
|
|
|
|
mVUop(mVU_JR) {
|
|
mVUbranch = 9;
|
|
pass1 { mVUanalyzeJump(mVU, _Is_, 0, false); }
|
|
pass2 { normJumpPass2(mVU); mVU.profiler.EmitOp(opJR); }
|
|
pass3 { mVUlog("JR [vi%02d]", _Fs_); }
|
|
}
|
|
|
|
mVUop(mVU_JALR) {
|
|
mVUbranch = 10;
|
|
pass1 { mVUanalyzeJump(mVU, _Is_, _It_, 1); }
|
|
pass2 {
|
|
normJumpPass2(mVU);
|
|
if(!mVUlow.evilBranch)
|
|
{
|
|
xMOV(gprT1, bSaveAddr);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
}
|
|
if(mVUlow.evilBranch)
|
|
{
|
|
incPC(-2);
|
|
if(mVUlow.branch >= 9) //Previous branch is a jump of some type so
|
|
//we need to take the branch address from the register it uses.
|
|
{
|
|
DevCon.Warning("Linking JALR from JALR/JR branch target! - If game broken report to PCSX2 Team");
|
|
mVUallocVIa(mVU, gprT1, _Is_);
|
|
xADD(gprT1, 8);
|
|
xSHR(gprT1, 3);
|
|
incPC(2);
|
|
mVUallocVIb(mVU, gprT1, _It_);
|
|
}
|
|
else incPC(2);
|
|
}
|
|
|
|
mVU.profiler.EmitOp(opJALR);
|
|
}
|
|
pass3 { mVUlog("JALR vi%02d, [vi%02d]", _Ft_, _Fs_); }
|
|
}
|