mirror of https://github.com/PCSX2/pcsx2.git
* Disable newVifUnpack, which I left enabled in the prev commit (it's not ready yet!)
* Added feature to align call targets for EErec functions and blocks on P4's and AMDs, and pack them on Core2/i7's. * Fixed some svn:native props. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2347 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
b5f643950c
commit
b3fead5dc9
|
@ -157,9 +157,12 @@ template< typename T > void xWrite( T val );
|
|||
class ModSibBase;
|
||||
|
||||
extern void xSetPtr( void* ptr );
|
||||
extern u8* xGetPtr();
|
||||
extern void xAlignPtr( uint bytes );
|
||||
extern void xAdvancePtr( uint bytes );
|
||||
extern void xAlignCallTarget();
|
||||
|
||||
extern u8* xGetPtr();
|
||||
extern u8* xGetAlignedCallTarget();
|
||||
|
||||
extern JccComparisonType xInvertCond( JccComparisonType src );
|
||||
|
||||
|
|
|
@ -395,6 +395,32 @@ __emitinline void xAlignPtr( uint bytes )
|
|||
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) );
|
||||
}
|
||||
|
||||
// Performs best-case alignment for the target CPU, for use prior to starting a new
|
||||
// function. This is not meant to be used prior to jump targets, since it doesn't
|
||||
// add padding (additionally, speed benefit from jump alignment is minimal, and often
|
||||
// a loss).
|
||||
__emitinline void xAlignCallTarget()
|
||||
{
|
||||
// Core2/i7 CPUs prefer unaligned addresses. Checking for SSSE3 is a decent filter.
|
||||
// (also align in debug modes for disasm convenience)
|
||||
|
||||
if( IsDebugBuild || !x86caps.hasSupplementalStreamingSIMD3Extensions )
|
||||
{
|
||||
// - P4's and earlier prefer 16 byte alignment.
|
||||
// - AMD Athlons and Phenoms prefer 8 byte alignment, but I don't have an easy
|
||||
// heuristic for it yet.
|
||||
// - AMD Phenom IIs are unknown (either prefer 8 byte, or unaligned).
|
||||
|
||||
xAlignPtr( 16 );
|
||||
}
|
||||
}
|
||||
|
||||
__emitinline u8* xGetAlignedCallTarget()
|
||||
{
|
||||
xAlignCallTarget();
|
||||
return x86Ptr;
|
||||
}
|
||||
|
||||
__emitinline void xAdvancePtr( uint bytes )
|
||||
{
|
||||
if( IsDevBuild )
|
||||
|
|
|
@ -58,6 +58,11 @@ __forceinline void vif1FLUSH()
|
|||
|
||||
void vif1Init()
|
||||
{
|
||||
#ifdef newVif1
|
||||
extern void initNewVif(int idx);
|
||||
initNewVif(1);
|
||||
#endif
|
||||
|
||||
SetNewMask(g_vif1Masks, g_vif1HasMask3, 0, 0xffffffff);
|
||||
}
|
||||
|
||||
|
@ -313,19 +318,13 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
|||
|
||||
return ret;
|
||||
}
|
||||
#ifdef newVif1
|
||||
extern void initNewVif(int idx);
|
||||
extern int nVifUnpack(int idx, u32 *data);
|
||||
static int testVif = 0;
|
||||
#endif
|
||||
static int __fastcall Vif1TransUnpack(u32 *data)
|
||||
{
|
||||
#ifdef newVif1
|
||||
if (!testVif) { initNewVif(1); testVif = 1; }
|
||||
//int temp = nVifUnpack(1, data);
|
||||
//if (temp >= 0) return temp;
|
||||
extern int nVifUnpack(int idx, u32 *data);
|
||||
return nVifUnpack(1, data);
|
||||
#endif
|
||||
|
||||
XMMRegisters::Freeze();
|
||||
|
||||
if (vif1.vifpacketsize < vif1.tag.size)
|
||||
|
|
|
@ -60,7 +60,7 @@ static __forceinline u32 vif_size(u8 num)
|
|||
return (num == 0) ? 0x1000 : 0x4000;
|
||||
}
|
||||
|
||||
#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
||||
#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
||||
//#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
||||
//#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
||||
//#define newVif0 // Use New Code for Vif0 Unpacks (not implemented)
|
||||
#endif
|
||||
|
|
|
@ -371,7 +371,7 @@ static DynGenFunc* _DynGen_JITCompile()
|
|||
{
|
||||
pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
|
||||
|
||||
u8* retval = xGetPtr();
|
||||
u8* retval = xGetAlignedCallTarget();
|
||||
_DynGen_StackFrameCheck();
|
||||
|
||||
xMOV( ecx, &cpuRegs.pc );
|
||||
|
@ -388,7 +388,7 @@ static DynGenFunc* _DynGen_JITCompile()
|
|||
|
||||
static DynGenFunc* _DynGen_JITCompileInBlock()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
u8* retval = xGetAlignedCallTarget();
|
||||
xJMP( JITCompile );
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
@ -396,7 +396,7 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
|
|||
// called when jumping to variable pc address
|
||||
static DynGenFunc* _DynGen_DispatcherReg()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
u8* retval = xGetPtr(); // fallthrough target, can't align it!
|
||||
_DynGen_StackFrameCheck();
|
||||
|
||||
xMOV( eax, &cpuRegs.pc );
|
||||
|
@ -410,7 +410,7 @@ static DynGenFunc* _DynGen_DispatcherReg()
|
|||
|
||||
static DynGenFunc* _DynGen_EnterRecompiledCode()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
u8* retval = xGetAlignedCallTarget();
|
||||
|
||||
// "standard" frame pointer setup for aligned stack: Record the original
|
||||
// esp into ebp, and then align esp. ebp references the original esp base
|
||||
|
@ -446,6 +446,8 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
|||
xMOV( &s_store_ebp, ebp );
|
||||
|
||||
xJMP( ptr32[&DispatcherReg] );
|
||||
|
||||
xAlignCallTarget();
|
||||
imm = (uptr)xGetPtr();
|
||||
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
|
||||
|
||||
|
@ -1254,7 +1256,7 @@ void recompileNextInstruction(int delayslot)
|
|||
// _flushCachedRegs();
|
||||
// g_cpuHasConstReg = 1;
|
||||
|
||||
if (!delayslot && x86Ptr - recPtr > 0x1000)
|
||||
if (!delayslot && (xGetPtr() - recPtr > 0x1000) )
|
||||
s_nEndBlock = pc;
|
||||
}
|
||||
|
||||
|
@ -1335,9 +1337,8 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
recResetEE();
|
||||
}
|
||||
|
||||
x86SetPtr( recPtr );
|
||||
x86Align(16);
|
||||
recPtr = x86Ptr;
|
||||
xSetPtr( recPtr );
|
||||
recPtr = xGetAlignedCallTarget();
|
||||
|
||||
s_nBlockFF = false;
|
||||
if (HWADDR(startpc) == 0x81fc0)
|
||||
|
@ -1718,14 +1719,14 @@ StartRecomp:
|
|||
}
|
||||
}
|
||||
|
||||
pxAssert( x86Ptr < recMem+REC_CACHEMEM );
|
||||
pxAssert( xGetPtr() < recMem+REC_CACHEMEM );
|
||||
pxAssert( recConstBufPtr < recConstBuf + RECCONSTBUF_SIZE );
|
||||
pxAssert( x86FpuState == 0 );
|
||||
|
||||
pxAssert(x86Ptr - recPtr < 0x10000);
|
||||
s_pCurBlockEx->x86size = x86Ptr - recPtr;
|
||||
pxAssert(xGetPtr() - recPtr < 0x10000);
|
||||
s_pCurBlockEx->x86size = xGetPtr() - recPtr;
|
||||
|
||||
recPtr = x86Ptr;
|
||||
recPtr = xGetPtr();
|
||||
|
||||
pxAssert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );
|
||||
|
||||
|
|
|
@ -1,441 +1,441 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Pass 1 Functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Helper Macros
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#define aReg(x) mVUregs.VF[x]
|
||||
#define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y]
|
||||
#define aMax(x, y) ((x > y) ? x : y)
|
||||
#define aMin(x, y) ((x < y) ? x : y)
|
||||
|
||||
// Read a VF reg
|
||||
#define analyzeReg1(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
|
||||
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
|
||||
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
|
||||
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// Write to a VF reg
|
||||
#define analyzeReg2(xReg, vfWrite, isLowOp) { \
|
||||
if (xReg) { \
|
||||
if (_X) { bReg(xReg, isLowOp).x = 4; vfWrite.reg = xReg; vfWrite.x = 4; } \
|
||||
if (_Y) { bReg(xReg, isLowOp).y = 4; vfWrite.reg = xReg; vfWrite.y = 4; } \
|
||||
if (_Z) { bReg(xReg, isLowOp).z = 4; vfWrite.reg = xReg; vfWrite.z = 4; } \
|
||||
if (_W) { bReg(xReg, isLowOp).w = 4; vfWrite.reg = xReg; vfWrite.w = 4; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// Read a VF reg (BC opcodes)
|
||||
#define analyzeReg3(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
if (_bc_x) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
|
||||
else if (_bc_y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
|
||||
else if (_bc_z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
|
||||
else { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// For Clip Opcode
|
||||
#define analyzeReg4(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
mVUstall = aMax(mVUstall, aReg(xReg).w); \
|
||||
vfRead.reg = xReg; vfRead.w = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Read VF reg (FsF/FtF)
|
||||
#define analyzeReg5(xReg, fxf, vfRead) { \
|
||||
if (xReg) { \
|
||||
switch (fxf) { \
|
||||
case 0: mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; break; \
|
||||
case 1: mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; break; \
|
||||
case 2: mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; break; \
|
||||
case 3: mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; break; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// Flips xyzw stalls to yzwx (MR32 Opcode)
|
||||
#define analyzeReg6(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
|
||||
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
|
||||
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
|
||||
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// Reading a VI reg
|
||||
#define analyzeVIreg1(xReg, viRead) { \
|
||||
if (xReg) { \
|
||||
mVUstall = aMax(mVUstall, mVUregs.VI[xReg]); \
|
||||
viRead.reg = xReg; viRead.used = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Writing to a VI reg
|
||||
#define analyzeVIreg2(xReg, viWrite, aCycles) { \
|
||||
if (xReg) { \
|
||||
mVUconstReg[xReg].isValid = 0; \
|
||||
mVUregsTemp.VIreg = xReg; \
|
||||
mVUregsTemp.VI = aCycles; \
|
||||
viWrite.reg = xReg; \
|
||||
viWrite.used = aCycles; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = aMax(mVUstall, mVUregs.q); }
|
||||
#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); }
|
||||
#define analyzeRreg() { mVUregsTemp.r = 1; }
|
||||
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
|
||||
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
|
||||
#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } }
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC1 - Normal FMAC Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
|
||||
sFLAG.doFlag = 1;
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg1(Ft, mVUup.VF_read[1]);
|
||||
analyzeReg2(Fd, mVUup.VF_write, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC2 - ABS/FTOI/ITOF Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) {
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg2(Ft, mVUup.VF_write, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC3 - BC(xyzw) FMAC Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
|
||||
sFLAG.doFlag = 1;
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg3(Ft, mVUup.VF_read[1]);
|
||||
analyzeReg2(Fd, mVUup.VF_write, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC4 - Clip FMAC Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC4(mV, int Fs, int Ft) {
|
||||
cFLAG.doFlag = 1;
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg4(Ft, mVUup.VF_read[1]);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// IALU - IALU Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeIALU1(mV, int Id, int Is, int It) {
|
||||
if (!Id) { mVUlow.isNOP = 1; }
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg1(It, mVUlow.VI_read[1]);
|
||||
analyzeVIreg2(Id, mVUlow.VI_write, 1);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
|
||||
if (!It) { mVUlow.isNOP = 1; }
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeIADDI(mV, int Is, int It, s16 imm) {
|
||||
mVUanalyzeIALU2(mVU, Is, It);
|
||||
if (!Is) { setConstReg(It, imm); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MR32 - MR32 Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMR32(mV, int Fs, int Ft) {
|
||||
if (!Ft) { mVUlow.isNOP = 1; }
|
||||
analyzeReg6(Fs, mVUlow.VF_read[0]);
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FDIV - DIV/SQRT/RSQRT Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) {
|
||||
mVUprint("microVU: DIV Opcode");
|
||||
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
|
||||
analyzeReg5(Ft, Ftf, mVUlow.VF_read[1]);
|
||||
analyzeQreg(xCycles);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// EFU - EFU Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) {
|
||||
mVUprint("microVU: EFU Opcode");
|
||||
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
|
||||
analyzePreg(xCycles);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeEFU2(mV, int Fs, u8 xCycles) {
|
||||
mVUprint("microVU: EFU Opcode");
|
||||
analyzeReg1(Fs, mVUlow.VF_read[0]);
|
||||
analyzePreg(xCycles);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MFP - MFP Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMFP(mV, int Ft) {
|
||||
if (!Ft) { mVUlow.isNOP = 1; }
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MOVE - MOVE Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) {
|
||||
if (!Ft || (Ft == Fs)) { mVUlow.isNOP = 1; }
|
||||
analyzeReg1(Fs, mVUlow.VF_read[0]);
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// LQx - LQ/LQD/LQI Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) {
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeReg2 (Ft, mVUlow.VF_write, 1);
|
||||
if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } }
|
||||
if (writeIs) { analyzeVIreg2(Is, mVUlow.VI_write, 1); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// SQx - SQ/SQD/SQI Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) {
|
||||
analyzeReg1 (Fs, mVUlow.VF_read[0]);
|
||||
analyzeVIreg1(It, mVUlow.VI_read[0]);
|
||||
if (writeIt) { analyzeVIreg2(It, mVUlow.VI_write, 1); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// R*** - R Reg Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeR1(mV, int Fs, int Fsf) {
|
||||
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
|
||||
analyzeRreg();
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
|
||||
if (!Ft) { if (canBeNOP) { mVUlow.isNOP = 1; } else { mVUlow.noWriteVF = 1; } }
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
analyzeRreg();
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Sflag - Status Flag Opcodes
|
||||
//------------------------------------------------------------------
|
||||
microVUt(void) flagSet(mV, bool setMacFlag) {
|
||||
int curPC = iPC;
|
||||
for (int i = mVUcount, j = 0; i > 0; i--, j++) {
|
||||
j += mVUstall;
|
||||
incPC2(-2);
|
||||
if (sFLAG.doFlag && (j >= 3)) {
|
||||
if (setMacFlag) { mFLAG.doFlag = 1; }
|
||||
else { sFLAG.doNonSticky = 1; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
iPC = curPC;
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeSflag(mV, int It) {
|
||||
mVUlow.readFlags = 1;
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
if (!It) { mVUlow.isNOP = 1; }
|
||||
else {
|
||||
mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block
|
||||
mVUinfo.swapOps = 1;
|
||||
flagSet(mVU, 0);
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 1; }
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeFSSET(mV) {
|
||||
mVUlow.isFSSET = 1;
|
||||
mVUlow.readFlags = 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Mflag - Mac Flag Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
|
||||
mVUlow.readFlags = 1;
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
if (!It) { mVUlow.isNOP = 1; }
|
||||
else {
|
||||
mVUinfo.swapOps = 1;
|
||||
flagSet(mVU, 1);
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 2; }
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Cflag - Clip Flag Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeCflag(mV, int It) {
|
||||
mVUinfo.swapOps = 1;
|
||||
mVUlow.readFlags = 1;
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 4; }
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// XGkick
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeXGkick(mV, int Fs, int xCycles) {
|
||||
analyzeVIreg1(Fs, mVUlow.VI_read[0]);
|
||||
analyzeXGkick1();
|
||||
analyzeXGkick2(xCycles);
|
||||
// Note: Technically XGKICK should stall on the next instruction,
|
||||
// this code stalls on the same instruction. The only case where this
|
||||
// will be a problem with, is if you have very-specifically placed
|
||||
// FMxxx or FSxxx opcodes checking flags near this instruction AND
|
||||
// the XGKICK instruction stalls. No-game should be effected by
|
||||
// this minor difference.
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Branches - Branch Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) analyzeBranchVI(mV, int xReg, bool &infoVar) {
|
||||
if (!xReg) return;
|
||||
int i;
|
||||
int iEnd = aMin(5, (mVUcount+1));
|
||||
int bPC = iPC;
|
||||
incPC2(-2);
|
||||
for (i = 0; i < iEnd; i++) {
|
||||
if ((i == mVUcount) && (i < 5)) {
|
||||
if (mVUpBlock->pState.viBackUp == xReg) {
|
||||
infoVar = 1;
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) {
|
||||
if (mVUlow.readFlags || i == 5) break;
|
||||
if (i == 0) { incPC2(-2); continue; }
|
||||
if (((mVUlow.VI_read[0].reg == xReg) && (mVUlow.VI_read[0].used))
|
||||
|| ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used)))
|
||||
{ incPC2(-2); continue; }
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (i) {
|
||||
if (!infoVar) {
|
||||
incPC2(2);
|
||||
mVUlow.backupVI = 1;
|
||||
infoVar = 1;
|
||||
}
|
||||
iPC = bPC;
|
||||
Console.WriteLn( Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x]", getIndex, i, xPC);
|
||||
}
|
||||
else iPC = bPC;
|
||||
}
|
||||
|
||||
// Branch in Branch Delay-Slots
|
||||
microVUt(int) mVUbranchCheck(mV) {
|
||||
if (!mVUcount) return 0;
|
||||
incPC(-2);
|
||||
if (mVUlow.branch) {
|
||||
mVUlow.badBranch = 1;
|
||||
incPC(2);
|
||||
mVUlow.evilBranch = 1;
|
||||
mVUregs.blockType = 2;
|
||||
Console.Warning("microVU%d Warning: Branch in Branch delay slot! [%04x]", mVU->index, xPC);
|
||||
return 1;
|
||||
}
|
||||
incPC(2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeCondBranch1(mV, int Is) {
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
if (!mVUstall && !mVUbranchCheck(mVU)) {
|
||||
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeCondBranch2(mV, int Is, int It) {
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg1(It, mVUlow.VI_read[1]);
|
||||
if (!mVUstall && !mVUbranchCheck(mVU)) {
|
||||
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
|
||||
analyzeBranchVI(mVU, It, mVUlow.memReadIt);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeNormBranch(mV, int It, bool isBAL) {
|
||||
mVUbranchCheck(mVU);
|
||||
if (isBAL) {
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
setConstReg(It, bSaveAddr);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
|
||||
mVUbranchCheck(mVU);
|
||||
mVUlow.branch = (isJALR) ? 10 : 9;
|
||||
if (mVUconstReg[Is].isValid && !CHECK_VU_CONSTHACK) {
|
||||
mVUlow.constJump.isValid = 1;
|
||||
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
|
||||
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index);
|
||||
}
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
if (isJALR) {
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
setConstReg(It, bSaveAddr);
|
||||
}
|
||||
}
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Pass 1 Functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Helper Macros
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#define aReg(x) mVUregs.VF[x]
|
||||
#define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y]
|
||||
#define aMax(x, y) ((x > y) ? x : y)
|
||||
#define aMin(x, y) ((x < y) ? x : y)
|
||||
|
||||
// Read a VF reg
|
||||
#define analyzeReg1(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
|
||||
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
|
||||
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
|
||||
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// Write to a VF reg
|
||||
#define analyzeReg2(xReg, vfWrite, isLowOp) { \
|
||||
if (xReg) { \
|
||||
if (_X) { bReg(xReg, isLowOp).x = 4; vfWrite.reg = xReg; vfWrite.x = 4; } \
|
||||
if (_Y) { bReg(xReg, isLowOp).y = 4; vfWrite.reg = xReg; vfWrite.y = 4; } \
|
||||
if (_Z) { bReg(xReg, isLowOp).z = 4; vfWrite.reg = xReg; vfWrite.z = 4; } \
|
||||
if (_W) { bReg(xReg, isLowOp).w = 4; vfWrite.reg = xReg; vfWrite.w = 4; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// Read a VF reg (BC opcodes)
|
||||
#define analyzeReg3(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
if (_bc_x) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
|
||||
else if (_bc_y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
|
||||
else if (_bc_z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
|
||||
else { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// For Clip Opcode
|
||||
#define analyzeReg4(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
mVUstall = aMax(mVUstall, aReg(xReg).w); \
|
||||
vfRead.reg = xReg; vfRead.w = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Read VF reg (FsF/FtF)
|
||||
#define analyzeReg5(xReg, fxf, vfRead) { \
|
||||
if (xReg) { \
|
||||
switch (fxf) { \
|
||||
case 0: mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; break; \
|
||||
case 1: mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; break; \
|
||||
case 2: mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; break; \
|
||||
case 3: mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; break; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// Flips xyzw stalls to yzwx (MR32 Opcode)
|
||||
#define analyzeReg6(xReg, vfRead) { \
|
||||
if (xReg) { \
|
||||
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
|
||||
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
|
||||
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
|
||||
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
|
||||
} \
|
||||
}
|
||||
|
||||
// Reading a VI reg
|
||||
#define analyzeVIreg1(xReg, viRead) { \
|
||||
if (xReg) { \
|
||||
mVUstall = aMax(mVUstall, mVUregs.VI[xReg]); \
|
||||
viRead.reg = xReg; viRead.used = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Writing to a VI reg
|
||||
#define analyzeVIreg2(xReg, viWrite, aCycles) { \
|
||||
if (xReg) { \
|
||||
mVUconstReg[xReg].isValid = 0; \
|
||||
mVUregsTemp.VIreg = xReg; \
|
||||
mVUregsTemp.VI = aCycles; \
|
||||
viWrite.reg = xReg; \
|
||||
viWrite.used = aCycles; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = aMax(mVUstall, mVUregs.q); }
|
||||
#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); }
|
||||
#define analyzeRreg() { mVUregsTemp.r = 1; }
|
||||
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
|
||||
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
|
||||
#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } }
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC1 - Normal FMAC Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
|
||||
sFLAG.doFlag = 1;
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg1(Ft, mVUup.VF_read[1]);
|
||||
analyzeReg2(Fd, mVUup.VF_write, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC2 - ABS/FTOI/ITOF Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) {
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg2(Ft, mVUup.VF_write, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC3 - BC(xyzw) FMAC Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
|
||||
sFLAG.doFlag = 1;
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg3(Ft, mVUup.VF_read[1]);
|
||||
analyzeReg2(Fd, mVUup.VF_write, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FMAC4 - Clip FMAC Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFMAC4(mV, int Fs, int Ft) {
|
||||
cFLAG.doFlag = 1;
|
||||
analyzeReg1(Fs, mVUup.VF_read[0]);
|
||||
analyzeReg4(Ft, mVUup.VF_read[1]);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// IALU - IALU Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeIALU1(mV, int Id, int Is, int It) {
|
||||
if (!Id) { mVUlow.isNOP = 1; }
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg1(It, mVUlow.VI_read[1]);
|
||||
analyzeVIreg2(Id, mVUlow.VI_write, 1);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
|
||||
if (!It) { mVUlow.isNOP = 1; }
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeIADDI(mV, int Is, int It, s16 imm) {
|
||||
mVUanalyzeIALU2(mVU, Is, It);
|
||||
if (!Is) { setConstReg(It, imm); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MR32 - MR32 Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMR32(mV, int Fs, int Ft) {
|
||||
if (!Ft) { mVUlow.isNOP = 1; }
|
||||
analyzeReg6(Fs, mVUlow.VF_read[0]);
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FDIV - DIV/SQRT/RSQRT Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) {
|
||||
mVUprint("microVU: DIV Opcode");
|
||||
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
|
||||
analyzeReg5(Ft, Ftf, mVUlow.VF_read[1]);
|
||||
analyzeQreg(xCycles);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// EFU - EFU Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) {
|
||||
mVUprint("microVU: EFU Opcode");
|
||||
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
|
||||
analyzePreg(xCycles);
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeEFU2(mV, int Fs, u8 xCycles) {
|
||||
mVUprint("microVU: EFU Opcode");
|
||||
analyzeReg1(Fs, mVUlow.VF_read[0]);
|
||||
analyzePreg(xCycles);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MFP - MFP Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMFP(mV, int Ft) {
|
||||
if (!Ft) { mVUlow.isNOP = 1; }
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MOVE - MOVE Opcode
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) {
|
||||
if (!Ft || (Ft == Fs)) { mVUlow.isNOP = 1; }
|
||||
analyzeReg1(Fs, mVUlow.VF_read[0]);
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// LQx - LQ/LQD/LQI Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) {
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeReg2 (Ft, mVUlow.VF_write, 1);
|
||||
if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } }
|
||||
if (writeIs) { analyzeVIreg2(Is, mVUlow.VI_write, 1); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// SQx - SQ/SQD/SQI Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) {
|
||||
analyzeReg1 (Fs, mVUlow.VF_read[0]);
|
||||
analyzeVIreg1(It, mVUlow.VI_read[0]);
|
||||
if (writeIt) { analyzeVIreg2(It, mVUlow.VI_write, 1); }
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// R*** - R Reg Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeR1(mV, int Fs, int Fsf) {
|
||||
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
|
||||
analyzeRreg();
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
|
||||
if (!Ft) { if (canBeNOP) { mVUlow.isNOP = 1; } else { mVUlow.noWriteVF = 1; } }
|
||||
analyzeReg2(Ft, mVUlow.VF_write, 1);
|
||||
analyzeRreg();
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Sflag - Status Flag Opcodes
|
||||
//------------------------------------------------------------------
|
||||
microVUt(void) flagSet(mV, bool setMacFlag) {
|
||||
int curPC = iPC;
|
||||
for (int i = mVUcount, j = 0; i > 0; i--, j++) {
|
||||
j += mVUstall;
|
||||
incPC2(-2);
|
||||
if (sFLAG.doFlag && (j >= 3)) {
|
||||
if (setMacFlag) { mFLAG.doFlag = 1; }
|
||||
else { sFLAG.doNonSticky = 1; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
iPC = curPC;
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeSflag(mV, int It) {
|
||||
mVUlow.readFlags = 1;
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
if (!It) { mVUlow.isNOP = 1; }
|
||||
else {
|
||||
mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block
|
||||
mVUinfo.swapOps = 1;
|
||||
flagSet(mVU, 0);
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 1; }
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeFSSET(mV) {
|
||||
mVUlow.isFSSET = 1;
|
||||
mVUlow.readFlags = 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Mflag - Mac Flag Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
|
||||
mVUlow.readFlags = 1;
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
if (!It) { mVUlow.isNOP = 1; }
|
||||
else {
|
||||
mVUinfo.swapOps = 1;
|
||||
flagSet(mVU, 1);
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 2; }
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Cflag - Clip Flag Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeCflag(mV, int It) {
|
||||
mVUinfo.swapOps = 1;
|
||||
mVUlow.readFlags = 1;
|
||||
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 4; }
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// XGkick
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) mVUanalyzeXGkick(mV, int Fs, int xCycles) {
|
||||
analyzeVIreg1(Fs, mVUlow.VI_read[0]);
|
||||
analyzeXGkick1();
|
||||
analyzeXGkick2(xCycles);
|
||||
// Note: Technically XGKICK should stall on the next instruction,
|
||||
// this code stalls on the same instruction. The only case where this
|
||||
// will be a problem with, is if you have very-specifically placed
|
||||
// FMxxx or FSxxx opcodes checking flags near this instruction AND
|
||||
// the XGKICK instruction stalls. No-game should be effected by
|
||||
// this minor difference.
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Branches - Branch Opcodes
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) analyzeBranchVI(mV, int xReg, bool &infoVar) {
|
||||
if (!xReg) return;
|
||||
int i;
|
||||
int iEnd = aMin(5, (mVUcount+1));
|
||||
int bPC = iPC;
|
||||
incPC2(-2);
|
||||
for (i = 0; i < iEnd; i++) {
|
||||
if ((i == mVUcount) && (i < 5)) {
|
||||
if (mVUpBlock->pState.viBackUp == xReg) {
|
||||
infoVar = 1;
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) {
|
||||
if (mVUlow.readFlags || i == 5) break;
|
||||
if (i == 0) { incPC2(-2); continue; }
|
||||
if (((mVUlow.VI_read[0].reg == xReg) && (mVUlow.VI_read[0].used))
|
||||
|| ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used)))
|
||||
{ incPC2(-2); continue; }
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (i) {
|
||||
if (!infoVar) {
|
||||
incPC2(2);
|
||||
mVUlow.backupVI = 1;
|
||||
infoVar = 1;
|
||||
}
|
||||
iPC = bPC;
|
||||
Console.WriteLn( Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x]", getIndex, i, xPC);
|
||||
}
|
||||
else iPC = bPC;
|
||||
}
|
||||
|
||||
// Branch in Branch Delay-Slots
|
||||
microVUt(int) mVUbranchCheck(mV) {
|
||||
if (!mVUcount) return 0;
|
||||
incPC(-2);
|
||||
if (mVUlow.branch) {
|
||||
mVUlow.badBranch = 1;
|
||||
incPC(2);
|
||||
mVUlow.evilBranch = 1;
|
||||
mVUregs.blockType = 2;
|
||||
Console.Warning("microVU%d Warning: Branch in Branch delay slot! [%04x]", mVU->index, xPC);
|
||||
return 1;
|
||||
}
|
||||
incPC(2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeCondBranch1(mV, int Is) {
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
if (!mVUstall && !mVUbranchCheck(mVU)) {
|
||||
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeCondBranch2(mV, int Is, int It) {
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg1(It, mVUlow.VI_read[1]);
|
||||
if (!mVUstall && !mVUbranchCheck(mVU)) {
|
||||
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
|
||||
analyzeBranchVI(mVU, It, mVUlow.memReadIt);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeNormBranch(mV, int It, bool isBAL) {
|
||||
mVUbranchCheck(mVU);
|
||||
if (isBAL) {
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
setConstReg(It, bSaveAddr);
|
||||
}
|
||||
}
|
||||
|
||||
microVUt(void) mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
|
||||
mVUbranchCheck(mVU);
|
||||
mVUlow.branch = (isJALR) ? 10 : 9;
|
||||
if (mVUconstReg[Is].isValid && !CHECK_VU_CONSTHACK) {
|
||||
mVUlow.constJump.isValid = 1;
|
||||
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
|
||||
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index);
|
||||
}
|
||||
analyzeVIreg1(Is, mVUlow.VI_read[0]);
|
||||
if (isJALR) {
|
||||
analyzeVIreg2(It, mVUlow.VI_write, 1);
|
||||
setConstReg(It, bSaveAddr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,106 +1,106 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Clamp Functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
const __aligned16 u32 sse4_minvals[2][4] = {
|
||||
{ 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000
|
||||
{ 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111
|
||||
};
|
||||
const __aligned16 u32 sse4_maxvals[2][4] = {
|
||||
{ 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000
|
||||
{ 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111
|
||||
};
|
||||
|
||||
// Used for Result Clamping
|
||||
// Note: This function will not preserve NaN values' sign.
|
||||
// The theory behind this is that when we compute a result, and we've
|
||||
// gotten a NaN value, then something went wrong; and the NaN's sign
|
||||
// is not to be trusted. Games like positive values better usually,
|
||||
// and its faster... so just always make NaNs into positive infinity.
|
||||
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
default:
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Used for Operand Clamping
|
||||
// Note 1: If 'preserve sign' mode is on, it will preserve the sign of NaN values.
|
||||
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
|
||||
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
|
||||
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
|
||||
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
|
||||
SSE4_PMINSD_M128_to_XMM(reg, (uptr)&sse4_maxvals[i][0]);
|
||||
SSE4_PMINUD_M128_to_XMM(reg, (uptr)&sse4_minvals[i][0]);
|
||||
return;
|
||||
}
|
||||
int regT1b = 0;
|
||||
if (regT1 < 0) {
|
||||
regT1b = 1; regT1=(reg+1)%8;
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmCTemp, regT1);
|
||||
//regT1 = mVU->regAlloc->allocReg();
|
||||
}
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
default:
|
||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
}
|
||||
//if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
||||
if (regT1b) SSE_MOVAPS_M128_to_XMM(regT1, (uptr)mVU->xmmCTemp);
|
||||
}
|
||||
else mVUclamp1(reg, regT1, xyzw, bClampE);
|
||||
}
|
||||
|
||||
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
|
||||
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
||||
// Used for result clamping on every SSE instruction (add/sub/mul/div)
|
||||
// Note: Disabled in "preserve sign" mode because in certain cases it
|
||||
// makes too much code-gen, and you get jump8-overflows in certain
|
||||
// emulated opcodes (causing crashes). Since we're clamping the operands
|
||||
// with mVUclamp3, we should almost never be getting a NaN result,
|
||||
// but this clamp is just a precaution just-in-case.
|
||||
void mVUclamp4(int reg, int regT1, int xyzw) {
|
||||
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
|
||||
}
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Clamp Functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
const __aligned16 u32 sse4_minvals[2][4] = {
|
||||
{ 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000
|
||||
{ 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111
|
||||
};
|
||||
const __aligned16 u32 sse4_maxvals[2][4] = {
|
||||
{ 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000
|
||||
{ 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111
|
||||
};
|
||||
|
||||
// Used for Result Clamping
|
||||
// Note: This function will not preserve NaN values' sign.
|
||||
// The theory behind this is that when we compute a result, and we've
|
||||
// gotten a NaN value, then something went wrong; and the NaN's sign
|
||||
// is not to be trusted. Games like positive values better usually,
|
||||
// and its faster... so just always make NaNs into positive infinity.
|
||||
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
default:
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Used for Operand Clamping
|
||||
// Note 1: If 'preserve sign' mode is on, it will preserve the sign of NaN values.
|
||||
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
|
||||
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
|
||||
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
|
||||
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
|
||||
SSE4_PMINSD_M128_to_XMM(reg, (uptr)&sse4_maxvals[i][0]);
|
||||
SSE4_PMINUD_M128_to_XMM(reg, (uptr)&sse4_minvals[i][0]);
|
||||
return;
|
||||
}
|
||||
int regT1b = 0;
|
||||
if (regT1 < 0) {
|
||||
regT1b = 1; regT1=(reg+1)%8;
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmCTemp, regT1);
|
||||
//regT1 = mVU->regAlloc->allocReg();
|
||||
}
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
default:
|
||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
}
|
||||
//if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
||||
if (regT1b) SSE_MOVAPS_M128_to_XMM(regT1, (uptr)mVU->xmmCTemp);
|
||||
}
|
||||
else mVUclamp1(reg, regT1, xyzw, bClampE);
|
||||
}
|
||||
|
||||
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
|
||||
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
||||
// Used for result clamping on every SSE instruction (add/sub/mul/div)
|
||||
// Note: Disabled in "preserve sign" mode because in certain cases it
|
||||
// makes too much code-gen, and you get jump8-overflows in certain
|
||||
// emulated opcodes (causing crashes). Since we're clamping the operands
|
||||
// with mVUclamp3, we should almost never be getting a NaN result,
|
||||
// but this clamp is just a precaution just-in-case.
|
||||
void mVUclamp4(int reg, int regT1, int xyzw) {
|
||||
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
|
|
@ -1,337 +1,337 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
union regInfo {
|
||||
u32 reg;
|
||||
struct {
|
||||
u8 x;
|
||||
u8 y;
|
||||
u8 z;
|
||||
u8 w;
|
||||
};
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack(1)
|
||||
# pragma warning(disable:4996) // 'function': was declared deprecated
|
||||
#endif
|
||||
|
||||
struct __aligned16 microRegInfo { // Ordered for Faster Compares
|
||||
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
|
||||
u8 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u8 q;
|
||||
u8 p;
|
||||
u8 r;
|
||||
u8 xgkick;
|
||||
u8 viBackUp;
|
||||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
||||
u8 padding[5]; // 160 bytes
|
||||
} __packed;
|
||||
|
||||
struct __aligned16 microBlock {
|
||||
microRegInfo pState; // Detailed State of Pipeline
|
||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||
u8* x86ptrStart; // Start of code
|
||||
} __packed;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack()
|
||||
#endif
|
||||
|
||||
struct microTempRegInfo {
|
||||
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
|
||||
u8 VFreg[2]; // Index of the VF reg
|
||||
u8 VI; // Holds cycle info for Id
|
||||
u8 VIreg; // Index of the VI reg
|
||||
u8 q; // Holds cycle info for Q reg
|
||||
u8 p; // Holds cycle info for P reg
|
||||
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
|
||||
u8 xgkick; // Holds the cycle info for XGkick
|
||||
};
|
||||
|
||||
struct microVFreg {
|
||||
u8 reg; // Reg Index
|
||||
u8 x; // X vector read/written to?
|
||||
u8 y; // Y vector read/written to?
|
||||
u8 z; // Z vector read/written to?
|
||||
u8 w; // W vector read/written to?
|
||||
};
|
||||
|
||||
struct microVIreg {
|
||||
u8 reg; // Reg Index
|
||||
u8 used; // Reg is Used? (Read/Written)
|
||||
};
|
||||
|
||||
struct microConstInfo {
|
||||
u8 isValid; // Is the constant in regValue valid?
|
||||
u32 regValue; // Constant Value
|
||||
};
|
||||
|
||||
struct microUpperOp {
|
||||
bool eBit; // Has E-bit set
|
||||
bool iBit; // Has I-bit set
|
||||
bool mBit; // Has M-bit set
|
||||
microVFreg VF_write; // VF Vectors written to by this instruction
|
||||
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
||||
};
|
||||
|
||||
struct microLowerOp {
|
||||
microVFreg VF_write; // VF Vectors written to by this instruction
|
||||
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
||||
microVIreg VI_write; // VI reg written to by this instruction
|
||||
microVIreg VI_read[2]; // VI regs read by this instruction
|
||||
microConstInfo constJump; // Constant Reg Info for JR/JARL instructions
|
||||
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
|
||||
bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot
|
||||
bool evilBranch;// This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch)
|
||||
bool isNOP; // This instruction is a NOP
|
||||
bool isFSSET; // This instruction is a FSSET
|
||||
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
|
||||
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
|
||||
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
|
||||
bool memReadIt; // Read If (VI reg) from memory (used by branches)
|
||||
bool readFlags; // Current Instruction reads Status, Mac, or Clip flags
|
||||
};
|
||||
|
||||
struct microFlagInst {
|
||||
bool doFlag; // Update Flag on this Instruction
|
||||
bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only)
|
||||
u8 write; // Points to the instance that should be written to (s-stage write)
|
||||
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
|
||||
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
|
||||
};
|
||||
|
||||
struct microFlagCycles {
|
||||
int xStatus[4];
|
||||
int xMac[4];
|
||||
int xClip[4];
|
||||
int cycles;
|
||||
};
|
||||
|
||||
struct microOp {
|
||||
u8 stall; // Info on how much current instruction stalled
|
||||
bool isEOB; // Cur Instruction is last instruction in block (End of Block)
|
||||
bool isBdelay; // Cur Instruction in Branch Delay slot
|
||||
bool swapOps; // Run Lower Instruction before Upper Instruction
|
||||
bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called
|
||||
bool doXGKICK; // Do XGKICK transfer on this instruction
|
||||
bool doDivFlag; // Transfer Div flag to Status Flag on this instruction
|
||||
int readQ; // Q instance for reading
|
||||
int writeQ; // Q instance for writing
|
||||
int readP; // P instance for reading
|
||||
int writeP; // P instance for writing
|
||||
microFlagInst sFlag; // Status Flag Instance Info
|
||||
microFlagInst mFlag; // Mac Flag Instance Info
|
||||
microFlagInst cFlag; // Clip Flag Instance Info
|
||||
microUpperOp uOp; // Upper Op Info
|
||||
microLowerOp lOp; // Lower Op Info
|
||||
};
|
||||
|
||||
template<u32 pSize>
|
||||
struct microIR {
|
||||
microBlock block; // Block/Pipeline info
|
||||
microBlock* pBlock; // Pointer to a block in mVUblocks
|
||||
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
||||
microOp info[pSize/2]; // Info for Instructions in current block
|
||||
microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks
|
||||
u8 branch;
|
||||
u32 cycles; // Cycles for current block
|
||||
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
|
||||
u32 curPC; // Current PC
|
||||
u32 startPC; // Start PC for Cur Block
|
||||
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Reg Alloc
|
||||
//------------------------------------------------------------------
|
||||
|
||||
void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW);
|
||||
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
|
||||
void mVUloadReg(int reg, uptr offset, int xyzw);
|
||||
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs);
|
||||
|
||||
struct microXMM {
|
||||
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
|
||||
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
|
||||
int count; // Count of when last used
|
||||
bool isNeeded; // Is needed for current instruction
|
||||
};
|
||||
|
||||
#define xmmTotal 7 // Don't allocate PQ?
|
||||
class microRegAlloc {
|
||||
private:
|
||||
microXMM xmmReg[xmmTotal];
|
||||
VURegs* vuRegs;
|
||||
int counter;
|
||||
int findFreeRegRec(int startIdx) {
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded) {
|
||||
int x = findFreeRegRec(i+1);
|
||||
if (x == -1) return i;
|
||||
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int findFreeReg() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded && (xmmReg[i].reg < 0)) {
|
||||
return i; // Reg is not needed and was a temp reg
|
||||
}
|
||||
}
|
||||
int x = findFreeRegRec(0);
|
||||
if (x < 0) { DevCon.Error("microVU Allocation Error!"); return 0; }
|
||||
return x;
|
||||
}
|
||||
|
||||
public:
|
||||
microRegAlloc(VURegs* vuRegsPtr) {
|
||||
vuRegs = vuRegsPtr;
|
||||
reset();
|
||||
}
|
||||
void reset() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
clearReg(i);
|
||||
}
|
||||
counter = 0;
|
||||
}
|
||||
void flushAll(bool clearState = 1) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
writeBackReg(i);
|
||||
if (clearState) clearReg(i);
|
||||
}
|
||||
}
|
||||
void clearReg(int reg) {
|
||||
xmmReg[reg].reg = -1;
|
||||
xmmReg[reg].count = 0;
|
||||
xmmReg[reg].xyzw = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
}
|
||||
void clearRegVF(int VFreg) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (xmmReg[i].reg == VFreg) clearReg(i);
|
||||
}
|
||||
}
|
||||
void writeBackReg(int reg, bool invalidateRegs = 1) {
|
||||
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
|
||||
if (xmmReg[reg].reg == 33) SSE_MOVSS_XMM_to_M32((uptr)&vuRegs->VI[REG_I].UL, reg);
|
||||
else if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
|
||||
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||
if (invalidateRegs) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if ((i == reg) || xmmReg[i].isNeeded) continue;
|
||||
if (xmmReg[i].reg == xmmReg[reg].reg) {
|
||||
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", xmmReg[i].reg);
|
||||
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
||||
}
|
||||
}
|
||||
}
|
||||
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
||||
xmmReg[reg].count = counter;
|
||||
xmmReg[reg].xyzw = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
clearReg(reg); // Clear Reg
|
||||
}
|
||||
void clearNeeded(int reg) {
|
||||
if ((reg < 0) || (reg >= xmmTotal)) return;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
if (xmmReg[reg].xyzw) { // Reg was modified
|
||||
if (xmmReg[reg].reg > 0) {
|
||||
int mergeRegs = 0;
|
||||
if (xmmReg[reg].xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
|
||||
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
|
||||
if (i == reg) continue;
|
||||
if (xmmReg[i].reg == xmmReg[reg].reg) {
|
||||
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", xmmReg[i].reg);
|
||||
if (mergeRegs == 1) {
|
||||
mVUmergeRegs(i, reg, xmmReg[reg].xyzw, 1);
|
||||
xmmReg[i].xyzw = 0xf;
|
||||
xmmReg[i].count = counter;
|
||||
mergeRegs = 2;
|
||||
}
|
||||
else clearReg(i);
|
||||
}
|
||||
}
|
||||
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
|
||||
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
|
||||
}
|
||||
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
||||
}
|
||||
}
|
||||
int allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
|
||||
counter++;
|
||||
if (vfLoadReg >= 0) { // Search For Cached Regs
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified
|
||||
|| (xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
||||
int z = i;
|
||||
if (vfWriteReg >= 0) { // Reg will be modified
|
||||
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
||||
z = findFreeReg();
|
||||
writeBackReg(z);
|
||||
if (z!=i && xyzw==8) SSE_MOVAPS_XMM_to_XMM (z, i);
|
||||
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
||||
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
||||
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
||||
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
|
||||
xmmReg[i].count = counter; // Reg i was used, so update counter
|
||||
}
|
||||
else { // Don't clone reg, but shuffle to adjust for SS ops
|
||||
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(z); }
|
||||
if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
||||
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
||||
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
||||
}
|
||||
xmmReg[z].reg = vfWriteReg;
|
||||
xmmReg[z].xyzw = xyzw;
|
||||
}
|
||||
xmmReg[z].count = counter;
|
||||
xmmReg[z].isNeeded = 1;
|
||||
return z;
|
||||
}
|
||||
}
|
||||
}
|
||||
int x = findFreeReg();
|
||||
writeBackReg(x);
|
||||
|
||||
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
||||
if ((vfLoadReg == 0) && !(xyzw & 1)) { SSE2_PXOR_XMM_to_XMM(x, x); }
|
||||
else if (vfLoadReg == 33) mVUloadIreg(x, xyzw, vuRegs);
|
||||
else if (vfLoadReg == 32) mVUloadReg (x, (uptr)&vuRegs->ACC.UL[0], xyzw);
|
||||
else if (vfLoadReg >= 0) mVUloadReg (x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
|
||||
xmmReg[x].reg = vfWriteReg;
|
||||
xmmReg[x].xyzw = xyzw;
|
||||
}
|
||||
else { // Reg Will Not Be Modified (always load full reg for caching)
|
||||
if (vfLoadReg == 33) mVUloadIreg(x, 0xf, vuRegs);
|
||||
else if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
|
||||
else if (vfLoadReg >= 0) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0]);
|
||||
xmmReg[x].reg = vfLoadReg;
|
||||
xmmReg[x].xyzw = 0;
|
||||
}
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
return x;
|
||||
}
|
||||
};
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
union regInfo {
|
||||
u32 reg;
|
||||
struct {
|
||||
u8 x;
|
||||
u8 y;
|
||||
u8 z;
|
||||
u8 w;
|
||||
};
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack(1)
|
||||
# pragma warning(disable:4996) // 'function': was declared deprecated
|
||||
#endif
|
||||
|
||||
struct __aligned16 microRegInfo { // Ordered for Faster Compares
|
||||
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
|
||||
u8 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u8 q;
|
||||
u8 p;
|
||||
u8 r;
|
||||
u8 xgkick;
|
||||
u8 viBackUp;
|
||||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
||||
u8 padding[5]; // 160 bytes
|
||||
} __packed;
|
||||
|
||||
struct __aligned16 microBlock {
|
||||
microRegInfo pState; // Detailed State of Pipeline
|
||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||
u8* x86ptrStart; // Start of code
|
||||
} __packed;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack()
|
||||
#endif
|
||||
|
||||
struct microTempRegInfo {
|
||||
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
|
||||
u8 VFreg[2]; // Index of the VF reg
|
||||
u8 VI; // Holds cycle info for Id
|
||||
u8 VIreg; // Index of the VI reg
|
||||
u8 q; // Holds cycle info for Q reg
|
||||
u8 p; // Holds cycle info for P reg
|
||||
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
|
||||
u8 xgkick; // Holds the cycle info for XGkick
|
||||
};
|
||||
|
||||
struct microVFreg {
|
||||
u8 reg; // Reg Index
|
||||
u8 x; // X vector read/written to?
|
||||
u8 y; // Y vector read/written to?
|
||||
u8 z; // Z vector read/written to?
|
||||
u8 w; // W vector read/written to?
|
||||
};
|
||||
|
||||
struct microVIreg {
|
||||
u8 reg; // Reg Index
|
||||
u8 used; // Reg is Used? (Read/Written)
|
||||
};
|
||||
|
||||
struct microConstInfo {
|
||||
u8 isValid; // Is the constant in regValue valid?
|
||||
u32 regValue; // Constant Value
|
||||
};
|
||||
|
||||
struct microUpperOp {
|
||||
bool eBit; // Has E-bit set
|
||||
bool iBit; // Has I-bit set
|
||||
bool mBit; // Has M-bit set
|
||||
microVFreg VF_write; // VF Vectors written to by this instruction
|
||||
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
||||
};
|
||||
|
||||
struct microLowerOp {
|
||||
microVFreg VF_write; // VF Vectors written to by this instruction
|
||||
microVFreg VF_read[2]; // VF Vectors read by this instruction
|
||||
microVIreg VI_write; // VI reg written to by this instruction
|
||||
microVIreg VI_read[2]; // VI regs read by this instruction
|
||||
microConstInfo constJump; // Constant Reg Info for JR/JARL instructions
|
||||
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
|
||||
bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot
|
||||
bool evilBranch;// This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch)
|
||||
bool isNOP; // This instruction is a NOP
|
||||
bool isFSSET; // This instruction is a FSSET
|
||||
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
|
||||
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
|
||||
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
|
||||
bool memReadIt; // Read If (VI reg) from memory (used by branches)
|
||||
bool readFlags; // Current Instruction reads Status, Mac, or Clip flags
|
||||
};
|
||||
|
||||
struct microFlagInst {
|
||||
bool doFlag; // Update Flag on this Instruction
|
||||
bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only)
|
||||
u8 write; // Points to the instance that should be written to (s-stage write)
|
||||
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
|
||||
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
|
||||
};
|
||||
|
||||
struct microFlagCycles {
|
||||
int xStatus[4];
|
||||
int xMac[4];
|
||||
int xClip[4];
|
||||
int cycles;
|
||||
};
|
||||
|
||||
struct microOp {
|
||||
u8 stall; // Info on how much current instruction stalled
|
||||
bool isEOB; // Cur Instruction is last instruction in block (End of Block)
|
||||
bool isBdelay; // Cur Instruction in Branch Delay slot
|
||||
bool swapOps; // Run Lower Instruction before Upper Instruction
|
||||
bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called
|
||||
bool doXGKICK; // Do XGKICK transfer on this instruction
|
||||
bool doDivFlag; // Transfer Div flag to Status Flag on this instruction
|
||||
int readQ; // Q instance for reading
|
||||
int writeQ; // Q instance for writing
|
||||
int readP; // P instance for reading
|
||||
int writeP; // P instance for writing
|
||||
microFlagInst sFlag; // Status Flag Instance Info
|
||||
microFlagInst mFlag; // Mac Flag Instance Info
|
||||
microFlagInst cFlag; // Clip Flag Instance Info
|
||||
microUpperOp uOp; // Upper Op Info
|
||||
microLowerOp lOp; // Lower Op Info
|
||||
};
|
||||
|
||||
template<u32 pSize>
|
||||
struct microIR {
|
||||
microBlock block; // Block/Pipeline info
|
||||
microBlock* pBlock; // Pointer to a block in mVUblocks
|
||||
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
||||
microOp info[pSize/2]; // Info for Instructions in current block
|
||||
microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks
|
||||
u8 branch;
|
||||
u32 cycles; // Cycles for current block
|
||||
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
|
||||
u32 curPC; // Current PC
|
||||
u32 startPC; // Start PC for Cur Block
|
||||
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Reg Alloc
|
||||
//------------------------------------------------------------------
|
||||
|
||||
void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW);
|
||||
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
|
||||
void mVUloadReg(int reg, uptr offset, int xyzw);
|
||||
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs);
|
||||
|
||||
struct microXMM {
|
||||
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
|
||||
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
|
||||
int count; // Count of when last used
|
||||
bool isNeeded; // Is needed for current instruction
|
||||
};
|
||||
|
||||
#define xmmTotal 7 // Don't allocate PQ?
|
||||
class microRegAlloc {
|
||||
private:
|
||||
microXMM xmmReg[xmmTotal];
|
||||
VURegs* vuRegs;
|
||||
int counter;
|
||||
int findFreeRegRec(int startIdx) {
|
||||
for (int i = startIdx; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded) {
|
||||
int x = findFreeRegRec(i+1);
|
||||
if (x == -1) return i;
|
||||
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int findFreeReg() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (!xmmReg[i].isNeeded && (xmmReg[i].reg < 0)) {
|
||||
return i; // Reg is not needed and was a temp reg
|
||||
}
|
||||
}
|
||||
int x = findFreeRegRec(0);
|
||||
if (x < 0) { DevCon.Error("microVU Allocation Error!"); return 0; }
|
||||
return x;
|
||||
}
|
||||
|
||||
public:
|
||||
microRegAlloc(VURegs* vuRegsPtr) {
|
||||
vuRegs = vuRegsPtr;
|
||||
reset();
|
||||
}
|
||||
void reset() {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
clearReg(i);
|
||||
}
|
||||
counter = 0;
|
||||
}
|
||||
void flushAll(bool clearState = 1) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
writeBackReg(i);
|
||||
if (clearState) clearReg(i);
|
||||
}
|
||||
}
|
||||
void clearReg(int reg) {
|
||||
xmmReg[reg].reg = -1;
|
||||
xmmReg[reg].count = 0;
|
||||
xmmReg[reg].xyzw = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
}
|
||||
void clearRegVF(int VFreg) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (xmmReg[i].reg == VFreg) clearReg(i);
|
||||
}
|
||||
}
|
||||
void writeBackReg(int reg, bool invalidateRegs = 1) {
|
||||
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
|
||||
if (xmmReg[reg].reg == 33) SSE_MOVSS_XMM_to_M32((uptr)&vuRegs->VI[REG_I].UL, reg);
|
||||
else if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
|
||||
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||
if (invalidateRegs) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if ((i == reg) || xmmReg[i].isNeeded) continue;
|
||||
if (xmmReg[i].reg == xmmReg[reg].reg) {
|
||||
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", xmmReg[i].reg);
|
||||
clearReg(i); // Invalidate any Cached Regs of same vf Reg
|
||||
}
|
||||
}
|
||||
}
|
||||
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
|
||||
xmmReg[reg].count = counter;
|
||||
xmmReg[reg].xyzw = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
clearReg(reg); // Clear Reg
|
||||
}
|
||||
void clearNeeded(int reg) {
|
||||
if ((reg < 0) || (reg >= xmmTotal)) return;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
if (xmmReg[reg].xyzw) { // Reg was modified
|
||||
if (xmmReg[reg].reg > 0) {
|
||||
int mergeRegs = 0;
|
||||
if (xmmReg[reg].xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
|
||||
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
|
||||
if (i == reg) continue;
|
||||
if (xmmReg[i].reg == xmmReg[reg].reg) {
|
||||
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", xmmReg[i].reg);
|
||||
if (mergeRegs == 1) {
|
||||
mVUmergeRegs(i, reg, xmmReg[reg].xyzw, 1);
|
||||
xmmReg[i].xyzw = 0xf;
|
||||
xmmReg[i].count = counter;
|
||||
mergeRegs = 2;
|
||||
}
|
||||
else clearReg(i);
|
||||
}
|
||||
}
|
||||
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
|
||||
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
|
||||
}
|
||||
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
||||
}
|
||||
}
|
||||
int allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
|
||||
counter++;
|
||||
if (vfLoadReg >= 0) { // Search For Cached Regs
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified
|
||||
|| (xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
|
||||
int z = i;
|
||||
if (vfWriteReg >= 0) { // Reg will be modified
|
||||
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
|
||||
z = findFreeReg();
|
||||
writeBackReg(z);
|
||||
if (z!=i && xyzw==8) SSE_MOVAPS_XMM_to_XMM (z, i);
|
||||
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
||||
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
||||
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
||||
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
|
||||
xmmReg[i].count = counter; // Reg i was used, so update counter
|
||||
}
|
||||
else { // Don't clone reg, but shuffle to adjust for SS ops
|
||||
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(z); }
|
||||
if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
|
||||
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
|
||||
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
|
||||
}
|
||||
xmmReg[z].reg = vfWriteReg;
|
||||
xmmReg[z].xyzw = xyzw;
|
||||
}
|
||||
xmmReg[z].count = counter;
|
||||
xmmReg[z].isNeeded = 1;
|
||||
return z;
|
||||
}
|
||||
}
|
||||
}
|
||||
int x = findFreeReg();
|
||||
writeBackReg(x);
|
||||
|
||||
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
||||
if ((vfLoadReg == 0) && !(xyzw & 1)) { SSE2_PXOR_XMM_to_XMM(x, x); }
|
||||
else if (vfLoadReg == 33) mVUloadIreg(x, xyzw, vuRegs);
|
||||
else if (vfLoadReg == 32) mVUloadReg (x, (uptr)&vuRegs->ACC.UL[0], xyzw);
|
||||
else if (vfLoadReg >= 0) mVUloadReg (x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
|
||||
xmmReg[x].reg = vfWriteReg;
|
||||
xmmReg[x].xyzw = xyzw;
|
||||
}
|
||||
else { // Reg Will Not Be Modified (always load full reg for caching)
|
||||
if (vfLoadReg == 33) mVUloadIreg(x, 0xf, vuRegs);
|
||||
else if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
|
||||
else if (vfLoadReg >= 0) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0]);
|
||||
xmmReg[x].reg = vfLoadReg;
|
||||
xmmReg[x].xyzw = 0;
|
||||
}
|
||||
xmmReg[x].count = counter;
|
||||
xmmReg[x].isNeeded = 1;
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1,67 +1,67 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef newVif
|
||||
#include "x86emitter/x86emitter.h"
|
||||
using namespace x86Emitter;
|
||||
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
|
||||
extern void _nVifUnpack(int idx, u8 *data, u32 size);
|
||||
|
||||
typedef u32 (__fastcall *nVifCall)(void*, void*);
|
||||
|
||||
static __pagealigned u8 nVifUpkExec[__pagesize*16];
|
||||
static __aligned16 nVifCall nVifUpk[(2*2*16)*4*4]; // ([USN][Masking][Unpack Type]) [curCycle][CyclesToWrite-1]
|
||||
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
||||
|
||||
#define _v0 0
|
||||
#define _v1 0x55
|
||||
#define _v2 0xaa
|
||||
#define _v3 0xff
|
||||
#define aMax(x, y) std::max(x,y)
|
||||
#define aMin(x, y) std::min(x,y)
|
||||
#define _f __forceinline
|
||||
|
||||
#define xShiftR(regX, n) { \
|
||||
if (usn) { xPSRL.D(regX, n); } \
|
||||
else { xPSRA.D(regX, n); } \
|
||||
}
|
||||
|
||||
static const u32 nVifT[16] = {
|
||||
4, // S-32
|
||||
2, // S-16
|
||||
1, // S-8
|
||||
0, // ----
|
||||
8, // V2-32
|
||||
4, // V2-16
|
||||
2, // V2-8
|
||||
0, // ----
|
||||
12,// V3-32
|
||||
6, // V3-16
|
||||
3, // V3-8
|
||||
0, // ----
|
||||
16,// V4-32
|
||||
8, // V4-16
|
||||
4, // V4-8
|
||||
2, // V4-5
|
||||
};
|
||||
|
||||
#include "newVif_BlockBuffer.h"
|
||||
#include "newVif_OldUnpack.inl"
|
||||
#include "newVif_UnpackGen.inl"
|
||||
#include "newVif_Unpack.inl"
|
||||
|
||||
#endif
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef newVif
|
||||
#include "x86emitter/x86emitter.h"
|
||||
using namespace x86Emitter;
|
||||
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
|
||||
extern void _nVifUnpack(int idx, u8 *data, u32 size);
|
||||
|
||||
typedef u32 (__fastcall *nVifCall)(void*, void*);
|
||||
|
||||
static __pagealigned u8 nVifUpkExec[__pagesize*16];
|
||||
static __aligned16 nVifCall nVifUpk[(2*2*16)*4*4]; // ([USN][Masking][Unpack Type]) [curCycle][CyclesToWrite-1]
|
||||
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
||||
|
||||
#define _v0 0
|
||||
#define _v1 0x55
|
||||
#define _v2 0xaa
|
||||
#define _v3 0xff
|
||||
#define aMax(x, y) std::max(x,y)
|
||||
#define aMin(x, y) std::min(x,y)
|
||||
#define _f __forceinline
|
||||
|
||||
#define xShiftR(regX, n) { \
|
||||
if (usn) { xPSRL.D(regX, n); } \
|
||||
else { xPSRA.D(regX, n); } \
|
||||
}
|
||||
|
||||
static const u32 nVifT[16] = {
|
||||
4, // S-32
|
||||
2, // S-16
|
||||
1, // S-8
|
||||
0, // ----
|
||||
8, // V2-32
|
||||
4, // V2-16
|
||||
2, // V2-8
|
||||
0, // ----
|
||||
12,// V3-32
|
||||
6, // V3-16
|
||||
3, // V3-8
|
||||
0, // ----
|
||||
16,// V4-32
|
||||
8, // V4-16
|
||||
4, // V4-8
|
||||
2, // V4-5
|
||||
};
|
||||
|
||||
#include "newVif_BlockBuffer.h"
|
||||
#include "newVif_OldUnpack.inl"
|
||||
#include "newVif_UnpackGen.inl"
|
||||
#include "newVif_Unpack.inl"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,40 +1,40 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class BlockBuffer {
|
||||
private:
|
||||
u32 mSize; // Cur Size
|
||||
u32 mSizeT; // Total Size
|
||||
u8* mData; // Data Ptr
|
||||
void grow(u32 newSize) {
|
||||
u8* temp = new u8[newSize];
|
||||
memcpy(temp, mData, mSizeT);
|
||||
safe_delete( mData );
|
||||
mData = temp;
|
||||
}
|
||||
public:
|
||||
BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; mData = new u8[mSizeT]; }
|
||||
virtual ~BlockBuffer() { safe_delete(mData); }
|
||||
void append(void *addr, u32 size) {
|
||||
if (mSize + size > mSizeT) grow(mSize*2 + size);
|
||||
memcpy(&mData[mSize], addr, size);
|
||||
mSize += size;
|
||||
}
|
||||
void clear() { mSize = 0; }
|
||||
u32 getSize() { return mSize; }
|
||||
u8* getBlock() { return mData; }
|
||||
};
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class BlockBuffer {
|
||||
private:
|
||||
u32 mSize; // Cur Size
|
||||
u32 mSizeT; // Total Size
|
||||
u8* mData; // Data Ptr
|
||||
void grow(u32 newSize) {
|
||||
u8* temp = new u8[newSize];
|
||||
memcpy(temp, mData, mSizeT);
|
||||
safe_delete( mData );
|
||||
mData = temp;
|
||||
}
|
||||
public:
|
||||
BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; mData = new u8[mSizeT]; }
|
||||
virtual ~BlockBuffer() { safe_delete(mData); }
|
||||
void append(void *addr, u32 size) {
|
||||
if (mSize + size > mSizeT) grow(mSize*2 + size);
|
||||
memcpy(&mData[mSize], addr, size);
|
||||
mSize += size;
|
||||
}
|
||||
void clear() { mSize = 0; }
|
||||
u32 getSize() { return mSize; }
|
||||
u8* getBlock() { return mData; }
|
||||
};
|
||||
|
|
|
@ -1,167 +1,167 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Old Vif Unpack Code
|
||||
// Only here for testing/reference
|
||||
// If newVif is defined and newVif1 isn't, vif1 will use this code
|
||||
// same goes for vif0...
|
||||
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
|
||||
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
|
||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
||||
UNPACKFUNCTYPE func;
|
||||
const VIFUnpackFuncTable *ft;
|
||||
VURegs * VU;
|
||||
u8 *cdata = (u8*)data;
|
||||
u32 tempsize = 0;
|
||||
const u32 memlimit = vif_size(VIFdmanum);
|
||||
|
||||
if (VIFdmanum == 0) {
|
||||
VU = &VU0;
|
||||
vifRegs = vif0Regs;
|
||||
vifMaskRegs = g_vif0Masks;
|
||||
vif = &vif0;
|
||||
vifRow = g_vifmask.Row0;
|
||||
}
|
||||
else {
|
||||
VU = &VU1;
|
||||
vifRegs = vif1Regs;
|
||||
vifMaskRegs = g_vif1Masks;
|
||||
vif = &vif1;
|
||||
vifRow = g_vifmask.Row1;
|
||||
}
|
||||
|
||||
u32 *dest = (u32*)(VU->Mem + v->addr);
|
||||
u32 unpackType = v->cmd & 0xf;
|
||||
|
||||
ft = &VIFfuncTable[ unpackType ];
|
||||
func = vif->usn ? ft->funcU : ft->funcS;
|
||||
size <<= 2;
|
||||
|
||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
|
||||
if (v->addr >= memlimit) {
|
||||
DevCon.Warning("Overflown at the start");
|
||||
v->addr &= (memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
|
||||
size = min(size, (int)vifRegs->num * ft->gsize); //size will always be the same or smaller
|
||||
|
||||
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
||||
|
||||
//Sanity Check (memory overflow)
|
||||
if (tempsize > memlimit) {
|
||||
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
|
||||
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
|
||||
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
|
||||
DevCon.WriteLn("what!!!!!!!!!");
|
||||
//tempsize = 0;
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
else {
|
||||
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
if (tempsize) {
|
||||
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
||||
size = 0;
|
||||
int addrstart = v->addr;
|
||||
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
|
||||
|
||||
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
|
||||
|
||||
while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) {
|
||||
if(v->addr >= memlimit) {
|
||||
DevCon.Warning("Mem limit overflow");
|
||||
v->addr &= (memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
cdata += ft->gsize;
|
||||
tempsize -= ft->gsize;
|
||||
|
||||
vifRegs->num--;
|
||||
vif->cl++;
|
||||
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
dest += incdest;
|
||||
v->addr +=(incdest * 4);
|
||||
vif->cl = 0;
|
||||
}
|
||||
else {
|
||||
dest += 4;
|
||||
v->addr += 16;
|
||||
}
|
||||
}
|
||||
if (v->addr >= memlimit) {
|
||||
v->addr &=(memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
v->addr = addrstart;
|
||||
if(tempsize > 0) size = tempsize;
|
||||
}
|
||||
|
||||
if (size >= ft->dsize && vifRegs->num > 0) { //Else write what we do have
|
||||
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
|
||||
VIF_LOG("warning, end with size = %d", size);
|
||||
// unpack one qword
|
||||
//v->addr += (size / ft->dsize) * 4;
|
||||
func(dest, (u32*)cdata, size / ft->dsize);
|
||||
size = 0;
|
||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
|
||||
}
|
||||
}
|
||||
else { // filling write
|
||||
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
||||
if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
||||
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
||||
|
||||
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
|
||||
while (vifRegs->num > 0) {
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
// unpack one qword
|
||||
if (vif->cl < vifRegs->cycle.cl) {
|
||||
if(size < ft->gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
cdata += ft->gsize;
|
||||
size -= ft->gsize;
|
||||
vif->cl++;
|
||||
vifRegs->num--;
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
v->addr += 16;
|
||||
vifRegs->num--;
|
||||
vif->cl++;
|
||||
}
|
||||
dest += 4;
|
||||
if (vifRegs->num == 0) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Old Vif Unpack Code
|
||||
// Only here for testing/reference
|
||||
// If newVif is defined and newVif1 isn't, vif1 will use this code
|
||||
// same goes for vif0...
|
||||
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
|
||||
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
|
||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
||||
UNPACKFUNCTYPE func;
|
||||
const VIFUnpackFuncTable *ft;
|
||||
VURegs * VU;
|
||||
u8 *cdata = (u8*)data;
|
||||
u32 tempsize = 0;
|
||||
const u32 memlimit = vif_size(VIFdmanum);
|
||||
|
||||
if (VIFdmanum == 0) {
|
||||
VU = &VU0;
|
||||
vifRegs = vif0Regs;
|
||||
vifMaskRegs = g_vif0Masks;
|
||||
vif = &vif0;
|
||||
vifRow = g_vifmask.Row0;
|
||||
}
|
||||
else {
|
||||
VU = &VU1;
|
||||
vifRegs = vif1Regs;
|
||||
vifMaskRegs = g_vif1Masks;
|
||||
vif = &vif1;
|
||||
vifRow = g_vifmask.Row1;
|
||||
}
|
||||
|
||||
u32 *dest = (u32*)(VU->Mem + v->addr);
|
||||
u32 unpackType = v->cmd & 0xf;
|
||||
|
||||
ft = &VIFfuncTable[ unpackType ];
|
||||
func = vif->usn ? ft->funcU : ft->funcS;
|
||||
size <<= 2;
|
||||
|
||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
|
||||
if (v->addr >= memlimit) {
|
||||
DevCon.Warning("Overflown at the start");
|
||||
v->addr &= (memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
|
||||
size = min(size, (int)vifRegs->num * ft->gsize); //size will always be the same or smaller
|
||||
|
||||
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
||||
|
||||
//Sanity Check (memory overflow)
|
||||
if (tempsize > memlimit) {
|
||||
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
|
||||
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
|
||||
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
|
||||
DevCon.WriteLn("what!!!!!!!!!");
|
||||
//tempsize = 0;
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
else {
|
||||
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
if (tempsize) {
|
||||
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
||||
size = 0;
|
||||
int addrstart = v->addr;
|
||||
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
|
||||
|
||||
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
|
||||
|
||||
while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) {
|
||||
if(v->addr >= memlimit) {
|
||||
DevCon.Warning("Mem limit overflow");
|
||||
v->addr &= (memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
cdata += ft->gsize;
|
||||
tempsize -= ft->gsize;
|
||||
|
||||
vifRegs->num--;
|
||||
vif->cl++;
|
||||
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
dest += incdest;
|
||||
v->addr +=(incdest * 4);
|
||||
vif->cl = 0;
|
||||
}
|
||||
else {
|
||||
dest += 4;
|
||||
v->addr += 16;
|
||||
}
|
||||
}
|
||||
if (v->addr >= memlimit) {
|
||||
v->addr &=(memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
v->addr = addrstart;
|
||||
if(tempsize > 0) size = tempsize;
|
||||
}
|
||||
|
||||
if (size >= ft->dsize && vifRegs->num > 0) { //Else write what we do have
|
||||
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
|
||||
VIF_LOG("warning, end with size = %d", size);
|
||||
// unpack one qword
|
||||
//v->addr += (size / ft->dsize) * 4;
|
||||
func(dest, (u32*)cdata, size / ft->dsize);
|
||||
size = 0;
|
||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
|
||||
}
|
||||
}
|
||||
else { // filling write
|
||||
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
||||
if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
||||
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
||||
|
||||
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
|
||||
while (vifRegs->num > 0) {
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
// unpack one qword
|
||||
if (vif->cl < vifRegs->cycle.cl) {
|
||||
if(size < ft->gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
cdata += ft->gsize;
|
||||
size -= ft->gsize;
|
||||
vif->cl++;
|
||||
vifRegs->num--;
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
v->addr += 16;
|
||||
vifRegs->num--;
|
||||
vif->cl++;
|
||||
}
|
||||
dest += 4;
|
||||
if (vifRegs->num == 0) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,261 +1,279 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// newVif! - author: cottonvibes(@gmail.com)
|
||||
|
||||
#pragma once
|
||||
|
||||
struct nVifStruct {
|
||||
u32 idx; // VIF0 or VIF1
|
||||
vifStruct* vif; // Vif Struct ptr
|
||||
VIFregisters* vifRegs; // Vif Regs ptr
|
||||
VURegs* VU; // VU Regs ptr
|
||||
u8* vuMemEnd; // End of VU Memory
|
||||
u32 vuMemLimit; // Use for fast AND
|
||||
BlockBuffer* vifBlock; // Block Buffer
|
||||
};
|
||||
nVifStruct nVif[2];
|
||||
|
||||
void initNewVif(int idx) {
|
||||
nVif[idx].idx = idx;
|
||||
nVif[idx].VU = idx ? &VU1 : &VU0;
|
||||
nVif[idx].vif = idx ? &vif1 : &vif0;
|
||||
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
||||
nVif[idx].vifBlock = new BlockBuffer(0x2000); // 8kb Block Buffer
|
||||
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
||||
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
||||
|
||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
|
||||
memset8<0xcc>( nVifUpkExec );
|
||||
|
||||
xSetPtr( nVifUpkExec );
|
||||
|
||||
for (int a = 0; a < 2; a++) {
|
||||
for (int b = 0; b < 2; b++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
for (int d = 0; d < 3; d++) {
|
||||
nVifGen(a, b, c, d);
|
||||
}}}}
|
||||
|
||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
||||
}
|
||||
|
||||
int nVifUnpack(int idx, u32 *data) {
|
||||
XMMRegisters::Freeze();
|
||||
//BlockBuffer* vB = nVif[idx].vifBlock;
|
||||
int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||
vif1.tag.size -= ret;
|
||||
_nVifUnpack(idx, (u8*)data, ret<<2);
|
||||
if (vif1.tag.size <= 0) vif1.tag.size = 0;
|
||||
if (vif1.tag.size <= 0) vif1.cmd = 0;
|
||||
XMMRegisters::Thaw();
|
||||
return ret;
|
||||
}
|
||||
|
||||
_f u8* setVUptr(int idx, int offset) {
|
||||
return (u8*)(nVif[idx].VU->Mem + (offset & nVif[idx].vuMemLimit));
|
||||
}
|
||||
|
||||
_f void incVUptr(int idx, u8* &ptr, int amount) {
|
||||
ptr += amount;
|
||||
int diff = ptr - nVif[idx].vuMemEnd;
|
||||
if (diff >= 0) {
|
||||
ptr = nVif[idx].VU->Mem + diff;
|
||||
}
|
||||
if ((uptr)ptr & 0xf) DevCon.WriteLn("unaligned wtf :(");
|
||||
}
|
||||
|
||||
static void setMasks(const VIFregisters& v) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
int m = (v.mask >> (i*2)) & 3;
|
||||
switch (m) {
|
||||
case 0: // Data
|
||||
nVifMask[0][i/4][i%4] = 0xffffffff;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = 0;
|
||||
break;
|
||||
case 1: // Row
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = ((u32*)&v.r0)[(i%4)*4];
|
||||
break;
|
||||
case 2: // Col
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = ((u32*)&v.c0)[(i/4)*4];
|
||||
break;
|
||||
case 3: // Write Protect
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0xffffffff;
|
||||
nVifMask[2][i/4][i%4] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Unpacking Optimization notes:
|
||||
// ----------------------------------------------------------------------------
|
||||
// Some games send a LOT of small packets. This is a problem because the new VIF unpacker
|
||||
// has a lot of setup code to establish which unpack function to call. The best way to
|
||||
// optimize this is to cache the unpack function's base (see fnbase below) and update it
|
||||
// when the variables it depends on are modified: writes to vif->tag.cmd and vif->usn.
|
||||
//
|
||||
// A secondary optimization would be adding special handlers for packets where vifRegs->num==1.
|
||||
// (which would remove the loop, simplify the incVUptr code, etc). But checking for it has
|
||||
// to be simple enough that it doesn't offset the benefits (which I'm not sure is possible).
|
||||
// -- air
|
||||
|
||||
|
||||
template< int idx, bool doMode, bool isFill >
|
||||
__releaseinline void __fastcall _nVifUnpackLoop( u8 *data, u32 size )
|
||||
{
|
||||
// Eh... template attempt, tho not sure it helped much. There's too much setup code (see
|
||||
// optimization note above) -- air
|
||||
|
||||
const int usn = !!(vif->usn);
|
||||
const int doMask = !!(vif->tag.cmd & 0x10);
|
||||
const int upkNum = vif->tag.cmd & 0xf;
|
||||
const u32& vift = nVifT[upkNum];
|
||||
|
||||
u8* dest = setVUptr(idx, vif->tag.addr);
|
||||
const VIFUnpackFuncTable& ft = VIFfuncTable[vif->tag.cmd & 0xf];
|
||||
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
|
||||
|
||||
const nVifCall* fnbase = &nVifUpk[
|
||||
((usn*2*16) + (doMask*16) + (upkNum)) * (4*4)
|
||||
];
|
||||
|
||||
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
|
||||
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
|
||||
|
||||
if (doMask)
|
||||
setMasks(*vifRegs);
|
||||
|
||||
if (vif->cl >= blockSize) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
|
||||
while (vifRegs->num > 0) {
|
||||
if (vif->cl < cycleSize) {
|
||||
//if (size <= 0) { DbgCon.WriteLn("_nVifUnpack: Out of Data!"); break; }
|
||||
if (doMode /*|| doMask*/) {
|
||||
//if (doMask)
|
||||
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
|
||||
func((u32*)dest, (u32*)data, ft.qsize);
|
||||
data += ft.gsize;
|
||||
size -= ft.gsize;
|
||||
vifRegs->num--;
|
||||
}
|
||||
else if (1) {
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
fnbase[aMin(vif->cl, 4) * 4](dest, data);
|
||||
data += vift;
|
||||
size -= vift;
|
||||
vifRegs->num--;
|
||||
}
|
||||
else {
|
||||
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
int c = aMin((cycleSize - vif->cl), 3);
|
||||
size -= vift * c;
|
||||
//if (c>1) { DevCon.WriteLn("C > 1!"); }
|
||||
if (c<0||c>3) { DbgCon.WriteLn("C wtf!"); }
|
||||
if (size < 0) { DbgCon.WriteLn("Size Shit"); size+=vift*c;c=1;size-=vift*c;}
|
||||
fnbase[(aMin(vif->cl, 4) * 4) + c-1](dest, data);
|
||||
data += vift * c;
|
||||
vifRegs->num -= c;
|
||||
}
|
||||
}
|
||||
else if (isFill) {
|
||||
func((u32*)dest, (u32*)data, ft.qsize);
|
||||
vifRegs->num--;
|
||||
}
|
||||
incVUptr(idx, dest, 16);
|
||||
|
||||
// Removing this modulo was a huge speedup for God of War. (62->73 fps)
|
||||
// (GoW uses a lot of blockSize==1 packets, resulting in tons of loops -- so the biggest
|
||||
// factor in performance ends up being the top-level conditionals of the loop, and
|
||||
// also the loop prep code.) --air
|
||||
|
||||
//vif->cl = (vif->cl+1) % blockSize;
|
||||
if( ++vif->cl == blockSize ) vif->cl = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void _nVifUnpack(int idx, u8 *data, u32 size) {
|
||||
/*if (nVif[idx].vifRegs->cycle.cl >= nVif[idx].vifRegs->cycle.wl) { // skipping write
|
||||
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
|
||||
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
|
||||
return;
|
||||
}
|
||||
else*/ { // filling write
|
||||
vif = nVif[idx].vif;
|
||||
vifRegs = nVif[idx].vifRegs;
|
||||
|
||||
const bool doMode = !!vifRegs->mode;
|
||||
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
||||
|
||||
//UnpackLoopTable[idx][doMode][isFill]( data, size );
|
||||
|
||||
if( idx )
|
||||
{
|
||||
if( doMode )
|
||||
{
|
||||
if( isFill )
|
||||
_nVifUnpackLoop<1,true,true>( data, size );
|
||||
else
|
||||
_nVifUnpackLoop<1,true,false>( data, size );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( isFill )
|
||||
_nVifUnpackLoop<1,false,true>( data, size );
|
||||
else
|
||||
_nVifUnpackLoop<1,false,false>( data, size );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pxFailDev( "No VIF0 support yet, sorry!" );
|
||||
}
|
||||
|
||||
//if (isFill)
|
||||
//DevCon.WriteLn("%s Write! [num = %d][%s]", (isFill?"Filling":"Skipping"), vifRegs->num, (vifRegs->num%3 ? "bad!" : "ok"));
|
||||
//DevCon.WriteLn("%s Write! [mask = %08x][type = %02d][num = %d]", (isFill?"Filling":"Skipping"), vifRegs->mask, upkNum, vifRegs->num);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//int nVifUnpack(int idx, u32 *data) {
|
||||
// XMMRegisters::Freeze();
|
||||
// BlockBuffer* vB = nVif[idx].vifBlock;
|
||||
// int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||
// //vB->append(data, ret<<2);
|
||||
// vif1.tag.size -= ret;
|
||||
// //DevCon.WriteLn("2 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
|
||||
// //if (vif1.tag.size <= 0) {
|
||||
// //DevCon.WriteLn("3 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
|
||||
// //VIFunpack<1>(vB->getBlock(), &vif1.tag, vB->getSize()>>2);
|
||||
// //_nVifUnpack(idx, vB->getBlock(), vB->getSize());
|
||||
// _nVifUnpack(idx, (u8*)data, ret<<2);
|
||||
// if (vif1.tag.size <= 0) vif1.tag.size = 0;
|
||||
// if (vif1.tag.size <= 0) vif1.cmd = 0;
|
||||
// //vB->clear();
|
||||
// //}
|
||||
// //else { vif1.tag.size+=ret; ret = -1; vB->clear(); }
|
||||
// XMMRegisters::Thaw();
|
||||
// return ret;
|
||||
//}
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// newVif! - author: cottonvibes(@gmail.com)
|
||||
|
||||
#pragma once
|
||||
|
||||
struct nVifStruct {
|
||||
u32 idx; // VIF0 or VIF1
|
||||
vifStruct* vif; // Vif Struct ptr
|
||||
VIFregisters* vifRegs; // Vif Regs ptr
|
||||
VURegs* VU; // VU Regs ptr
|
||||
u8* vuMemEnd; // End of VU Memory
|
||||
u32 vuMemLimit; // Use for fast AND
|
||||
BlockBuffer* vifBlock; // Block Buffer
|
||||
};
|
||||
|
||||
static __aligned16 nVifStruct nVif[2];
|
||||
|
||||
void initNewVif(int idx) {
|
||||
nVif[idx].idx = idx;
|
||||
nVif[idx].VU = idx ? &VU1 : &VU0;
|
||||
nVif[idx].vif = idx ? &vif1 : &vif0;
|
||||
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
||||
nVif[idx].vifBlock = new BlockBuffer(0x2000); // 8kb Block Buffer
|
||||
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
||||
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
||||
|
||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
|
||||
memset8<0xcc>( nVifUpkExec );
|
||||
|
||||
xSetPtr( nVifUpkExec );
|
||||
|
||||
for (int a = 0; a < 2; a++) {
|
||||
for (int b = 0; b < 2; b++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
for (int d = 0; d < 3; d++) {
|
||||
nVifGen(a, b, c, d);
|
||||
}}}}
|
||||
|
||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
||||
}
|
||||
|
||||
int nVifUnpack(int idx, u32 *data) {
|
||||
XMMRegisters::Freeze();
|
||||
//BlockBuffer* vB = nVif[idx].vifBlock;
|
||||
int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||
vif1.tag.size -= ret;
|
||||
_nVifUnpack(idx, (u8*)data, ret<<2);
|
||||
if (vif1.tag.size <= 0) vif1.tag.size = 0;
|
||||
if (vif1.tag.size <= 0) vif1.cmd = 0;
|
||||
XMMRegisters::Thaw();
|
||||
return ret;
|
||||
}
|
||||
|
||||
_f u8* setVUptr(int idx, int offset) {
|
||||
return (u8*)(nVif[idx].VU->Mem + (offset & nVif[idx].vuMemLimit));
|
||||
}
|
||||
|
||||
_f void incVUptr(int idx, u8* &ptr, int amount) {
|
||||
ptr += amount;
|
||||
int diff = ptr - nVif[idx].vuMemEnd;
|
||||
if (diff >= 0) {
|
||||
ptr = nVif[idx].VU->Mem + diff;
|
||||
}
|
||||
if ((uptr)ptr & 0xf) DevCon.WriteLn("unaligned wtf :(");
|
||||
}
|
||||
|
||||
static void setMasks(const VIFregisters& v) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
int m = (v.mask >> (i*2)) & 3;
|
||||
switch (m) {
|
||||
case 0: // Data
|
||||
nVifMask[0][i/4][i%4] = 0xffffffff;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = 0;
|
||||
break;
|
||||
case 1: // Row
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = ((u32*)&v.r0)[(i%4)*4];
|
||||
break;
|
||||
case 2: // Col
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = ((u32*)&v.c0)[(i/4)*4];
|
||||
break;
|
||||
case 3: // Write Protect
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0xffffffff;
|
||||
nVifMask[2][i/4][i%4] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Unpacking Optimization notes:
|
||||
// ----------------------------------------------------------------------------
|
||||
// Some games send a LOT of small packets. This is a problem because the new VIF unpacker
|
||||
// has a lot of setup code to establish which unpack function to call. The best way to
|
||||
// optimize this is to cache the unpack function's base (see fnbase below) and update it
|
||||
// when the variables it depends on are modified: writes to vif->tag.cmd and vif->usn.
|
||||
// Problem: vif->tag.cmd is modified a lot. Like, constantly. So won't work.
|
||||
//
|
||||
// A secondary optimization would be adding special handlers for packets where vifRegs->num==1.
|
||||
// (which would remove the loop, simplify the incVUptr code, etc). But checking for it has
|
||||
// to be simple enough that it doesn't offset the benefits (which I'm not sure is possible).
|
||||
// -- air
|
||||
|
||||
|
||||
//template< int idx, bool doMode, bool isFill >
|
||||
//__releaseinline void __fastcall _nVifUnpackLoop( u8 *data, u32 size )
|
||||
__releaseinline void __fastcall _nVifUnpackLoop( int idx, u8 *data, u32 size )
|
||||
{
|
||||
// comment out the following 2 lines to test templated version...
|
||||
const bool doMode = !!vifRegs->mode;
|
||||
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
||||
|
||||
const int usn = !!(vif->usn);
|
||||
const int doMask = !!(vif->tag.cmd & 0x10);
|
||||
const int upkNum = vif->tag.cmd & 0xf;
|
||||
const u32& vift = nVifT[upkNum];
|
||||
|
||||
u8* dest = setVUptr(idx, vif->tag.addr);
|
||||
const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum];
|
||||
UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS;
|
||||
|
||||
// Did a bunch of work to make it so I could optimize this index lookup to outside
|
||||
// the main loop but it was for naught -- too often the loop is only 1-2 iterations,
|
||||
// so this setup code ends up being slower (1 iter) or same speed (2 iters).
|
||||
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + (doMask*16) + (upkNum)) * (4*4) ];
|
||||
|
||||
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
|
||||
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
|
||||
|
||||
if (doMask)
|
||||
setMasks(*vifRegs);
|
||||
|
||||
if (vif->cl >= blockSize) {
|
||||
|
||||
// This condition doesn't appear to ever occur, and really it never should.
|
||||
// Normally it wouldn't matter, but even simple setup code matters here (see
|
||||
// optimization notes above) >_<
|
||||
|
||||
vif->cl = 0;
|
||||
}
|
||||
|
||||
while (vifRegs->num > 0) {
|
||||
if (vif->cl < cycleSize) {
|
||||
//if (size <= 0) { DbgCon.WriteLn("_nVifUnpack: Out of Data!"); break; }
|
||||
if (doMode /*|| doMask*/) {
|
||||
//if (doMask)
|
||||
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
|
||||
func((u32*)dest, (u32*)data, ft.qsize);
|
||||
data += ft.gsize;
|
||||
size -= ft.gsize;
|
||||
vifRegs->num--;
|
||||
}
|
||||
else if (1) {
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
fnbase[aMin(vif->cl, 4) * 4](dest, data);
|
||||
data += vift;
|
||||
size -= vift;
|
||||
vifRegs->num--;
|
||||
}
|
||||
else {
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
int c = aMin((cycleSize - vif->cl), 3);
|
||||
size -= vift * c;
|
||||
//if (c>1) { DevCon.WriteLn("C > 1!"); }
|
||||
if (c<0||c>3) { DbgCon.WriteLn("C wtf!"); }
|
||||
if (size < 0) { DbgCon.WriteLn("Size Shit"); size+=vift*c;c=1;size-=vift*c;}
|
||||
fnbase[(aMin(vif->cl, 4) * 4) + c-1](dest, data);
|
||||
data += vift * c;
|
||||
vifRegs->num -= c;
|
||||
}
|
||||
}
|
||||
else if (isFill) {
|
||||
func((u32*)dest, (u32*)data, ft.qsize);
|
||||
vifRegs->num--;
|
||||
}
|
||||
incVUptr(idx, dest, 16);
|
||||
|
||||
// Removing this modulo was a huge speedup for God of War start menu. (62->73 fps)
|
||||
// (GoW and tri-ace games both use a lot of blockSize==1 packets, resulting in tons
|
||||
// of loops -- so the biggest factor in performance ends up being the top-level
|
||||
// conditionals of the loop, and also the loop prep code.) --air
|
||||
|
||||
//vif->cl = (vif->cl+1) % blockSize;
|
||||
if( ++vif->cl == blockSize ) vif->cl = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void _nVifUnpack(int idx, u8 *data, u32 size) {
|
||||
/*if (nVif[idx].vifRegs->cycle.cl >= nVif[idx].vifRegs->cycle.wl) { // skipping write
|
||||
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
|
||||
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
|
||||
return;
|
||||
}
|
||||
else*/ { // filling write
|
||||
|
||||
vif = nVif[idx].vif;
|
||||
vifRegs = nVif[idx].vifRegs;
|
||||
|
||||
#if 1
|
||||
_nVifUnpackLoop( idx, data, size );
|
||||
#else
|
||||
// Eh... template attempt, tho it didn't help much. There's too much setup code,
|
||||
// and the template only optimizes code inside the loop, which often times seems to
|
||||
// only be run once or twice anyway. Better to use recompilation than templating
|
||||
// anyway, but I'll leave it in for now for reference. -- air
|
||||
|
||||
const bool doMode = !!vifRegs->mode;
|
||||
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
||||
|
||||
//UnpackLoopTable[idx][doMode][isFill]( data, size );
|
||||
|
||||
if( idx )
|
||||
{
|
||||
if( doMode )
|
||||
{
|
||||
if( isFill )
|
||||
_nVifUnpackLoop<1,true,true>( data, size );
|
||||
else
|
||||
_nVifUnpackLoop<1,true,false>( data, size );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( isFill )
|
||||
_nVifUnpackLoop<1,false,true>( data, size );
|
||||
else
|
||||
_nVifUnpackLoop<1,false,false>( data, size );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pxFailDev( "No VIF0 support yet, sorry!" );
|
||||
}
|
||||
#endif
|
||||
//if (isFill)
|
||||
//DevCon.WriteLn("%s Write! [num = %d][%s]", (isFill?"Filling":"Skipping"), vifRegs->num, (vifRegs->num%3 ? "bad!" : "ok"));
|
||||
//DevCon.WriteLn("%s Write! [mask = %08x][type = %02d][num = %d]", (isFill?"Filling":"Skipping"), vifRegs->mask, upkNum, vifRegs->num);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//int nVifUnpack(int idx, u32 *data) {
|
||||
// XMMRegisters::Freeze();
|
||||
// BlockBuffer* vB = nVif[idx].vifBlock;
|
||||
// int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||
// //vB->append(data, ret<<2);
|
||||
// vif1.tag.size -= ret;
|
||||
// //DevCon.WriteLn("2 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
|
||||
// //if (vif1.tag.size <= 0) {
|
||||
// //DevCon.WriteLn("3 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
|
||||
// //VIFunpack<1>(vB->getBlock(), &vif1.tag, vB->getSize()>>2);
|
||||
// //_nVifUnpack(idx, vB->getBlock(), vB->getSize());
|
||||
// _nVifUnpack(idx, (u8*)data, ret<<2);
|
||||
// if (vif1.tag.size <= 0) vif1.tag.size = 0;
|
||||
// if (vif1.tag.size <= 0) vif1.cmd = 0;
|
||||
// //vB->clear();
|
||||
// //}
|
||||
// //else { vif1.tag.size+=ret; ret = -1; vB->clear(); }
|
||||
// XMMRegisters::Thaw();
|
||||
// return ret;
|
||||
//}
|
||||
|
|
|
@ -1,256 +1,255 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#define xMaskWrite(regX, x) { \
|
||||
if (x==0) xMOVAPS(xmm7, ptr32[ecx]); \
|
||||
if (x==1) xMOVAPS(xmm7, ptr32[ecx+0x10]); \
|
||||
if (x==2) xMOVAPS(xmm7, ptr32[ecx+0x20]); \
|
||||
int offX = aMin(curCycle+x, 4); \
|
||||
xPAND(regX, ptr32[nVifMask[0][offX]]); \
|
||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]); \
|
||||
xPOR (regX, ptr32[nVifMask[2][offX]]); \
|
||||
xPOR (regX, xmm7); \
|
||||
if (x==0) xMOVAPS(ptr32[ecx], regX); \
|
||||
if (x==1) xMOVAPS(ptr32[ecx+0x10], regX); \
|
||||
if (x==2) xMOVAPS(ptr32[ecx+0x20], regX); \
|
||||
}
|
||||
|
||||
#define xMovDest(reg0, reg1, reg2) { \
|
||||
if (mask==0) { \
|
||||
if (cycles>=0) { xMOVAPS (ptr32[ecx], reg0); } \
|
||||
if (cycles>=1) { xMOVAPS (ptr32[ecx+0x10], reg1); } \
|
||||
if (cycles>=2) { xMOVAPS (ptr32[ecx+0x20], reg2); } \
|
||||
} \
|
||||
else { \
|
||||
if (cycles>=0) { xMaskWrite(reg0, 0); } \
|
||||
if (cycles>=1) { xMaskWrite(reg1, 1); } \
|
||||
if (cycles>=2) { xMaskWrite(reg2, 2); } \
|
||||
} \
|
||||
}
|
||||
|
||||
// xmm2 gets result
|
||||
void convertRGB() {
|
||||
xPSLL.D (xmm1, 3); // ABG|R5.000
|
||||
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // ABG
|
||||
xPSLL.D (xmm1, 3); // AB|G5.000
|
||||
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // AB
|
||||
xPSLL.D (xmm1, 3); // A|B5.000
|
||||
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // A
|
||||
xPSLL.D (xmm1, 7); // A.0000000
|
||||
|
||||
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
|
||||
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
|
||||
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
|
||||
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
|
||||
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
|
||||
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
|
||||
|
||||
xPSLL.D (xmm2, 24); // can optimize to
|
||||
xPSRL.D (xmm2, 24); // single AND...
|
||||
}
|
||||
|
||||
struct VifUnpackIndexer
|
||||
{
|
||||
int usn, mask;
|
||||
int curCycle, cyclesToWrite;
|
||||
|
||||
nVifCall& GetCall( int packType ) const
|
||||
{
|
||||
int usnpart = usn*2*16;
|
||||
int maskpart = mask*16;
|
||||
int packpart = packType;
|
||||
|
||||
int curpart = curCycle*4;
|
||||
int cycpespart = cyclesToWrite;
|
||||
|
||||
return nVifUpk[((usnpart+maskpart+packpart)*(4*4)) + (curpart+cycpespart)];
|
||||
}
|
||||
|
||||
void xSetCall( int packType ) const
|
||||
{
|
||||
xAlignPtr(16);
|
||||
GetCall( packType ) = (nVifCall)xGetPtr();
|
||||
}
|
||||
|
||||
void xSetNullCall( int packType ) const
|
||||
{
|
||||
GetCall( packType ) = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
// ecx = dest, edx = src
|
||||
void nVifGen(int usn, int mask, int curCycle, int cycles) {
|
||||
const VifUnpackIndexer indexer = { usn, mask, curCycle, cycles };
|
||||
|
||||
indexer.xSetCall(0x0); // S-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x1); // S-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x2); // S-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
indexer.xSetNullCall(0x3); // ----
|
||||
|
||||
indexer.xSetCall(0x4); // V2-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x10]);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x5); // V2-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x6); // V2-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetNullCall(0x7); // ----
|
||||
|
||||
indexer.xSetCall(0x8); // V3-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+12]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+24]);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x9); // V3-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+6]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+12]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=1) xShiftR (xmm1, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0xa); // V3-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+3]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+6]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=1) xShiftR (xmm1, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetNullCall(0xb); // ----
|
||||
|
||||
indexer.xSetCall(0xc); // V4-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0xd); // V4-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=1) xShiftR (xmm1, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0xe); // V4-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+4]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+8]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=1) xShiftR (xmm1, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
// A | B5 | G5 | R5
|
||||
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
|
||||
indexer.xSetCall(0xf); // V4-5
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xMOVAPS (xmm1, xmm0);
|
||||
if (cycles>=0) convertRGB();
|
||||
if (cycles>=0) xMOVAPS (ptr32[ecx], xmm2);
|
||||
if (cycles>=1) xMOVAPS (xmm1, xmm0);
|
||||
if (cycles>=1) xPSRL.D (xmm1, 16);
|
||||
if (cycles>=1) convertRGB();
|
||||
if (cycles>=1) xMOVAPS (ptr32[ecx+0x10], xmm2);
|
||||
if (cycles>=2) xPSHUF.D (xmm1, xmm0, _v1);
|
||||
if (cycles>=2) convertRGB();
|
||||
if (cycles>=2) xMOVAPS (ptr32[ecx+0x20], xmm2);
|
||||
xRET();
|
||||
|
||||
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
|
||||
}
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#define xMaskWrite(regX, x) { \
|
||||
if (x==0) xMOVAPS(xmm7, ptr32[ecx]); \
|
||||
if (x==1) xMOVAPS(xmm7, ptr32[ecx+0x10]); \
|
||||
if (x==2) xMOVAPS(xmm7, ptr32[ecx+0x20]); \
|
||||
int offX = aMin(curCycle+x, 4); \
|
||||
xPAND(regX, ptr32[nVifMask[0][offX]]); \
|
||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]); \
|
||||
xPOR (regX, ptr32[nVifMask[2][offX]]); \
|
||||
xPOR (regX, xmm7); \
|
||||
if (x==0) xMOVAPS(ptr32[ecx], regX); \
|
||||
if (x==1) xMOVAPS(ptr32[ecx+0x10], regX); \
|
||||
if (x==2) xMOVAPS(ptr32[ecx+0x20], regX); \
|
||||
}
|
||||
|
||||
#define xMovDest(reg0, reg1, reg2) { \
|
||||
if (mask==0) { \
|
||||
if (cycles>=0) { xMOVAPS (ptr32[ecx], reg0); } \
|
||||
if (cycles>=1) { xMOVAPS (ptr32[ecx+0x10], reg1); } \
|
||||
if (cycles>=2) { xMOVAPS (ptr32[ecx+0x20], reg2); } \
|
||||
} \
|
||||
else { \
|
||||
if (cycles>=0) { xMaskWrite(reg0, 0); } \
|
||||
if (cycles>=1) { xMaskWrite(reg1, 1); } \
|
||||
if (cycles>=2) { xMaskWrite(reg2, 2); } \
|
||||
} \
|
||||
}
|
||||
|
||||
// xmm2 gets result
|
||||
void convertRGB() {
|
||||
xPSLL.D (xmm1, 3); // ABG|R5.000
|
||||
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // ABG
|
||||
xPSLL.D (xmm1, 3); // AB|G5.000
|
||||
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // AB
|
||||
xPSLL.D (xmm1, 3); // A|B5.000
|
||||
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // A
|
||||
xPSLL.D (xmm1, 7); // A.0000000
|
||||
|
||||
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
|
||||
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
|
||||
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
|
||||
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
|
||||
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
|
||||
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
|
||||
|
||||
xPSLL.D (xmm2, 24); // can optimize to
|
||||
xPSRL.D (xmm2, 24); // single AND...
|
||||
}
|
||||
|
||||
struct VifUnpackIndexer
|
||||
{
|
||||
int usn, mask;
|
||||
int curCycle, cyclesToWrite;
|
||||
|
||||
nVifCall& GetCall( int packType ) const
|
||||
{
|
||||
int usnpart = usn*2*16;
|
||||
int maskpart = mask*16;
|
||||
int packpart = packType;
|
||||
|
||||
int curpart = curCycle*4;
|
||||
int cycpespart = cyclesToWrite;
|
||||
|
||||
return nVifUpk[((usnpart+maskpart+packpart)*(4*4)) + (curpart+cycpespart)];
|
||||
}
|
||||
|
||||
void xSetCall( int packType ) const
|
||||
{
|
||||
GetCall( packType ) = (nVifCall)xGetAlignedCallTarget();
|
||||
}
|
||||
|
||||
void xSetNullCall( int packType ) const
|
||||
{
|
||||
GetCall( packType ) = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
// ecx = dest, edx = src
|
||||
void nVifGen(int usn, int mask, int curCycle, int cycles) {
|
||||
const VifUnpackIndexer indexer = { usn, mask, curCycle, cycles };
|
||||
|
||||
indexer.xSetCall(0x0); // S-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x1); // S-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x2); // S-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
indexer.xSetNullCall(0x3); // ----
|
||||
|
||||
indexer.xSetCall(0x4); // V2-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x10]);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x5); // V2-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x6); // V2-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetNullCall(0x7); // ----
|
||||
|
||||
indexer.xSetCall(0x8); // V3-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+12]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+24]);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0x9); // V3-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+6]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+12]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=1) xShiftR (xmm1, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0xa); // V3-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+3]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+6]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=1) xShiftR (xmm1, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetNullCall(0xb); // ----
|
||||
|
||||
indexer.xSetCall(0xc); // V4-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0xd); // V4-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=1) xShiftR (xmm1, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
indexer.xSetCall(0xe); // V4-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+4]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+8]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=1) xShiftR (xmm1, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
// A | B5 | G5 | R5
|
||||
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
|
||||
indexer.xSetCall(0xf); // V4-5
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xMOVAPS (xmm1, xmm0);
|
||||
if (cycles>=0) convertRGB();
|
||||
if (cycles>=0) xMOVAPS (ptr32[ecx], xmm2);
|
||||
if (cycles>=1) xMOVAPS (xmm1, xmm0);
|
||||
if (cycles>=1) xPSRL.D (xmm1, 16);
|
||||
if (cycles>=1) convertRGB();
|
||||
if (cycles>=1) xMOVAPS (ptr32[ecx+0x10], xmm2);
|
||||
if (cycles>=2) xPSHUF.D (xmm1, xmm0, _v1);
|
||||
if (cycles>=2) convertRGB();
|
||||
if (cycles>=2) xMOVAPS (ptr32[ecx+0x20], xmm2);
|
||||
xRET();
|
||||
|
||||
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,283 +1,283 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "VUmicro.h"
|
||||
|
||||
extern u32 vudump;
|
||||
|
||||
#define VU0_MEMSIZE 0x1000
|
||||
#define VU1_MEMSIZE 0x4000
|
||||
|
||||
void recResetVU0();
|
||||
void recExecuteVU0Block();
|
||||
void recClearVU0( u32 Addr, u32 Size );
|
||||
|
||||
void recVU1Init();
|
||||
void recVU1Shutdown();
|
||||
void recResetVU1();
|
||||
void recExecuteVU1Block();
|
||||
void recClearVU1( u32 Addr, u32 Size );
|
||||
|
||||
|
||||
u32 GetVIAddr(VURegs * VU, int reg, int read, int info); // returns the correct VI addr
|
||||
void recUpdateFlags(VURegs * VU, int reg, int info);
|
||||
|
||||
void _recvuTestPipes(VURegs * VU);
|
||||
void _recvuFlushFDIV(VURegs * VU);
|
||||
void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
|
||||
#define VUOP_READ 2
|
||||
#define VUOP_WRITE 4
|
||||
|
||||
// save on mem
|
||||
struct _vuopinfo {
|
||||
int cycle;
|
||||
int cycles;
|
||||
u8 statusflag;
|
||||
u8 macflag;
|
||||
u8 clipflag;
|
||||
u8 dummy;
|
||||
u8 q;
|
||||
u8 p;
|
||||
u16 pqinst; // bit of instruction specifying index (srec only)
|
||||
};
|
||||
|
||||
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
|
||||
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
|
||||
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
|
||||
|
||||
extern int vucycle;
|
||||
typedef void (*vFloat)(int regd, int regTemp);
|
||||
extern vFloat vFloats1[16];
|
||||
extern vFloat vFloats1_useEAX[16];
|
||||
extern vFloat vFloats2[16];
|
||||
extern vFloat vFloats4[16];
|
||||
extern vFloat vFloats4_useEAX[16];
|
||||
extern const __aligned16 float s_fones[8];
|
||||
extern const __aligned16 u32 s_mask[4];
|
||||
extern const __aligned16 u32 s_expmask[4];
|
||||
extern const __aligned16 u32 g_minvals[4];
|
||||
extern const __aligned16 u32 g_maxvals[4];
|
||||
extern const __aligned16 u32 const_clip[8];
|
||||
|
||||
u32 GetVIAddr(VURegs * VU, int reg, int read, int info);
|
||||
int _vuGetTempXMMreg(int info);
|
||||
void vuFloat(int info, int regd, int XYZW);
|
||||
void vuFloat_useEAX(int regd, int regTemp, int XYZW);
|
||||
void vuFloat2(int regd, int regTemp, int XYZW);
|
||||
void vuFloat3(uptr x86ptr);
|
||||
void vuFloat4(int regd, int regTemp, int XYZW);
|
||||
void vuFloat4_useEAX(int regd, int regTemp, int XYZW);
|
||||
void vuFloat5(int regd, int regTemp, int XYZW);
|
||||
void vuFloat5_useEAX(int regd, int regTemp, int XYZW);
|
||||
void _vuFlipRegSS(VURegs * VU, int reg);
|
||||
void _vuFlipRegSS_xyzw(int reg, int xyzw);
|
||||
void _vuMoveSS(VURegs * VU, int dstreg, int srcreg);
|
||||
void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw);
|
||||
void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw);
|
||||
void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw);
|
||||
void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw);
|
||||
#define VU_MERGE_REGS(dest, src) { \
|
||||
VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \
|
||||
}
|
||||
|
||||
// use for allocating vi regs
|
||||
#define ALLOCTEMPX86(mode) _allocX86reg(-1, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
|
||||
#define ALLOCVI(vi, mode) _allocX86reg(-1, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
|
||||
#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi);
|
||||
|
||||
#define SWAP(x, y) *(u32*)&y ^= *(u32*)&x ^= *(u32*)&y ^= *(u32*)&x;
|
||||
|
||||
/*****************************************
|
||||
VU Micromode Upper instructions
|
||||
*****************************************/
|
||||
|
||||
void recVUMI_ABS(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDi(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDq(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDx(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDy(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDz(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDw(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDA(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBi(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBq(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBx(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBy(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBz(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBw(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBA(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MUL(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULA(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDA(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBA(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAX(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINI(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIw(VURegs *vuRegs, int info);
|
||||
void recVUMI_OPMULA(VURegs *vuRegs, int info);
|
||||
void recVUMI_OPMSUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_NOP(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI0(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI4(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI12(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI15(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF0(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF4(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF12(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF15(VURegs *vuRegs, int info);
|
||||
void recVUMI_CLIP(VURegs *vuRegs, int info);
|
||||
|
||||
/*****************************************
|
||||
VU Micromode Lower instructions
|
||||
*****************************************/
|
||||
|
||||
void recVUMI_DIV(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_RSQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_IADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_IADDI(VURegs *vuRegs, int info);
|
||||
void recVUMI_IADDIU(VURegs *vuRegs, int info);
|
||||
void recVUMI_IAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_IOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISUBIU(VURegs *vuRegs, int info);
|
||||
void recVUMI_MOVE(VURegs *vuRegs, int info);
|
||||
void recVUMI_MFIR(VURegs *vuRegs, int info);
|
||||
void recVUMI_MTIR(VURegs *vuRegs, int info);
|
||||
void recVUMI_MR32(VURegs *vuRegs, int info);
|
||||
void recVUMI_LQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_LQD(VURegs *vuRegs, int info);
|
||||
void recVUMI_LQI(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQD(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQI(VURegs *vuRegs, int info);
|
||||
void recVUMI_ILW(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISW(VURegs *vuRegs, int info);
|
||||
void recVUMI_ILWR(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISWR(VURegs *vuRegs, int info);
|
||||
void recVUMI_LOI(VURegs *vuRegs, int info);
|
||||
void recVUMI_RINIT(VURegs *vuRegs, int info);
|
||||
void recVUMI_RGET(VURegs *vuRegs, int info);
|
||||
void recVUMI_RNEXT(VURegs *vuRegs, int info);
|
||||
void recVUMI_RXOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_WAITQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSSET(VURegs *vuRegs, int info);
|
||||
void recVUMI_FMAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_FMEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FMOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCSET(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCGET(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBGEZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBGTZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBLTZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBLEZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBNE(VURegs *vuRegs, int info);
|
||||
void recVUMI_B(VURegs *vuRegs, int info);
|
||||
void recVUMI_BAL(VURegs *vuRegs, int info);
|
||||
void recVUMI_JR(VURegs *vuRegs, int info);
|
||||
void recVUMI_JALR(VURegs *vuRegs, int info);
|
||||
void recVUMI_MFP(VURegs *vuRegs, int info);
|
||||
void recVUMI_WAITP(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERSADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_ELENG(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERLENG(VURegs *vuRegs, int info);
|
||||
void recVUMI_EATANxy(VURegs *vuRegs, int info);
|
||||
void recVUMI_EATANxz(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESUM(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERCPR(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERSQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESIN(VURegs *vuRegs, int info);
|
||||
void recVUMI_EATAN(VURegs *vuRegs, int info);
|
||||
void recVUMI_EEXP(VURegs *vuRegs, int info);
|
||||
void recVUMI_XGKICK(VURegs *vuRegs, int info);
|
||||
void recVUMI_XTOP(VURegs *vuRegs, int info);
|
||||
void recVUMI_XITOP(VURegs *vuRegs, int info);
|
||||
void recVUMI_XTOP( VURegs *VU , int info);
|
||||
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "VUmicro.h"
|
||||
|
||||
extern u32 vudump;
|
||||
|
||||
#define VU0_MEMSIZE 0x1000
|
||||
#define VU1_MEMSIZE 0x4000
|
||||
|
||||
void recResetVU0();
|
||||
void recExecuteVU0Block();
|
||||
void recClearVU0( u32 Addr, u32 Size );
|
||||
|
||||
void recVU1Init();
|
||||
void recVU1Shutdown();
|
||||
void recResetVU1();
|
||||
void recExecuteVU1Block();
|
||||
void recClearVU1( u32 Addr, u32 Size );
|
||||
|
||||
|
||||
u32 GetVIAddr(VURegs * VU, int reg, int read, int info); // returns the correct VI addr
|
||||
void recUpdateFlags(VURegs * VU, int reg, int info);
|
||||
|
||||
void _recvuTestPipes(VURegs * VU);
|
||||
void _recvuFlushFDIV(VURegs * VU);
|
||||
void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
|
||||
|
||||
#define VUOP_READ 2
|
||||
#define VUOP_WRITE 4
|
||||
|
||||
// save on mem
|
||||
struct _vuopinfo {
|
||||
int cycle;
|
||||
int cycles;
|
||||
u8 statusflag;
|
||||
u8 macflag;
|
||||
u8 clipflag;
|
||||
u8 dummy;
|
||||
u8 q;
|
||||
u8 p;
|
||||
u16 pqinst; // bit of instruction specifying index (srec only)
|
||||
};
|
||||
|
||||
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
|
||||
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
|
||||
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
|
||||
|
||||
extern int vucycle;
|
||||
typedef void (*vFloat)(int regd, int regTemp);
|
||||
extern vFloat vFloats1[16];
|
||||
extern vFloat vFloats1_useEAX[16];
|
||||
extern vFloat vFloats2[16];
|
||||
extern vFloat vFloats4[16];
|
||||
extern vFloat vFloats4_useEAX[16];
|
||||
extern const __aligned16 float s_fones[8];
|
||||
extern const __aligned16 u32 s_mask[4];
|
||||
extern const __aligned16 u32 s_expmask[4];
|
||||
extern const __aligned16 u32 g_minvals[4];
|
||||
extern const __aligned16 u32 g_maxvals[4];
|
||||
extern const __aligned16 u32 const_clip[8];
|
||||
|
||||
u32 GetVIAddr(VURegs * VU, int reg, int read, int info);
|
||||
int _vuGetTempXMMreg(int info);
|
||||
void vuFloat(int info, int regd, int XYZW);
|
||||
void vuFloat_useEAX(int regd, int regTemp, int XYZW);
|
||||
void vuFloat2(int regd, int regTemp, int XYZW);
|
||||
void vuFloat3(uptr x86ptr);
|
||||
void vuFloat4(int regd, int regTemp, int XYZW);
|
||||
void vuFloat4_useEAX(int regd, int regTemp, int XYZW);
|
||||
void vuFloat5(int regd, int regTemp, int XYZW);
|
||||
void vuFloat5_useEAX(int regd, int regTemp, int XYZW);
|
||||
void _vuFlipRegSS(VURegs * VU, int reg);
|
||||
void _vuFlipRegSS_xyzw(int reg, int xyzw);
|
||||
void _vuMoveSS(VURegs * VU, int dstreg, int srcreg);
|
||||
void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw);
|
||||
void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw);
|
||||
void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw);
|
||||
void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw);
|
||||
#define VU_MERGE_REGS(dest, src) { \
|
||||
VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \
|
||||
}
|
||||
|
||||
// use for allocating vi regs
|
||||
#define ALLOCTEMPX86(mode) _allocX86reg(-1, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
|
||||
#define ALLOCVI(vi, mode) _allocX86reg(-1, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
|
||||
#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi);
|
||||
|
||||
#define SWAP(x, y) *(u32*)&y ^= *(u32*)&x ^= *(u32*)&y ^= *(u32*)&x;
|
||||
|
||||
/*****************************************
|
||||
VU Micromode Upper instructions
|
||||
*****************************************/
|
||||
|
||||
void recVUMI_ABS(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDi(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDq(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDx(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDy(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDz(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDw(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDA(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_ADDAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBi(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBq(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBx(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBy(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBz(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBw(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBA(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_SUBAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MUL(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULA(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MULAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDA(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MADDAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBA(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAq(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MSUBAw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAX(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MAXw(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINI(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIi(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIx(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIy(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIz(VURegs *vuRegs, int info);
|
||||
void recVUMI_MINIw(VURegs *vuRegs, int info);
|
||||
void recVUMI_OPMULA(VURegs *vuRegs, int info);
|
||||
void recVUMI_OPMSUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_NOP(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI0(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI4(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI12(VURegs *vuRegs, int info);
|
||||
void recVUMI_FTOI15(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF0(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF4(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF12(VURegs *vuRegs, int info);
|
||||
void recVUMI_ITOF15(VURegs *vuRegs, int info);
|
||||
void recVUMI_CLIP(VURegs *vuRegs, int info);
|
||||
|
||||
/*****************************************
|
||||
VU Micromode Lower instructions
|
||||
*****************************************/
|
||||
|
||||
void recVUMI_DIV(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_RSQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_IADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_IADDI(VURegs *vuRegs, int info);
|
||||
void recVUMI_IADDIU(VURegs *vuRegs, int info);
|
||||
void recVUMI_IAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_IOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISUB(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISUBIU(VURegs *vuRegs, int info);
|
||||
void recVUMI_MOVE(VURegs *vuRegs, int info);
|
||||
void recVUMI_MFIR(VURegs *vuRegs, int info);
|
||||
void recVUMI_MTIR(VURegs *vuRegs, int info);
|
||||
void recVUMI_MR32(VURegs *vuRegs, int info);
|
||||
void recVUMI_LQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_LQD(VURegs *vuRegs, int info);
|
||||
void recVUMI_LQI(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQD(VURegs *vuRegs, int info);
|
||||
void recVUMI_SQI(VURegs *vuRegs, int info);
|
||||
void recVUMI_ILW(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISW(VURegs *vuRegs, int info);
|
||||
void recVUMI_ILWR(VURegs *vuRegs, int info);
|
||||
void recVUMI_ISWR(VURegs *vuRegs, int info);
|
||||
void recVUMI_LOI(VURegs *vuRegs, int info);
|
||||
void recVUMI_RINIT(VURegs *vuRegs, int info);
|
||||
void recVUMI_RGET(VURegs *vuRegs, int info);
|
||||
void recVUMI_RNEXT(VURegs *vuRegs, int info);
|
||||
void recVUMI_RXOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_WAITQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_FSSET(VURegs *vuRegs, int info);
|
||||
void recVUMI_FMAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_FMEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FMOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCAND(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCOR(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCSET(VURegs *vuRegs, int info);
|
||||
void recVUMI_FCGET(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBEQ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBGEZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBGTZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBLTZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBLEZ(VURegs *vuRegs, int info);
|
||||
void recVUMI_IBNE(VURegs *vuRegs, int info);
|
||||
void recVUMI_B(VURegs *vuRegs, int info);
|
||||
void recVUMI_BAL(VURegs *vuRegs, int info);
|
||||
void recVUMI_JR(VURegs *vuRegs, int info);
|
||||
void recVUMI_JALR(VURegs *vuRegs, int info);
|
||||
void recVUMI_MFP(VURegs *vuRegs, int info);
|
||||
void recVUMI_WAITP(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERSADD(VURegs *vuRegs, int info);
|
||||
void recVUMI_ELENG(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERLENG(VURegs *vuRegs, int info);
|
||||
void recVUMI_EATANxy(VURegs *vuRegs, int info);
|
||||
void recVUMI_EATANxz(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESUM(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERCPR(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_ERSQRT(VURegs *vuRegs, int info);
|
||||
void recVUMI_ESIN(VURegs *vuRegs, int info);
|
||||
void recVUMI_EATAN(VURegs *vuRegs, int info);
|
||||
void recVUMI_EEXP(VURegs *vuRegs, int info);
|
||||
void recVUMI_XGKICK(VURegs *vuRegs, int info);
|
||||
void recVUMI_XTOP(VURegs *vuRegs, int info);
|
||||
void recVUMI_XITOP(VURegs *vuRegs, int info);
|
||||
void recVUMI_XTOP( VURegs *VU , int info);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,73 +1,73 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Super VU recompiler - author: zerofrog(@gmail.com)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "sVU_Micro.h"
|
||||
|
||||
//Using assembly code from an external file.
|
||||
#ifdef __LINUX__
|
||||
extern "C" {
|
||||
#endif
|
||||
extern void SuperVUExecuteProgram(u32 startpc, int vuindex);
|
||||
extern void SuperVUEndProgram();
|
||||
extern void svudispfntemp();
|
||||
#ifdef __LINUX__
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void SuperVUDestroy(int vuindex);
|
||||
extern void SuperVUReset(int vuindex);
|
||||
|
||||
// read = 0, will write to reg
|
||||
// read = 1, will read from reg
|
||||
// read = 2, addr of previously written reg (used for status and clip flags)
|
||||
extern u32 SuperVUGetVIAddr(int reg, int read);
|
||||
|
||||
// if p == 0, flush q else flush p; if wait is != 0, waits for p/q
|
||||
extern void SuperVUFlush(int p, int wait);
|
||||
|
||||
|
||||
class recSuperVU0 : public BaseVUmicroCPU
|
||||
{
|
||||
public:
|
||||
recSuperVU0();
|
||||
|
||||
const char* GetShortName() const { return "sVU0"; }
|
||||
wxString GetLongName() const { return L"SuperVU0 Recompiler"; }
|
||||
|
||||
void Allocate();
|
||||
void Shutdown() throw();
|
||||
void Reset();
|
||||
void ExecuteBlock();
|
||||
void Clear(u32 Addr, u32 Size);
|
||||
};
|
||||
|
||||
class recSuperVU1 : public BaseVUmicroCPU
|
||||
{
|
||||
public:
|
||||
recSuperVU1();
|
||||
|
||||
const char* GetShortName() const { return "sVU1"; }
|
||||
wxString GetLongName() const { return L"SuperVU1 Recompiler"; }
|
||||
|
||||
void Allocate();
|
||||
void Shutdown() throw();
|
||||
void Reset();
|
||||
void ExecuteBlock();
|
||||
void Clear(u32 Addr, u32 Size);
|
||||
};
|
||||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Super VU recompiler - author: zerofrog(@gmail.com)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "sVU_Micro.h"
|
||||
|
||||
//Using assembly code from an external file.
|
||||
#ifdef __LINUX__
|
||||
extern "C" {
|
||||
#endif
|
||||
extern void SuperVUExecuteProgram(u32 startpc, int vuindex);
|
||||
extern void SuperVUEndProgram();
|
||||
extern void svudispfntemp();
|
||||
#ifdef __LINUX__
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void SuperVUDestroy(int vuindex);
|
||||
extern void SuperVUReset(int vuindex);
|
||||
|
||||
// read = 0, will write to reg
|
||||
// read = 1, will read from reg
|
||||
// read = 2, addr of previously written reg (used for status and clip flags)
|
||||
extern u32 SuperVUGetVIAddr(int reg, int read);
|
||||
|
||||
// if p == 0, flush q else flush p; if wait is != 0, waits for p/q
|
||||
extern void SuperVUFlush(int p, int wait);
|
||||
|
||||
|
||||
class recSuperVU0 : public BaseVUmicroCPU
|
||||
{
|
||||
public:
|
||||
recSuperVU0();
|
||||
|
||||
const char* GetShortName() const { return "sVU0"; }
|
||||
wxString GetLongName() const { return L"SuperVU0 Recompiler"; }
|
||||
|
||||
void Allocate();
|
||||
void Shutdown() throw();
|
||||
void Reset();
|
||||
void ExecuteBlock();
|
||||
void Clear(u32 Addr, u32 Size);
|
||||
};
|
||||
|
||||
class recSuperVU1 : public BaseVUmicroCPU
|
||||
{
|
||||
public:
|
||||
recSuperVU1();
|
||||
|
||||
const char* GetShortName() const { return "sVU1"; }
|
||||
wxString GetLongName() const { return L"SuperVU1 Recompiler"; }
|
||||
|
||||
void Allocate();
|
||||
void Shutdown() throw();
|
||||
void Reset();
|
||||
void ExecuteBlock();
|
||||
void Clear(u32 Addr, u32 Size);
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue