* Disable newVifUnpack, which I left enabled in the prev commit (it's not ready yet!)

* Added feature to align call targets for EErec functions and blocks on P4's and AMDs, and pack them on Core2/i7's.
 * Fixed some svn:native props.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2347 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-12-15 20:46:30 +00:00
parent b5f643950c
commit b3fead5dc9
17 changed files with 6908 additions and 6862 deletions

View File

@ -157,9 +157,12 @@ template< typename T > void xWrite( T val );
class ModSibBase;
extern void xSetPtr( void* ptr );
extern u8* xGetPtr();
extern void xAlignPtr( uint bytes );
extern void xAdvancePtr( uint bytes );
extern void xAlignCallTarget();
extern u8* xGetPtr();
extern u8* xGetAlignedCallTarget();
extern JccComparisonType xInvertCond( JccComparisonType src );

View File

@ -395,6 +395,32 @@ __emitinline void xAlignPtr( uint bytes )
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) );
}
// Performs best-case alignment for the target CPU, for use prior to starting a new
// function. This is not meant to be used prior to jump targets, since it doesn't
// add padding (additionally, speed benefit from jump alignment is minimal, and often
// a loss).
__emitinline void xAlignCallTarget()
{
// Core2/i7 CPUs prefer unaligned addresses. Checking for SSSE3 is a decent filter.
// (also align in debug modes for disasm convenience)
if( IsDebugBuild || !x86caps.hasSupplementalStreamingSIMD3Extensions )
{
// - P4's and earlier prefer 16 byte alignment.
// - AMD Athlons and Phenoms prefer 8 byte alignment, but I don't have an easy
// heuristic for it yet.
// - AMD Phenom IIs are unknown (either prefer 8 byte, or unaligned).
xAlignPtr( 16 );
}
}
__emitinline u8* xGetAlignedCallTarget()
{
xAlignCallTarget();
return x86Ptr;
}
__emitinline void xAdvancePtr( uint bytes )
{
if( IsDevBuild )

View File

@ -58,6 +58,11 @@ __forceinline void vif1FLUSH()
void vif1Init()
{
#ifdef newVif1
extern void initNewVif(int idx);
initNewVif(1);
#endif
SetNewMask(g_vif1Masks, g_vif1HasMask3, 0, 0xffffffff);
}
@ -313,19 +318,13 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
return ret;
}
#ifdef newVif1
extern void initNewVif(int idx);
extern int nVifUnpack(int idx, u32 *data);
static int testVif = 0;
#endif
static int __fastcall Vif1TransUnpack(u32 *data)
{
#ifdef newVif1
if (!testVif) { initNewVif(1); testVif = 1; }
//int temp = nVifUnpack(1, data);
//if (temp >= 0) return temp;
extern int nVifUnpack(int idx, u32 *data);
return nVifUnpack(1, data);
#endif
XMMRegisters::Freeze();
if (vif1.vifpacketsize < vif1.tag.size)

View File

@ -60,7 +60,7 @@ static __forceinline u32 vif_size(u8 num)
return (num == 0) ? 0x1000 : 0x4000;
}
#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
//#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
//#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
//#define newVif0 // Use New Code for Vif0 Unpacks (not implemented)
#endif

View File

@ -371,7 +371,7 @@ static DynGenFunc* _DynGen_JITCompile()
{
pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr();
u8* retval = xGetAlignedCallTarget();
_DynGen_StackFrameCheck();
xMOV( ecx, &cpuRegs.pc );
@ -388,7 +388,7 @@ static DynGenFunc* _DynGen_JITCompile()
static DynGenFunc* _DynGen_JITCompileInBlock()
{
u8* retval = xGetPtr();
u8* retval = xGetAlignedCallTarget();
xJMP( JITCompile );
return (DynGenFunc*)retval;
}
@ -396,7 +396,7 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
// called when jumping to variable pc address
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr();
u8* retval = xGetPtr(); // fallthrough target, can't align it!
_DynGen_StackFrameCheck();
xMOV( eax, &cpuRegs.pc );
@ -410,7 +410,7 @@ static DynGenFunc* _DynGen_DispatcherReg()
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
u8* retval = xGetPtr();
u8* retval = xGetAlignedCallTarget();
// "standard" frame pointer setup for aligned stack: Record the original
// esp into ebp, and then align esp. ebp references the original esp base
@ -446,6 +446,8 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
xMOV( &s_store_ebp, ebp );
xJMP( ptr32[&DispatcherReg] );
xAlignCallTarget();
imm = (uptr)xGetPtr();
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
@ -1254,7 +1256,7 @@ void recompileNextInstruction(int delayslot)
// _flushCachedRegs();
// g_cpuHasConstReg = 1;
if (!delayslot && x86Ptr - recPtr > 0x1000)
if (!delayslot && (xGetPtr() - recPtr > 0x1000) )
s_nEndBlock = pc;
}
@ -1335,9 +1337,8 @@ static void __fastcall recRecompile( const u32 startpc )
recResetEE();
}
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr;
xSetPtr( recPtr );
recPtr = xGetAlignedCallTarget();
s_nBlockFF = false;
if (HWADDR(startpc) == 0x81fc0)
@ -1718,14 +1719,14 @@ StartRecomp:
}
}
pxAssert( x86Ptr < recMem+REC_CACHEMEM );
pxAssert( xGetPtr() < recMem+REC_CACHEMEM );
pxAssert( recConstBufPtr < recConstBuf + RECCONSTBUF_SIZE );
pxAssert( x86FpuState == 0 );
pxAssert(x86Ptr - recPtr < 0x10000);
s_pCurBlockEx->x86size = x86Ptr - recPtr;
pxAssert(xGetPtr() - recPtr < 0x10000);
s_pCurBlockEx->x86size = xGetPtr() - recPtr;
recPtr = x86Ptr;
recPtr = xGetPtr();
pxAssert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );

View File

@ -1,441 +1,441 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
//------------------------------------------------------------------
// Micro VU - Pass 1 Functions
//------------------------------------------------------------------
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define aReg(x) mVUregs.VF[x]
#define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y]
#define aMax(x, y) ((x > y) ? x : y)
#define aMin(x, y) ((x < y) ? x : y)
// Read a VF reg
#define analyzeReg1(xReg, vfRead) { \
if (xReg) { \
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
} \
}
// Write to a VF reg
#define analyzeReg2(xReg, vfWrite, isLowOp) { \
if (xReg) { \
if (_X) { bReg(xReg, isLowOp).x = 4; vfWrite.reg = xReg; vfWrite.x = 4; } \
if (_Y) { bReg(xReg, isLowOp).y = 4; vfWrite.reg = xReg; vfWrite.y = 4; } \
if (_Z) { bReg(xReg, isLowOp).z = 4; vfWrite.reg = xReg; vfWrite.z = 4; } \
if (_W) { bReg(xReg, isLowOp).w = 4; vfWrite.reg = xReg; vfWrite.w = 4; } \
} \
}
// Read a VF reg (BC opcodes)
#define analyzeReg3(xReg, vfRead) { \
if (xReg) { \
if (_bc_x) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
else if (_bc_y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
else if (_bc_z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
else { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
} \
}
// For Clip Opcode
#define analyzeReg4(xReg, vfRead) { \
if (xReg) { \
mVUstall = aMax(mVUstall, aReg(xReg).w); \
vfRead.reg = xReg; vfRead.w = 1; \
} \
}
// Read VF reg (FsF/FtF)
#define analyzeReg5(xReg, fxf, vfRead) { \
if (xReg) { \
switch (fxf) { \
case 0: mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; break; \
case 1: mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; break; \
case 2: mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; break; \
case 3: mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; break; \
} \
} \
}
// Flips xyzw stalls to yzwx (MR32 Opcode)
#define analyzeReg6(xReg, vfRead) { \
if (xReg) { \
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
} \
}
// Reading a VI reg
#define analyzeVIreg1(xReg, viRead) { \
if (xReg) { \
mVUstall = aMax(mVUstall, mVUregs.VI[xReg]); \
viRead.reg = xReg; viRead.used = 1; \
} \
}
// Writing to a VI reg
#define analyzeVIreg2(xReg, viWrite, aCycles) { \
if (xReg) { \
mVUconstReg[xReg].isValid = 0; \
mVUregsTemp.VIreg = xReg; \
mVUregsTemp.VI = aCycles; \
viWrite.reg = xReg; \
viWrite.used = aCycles; \
} \
}
#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = aMax(mVUstall, mVUregs.q); }
#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); }
#define analyzeRreg() { mVUregsTemp.r = 1; }
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } }
//------------------------------------------------------------------
// FMAC1 - Normal FMAC Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
sFLAG.doFlag = 1;
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg1(Ft, mVUup.VF_read[1]);
analyzeReg2(Fd, mVUup.VF_write, 0);
}
//------------------------------------------------------------------
// FMAC2 - ABS/FTOI/ITOF Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) {
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg2(Ft, mVUup.VF_write, 0);
}
//------------------------------------------------------------------
// FMAC3 - BC(xyzw) FMAC Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
sFLAG.doFlag = 1;
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg3(Ft, mVUup.VF_read[1]);
analyzeReg2(Fd, mVUup.VF_write, 0);
}
//------------------------------------------------------------------
// FMAC4 - Clip FMAC Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC4(mV, int Fs, int Ft) {
cFLAG.doFlag = 1;
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg4(Ft, mVUup.VF_read[1]);
}
//------------------------------------------------------------------
// IALU - IALU Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeIALU1(mV, int Id, int Is, int It) {
if (!Id) { mVUlow.isNOP = 1; }
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg1(It, mVUlow.VI_read[1]);
analyzeVIreg2(Id, mVUlow.VI_write, 1);
}
microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
if (!It) { mVUlow.isNOP = 1; }
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg2(It, mVUlow.VI_write, 1);
}
microVUt(void) mVUanalyzeIADDI(mV, int Is, int It, s16 imm) {
mVUanalyzeIALU2(mVU, Is, It);
if (!Is) { setConstReg(It, imm); }
}
//------------------------------------------------------------------
// MR32 - MR32 Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMR32(mV, int Fs, int Ft) {
if (!Ft) { mVUlow.isNOP = 1; }
analyzeReg6(Fs, mVUlow.VF_read[0]);
analyzeReg2(Ft, mVUlow.VF_write, 1);
}
//------------------------------------------------------------------
// FDIV - DIV/SQRT/RSQRT Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) {
mVUprint("microVU: DIV Opcode");
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
analyzeReg5(Ft, Ftf, mVUlow.VF_read[1]);
analyzeQreg(xCycles);
}
//------------------------------------------------------------------
// EFU - EFU Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) {
mVUprint("microVU: EFU Opcode");
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
analyzePreg(xCycles);
}
microVUt(void) mVUanalyzeEFU2(mV, int Fs, u8 xCycles) {
mVUprint("microVU: EFU Opcode");
analyzeReg1(Fs, mVUlow.VF_read[0]);
analyzePreg(xCycles);
}
//------------------------------------------------------------------
// MFP - MFP Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMFP(mV, int Ft) {
if (!Ft) { mVUlow.isNOP = 1; }
analyzeReg2(Ft, mVUlow.VF_write, 1);
}
//------------------------------------------------------------------
// MOVE - MOVE Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) {
if (!Ft || (Ft == Fs)) { mVUlow.isNOP = 1; }
analyzeReg1(Fs, mVUlow.VF_read[0]);
analyzeReg2(Ft, mVUlow.VF_write, 1);
}
//------------------------------------------------------------------
// LQx - LQ/LQD/LQI Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) {
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeReg2 (Ft, mVUlow.VF_write, 1);
if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } }
if (writeIs) { analyzeVIreg2(Is, mVUlow.VI_write, 1); }
}
//------------------------------------------------------------------
// SQx - SQ/SQD/SQI Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) {
analyzeReg1 (Fs, mVUlow.VF_read[0]);
analyzeVIreg1(It, mVUlow.VI_read[0]);
if (writeIt) { analyzeVIreg2(It, mVUlow.VI_write, 1); }
}
//------------------------------------------------------------------
// R*** - R Reg Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeR1(mV, int Fs, int Fsf) {
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
analyzeRreg();
}
microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
if (!Ft) { if (canBeNOP) { mVUlow.isNOP = 1; } else { mVUlow.noWriteVF = 1; } }
analyzeReg2(Ft, mVUlow.VF_write, 1);
analyzeRreg();
}
//------------------------------------------------------------------
// Sflag - Status Flag Opcodes
//------------------------------------------------------------------
microVUt(void) flagSet(mV, bool setMacFlag) {
int curPC = iPC;
for (int i = mVUcount, j = 0; i > 0; i--, j++) {
j += mVUstall;
incPC2(-2);
if (sFLAG.doFlag && (j >= 3)) {
if (setMacFlag) { mFLAG.doFlag = 1; }
else { sFLAG.doNonSticky = 1; }
break;
}
}
iPC = curPC;
}
microVUt(void) mVUanalyzeSflag(mV, int It) {
mVUlow.readFlags = 1;
analyzeVIreg2(It, mVUlow.VI_write, 1);
if (!It) { mVUlow.isNOP = 1; }
else {
mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block
mVUinfo.swapOps = 1;
flagSet(mVU, 0);
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 1; }
}
}
microVUt(void) mVUanalyzeFSSET(mV) {
mVUlow.isFSSET = 1;
mVUlow.readFlags = 1;
}
//------------------------------------------------------------------
// Mflag - Mac Flag Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
mVUlow.readFlags = 1;
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg2(It, mVUlow.VI_write, 1);
if (!It) { mVUlow.isNOP = 1; }
else {
mVUinfo.swapOps = 1;
flagSet(mVU, 1);
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 2; }
}
}
//------------------------------------------------------------------
// Cflag - Clip Flag Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeCflag(mV, int It) {
mVUinfo.swapOps = 1;
mVUlow.readFlags = 1;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 4; }
analyzeVIreg2(It, mVUlow.VI_write, 1);
}
//------------------------------------------------------------------
// XGkick
//------------------------------------------------------------------
microVUt(void) mVUanalyzeXGkick(mV, int Fs, int xCycles) {
analyzeVIreg1(Fs, mVUlow.VI_read[0]);
analyzeXGkick1();
analyzeXGkick2(xCycles);
// Note: Technically XGKICK should stall on the next instruction,
// this code stalls on the same instruction. The only case where this
// will be a problem with, is if you have very-specifically placed
// FMxxx or FSxxx opcodes checking flags near this instruction AND
// the XGKICK instruction stalls. No-game should be effected by
// this minor difference.
}
//------------------------------------------------------------------
// Branches - Branch Opcodes
//------------------------------------------------------------------
microVUt(void) analyzeBranchVI(mV, int xReg, bool &infoVar) {
if (!xReg) return;
int i;
int iEnd = aMin(5, (mVUcount+1));
int bPC = iPC;
incPC2(-2);
for (i = 0; i < iEnd; i++) {
if ((i == mVUcount) && (i < 5)) {
if (mVUpBlock->pState.viBackUp == xReg) {
infoVar = 1;
i++;
}
break;
}
if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) {
if (mVUlow.readFlags || i == 5) break;
if (i == 0) { incPC2(-2); continue; }
if (((mVUlow.VI_read[0].reg == xReg) && (mVUlow.VI_read[0].used))
|| ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used)))
{ incPC2(-2); continue; }
}
break;
}
if (i) {
if (!infoVar) {
incPC2(2);
mVUlow.backupVI = 1;
infoVar = 1;
}
iPC = bPC;
Console.WriteLn( Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x]", getIndex, i, xPC);
}
else iPC = bPC;
}
// Branch in Branch Delay-Slots
microVUt(int) mVUbranchCheck(mV) {
if (!mVUcount) return 0;
incPC(-2);
if (mVUlow.branch) {
mVUlow.badBranch = 1;
incPC(2);
mVUlow.evilBranch = 1;
mVUregs.blockType = 2;
Console.Warning("microVU%d Warning: Branch in Branch delay slot! [%04x]", mVU->index, xPC);
return 1;
}
incPC(2);
return 0;
}
microVUt(void) mVUanalyzeCondBranch1(mV, int Is) {
analyzeVIreg1(Is, mVUlow.VI_read[0]);
if (!mVUstall && !mVUbranchCheck(mVU)) {
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
}
}
microVUt(void) mVUanalyzeCondBranch2(mV, int Is, int It) {
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg1(It, mVUlow.VI_read[1]);
if (!mVUstall && !mVUbranchCheck(mVU)) {
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
analyzeBranchVI(mVU, It, mVUlow.memReadIt);
}
}
microVUt(void) mVUanalyzeNormBranch(mV, int It, bool isBAL) {
mVUbranchCheck(mVU);
if (isBAL) {
analyzeVIreg2(It, mVUlow.VI_write, 1);
setConstReg(It, bSaveAddr);
}
}
microVUt(void) mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
mVUbranchCheck(mVU);
mVUlow.branch = (isJALR) ? 10 : 9;
if (mVUconstReg[Is].isValid && !CHECK_VU_CONSTHACK) {
mVUlow.constJump.isValid = 1;
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index);
}
analyzeVIreg1(Is, mVUlow.VI_read[0]);
if (isJALR) {
analyzeVIreg2(It, mVUlow.VI_write, 1);
setConstReg(It, bSaveAddr);
}
}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
//------------------------------------------------------------------
// Micro VU - Pass 1 Functions
//------------------------------------------------------------------
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define aReg(x) mVUregs.VF[x]
#define bReg(x, y) mVUregsTemp.VFreg[y] = x; mVUregsTemp.VF[y]
#define aMax(x, y) ((x > y) ? x : y)
#define aMin(x, y) ((x < y) ? x : y)
// Read a VF reg
#define analyzeReg1(xReg, vfRead) { \
if (xReg) { \
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
} \
}
// Write to a VF reg
#define analyzeReg2(xReg, vfWrite, isLowOp) { \
if (xReg) { \
if (_X) { bReg(xReg, isLowOp).x = 4; vfWrite.reg = xReg; vfWrite.x = 4; } \
if (_Y) { bReg(xReg, isLowOp).y = 4; vfWrite.reg = xReg; vfWrite.y = 4; } \
if (_Z) { bReg(xReg, isLowOp).z = 4; vfWrite.reg = xReg; vfWrite.z = 4; } \
if (_W) { bReg(xReg, isLowOp).w = 4; vfWrite.reg = xReg; vfWrite.w = 4; } \
} \
}
// Read a VF reg (BC opcodes)
#define analyzeReg3(xReg, vfRead) { \
if (xReg) { \
if (_bc_x) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
else if (_bc_y) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
else if (_bc_z) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
else { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
} \
}
// For Clip Opcode
#define analyzeReg4(xReg, vfRead) { \
if (xReg) { \
mVUstall = aMax(mVUstall, aReg(xReg).w); \
vfRead.reg = xReg; vfRead.w = 1; \
} \
}
// Read VF reg (FsF/FtF)
#define analyzeReg5(xReg, fxf, vfRead) { \
if (xReg) { \
switch (fxf) { \
case 0: mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; break; \
case 1: mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; break; \
case 2: mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; break; \
case 3: mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; break; \
} \
} \
}
// Flips xyzw stalls to yzwx (MR32 Opcode)
#define analyzeReg6(xReg, vfRead) { \
if (xReg) { \
if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).y); vfRead.reg = xReg; vfRead.y = 1; } \
if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \
if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \
if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \
} \
}
// Reading a VI reg
#define analyzeVIreg1(xReg, viRead) { \
if (xReg) { \
mVUstall = aMax(mVUstall, mVUregs.VI[xReg]); \
viRead.reg = xReg; viRead.used = 1; \
} \
}
// Writing to a VI reg
#define analyzeVIreg2(xReg, viWrite, aCycles) { \
if (xReg) { \
mVUconstReg[xReg].isValid = 0; \
mVUregsTemp.VIreg = xReg; \
mVUregsTemp.VI = aCycles; \
viWrite.reg = xReg; \
viWrite.used = aCycles; \
} \
}
#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = aMax(mVUstall, mVUregs.q); }
#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); }
#define analyzeRreg() { mVUregsTemp.r = 1; }
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
#define setConstReg(x, v) { if (x) { mVUconstReg[x].isValid = 1; mVUconstReg[x].regValue = v; } }
//------------------------------------------------------------------
// FMAC1 - Normal FMAC Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) {
sFLAG.doFlag = 1;
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg1(Ft, mVUup.VF_read[1]);
analyzeReg2(Fd, mVUup.VF_write, 0);
}
//------------------------------------------------------------------
// FMAC2 - ABS/FTOI/ITOF Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) {
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg2(Ft, mVUup.VF_write, 0);
}
//------------------------------------------------------------------
// FMAC3 - BC(xyzw) FMAC Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) {
sFLAG.doFlag = 1;
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg3(Ft, mVUup.VF_read[1]);
analyzeReg2(Fd, mVUup.VF_write, 0);
}
//------------------------------------------------------------------
// FMAC4 - Clip FMAC Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFMAC4(mV, int Fs, int Ft) {
cFLAG.doFlag = 1;
analyzeReg1(Fs, mVUup.VF_read[0]);
analyzeReg4(Ft, mVUup.VF_read[1]);
}
//------------------------------------------------------------------
// IALU - IALU Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeIALU1(mV, int Id, int Is, int It) {
if (!Id) { mVUlow.isNOP = 1; }
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg1(It, mVUlow.VI_read[1]);
analyzeVIreg2(Id, mVUlow.VI_write, 1);
}
microVUt(void) mVUanalyzeIALU2(mV, int Is, int It) {
if (!It) { mVUlow.isNOP = 1; }
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg2(It, mVUlow.VI_write, 1);
}
microVUt(void) mVUanalyzeIADDI(mV, int Is, int It, s16 imm) {
mVUanalyzeIALU2(mVU, Is, It);
if (!Is) { setConstReg(It, imm); }
}
//------------------------------------------------------------------
// MR32 - MR32 Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMR32(mV, int Fs, int Ft) {
if (!Ft) { mVUlow.isNOP = 1; }
analyzeReg6(Fs, mVUlow.VF_read[0]);
analyzeReg2(Ft, mVUlow.VF_write, 1);
}
//------------------------------------------------------------------
// FDIV - DIV/SQRT/RSQRT Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) {
mVUprint("microVU: DIV Opcode");
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
analyzeReg5(Ft, Ftf, mVUlow.VF_read[1]);
analyzeQreg(xCycles);
}
//------------------------------------------------------------------
// EFU - EFU Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) {
mVUprint("microVU: EFU Opcode");
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
analyzePreg(xCycles);
}
microVUt(void) mVUanalyzeEFU2(mV, int Fs, u8 xCycles) {
mVUprint("microVU: EFU Opcode");
analyzeReg1(Fs, mVUlow.VF_read[0]);
analyzePreg(xCycles);
}
//------------------------------------------------------------------
// MFP - MFP Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMFP(mV, int Ft) {
if (!Ft) { mVUlow.isNOP = 1; }
analyzeReg2(Ft, mVUlow.VF_write, 1);
}
//------------------------------------------------------------------
// MOVE - MOVE Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) {
if (!Ft || (Ft == Fs)) { mVUlow.isNOP = 1; }
analyzeReg1(Fs, mVUlow.VF_read[0]);
analyzeReg2(Ft, mVUlow.VF_write, 1);
}
//------------------------------------------------------------------
// LQx - LQ/LQD/LQI Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) {
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeReg2 (Ft, mVUlow.VF_write, 1);
if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } }
if (writeIs) { analyzeVIreg2(Is, mVUlow.VI_write, 1); }
}
//------------------------------------------------------------------
// SQx - SQ/SQD/SQI Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) {
analyzeReg1 (Fs, mVUlow.VF_read[0]);
analyzeVIreg1(It, mVUlow.VI_read[0]);
if (writeIt) { analyzeVIreg2(It, mVUlow.VI_write, 1); }
}
//------------------------------------------------------------------
// R*** - R Reg Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeR1(mV, int Fs, int Fsf) {
analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]);
analyzeRreg();
}
microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
if (!Ft) { if (canBeNOP) { mVUlow.isNOP = 1; } else { mVUlow.noWriteVF = 1; } }
analyzeReg2(Ft, mVUlow.VF_write, 1);
analyzeRreg();
}
//------------------------------------------------------------------
// Sflag - Status Flag Opcodes
//------------------------------------------------------------------
microVUt(void) flagSet(mV, bool setMacFlag) {
int curPC = iPC;
for (int i = mVUcount, j = 0; i > 0; i--, j++) {
j += mVUstall;
incPC2(-2);
if (sFLAG.doFlag && (j >= 3)) {
if (setMacFlag) { mFLAG.doFlag = 1; }
else { sFLAG.doNonSticky = 1; }
break;
}
}
iPC = curPC;
}
microVUt(void) mVUanalyzeSflag(mV, int It) {
mVUlow.readFlags = 1;
analyzeVIreg2(It, mVUlow.VI_write, 1);
if (!It) { mVUlow.isNOP = 1; }
else {
mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block
mVUinfo.swapOps = 1;
flagSet(mVU, 0);
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 1; }
}
}
microVUt(void) mVUanalyzeFSSET(mV) {
mVUlow.isFSSET = 1;
mVUlow.readFlags = 1;
}
//------------------------------------------------------------------
// Mflag - Mac Flag Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
mVUlow.readFlags = 1;
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg2(It, mVUlow.VI_write, 1);
if (!It) { mVUlow.isNOP = 1; }
else {
mVUinfo.swapOps = 1;
flagSet(mVU, 1);
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 2; }
}
}
//------------------------------------------------------------------
// Cflag - Clip Flag Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeCflag(mV, int It) {
mVUinfo.swapOps = 1;
mVUlow.readFlags = 1;
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 4; }
analyzeVIreg2(It, mVUlow.VI_write, 1);
}
//------------------------------------------------------------------
// XGkick
//------------------------------------------------------------------
microVUt(void) mVUanalyzeXGkick(mV, int Fs, int xCycles) {
analyzeVIreg1(Fs, mVUlow.VI_read[0]);
analyzeXGkick1();
analyzeXGkick2(xCycles);
// Note: Technically XGKICK should stall on the next instruction,
// this code stalls on the same instruction. The only case where this
// will be a problem with, is if you have very-specifically placed
// FMxxx or FSxxx opcodes checking flags near this instruction AND
// the XGKICK instruction stalls. No-game should be effected by
// this minor difference.
}
//------------------------------------------------------------------
// Branches - Branch Opcodes
//------------------------------------------------------------------
microVUt(void) analyzeBranchVI(mV, int xReg, bool &infoVar) {
if (!xReg) return;
int i;
int iEnd = aMin(5, (mVUcount+1));
int bPC = iPC;
incPC2(-2);
for (i = 0; i < iEnd; i++) {
if ((i == mVUcount) && (i < 5)) {
if (mVUpBlock->pState.viBackUp == xReg) {
infoVar = 1;
i++;
}
break;
}
if ((mVUlow.VI_write.reg == xReg) && mVUlow.VI_write.used) {
if (mVUlow.readFlags || i == 5) break;
if (i == 0) { incPC2(-2); continue; }
if (((mVUlow.VI_read[0].reg == xReg) && (mVUlow.VI_read[0].used))
|| ((mVUlow.VI_read[1].reg == xReg) && (mVUlow.VI_read[1].used)))
{ incPC2(-2); continue; }
}
break;
}
if (i) {
if (!infoVar) {
incPC2(2);
mVUlow.backupVI = 1;
infoVar = 1;
}
iPC = bPC;
Console.WriteLn( Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x]", getIndex, i, xPC);
}
else iPC = bPC;
}
// Branch in Branch Delay-Slots
microVUt(int) mVUbranchCheck(mV) {
if (!mVUcount) return 0;
incPC(-2);
if (mVUlow.branch) {
mVUlow.badBranch = 1;
incPC(2);
mVUlow.evilBranch = 1;
mVUregs.blockType = 2;
Console.Warning("microVU%d Warning: Branch in Branch delay slot! [%04x]", mVU->index, xPC);
return 1;
}
incPC(2);
return 0;
}
microVUt(void) mVUanalyzeCondBranch1(mV, int Is) {
analyzeVIreg1(Is, mVUlow.VI_read[0]);
if (!mVUstall && !mVUbranchCheck(mVU)) {
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
}
}
microVUt(void) mVUanalyzeCondBranch2(mV, int Is, int It) {
analyzeVIreg1(Is, mVUlow.VI_read[0]);
analyzeVIreg1(It, mVUlow.VI_read[1]);
if (!mVUstall && !mVUbranchCheck(mVU)) {
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
analyzeBranchVI(mVU, It, mVUlow.memReadIt);
}
}
microVUt(void) mVUanalyzeNormBranch(mV, int It, bool isBAL) {
mVUbranchCheck(mVU);
if (isBAL) {
analyzeVIreg2(It, mVUlow.VI_write, 1);
setConstReg(It, bSaveAddr);
}
}
microVUt(void) mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
mVUbranchCheck(mVU);
mVUlow.branch = (isJALR) ? 10 : 9;
if (mVUconstReg[Is].isValid && !CHECK_VU_CONSTHACK) {
mVUlow.constJump.isValid = 1;
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index);
}
analyzeVIreg1(Is, mVUlow.VI_read[0]);
if (isJALR) {
analyzeVIreg2(It, mVUlow.VI_write, 1);
setConstReg(It, bSaveAddr);
}
}

View File

@ -1,106 +1,106 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
//------------------------------------------------------------------
// Micro VU - Clamp Functions
//------------------------------------------------------------------
const __aligned16 u32 sse4_minvals[2][4] = {
{ 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000
{ 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111
};
const __aligned16 u32 sse4_maxvals[2][4] = {
{ 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000
{ 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111
};
// Used for Result Clamping
// Note: This function will not preserve NaN values' sign.
// The theory behind this is that when we compute a result, and we've
// gotten a NaN value, then something went wrong; and the NaN's sign
// is not to be trusted. Games like positive values better usually,
// and its faster... so just always make NaNs into positive infinity.
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
break;
default:
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
break;
}
}
}
// Used for Operand Clamping
// Note 1: If 'preserve sign' mode is on, it will preserve the sign of NaN values.
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
if (x86caps.hasStreamingSIMD4Extensions) {
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
SSE4_PMINSD_M128_to_XMM(reg, (uptr)&sse4_maxvals[i][0]);
SSE4_PMINUD_M128_to_XMM(reg, (uptr)&sse4_minvals[i][0]);
return;
}
int regT1b = 0;
if (regT1 < 0) {
regT1b = 1; regT1=(reg+1)%8;
SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmCTemp, regT1);
//regT1 = mVU->regAlloc->allocReg();
}
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
default:
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
}
//if (regT1b) mVU->regAlloc->clearNeeded(regT1);
if (regT1b) SSE_MOVAPS_M128_to_XMM(regT1, (uptr)mVU->xmmCTemp);
}
else mVUclamp1(reg, regT1, xyzw, bClampE);
}
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
}
// Used for result clamping on every SSE instruction (add/sub/mul/div)
// Note: Disabled in "preserve sign" mode because in certain cases it
// makes too much code-gen, and you get jump8-overflows in certain
// emulated opcodes (causing crashes). Since we're clamping the operands
// with mVUclamp3, we should almost never be getting a NaN result,
// but this clamp is just a precaution just-in-case.
void mVUclamp4(int reg, int regT1, int xyzw) {
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
//------------------------------------------------------------------
// Micro VU - Clamp Functions
//------------------------------------------------------------------
const __aligned16 u32 sse4_minvals[2][4] = {
{ 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000
{ 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111
};
const __aligned16 u32 sse4_maxvals[2][4] = {
{ 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000
{ 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111
};
// Used for Result Clamping
// Note: This function will not preserve NaN values' sign.
// The theory behind this is that when we compute a result, and we've
// gotten a NaN value, then something went wrong; and the NaN's sign
// is not to be trusted. Games like positive values better usually,
// and its faster... so just always make NaNs into positive infinity.
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
break;
default:
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
break;
}
}
}
// Used for Operand Clamping
// Note 1: If 'preserve sign' mode is on, it will preserve the sign of NaN values.
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
if (x86caps.hasStreamingSIMD4Extensions) {
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
SSE4_PMINSD_M128_to_XMM(reg, (uptr)&sse4_maxvals[i][0]);
SSE4_PMINUD_M128_to_XMM(reg, (uptr)&sse4_minvals[i][0]);
return;
}
int regT1b = 0;
if (regT1 < 0) {
regT1b = 1; regT1=(reg+1)%8;
SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmCTemp, regT1);
//regT1 = mVU->regAlloc->allocReg();
}
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
default:
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
}
//if (regT1b) mVU->regAlloc->clearNeeded(regT1);
if (regT1b) SSE_MOVAPS_M128_to_XMM(regT1, (uptr)mVU->xmmCTemp);
}
else mVUclamp1(reg, regT1, xyzw, bClampE);
}
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
}
// Used for result clamping on every SSE instruction (add/sub/mul/div)
// Note: Disabled in "preserve sign" mode because in certain cases it
// makes too much code-gen, and you get jump8-overflows in certain
// emulated opcodes (causing crashes). Since we're clamping the operands
// with mVUclamp3, we should almost never be getting a NaN result,
// but this clamp is just a precaution just-in-case.
void mVUclamp4(int reg, int regT1, int xyzw) {
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
}

View File

@ -1,337 +1,337 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
union regInfo {
u32 reg;
struct {
u8 x;
u8 y;
u8 z;
u8 w;
};
};
#ifdef _MSC_VER
# pragma pack(1)
# pragma warning(disable:4996) // 'function': was declared deprecated
#endif
struct __aligned16 microRegInfo { // Ordered for Faster Compares
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
u8 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q;
u8 p;
u8 r;
u8 xgkick;
u8 viBackUp;
u8 VI[16];
regInfo VF[32];
u8 flags; // clip x2 :: status x2
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
u8 padding[5]; // 160 bytes
} __packed;
struct __aligned16 microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
} __packed;
#ifdef _MSC_VER
# pragma pack()
#endif
struct microTempRegInfo {
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
u8 VFreg[2]; // Index of the VF reg
u8 VI; // Holds cycle info for Id
u8 VIreg; // Index of the VI reg
u8 q; // Holds cycle info for Q reg
u8 p; // Holds cycle info for P reg
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
u8 xgkick; // Holds the cycle info for XGkick
};
struct microVFreg {
u8 reg; // Reg Index
u8 x; // X vector read/written to?
u8 y; // Y vector read/written to?
u8 z; // Z vector read/written to?
u8 w; // W vector read/written to?
};
struct microVIreg {
u8 reg; // Reg Index
u8 used; // Reg is Used? (Read/Written)
};
struct microConstInfo {
u8 isValid; // Is the constant in regValue valid?
u32 regValue; // Constant Value
};
struct microUpperOp {
bool eBit; // Has E-bit set
bool iBit; // Has I-bit set
bool mBit; // Has M-bit set
microVFreg VF_write; // VF Vectors written to by this instruction
microVFreg VF_read[2]; // VF Vectors read by this instruction
};
struct microLowerOp {
microVFreg VF_write; // VF Vectors written to by this instruction
microVFreg VF_read[2]; // VF Vectors read by this instruction
microVIreg VI_write; // VI reg written to by this instruction
microVIreg VI_read[2]; // VI regs read by this instruction
microConstInfo constJump; // Constant Reg Info for JR/JARL instructions
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot
bool evilBranch;// This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch)
bool isNOP; // This instruction is a NOP
bool isFSSET; // This instruction is a FSSET
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
bool memReadIt; // Read If (VI reg) from memory (used by branches)
bool readFlags; // Current Instruction reads Status, Mac, or Clip flags
};
struct microFlagInst {
bool doFlag; // Update Flag on this Instruction
bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only)
u8 write; // Points to the instance that should be written to (s-stage write)
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
};
struct microFlagCycles {
int xStatus[4];
int xMac[4];
int xClip[4];
int cycles;
};
struct microOp {
u8 stall; // Info on how much current instruction stalled
bool isEOB; // Cur Instruction is last instruction in block (End of Block)
bool isBdelay; // Cur Instruction in Branch Delay slot
bool swapOps; // Run Lower Instruction before Upper Instruction
bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called
bool doXGKICK; // Do XGKICK transfer on this instruction
bool doDivFlag; // Transfer Div flag to Status Flag on this instruction
int readQ; // Q instance for reading
int writeQ; // Q instance for writing
int readP; // P instance for reading
int writeP; // P instance for writing
microFlagInst sFlag; // Status Flag Instance Info
microFlagInst mFlag; // Mac Flag Instance Info
microFlagInst cFlag; // Clip Flag Instance Info
microUpperOp uOp; // Upper Op Info
microLowerOp lOp; // Lower Op Info
};
template<u32 pSize>
struct microIR {
microBlock block; // Block/Pipeline info
microBlock* pBlock; // Pointer to a block in mVUblocks
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
microOp info[pSize/2]; // Info for Instructions in current block
microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks
u8 branch;
u32 cycles; // Cycles for current block
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
};
//------------------------------------------------------------------
// Reg Alloc
//------------------------------------------------------------------
void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW);
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
void mVUloadReg(int reg, uptr offset, int xyzw);
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs);
struct microXMM {
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
int count; // Count of when last used
bool isNeeded; // Is needed for current instruction
};
#define xmmTotal 7 // Don't allocate PQ?
class microRegAlloc {
private:
microXMM xmmReg[xmmTotal];
VURegs* vuRegs;
int counter;
int findFreeRegRec(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded) {
int x = findFreeRegRec(i+1);
if (x == -1) return i;
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
}
}
return -1;
}
int findFreeReg() {
for (int i = 0; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded && (xmmReg[i].reg < 0)) {
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeRegRec(0);
if (x < 0) { DevCon.Error("microVU Allocation Error!"); return 0; }
return x;
}
public:
microRegAlloc(VURegs* vuRegsPtr) {
vuRegs = vuRegsPtr;
reset();
}
void reset() {
for (int i = 0; i < xmmTotal; i++) {
clearReg(i);
}
counter = 0;
}
void flushAll(bool clearState = 1) {
for (int i = 0; i < xmmTotal; i++) {
writeBackReg(i);
if (clearState) clearReg(i);
}
}
void clearReg(int reg) {
xmmReg[reg].reg = -1;
xmmReg[reg].count = 0;
xmmReg[reg].xyzw = 0;
xmmReg[reg].isNeeded = 0;
}
void clearRegVF(int VFreg) {
for (int i = 0; i < xmmTotal; i++) {
if (xmmReg[i].reg == VFreg) clearReg(i);
}
}
void writeBackReg(int reg, bool invalidateRegs = 1) {
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
if (xmmReg[reg].reg == 33) SSE_MOVSS_XMM_to_M32((uptr)&vuRegs->VI[REG_I].UL, reg);
else if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
if (invalidateRegs) {
for (int i = 0; i < xmmTotal; i++) {
if ((i == reg) || xmmReg[i].isNeeded) continue;
if (xmmReg[i].reg == xmmReg[reg].reg) {
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", xmmReg[i].reg);
clearReg(i); // Invalidate any Cached Regs of same vf Reg
}
}
}
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
xmmReg[reg].count = counter;
xmmReg[reg].xyzw = 0;
xmmReg[reg].isNeeded = 0;
return;
}
}
clearReg(reg); // Clear Reg
}
void clearNeeded(int reg) {
if ((reg < 0) || (reg >= xmmTotal)) return;
xmmReg[reg].isNeeded = 0;
if (xmmReg[reg].xyzw) { // Reg was modified
if (xmmReg[reg].reg > 0) {
int mergeRegs = 0;
if (xmmReg[reg].xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
if (i == reg) continue;
if (xmmReg[i].reg == xmmReg[reg].reg) {
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", xmmReg[i].reg);
if (mergeRegs == 1) {
mVUmergeRegs(i, reg, xmmReg[reg].xyzw, 1);
xmmReg[i].xyzw = 0xf;
xmmReg[i].count = counter;
mergeRegs = 2;
}
else clearReg(i);
}
}
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
}
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
}
}
int allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
counter++;
if (vfLoadReg >= 0) { // Search For Cached Regs
for (int i = 0; i < xmmTotal; i++) {
if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified
|| (xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
int z = i;
if (vfWriteReg >= 0) { // Reg will be modified
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
z = findFreeReg();
writeBackReg(z);
if (z!=i && xyzw==8) SSE_MOVAPS_XMM_to_XMM (z, i);
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
xmmReg[i].count = counter; // Reg i was used, so update counter
}
else { // Don't clone reg, but shuffle to adjust for SS ops
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(z); }
if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
}
xmmReg[z].reg = vfWriteReg;
xmmReg[z].xyzw = xyzw;
}
xmmReg[z].count = counter;
xmmReg[z].isNeeded = 1;
return z;
}
}
}
int x = findFreeReg();
writeBackReg(x);
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
if ((vfLoadReg == 0) && !(xyzw & 1)) { SSE2_PXOR_XMM_to_XMM(x, x); }
else if (vfLoadReg == 33) mVUloadIreg(x, xyzw, vuRegs);
else if (vfLoadReg == 32) mVUloadReg (x, (uptr)&vuRegs->ACC.UL[0], xyzw);
else if (vfLoadReg >= 0) mVUloadReg (x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
xmmReg[x].reg = vfWriteReg;
xmmReg[x].xyzw = xyzw;
}
else { // Reg Will Not Be Modified (always load full reg for caching)
if (vfLoadReg == 33) mVUloadIreg(x, 0xf, vuRegs);
else if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
else if (vfLoadReg >= 0) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0]);
xmmReg[x].reg = vfLoadReg;
xmmReg[x].xyzw = 0;
}
xmmReg[x].count = counter;
xmmReg[x].isNeeded = 1;
return x;
}
};
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
union regInfo {
u32 reg;
struct {
u8 x;
u8 y;
u8 z;
u8 w;
};
};
#ifdef _MSC_VER
# pragma pack(1)
# pragma warning(disable:4996) // 'function': was declared deprecated
#endif
struct __aligned16 microRegInfo { // Ordered for Faster Compares
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
u8 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q;
u8 p;
u8 r;
u8 xgkick;
u8 viBackUp;
u8 VI[16];
regInfo VF[32];
u8 flags; // clip x2 :: status x2
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
u8 padding[5]; // 160 bytes
} __packed;
struct __aligned16 microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
} __packed;
#ifdef _MSC_VER
# pragma pack()
#endif
struct microTempRegInfo {
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
u8 VFreg[2]; // Index of the VF reg
u8 VI; // Holds cycle info for Id
u8 VIreg; // Index of the VI reg
u8 q; // Holds cycle info for Q reg
u8 p; // Holds cycle info for P reg
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
u8 xgkick; // Holds the cycle info for XGkick
};
struct microVFreg {
u8 reg; // Reg Index
u8 x; // X vector read/written to?
u8 y; // Y vector read/written to?
u8 z; // Z vector read/written to?
u8 w; // W vector read/written to?
};
struct microVIreg {
u8 reg; // Reg Index
u8 used; // Reg is Used? (Read/Written)
};
struct microConstInfo {
u8 isValid; // Is the constant in regValue valid?
u32 regValue; // Constant Value
};
struct microUpperOp {
bool eBit; // Has E-bit set
bool iBit; // Has I-bit set
bool mBit; // Has M-bit set
microVFreg VF_write; // VF Vectors written to by this instruction
microVFreg VF_read[2]; // VF Vectors read by this instruction
};
struct microLowerOp {
microVFreg VF_write; // VF Vectors written to by this instruction
microVFreg VF_read[2]; // VF Vectors read by this instruction
microVIreg VI_write; // VI reg written to by this instruction
microVIreg VI_read[2]; // VI regs read by this instruction
microConstInfo constJump; // Constant Reg Info for JR/JARL instructions
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot
bool evilBranch;// This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch)
bool isNOP; // This instruction is a NOP
bool isFSSET; // This instruction is a FSSET
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
bool memReadIt; // Read If (VI reg) from memory (used by branches)
bool readFlags; // Current Instruction reads Status, Mac, or Clip flags
};
struct microFlagInst {
bool doFlag; // Update Flag on this Instruction
bool doNonSticky; // Update O,U,S,Z (non-sticky) bits on this Instruction (status flag only)
u8 write; // Points to the instance that should be written to (s-stage write)
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
};
struct microFlagCycles {
int xStatus[4];
int xMac[4];
int xClip[4];
int cycles;
};
struct microOp {
u8 stall; // Info on how much current instruction stalled
bool isEOB; // Cur Instruction is last instruction in block (End of Block)
bool isBdelay; // Cur Instruction in Branch Delay slot
bool swapOps; // Run Lower Instruction before Upper Instruction
bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called
bool doXGKICK; // Do XGKICK transfer on this instruction
bool doDivFlag; // Transfer Div flag to Status Flag on this instruction
int readQ; // Q instance for reading
int writeQ; // Q instance for writing
int readP; // P instance for reading
int writeP; // P instance for writing
microFlagInst sFlag; // Status Flag Instance Info
microFlagInst mFlag; // Mac Flag Instance Info
microFlagInst cFlag; // Clip Flag Instance Info
microUpperOp uOp; // Upper Op Info
microLowerOp lOp; // Lower Op Info
};
template<u32 pSize>
struct microIR {
microBlock block; // Block/Pipeline info
microBlock* pBlock; // Pointer to a block in mVUblocks
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
microOp info[pSize/2]; // Info for Instructions in current block
microConstInfo constReg[16]; // Simple Const Propagation Info for VI regs within blocks
u8 branch;
u32 cycles; // Cycles for current block
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
};
//------------------------------------------------------------------
// Reg Alloc
//------------------------------------------------------------------
void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW);
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
void mVUloadReg(int reg, uptr offset, int xyzw);
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs);
struct microXMM {
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
int count; // Count of when last used
bool isNeeded; // Is needed for current instruction
};
#define xmmTotal 7 // Don't allocate PQ?
class microRegAlloc {
private:
microXMM xmmReg[xmmTotal];
VURegs* vuRegs;
int counter;
int findFreeRegRec(int startIdx) {
for (int i = startIdx; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded) {
int x = findFreeRegRec(i+1);
if (x == -1) return i;
return ((xmmReg[i].count < xmmReg[x].count) ? i : x);
}
}
return -1;
}
int findFreeReg() {
for (int i = 0; i < xmmTotal; i++) {
if (!xmmReg[i].isNeeded && (xmmReg[i].reg < 0)) {
return i; // Reg is not needed and was a temp reg
}
}
int x = findFreeRegRec(0);
if (x < 0) { DevCon.Error("microVU Allocation Error!"); return 0; }
return x;
}
public:
microRegAlloc(VURegs* vuRegsPtr) {
vuRegs = vuRegsPtr;
reset();
}
void reset() {
for (int i = 0; i < xmmTotal; i++) {
clearReg(i);
}
counter = 0;
}
void flushAll(bool clearState = 1) {
for (int i = 0; i < xmmTotal; i++) {
writeBackReg(i);
if (clearState) clearReg(i);
}
}
void clearReg(int reg) {
xmmReg[reg].reg = -1;
xmmReg[reg].count = 0;
xmmReg[reg].xyzw = 0;
xmmReg[reg].isNeeded = 0;
}
void clearRegVF(int VFreg) {
for (int i = 0; i < xmmTotal; i++) {
if (xmmReg[i].reg == VFreg) clearReg(i);
}
}
void writeBackReg(int reg, bool invalidateRegs = 1) {
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
if (xmmReg[reg].reg == 33) SSE_MOVSS_XMM_to_M32((uptr)&vuRegs->VI[REG_I].UL, reg);
else if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
if (invalidateRegs) {
for (int i = 0; i < xmmTotal; i++) {
if ((i == reg) || xmmReg[i].isNeeded) continue;
if (xmmReg[i].reg == xmmReg[reg].reg) {
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", xmmReg[i].reg);
clearReg(i); // Invalidate any Cached Regs of same vf Reg
}
}
}
if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
xmmReg[reg].count = counter;
xmmReg[reg].xyzw = 0;
xmmReg[reg].isNeeded = 0;
return;
}
}
clearReg(reg); // Clear Reg
}
void clearNeeded(int reg) {
if ((reg < 0) || (reg >= xmmTotal)) return;
xmmReg[reg].isNeeded = 0;
if (xmmReg[reg].xyzw) { // Reg was modified
if (xmmReg[reg].reg > 0) {
int mergeRegs = 0;
if (xmmReg[reg].xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
if (i == reg) continue;
if (xmmReg[i].reg == xmmReg[reg].reg) {
if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", xmmReg[i].reg);
if (mergeRegs == 1) {
mVUmergeRegs(i, reg, xmmReg[reg].xyzw, 1);
xmmReg[i].xyzw = 0xf;
xmmReg[i].count = counter;
mergeRegs = 2;
}
else clearReg(i);
}
}
if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
}
else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
}
}
int allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
counter++;
if (vfLoadReg >= 0) { // Search For Cached Regs
for (int i = 0; i < xmmTotal; i++) {
if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified
|| (xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
int z = i;
if (vfWriteReg >= 0) { // Reg will be modified
if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
z = findFreeReg();
writeBackReg(z);
if (z!=i && xyzw==8) SSE_MOVAPS_XMM_to_XMM (z, i);
else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i);
xmmReg[i].count = counter; // Reg i was used, so update counter
}
else { // Don't clone reg, but shuffle to adjust for SS ops
if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(z); }
if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1);
else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2);
else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3);
}
xmmReg[z].reg = vfWriteReg;
xmmReg[z].xyzw = xyzw;
}
xmmReg[z].count = counter;
xmmReg[z].isNeeded = 1;
return z;
}
}
}
int x = findFreeReg();
writeBackReg(x);
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
if ((vfLoadReg == 0) && !(xyzw & 1)) { SSE2_PXOR_XMM_to_XMM(x, x); }
else if (vfLoadReg == 33) mVUloadIreg(x, xyzw, vuRegs);
else if (vfLoadReg == 32) mVUloadReg (x, (uptr)&vuRegs->ACC.UL[0], xyzw);
else if (vfLoadReg >= 0) mVUloadReg (x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
xmmReg[x].reg = vfWriteReg;
xmmReg[x].xyzw = xyzw;
}
else { // Reg Will Not Be Modified (always load full reg for caching)
if (vfLoadReg == 33) mVUloadIreg(x, 0xf, vuRegs);
else if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
else if (vfLoadReg >= 0) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0]);
xmmReg[x].reg = vfLoadReg;
xmmReg[x].xyzw = 0;
}
xmmReg[x].count = counter;
xmmReg[x].isNeeded = 1;
return x;
}
};

View File

@ -1,67 +1,67 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#ifdef newVif
#include "x86emitter/x86emitter.h"
using namespace x86Emitter;
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
extern void _nVifUnpack(int idx, u8 *data, u32 size);
typedef u32 (__fastcall *nVifCall)(void*, void*);
static __pagealigned u8 nVifUpkExec[__pagesize*16];
static __aligned16 nVifCall nVifUpk[(2*2*16)*4*4]; // ([USN][Masking][Unpack Type]) [curCycle][CyclesToWrite-1]
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
#define _v0 0
#define _v1 0x55
#define _v2 0xaa
#define _v3 0xff
#define aMax(x, y) std::max(x,y)
#define aMin(x, y) std::min(x,y)
#define _f __forceinline
#define xShiftR(regX, n) { \
if (usn) { xPSRL.D(regX, n); } \
else { xPSRA.D(regX, n); } \
}
static const u32 nVifT[16] = {
4, // S-32
2, // S-16
1, // S-8
0, // ----
8, // V2-32
4, // V2-16
2, // V2-8
0, // ----
12,// V3-32
6, // V3-16
3, // V3-8
0, // ----
16,// V4-32
8, // V4-16
4, // V4-8
2, // V4-5
};
#include "newVif_BlockBuffer.h"
#include "newVif_OldUnpack.inl"
#include "newVif_UnpackGen.inl"
#include "newVif_Unpack.inl"
#endif
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#ifdef newVif
#include "x86emitter/x86emitter.h"
using namespace x86Emitter;
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
extern void _nVifUnpack(int idx, u8 *data, u32 size);
typedef u32 (__fastcall *nVifCall)(void*, void*);
static __pagealigned u8 nVifUpkExec[__pagesize*16];
static __aligned16 nVifCall nVifUpk[(2*2*16)*4*4]; // ([USN][Masking][Unpack Type]) [curCycle][CyclesToWrite-1]
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
#define _v0 0
#define _v1 0x55
#define _v2 0xaa
#define _v3 0xff
#define aMax(x, y) std::max(x,y)
#define aMin(x, y) std::min(x,y)
#define _f __forceinline
#define xShiftR(regX, n) { \
if (usn) { xPSRL.D(regX, n); } \
else { xPSRA.D(regX, n); } \
}
static const u32 nVifT[16] = {
4, // S-32
2, // S-16
1, // S-8
0, // ----
8, // V2-32
4, // V2-16
2, // V2-8
0, // ----
12,// V3-32
6, // V3-16
3, // V3-8
0, // ----
16,// V4-32
8, // V4-16
4, // V4-8
2, // V4-5
};
#include "newVif_BlockBuffer.h"
#include "newVif_OldUnpack.inl"
#include "newVif_UnpackGen.inl"
#include "newVif_Unpack.inl"
#endif

View File

@ -1,40 +1,40 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
class BlockBuffer {
private:
u32 mSize; // Cur Size
u32 mSizeT; // Total Size
u8* mData; // Data Ptr
void grow(u32 newSize) {
u8* temp = new u8[newSize];
memcpy(temp, mData, mSizeT);
safe_delete( mData );
mData = temp;
}
public:
BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; mData = new u8[mSizeT]; }
virtual ~BlockBuffer() { safe_delete(mData); }
void append(void *addr, u32 size) {
if (mSize + size > mSizeT) grow(mSize*2 + size);
memcpy(&mData[mSize], addr, size);
mSize += size;
}
void clear() { mSize = 0; }
u32 getSize() { return mSize; }
u8* getBlock() { return mData; }
};
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
class BlockBuffer {
private:
u32 mSize; // Cur Size
u32 mSizeT; // Total Size
u8* mData; // Data Ptr
void grow(u32 newSize) {
u8* temp = new u8[newSize];
memcpy(temp, mData, mSizeT);
safe_delete( mData );
mData = temp;
}
public:
BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; mData = new u8[mSizeT]; }
virtual ~BlockBuffer() { safe_delete(mData); }
void append(void *addr, u32 size) {
if (mSize + size > mSizeT) grow(mSize*2 + size);
memcpy(&mData[mSize], addr, size);
mSize += size;
}
void clear() { mSize = 0; }
u32 getSize() { return mSize; }
u8* getBlock() { return mData; }
};

View File

@ -1,167 +1,167 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// Old Vif Unpack Code
// Only here for testing/reference
// If newVif is defined and newVif1 isn't, vif1 will use this code
// same goes for vif0...
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft;
VURegs * VU;
u8 *cdata = (u8*)data;
u32 tempsize = 0;
const u32 memlimit = vif_size(VIFdmanum);
if (VIFdmanum == 0) {
VU = &VU0;
vifRegs = vif0Regs;
vifMaskRegs = g_vif0Masks;
vif = &vif0;
vifRow = g_vifmask.Row0;
}
else {
VU = &VU1;
vifRegs = vif1Regs;
vifMaskRegs = g_vif1Masks;
vif = &vif1;
vifRow = g_vifmask.Row1;
}
u32 *dest = (u32*)(VU->Mem + v->addr);
u32 unpackType = v->cmd & 0xf;
ft = &VIFfuncTable[ unpackType ];
func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2;
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
if (v->addr >= memlimit) {
DevCon.Warning("Overflown at the start");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
size = min(size, (int)vifRegs->num * ft->gsize); //size will always be the same or smaller
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
//Sanity Check (memory overflow)
if (tempsize > memlimit) {
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
DevCon.WriteLn("what!!!!!!!!!");
//tempsize = 0;
tempsize = size;
size = 0;
}
else {
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
tempsize = size;
size = 0;
}
}
else {
tempsize = size;
size = 0;
}
if (tempsize) {
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
size = 0;
int addrstart = v->addr;
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) {
if(v->addr >= memlimit) {
DevCon.Warning("Mem limit overflow");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
func(dest, (u32*)cdata, ft->qsize);
cdata += ft->gsize;
tempsize -= ft->gsize;
vifRegs->num--;
vif->cl++;
if (vif->cl == vifRegs->cycle.wl) {
dest += incdest;
v->addr +=(incdest * 4);
vif->cl = 0;
}
else {
dest += 4;
v->addr += 16;
}
}
if (v->addr >= memlimit) {
v->addr &=(memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
v->addr = addrstart;
if(tempsize > 0) size = tempsize;
}
if (size >= ft->dsize && vifRegs->num > 0) { //Else write what we do have
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
VIF_LOG("warning, end with size = %d", size);
// unpack one qword
//v->addr += (size / ft->dsize) * 4;
func(dest, (u32*)cdata, size / ft->dsize);
size = 0;
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
}
}
else { // filling write
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
while (vifRegs->num > 0) {
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
// unpack one qword
if (vif->cl < vifRegs->cycle.cl) {
if(size < ft->gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
func(dest, (u32*)cdata, ft->qsize);
cdata += ft->gsize;
size -= ft->gsize;
vif->cl++;
vifRegs->num--;
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
}
else {
func(dest, (u32*)cdata, ft->qsize);
v->addr += 16;
vifRegs->num--;
vif->cl++;
}
dest += 4;
if (vifRegs->num == 0) break;
}
}
}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// Old Vif Unpack Code
// Only here for testing/reference
// If newVif is defined and newVif1 isn't, vif1 will use this code
// same goes for vif0...
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft;
VURegs * VU;
u8 *cdata = (u8*)data;
u32 tempsize = 0;
const u32 memlimit = vif_size(VIFdmanum);
if (VIFdmanum == 0) {
VU = &VU0;
vifRegs = vif0Regs;
vifMaskRegs = g_vif0Masks;
vif = &vif0;
vifRow = g_vifmask.Row0;
}
else {
VU = &VU1;
vifRegs = vif1Regs;
vifMaskRegs = g_vif1Masks;
vif = &vif1;
vifRow = g_vifmask.Row1;
}
u32 *dest = (u32*)(VU->Mem + v->addr);
u32 unpackType = v->cmd & 0xf;
ft = &VIFfuncTable[ unpackType ];
func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2;
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
if (v->addr >= memlimit) {
DevCon.Warning("Overflown at the start");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
size = min(size, (int)vifRegs->num * ft->gsize); //size will always be the same or smaller
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
//Sanity Check (memory overflow)
if (tempsize > memlimit) {
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
DevCon.WriteLn("what!!!!!!!!!");
//tempsize = 0;
tempsize = size;
size = 0;
}
else {
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
tempsize = size;
size = 0;
}
}
else {
tempsize = size;
size = 0;
}
if (tempsize) {
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
size = 0;
int addrstart = v->addr;
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) {
if(v->addr >= memlimit) {
DevCon.Warning("Mem limit overflow");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
func(dest, (u32*)cdata, ft->qsize);
cdata += ft->gsize;
tempsize -= ft->gsize;
vifRegs->num--;
vif->cl++;
if (vif->cl == vifRegs->cycle.wl) {
dest += incdest;
v->addr +=(incdest * 4);
vif->cl = 0;
}
else {
dest += 4;
v->addr += 16;
}
}
if (v->addr >= memlimit) {
v->addr &=(memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
v->addr = addrstart;
if(tempsize > 0) size = tempsize;
}
if (size >= ft->dsize && vifRegs->num > 0) { //Else write what we do have
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
VIF_LOG("warning, end with size = %d", size);
// unpack one qword
//v->addr += (size / ft->dsize) * 4;
func(dest, (u32*)cdata, size / ft->dsize);
size = 0;
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
}
}
else { // filling write
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
while (vifRegs->num > 0) {
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
// unpack one qword
if (vif->cl < vifRegs->cycle.cl) {
if(size < ft->gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
func(dest, (u32*)cdata, ft->qsize);
cdata += ft->gsize;
size -= ft->gsize;
vif->cl++;
vifRegs->num--;
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
}
else {
func(dest, (u32*)cdata, ft->qsize);
v->addr += 16;
vifRegs->num--;
vif->cl++;
}
dest += 4;
if (vifRegs->num == 0) break;
}
}
}

View File

@ -1,261 +1,279 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// newVif! - author: cottonvibes(@gmail.com)
#pragma once
struct nVifStruct {
u32 idx; // VIF0 or VIF1
vifStruct* vif; // Vif Struct ptr
VIFregisters* vifRegs; // Vif Regs ptr
VURegs* VU; // VU Regs ptr
u8* vuMemEnd; // End of VU Memory
u32 vuMemLimit; // Use for fast AND
BlockBuffer* vifBlock; // Block Buffer
};
nVifStruct nVif[2];
void initNewVif(int idx) {
nVif[idx].idx = idx;
nVif[idx].VU = idx ? &VU1 : &VU0;
nVif[idx].vif = idx ? &vif1 : &vif0;
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
nVif[idx].vifBlock = new BlockBuffer(0x2000); // 8kb Block Buffer
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
memset8<0xcc>( nVifUpkExec );
xSetPtr( nVifUpkExec );
for (int a = 0; a < 2; a++) {
for (int b = 0; b < 2; b++) {
for (int c = 0; c < 4; c++) {
for (int d = 0; d < 3; d++) {
nVifGen(a, b, c, d);
}}}}
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
}
int nVifUnpack(int idx, u32 *data) {
XMMRegisters::Freeze();
//BlockBuffer* vB = nVif[idx].vifBlock;
int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
vif1.tag.size -= ret;
_nVifUnpack(idx, (u8*)data, ret<<2);
if (vif1.tag.size <= 0) vif1.tag.size = 0;
if (vif1.tag.size <= 0) vif1.cmd = 0;
XMMRegisters::Thaw();
return ret;
}
_f u8* setVUptr(int idx, int offset) {
return (u8*)(nVif[idx].VU->Mem + (offset & nVif[idx].vuMemLimit));
}
_f void incVUptr(int idx, u8* &ptr, int amount) {
ptr += amount;
int diff = ptr - nVif[idx].vuMemEnd;
if (diff >= 0) {
ptr = nVif[idx].VU->Mem + diff;
}
if ((uptr)ptr & 0xf) DevCon.WriteLn("unaligned wtf :(");
}
static void setMasks(const VIFregisters& v) {
for (int i = 0; i < 16; i++) {
int m = (v.mask >> (i*2)) & 3;
switch (m) {
case 0: // Data
nVifMask[0][i/4][i%4] = 0xffffffff;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = 0;
break;
case 1: // Row
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = ((u32*)&v.r0)[(i%4)*4];
break;
case 2: // Col
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = ((u32*)&v.c0)[(i/4)*4];
break;
case 3: // Write Protect
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0xffffffff;
nVifMask[2][i/4][i%4] = 0;
break;
}
}
}
// ----------------------------------------------------------------------------
// Unpacking Optimization notes:
// ----------------------------------------------------------------------------
// Some games send a LOT of small packets. This is a problem because the new VIF unpacker
// has a lot of setup code to establish which unpack function to call. The best way to
// optimize this is to cache the unpack function's base (see fnbase below) and update it
// when the variables it depends on are modified: writes to vif->tag.cmd and vif->usn.
//
// A secondary optimization would be adding special handlers for packets where vifRegs->num==1.
// (which would remove the loop, simplify the incVUptr code, etc). But checking for it has
// to be simple enough that it doesn't offset the benefits (which I'm not sure is possible).
// -- air
template< int idx, bool doMode, bool isFill >
__releaseinline void __fastcall _nVifUnpackLoop( u8 *data, u32 size )
{
// Eh... template attempt, tho not sure it helped much. There's too much setup code (see
// optimization note above) -- air
const int usn = !!(vif->usn);
const int doMask = !!(vif->tag.cmd & 0x10);
const int upkNum = vif->tag.cmd & 0xf;
const u32& vift = nVifT[upkNum];
u8* dest = setVUptr(idx, vif->tag.addr);
const VIFUnpackFuncTable& ft = VIFfuncTable[vif->tag.cmd & 0xf];
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
const nVifCall* fnbase = &nVifUpk[
((usn*2*16) + (doMask*16) + (upkNum)) * (4*4)
];
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
if (doMask)
setMasks(*vifRegs);
if (vif->cl >= blockSize) {
vif->cl = 0;
}
while (vifRegs->num > 0) {
if (vif->cl < cycleSize) {
//if (size <= 0) { DbgCon.WriteLn("_nVifUnpack: Out of Data!"); break; }
if (doMode /*|| doMask*/) {
//if (doMask)
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
func((u32*)dest, (u32*)data, ft.qsize);
data += ft.gsize;
size -= ft.gsize;
vifRegs->num--;
}
else if (1) {
//DevCon.WriteLn("SSE Unpack!");
fnbase[aMin(vif->cl, 4) * 4](dest, data);
data += vift;
size -= vift;
vifRegs->num--;
}
else {
//DevCon.WriteLn("SSE Unpack!");
int c = aMin((cycleSize - vif->cl), 3);
size -= vift * c;
//if (c>1) { DevCon.WriteLn("C > 1!"); }
if (c<0||c>3) { DbgCon.WriteLn("C wtf!"); }
if (size < 0) { DbgCon.WriteLn("Size Shit"); size+=vift*c;c=1;size-=vift*c;}
fnbase[(aMin(vif->cl, 4) * 4) + c-1](dest, data);
data += vift * c;
vifRegs->num -= c;
}
}
else if (isFill) {
func((u32*)dest, (u32*)data, ft.qsize);
vifRegs->num--;
}
incVUptr(idx, dest, 16);
// Removing this modulo was a huge speedup for God of War. (62->73 fps)
// (GoW uses a lot of blockSize==1 packets, resulting in tons of loops -- so the biggest
// factor in performance ends up being the top-level conditionals of the loop, and
// also the loop prep code.) --air
//vif->cl = (vif->cl+1) % blockSize;
if( ++vif->cl == blockSize ) vif->cl = 0;
}
}
void _nVifUnpack(int idx, u8 *data, u32 size) {
/*if (nVif[idx].vifRegs->cycle.cl >= nVif[idx].vifRegs->cycle.wl) { // skipping write
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
return;
}
else*/ { // filling write
vif = nVif[idx].vif;
vifRegs = nVif[idx].vifRegs;
const bool doMode = !!vifRegs->mode;
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
//UnpackLoopTable[idx][doMode][isFill]( data, size );
if( idx )
{
if( doMode )
{
if( isFill )
_nVifUnpackLoop<1,true,true>( data, size );
else
_nVifUnpackLoop<1,true,false>( data, size );
}
else
{
if( isFill )
_nVifUnpackLoop<1,false,true>( data, size );
else
_nVifUnpackLoop<1,false,false>( data, size );
}
}
else
{
pxFailDev( "No VIF0 support yet, sorry!" );
}
//if (isFill)
//DevCon.WriteLn("%s Write! [num = %d][%s]", (isFill?"Filling":"Skipping"), vifRegs->num, (vifRegs->num%3 ? "bad!" : "ok"));
//DevCon.WriteLn("%s Write! [mask = %08x][type = %02d][num = %d]", (isFill?"Filling":"Skipping"), vifRegs->mask, upkNum, vifRegs->num);
}
}
//int nVifUnpack(int idx, u32 *data) {
// XMMRegisters::Freeze();
// BlockBuffer* vB = nVif[idx].vifBlock;
// int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
// //vB->append(data, ret<<2);
// vif1.tag.size -= ret;
// //DevCon.WriteLn("2 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
// //if (vif1.tag.size <= 0) {
// //DevCon.WriteLn("3 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
// //VIFunpack<1>(vB->getBlock(), &vif1.tag, vB->getSize()>>2);
// //_nVifUnpack(idx, vB->getBlock(), vB->getSize());
// _nVifUnpack(idx, (u8*)data, ret<<2);
// if (vif1.tag.size <= 0) vif1.tag.size = 0;
// if (vif1.tag.size <= 0) vif1.cmd = 0;
// //vB->clear();
// //}
// //else { vif1.tag.size+=ret; ret = -1; vB->clear(); }
// XMMRegisters::Thaw();
// return ret;
//}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// newVif! - author: cottonvibes(@gmail.com)
#pragma once
struct nVifStruct {
u32 idx; // VIF0 or VIF1
vifStruct* vif; // Vif Struct ptr
VIFregisters* vifRegs; // Vif Regs ptr
VURegs* VU; // VU Regs ptr
u8* vuMemEnd; // End of VU Memory
u32 vuMemLimit; // Use for fast AND
BlockBuffer* vifBlock; // Block Buffer
};
static __aligned16 nVifStruct nVif[2];
void initNewVif(int idx) {
nVif[idx].idx = idx;
nVif[idx].VU = idx ? &VU1 : &VU0;
nVif[idx].vif = idx ? &vif1 : &vif0;
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
nVif[idx].vifBlock = new BlockBuffer(0x2000); // 8kb Block Buffer
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
memset8<0xcc>( nVifUpkExec );
xSetPtr( nVifUpkExec );
for (int a = 0; a < 2; a++) {
for (int b = 0; b < 2; b++) {
for (int c = 0; c < 4; c++) {
for (int d = 0; d < 3; d++) {
nVifGen(a, b, c, d);
}}}}
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
}
int nVifUnpack(int idx, u32 *data) {
XMMRegisters::Freeze();
//BlockBuffer* vB = nVif[idx].vifBlock;
int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
vif1.tag.size -= ret;
_nVifUnpack(idx, (u8*)data, ret<<2);
if (vif1.tag.size <= 0) vif1.tag.size = 0;
if (vif1.tag.size <= 0) vif1.cmd = 0;
XMMRegisters::Thaw();
return ret;
}
_f u8* setVUptr(int idx, int offset) {
return (u8*)(nVif[idx].VU->Mem + (offset & nVif[idx].vuMemLimit));
}
_f void incVUptr(int idx, u8* &ptr, int amount) {
ptr += amount;
int diff = ptr - nVif[idx].vuMemEnd;
if (diff >= 0) {
ptr = nVif[idx].VU->Mem + diff;
}
if ((uptr)ptr & 0xf) DevCon.WriteLn("unaligned wtf :(");
}
static void setMasks(const VIFregisters& v) {
for (int i = 0; i < 16; i++) {
int m = (v.mask >> (i*2)) & 3;
switch (m) {
case 0: // Data
nVifMask[0][i/4][i%4] = 0xffffffff;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = 0;
break;
case 1: // Row
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = ((u32*)&v.r0)[(i%4)*4];
break;
case 2: // Col
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = ((u32*)&v.c0)[(i/4)*4];
break;
case 3: // Write Protect
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0xffffffff;
nVifMask[2][i/4][i%4] = 0;
break;
}
}
}
// ----------------------------------------------------------------------------
// Unpacking Optimization notes:
// ----------------------------------------------------------------------------
// Some games send a LOT of small packets. This is a problem because the new VIF unpacker
// has a lot of setup code to establish which unpack function to call. The best way to
// optimize this is to cache the unpack function's base (see fnbase below) and update it
// when the variables it depends on are modified: writes to vif->tag.cmd and vif->usn.
// Problem: vif->tag.cmd is modified a lot. Like, constantly. So won't work.
//
// A secondary optimization would be adding special handlers for packets where vifRegs->num==1.
// (which would remove the loop, simplify the incVUptr code, etc). But checking for it has
// to be simple enough that it doesn't offset the benefits (which I'm not sure is possible).
// -- air
//template< int idx, bool doMode, bool isFill >
//__releaseinline void __fastcall _nVifUnpackLoop( u8 *data, u32 size )
__releaseinline void __fastcall _nVifUnpackLoop( int idx, u8 *data, u32 size )
{
// comment out the following 2 lines to test templated version...
const bool doMode = !!vifRegs->mode;
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
const int usn = !!(vif->usn);
const int doMask = !!(vif->tag.cmd & 0x10);
const int upkNum = vif->tag.cmd & 0xf;
const u32& vift = nVifT[upkNum];
u8* dest = setVUptr(idx, vif->tag.addr);
const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum];
UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS;
// Did a bunch of work to make it so I could optimize this index lookup to outside
// the main loop but it was for naught -- too often the loop is only 1-2 iterations,
// so this setup code ends up being slower (1 iter) or same speed (2 iters).
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + (doMask*16) + (upkNum)) * (4*4) ];
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
if (doMask)
setMasks(*vifRegs);
if (vif->cl >= blockSize) {
// This condition doesn't appear to ever occur, and really it never should.
// Normally it wouldn't matter, but even simple setup code matters here (see
// optimization notes above) >_<
vif->cl = 0;
}
while (vifRegs->num > 0) {
if (vif->cl < cycleSize) {
//if (size <= 0) { DbgCon.WriteLn("_nVifUnpack: Out of Data!"); break; }
if (doMode /*|| doMask*/) {
//if (doMask)
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
func((u32*)dest, (u32*)data, ft.qsize);
data += ft.gsize;
size -= ft.gsize;
vifRegs->num--;
}
else if (1) {
//DevCon.WriteLn("SSE Unpack!");
fnbase[aMin(vif->cl, 4) * 4](dest, data);
data += vift;
size -= vift;
vifRegs->num--;
}
else {
//DevCon.WriteLn("SSE Unpack!");
int c = aMin((cycleSize - vif->cl), 3);
size -= vift * c;
//if (c>1) { DevCon.WriteLn("C > 1!"); }
if (c<0||c>3) { DbgCon.WriteLn("C wtf!"); }
if (size < 0) { DbgCon.WriteLn("Size Shit"); size+=vift*c;c=1;size-=vift*c;}
fnbase[(aMin(vif->cl, 4) * 4) + c-1](dest, data);
data += vift * c;
vifRegs->num -= c;
}
}
else if (isFill) {
func((u32*)dest, (u32*)data, ft.qsize);
vifRegs->num--;
}
incVUptr(idx, dest, 16);
// Removing this modulo was a huge speedup for God of War start menu. (62->73 fps)
// (GoW and tri-ace games both use a lot of blockSize==1 packets, resulting in tons
// of loops -- so the biggest factor in performance ends up being the top-level
// conditionals of the loop, and also the loop prep code.) --air
//vif->cl = (vif->cl+1) % blockSize;
if( ++vif->cl == blockSize ) vif->cl = 0;
}
}
void _nVifUnpack(int idx, u8 *data, u32 size) {
/*if (nVif[idx].vifRegs->cycle.cl >= nVif[idx].vifRegs->cycle.wl) { // skipping write
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
return;
}
else*/ { // filling write
vif = nVif[idx].vif;
vifRegs = nVif[idx].vifRegs;
#if 1
_nVifUnpackLoop( idx, data, size );
#else
// Eh... template attempt, tho it didn't help much. There's too much setup code,
// and the template only optimizes code inside the loop, which often times seems to
// only be run once or twice anyway. Better to use recompilation than templating
// anyway, but I'll leave it in for now for reference. -- air
const bool doMode = !!vifRegs->mode;
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
//UnpackLoopTable[idx][doMode][isFill]( data, size );
if( idx )
{
if( doMode )
{
if( isFill )
_nVifUnpackLoop<1,true,true>( data, size );
else
_nVifUnpackLoop<1,true,false>( data, size );
}
else
{
if( isFill )
_nVifUnpackLoop<1,false,true>( data, size );
else
_nVifUnpackLoop<1,false,false>( data, size );
}
}
else
{
pxFailDev( "No VIF0 support yet, sorry!" );
}
#endif
//if (isFill)
//DevCon.WriteLn("%s Write! [num = %d][%s]", (isFill?"Filling":"Skipping"), vifRegs->num, (vifRegs->num%3 ? "bad!" : "ok"));
//DevCon.WriteLn("%s Write! [mask = %08x][type = %02d][num = %d]", (isFill?"Filling":"Skipping"), vifRegs->mask, upkNum, vifRegs->num);
}
}
//int nVifUnpack(int idx, u32 *data) {
// XMMRegisters::Freeze();
// BlockBuffer* vB = nVif[idx].vifBlock;
// int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
// //vB->append(data, ret<<2);
// vif1.tag.size -= ret;
// //DevCon.WriteLn("2 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
// //if (vif1.tag.size <= 0) {
// //DevCon.WriteLn("3 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
// //VIFunpack<1>(vB->getBlock(), &vif1.tag, vB->getSize()>>2);
// //_nVifUnpack(idx, vB->getBlock(), vB->getSize());
// _nVifUnpack(idx, (u8*)data, ret<<2);
// if (vif1.tag.size <= 0) vif1.tag.size = 0;
// if (vif1.tag.size <= 0) vif1.cmd = 0;
// //vB->clear();
// //}
// //else { vif1.tag.size+=ret; ret = -1; vB->clear(); }
// XMMRegisters::Thaw();
// return ret;
//}

View File

@ -1,256 +1,255 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#define xMaskWrite(regX, x) { \
if (x==0) xMOVAPS(xmm7, ptr32[ecx]); \
if (x==1) xMOVAPS(xmm7, ptr32[ecx+0x10]); \
if (x==2) xMOVAPS(xmm7, ptr32[ecx+0x20]); \
int offX = aMin(curCycle+x, 4); \
xPAND(regX, ptr32[nVifMask[0][offX]]); \
xPAND(xmm7, ptr32[nVifMask[1][offX]]); \
xPOR (regX, ptr32[nVifMask[2][offX]]); \
xPOR (regX, xmm7); \
if (x==0) xMOVAPS(ptr32[ecx], regX); \
if (x==1) xMOVAPS(ptr32[ecx+0x10], regX); \
if (x==2) xMOVAPS(ptr32[ecx+0x20], regX); \
}
#define xMovDest(reg0, reg1, reg2) { \
if (mask==0) { \
if (cycles>=0) { xMOVAPS (ptr32[ecx], reg0); } \
if (cycles>=1) { xMOVAPS (ptr32[ecx+0x10], reg1); } \
if (cycles>=2) { xMOVAPS (ptr32[ecx+0x20], reg2); } \
} \
else { \
if (cycles>=0) { xMaskWrite(reg0, 0); } \
if (cycles>=1) { xMaskWrite(reg1, 1); } \
if (cycles>=2) { xMaskWrite(reg2, 2); } \
} \
}
// xmm2 gets result
void convertRGB() {
xPSLL.D (xmm1, 3); // ABG|R5.000
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
xPSRL.D (xmm1, 8); // ABG
xPSLL.D (xmm1, 3); // AB|G5.000
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
xPSRL.D (xmm1, 8); // AB
xPSLL.D (xmm1, 3); // A|B5.000
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
xPSRL.D (xmm1, 8); // A
xPSLL.D (xmm1, 7); // A.0000000
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
xPSLL.D (xmm2, 24); // can optimize to
xPSRL.D (xmm2, 24); // single AND...
}
struct VifUnpackIndexer
{
int usn, mask;
int curCycle, cyclesToWrite;
nVifCall& GetCall( int packType ) const
{
int usnpart = usn*2*16;
int maskpart = mask*16;
int packpart = packType;
int curpart = curCycle*4;
int cycpespart = cyclesToWrite;
return nVifUpk[((usnpart+maskpart+packpart)*(4*4)) + (curpart+cycpespart)];
}
void xSetCall( int packType ) const
{
xAlignPtr(16);
GetCall( packType ) = (nVifCall)xGetPtr();
}
void xSetNullCall( int packType ) const
{
GetCall( packType ) = NULL;
}
};
// ecx = dest, edx = src
void nVifGen(int usn, int mask, int curCycle, int cycles) {
const VifUnpackIndexer indexer = { usn, mask, curCycle, cycles };
indexer.xSetCall(0x0); // S-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
xRET();
indexer.xSetCall(0x1); // S-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
xRET();
indexer.xSetCall(0x2); // S-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
xRET();
indexer.xSetNullCall(0x3); // ----
indexer.xSetCall(0x4); // V2-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x10]);
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0x5); // V2-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=2) xShiftR (xmm2, 16);
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0x6); // V2-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=2) xShiftR (xmm2, 24);
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetNullCall(0x7); // ----
indexer.xSetCall(0x8); // V3-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+12]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+24]);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0x9); // V3-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+6]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+12]);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=1) xShiftR (xmm1, 16);
if (cycles>=2) xShiftR (xmm2, 16);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0xa); // V3-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+3]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+6]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=1) xShiftR (xmm1, 24);
if (cycles>=2) xShiftR (xmm2, 24);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetNullCall(0xb); // ----
indexer.xSetCall(0xc); // V4-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0xd); // V4-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=1) xShiftR (xmm1, 16);
if (cycles>=2) xShiftR (xmm2, 16);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0xe); // V4-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+4]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+8]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=1) xShiftR (xmm1, 24);
if (cycles>=2) xShiftR (xmm2, 24);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
// A | B5 | G5 | R5
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
indexer.xSetCall(0xf); // V4-5
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xMOVAPS (xmm1, xmm0);
if (cycles>=0) convertRGB();
if (cycles>=0) xMOVAPS (ptr32[ecx], xmm2);
if (cycles>=1) xMOVAPS (xmm1, xmm0);
if (cycles>=1) xPSRL.D (xmm1, 16);
if (cycles>=1) convertRGB();
if (cycles>=1) xMOVAPS (ptr32[ecx+0x10], xmm2);
if (cycles>=2) xPSHUF.D (xmm1, xmm0, _v1);
if (cycles>=2) convertRGB();
if (cycles>=2) xMOVAPS (ptr32[ecx+0x20], xmm2);
xRET();
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#define xMaskWrite(regX, x) { \
if (x==0) xMOVAPS(xmm7, ptr32[ecx]); \
if (x==1) xMOVAPS(xmm7, ptr32[ecx+0x10]); \
if (x==2) xMOVAPS(xmm7, ptr32[ecx+0x20]); \
int offX = aMin(curCycle+x, 4); \
xPAND(regX, ptr32[nVifMask[0][offX]]); \
xPAND(xmm7, ptr32[nVifMask[1][offX]]); \
xPOR (regX, ptr32[nVifMask[2][offX]]); \
xPOR (regX, xmm7); \
if (x==0) xMOVAPS(ptr32[ecx], regX); \
if (x==1) xMOVAPS(ptr32[ecx+0x10], regX); \
if (x==2) xMOVAPS(ptr32[ecx+0x20], regX); \
}
#define xMovDest(reg0, reg1, reg2) { \
if (mask==0) { \
if (cycles>=0) { xMOVAPS (ptr32[ecx], reg0); } \
if (cycles>=1) { xMOVAPS (ptr32[ecx+0x10], reg1); } \
if (cycles>=2) { xMOVAPS (ptr32[ecx+0x20], reg2); } \
} \
else { \
if (cycles>=0) { xMaskWrite(reg0, 0); } \
if (cycles>=1) { xMaskWrite(reg1, 1); } \
if (cycles>=2) { xMaskWrite(reg2, 2); } \
} \
}
// xmm2 gets result
void convertRGB() {
xPSLL.D (xmm1, 3); // ABG|R5.000
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
xPSRL.D (xmm1, 8); // ABG
xPSLL.D (xmm1, 3); // AB|G5.000
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
xPSRL.D (xmm1, 8); // AB
xPSLL.D (xmm1, 3); // A|B5.000
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
xPSRL.D (xmm1, 8); // A
xPSLL.D (xmm1, 7); // A.0000000
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
xPSLL.D (xmm2, 24); // can optimize to
xPSRL.D (xmm2, 24); // single AND...
}
struct VifUnpackIndexer
{
int usn, mask;
int curCycle, cyclesToWrite;
nVifCall& GetCall( int packType ) const
{
int usnpart = usn*2*16;
int maskpart = mask*16;
int packpart = packType;
int curpart = curCycle*4;
int cycpespart = cyclesToWrite;
return nVifUpk[((usnpart+maskpart+packpart)*(4*4)) + (curpart+cycpespart)];
}
void xSetCall( int packType ) const
{
GetCall( packType ) = (nVifCall)xGetAlignedCallTarget();
}
void xSetNullCall( int packType ) const
{
GetCall( packType ) = NULL;
}
};
// ecx = dest, edx = src
void nVifGen(int usn, int mask, int curCycle, int cycles) {
const VifUnpackIndexer indexer = { usn, mask, curCycle, cycles };
indexer.xSetCall(0x0); // S-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
xRET();
indexer.xSetCall(0x1); // S-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
xRET();
indexer.xSetCall(0x2); // S-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
xRET();
indexer.xSetNullCall(0x3); // ----
indexer.xSetCall(0x4); // V2-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x10]);
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0x5); // V2-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=2) xShiftR (xmm2, 16);
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0x6); // V2-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=2) xShiftR (xmm2, 24);
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetNullCall(0x7); // ----
indexer.xSetCall(0x8); // V3-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+12]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+24]);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0x9); // V3-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+6]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+12]);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=1) xShiftR (xmm1, 16);
if (cycles>=2) xShiftR (xmm2, 16);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0xa); // V3-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+3]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+6]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=1) xShiftR (xmm1, 24);
if (cycles>=2) xShiftR (xmm2, 24);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetNullCall(0xb); // ----
indexer.xSetCall(0xc); // V4-32
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0xd); // V4-16
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 16);
if (cycles>=1) xShiftR (xmm1, 16);
if (cycles>=2) xShiftR (xmm2, 16);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
indexer.xSetCall(0xe); // V4-8
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+4]);
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+8]);
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
if (cycles>=0) xShiftR (xmm0, 24);
if (cycles>=1) xShiftR (xmm1, 24);
if (cycles>=2) xShiftR (xmm2, 24);
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
xRET();
// A | B5 | G5 | R5
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
indexer.xSetCall(0xf); // V4-5
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
if (cycles>=0) xMOVAPS (xmm1, xmm0);
if (cycles>=0) convertRGB();
if (cycles>=0) xMOVAPS (ptr32[ecx], xmm2);
if (cycles>=1) xMOVAPS (xmm1, xmm0);
if (cycles>=1) xPSRL.D (xmm1, 16);
if (cycles>=1) convertRGB();
if (cycles>=1) xMOVAPS (ptr32[ecx+0x10], xmm2);
if (cycles>=2) xPSHUF.D (xmm1, xmm0, _v1);
if (cycles>=2) convertRGB();
if (cycles>=2) xMOVAPS (ptr32[ecx+0x20], xmm2);
xRET();
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
}

File diff suppressed because it is too large Load Diff

View File

@ -1,283 +1,283 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "VUmicro.h"
extern u32 vudump;
#define VU0_MEMSIZE 0x1000
#define VU1_MEMSIZE 0x4000
void recResetVU0();
void recExecuteVU0Block();
void recClearVU0( u32 Addr, u32 Size );
void recVU1Init();
void recVU1Shutdown();
void recResetVU1();
void recExecuteVU1Block();
void recClearVU1( u32 Addr, u32 Size );
u32 GetVIAddr(VURegs * VU, int reg, int read, int info); // returns the correct VI addr
void recUpdateFlags(VURegs * VU, int reg, int info);
void _recvuTestPipes(VURegs * VU);
void _recvuFlushFDIV(VURegs * VU);
void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
#define VUOP_READ 2
#define VUOP_WRITE 4
// save on mem
struct _vuopinfo {
int cycle;
int cycles;
u8 statusflag;
u8 macflag;
u8 clipflag;
u8 dummy;
u8 q;
u8 p;
u16 pqinst; // bit of instruction specifying index (srec only)
};
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
extern int vucycle;
typedef void (*vFloat)(int regd, int regTemp);
extern vFloat vFloats1[16];
extern vFloat vFloats1_useEAX[16];
extern vFloat vFloats2[16];
extern vFloat vFloats4[16];
extern vFloat vFloats4_useEAX[16];
extern const __aligned16 float s_fones[8];
extern const __aligned16 u32 s_mask[4];
extern const __aligned16 u32 s_expmask[4];
extern const __aligned16 u32 g_minvals[4];
extern const __aligned16 u32 g_maxvals[4];
extern const __aligned16 u32 const_clip[8];
u32 GetVIAddr(VURegs * VU, int reg, int read, int info);
int _vuGetTempXMMreg(int info);
void vuFloat(int info, int regd, int XYZW);
void vuFloat_useEAX(int regd, int regTemp, int XYZW);
void vuFloat2(int regd, int regTemp, int XYZW);
void vuFloat3(uptr x86ptr);
void vuFloat4(int regd, int regTemp, int XYZW);
void vuFloat4_useEAX(int regd, int regTemp, int XYZW);
void vuFloat5(int regd, int regTemp, int XYZW);
void vuFloat5_useEAX(int regd, int regTemp, int XYZW);
void _vuFlipRegSS(VURegs * VU, int reg);
void _vuFlipRegSS_xyzw(int reg, int xyzw);
void _vuMoveSS(VURegs * VU, int dstreg, int srcreg);
void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw);
void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw);
void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw);
void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw);
#define VU_MERGE_REGS(dest, src) { \
VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \
}
// use for allocating vi regs
#define ALLOCTEMPX86(mode) _allocX86reg(-1, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
#define ALLOCVI(vi, mode) _allocX86reg(-1, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi);
#define SWAP(x, y) *(u32*)&y ^= *(u32*)&x ^= *(u32*)&y ^= *(u32*)&x;
/*****************************************
VU Micromode Upper instructions
*****************************************/
void recVUMI_ABS(VURegs *vuRegs, int info);
void recVUMI_ADD(VURegs *vuRegs, int info);
void recVUMI_ADDi(VURegs *vuRegs, int info);
void recVUMI_ADDq(VURegs *vuRegs, int info);
void recVUMI_ADDx(VURegs *vuRegs, int info);
void recVUMI_ADDy(VURegs *vuRegs, int info);
void recVUMI_ADDz(VURegs *vuRegs, int info);
void recVUMI_ADDw(VURegs *vuRegs, int info);
void recVUMI_ADDA(VURegs *vuRegs, int info);
void recVUMI_ADDAi(VURegs *vuRegs, int info);
void recVUMI_ADDAq(VURegs *vuRegs, int info);
void recVUMI_ADDAx(VURegs *vuRegs, int info);
void recVUMI_ADDAy(VURegs *vuRegs, int info);
void recVUMI_ADDAz(VURegs *vuRegs, int info);
void recVUMI_ADDAw(VURegs *vuRegs, int info);
void recVUMI_SUB(VURegs *vuRegs, int info);
void recVUMI_SUBi(VURegs *vuRegs, int info);
void recVUMI_SUBq(VURegs *vuRegs, int info);
void recVUMI_SUBx(VURegs *vuRegs, int info);
void recVUMI_SUBy(VURegs *vuRegs, int info);
void recVUMI_SUBz(VURegs *vuRegs, int info);
void recVUMI_SUBw(VURegs *vuRegs, int info);
void recVUMI_SUBA(VURegs *vuRegs, int info);
void recVUMI_SUBAi(VURegs *vuRegs, int info);
void recVUMI_SUBAq(VURegs *vuRegs, int info);
void recVUMI_SUBAx(VURegs *vuRegs, int info);
void recVUMI_SUBAy(VURegs *vuRegs, int info);
void recVUMI_SUBAz(VURegs *vuRegs, int info);
void recVUMI_SUBAw(VURegs *vuRegs, int info);
void recVUMI_MUL(VURegs *vuRegs, int info);
void recVUMI_MULi(VURegs *vuRegs, int info);
void recVUMI_MULq(VURegs *vuRegs, int info);
void recVUMI_MULx(VURegs *vuRegs, int info);
void recVUMI_MULy(VURegs *vuRegs, int info);
void recVUMI_MULz(VURegs *vuRegs, int info);
void recVUMI_MULw(VURegs *vuRegs, int info);
void recVUMI_MULA(VURegs *vuRegs, int info);
void recVUMI_MULAi(VURegs *vuRegs, int info);
void recVUMI_MULAq(VURegs *vuRegs, int info);
void recVUMI_MULAx(VURegs *vuRegs, int info);
void recVUMI_MULAy(VURegs *vuRegs, int info);
void recVUMI_MULAz(VURegs *vuRegs, int info);
void recVUMI_MULAw(VURegs *vuRegs, int info);
void recVUMI_MADD(VURegs *vuRegs, int info);
void recVUMI_MADDi(VURegs *vuRegs, int info);
void recVUMI_MADDq(VURegs *vuRegs, int info);
void recVUMI_MADDx(VURegs *vuRegs, int info);
void recVUMI_MADDy(VURegs *vuRegs, int info);
void recVUMI_MADDz(VURegs *vuRegs, int info);
void recVUMI_MADDw(VURegs *vuRegs, int info);
void recVUMI_MADDA(VURegs *vuRegs, int info);
void recVUMI_MADDAi(VURegs *vuRegs, int info);
void recVUMI_MADDAq(VURegs *vuRegs, int info);
void recVUMI_MADDAx(VURegs *vuRegs, int info);
void recVUMI_MADDAy(VURegs *vuRegs, int info);
void recVUMI_MADDAz(VURegs *vuRegs, int info);
void recVUMI_MADDAw(VURegs *vuRegs, int info);
void recVUMI_MSUB(VURegs *vuRegs, int info);
void recVUMI_MSUBi(VURegs *vuRegs, int info);
void recVUMI_MSUBq(VURegs *vuRegs, int info);
void recVUMI_MSUBx(VURegs *vuRegs, int info);
void recVUMI_MSUBy(VURegs *vuRegs, int info);
void recVUMI_MSUBz(VURegs *vuRegs, int info);
void recVUMI_MSUBw(VURegs *vuRegs, int info);
void recVUMI_MSUBA(VURegs *vuRegs, int info);
void recVUMI_MSUBAi(VURegs *vuRegs, int info);
void recVUMI_MSUBAq(VURegs *vuRegs, int info);
void recVUMI_MSUBAx(VURegs *vuRegs, int info);
void recVUMI_MSUBAy(VURegs *vuRegs, int info);
void recVUMI_MSUBAz(VURegs *vuRegs, int info);
void recVUMI_MSUBAw(VURegs *vuRegs, int info);
void recVUMI_MAX(VURegs *vuRegs, int info);
void recVUMI_MAXi(VURegs *vuRegs, int info);
void recVUMI_MAXx(VURegs *vuRegs, int info);
void recVUMI_MAXy(VURegs *vuRegs, int info);
void recVUMI_MAXz(VURegs *vuRegs, int info);
void recVUMI_MAXw(VURegs *vuRegs, int info);
void recVUMI_MINI(VURegs *vuRegs, int info);
void recVUMI_MINIi(VURegs *vuRegs, int info);
void recVUMI_MINIx(VURegs *vuRegs, int info);
void recVUMI_MINIy(VURegs *vuRegs, int info);
void recVUMI_MINIz(VURegs *vuRegs, int info);
void recVUMI_MINIw(VURegs *vuRegs, int info);
void recVUMI_OPMULA(VURegs *vuRegs, int info);
void recVUMI_OPMSUB(VURegs *vuRegs, int info);
void recVUMI_NOP(VURegs *vuRegs, int info);
void recVUMI_FTOI0(VURegs *vuRegs, int info);
void recVUMI_FTOI4(VURegs *vuRegs, int info);
void recVUMI_FTOI12(VURegs *vuRegs, int info);
void recVUMI_FTOI15(VURegs *vuRegs, int info);
void recVUMI_ITOF0(VURegs *vuRegs, int info);
void recVUMI_ITOF4(VURegs *vuRegs, int info);
void recVUMI_ITOF12(VURegs *vuRegs, int info);
void recVUMI_ITOF15(VURegs *vuRegs, int info);
void recVUMI_CLIP(VURegs *vuRegs, int info);
/*****************************************
VU Micromode Lower instructions
*****************************************/
void recVUMI_DIV(VURegs *vuRegs, int info);
void recVUMI_SQRT(VURegs *vuRegs, int info);
void recVUMI_RSQRT(VURegs *vuRegs, int info);
void recVUMI_IADD(VURegs *vuRegs, int info);
void recVUMI_IADDI(VURegs *vuRegs, int info);
void recVUMI_IADDIU(VURegs *vuRegs, int info);
void recVUMI_IAND(VURegs *vuRegs, int info);
void recVUMI_IOR(VURegs *vuRegs, int info);
void recVUMI_ISUB(VURegs *vuRegs, int info);
void recVUMI_ISUBIU(VURegs *vuRegs, int info);
void recVUMI_MOVE(VURegs *vuRegs, int info);
void recVUMI_MFIR(VURegs *vuRegs, int info);
void recVUMI_MTIR(VURegs *vuRegs, int info);
void recVUMI_MR32(VURegs *vuRegs, int info);
void recVUMI_LQ(VURegs *vuRegs, int info);
void recVUMI_LQD(VURegs *vuRegs, int info);
void recVUMI_LQI(VURegs *vuRegs, int info);
void recVUMI_SQ(VURegs *vuRegs, int info);
void recVUMI_SQD(VURegs *vuRegs, int info);
void recVUMI_SQI(VURegs *vuRegs, int info);
void recVUMI_ILW(VURegs *vuRegs, int info);
void recVUMI_ISW(VURegs *vuRegs, int info);
void recVUMI_ILWR(VURegs *vuRegs, int info);
void recVUMI_ISWR(VURegs *vuRegs, int info);
void recVUMI_LOI(VURegs *vuRegs, int info);
void recVUMI_RINIT(VURegs *vuRegs, int info);
void recVUMI_RGET(VURegs *vuRegs, int info);
void recVUMI_RNEXT(VURegs *vuRegs, int info);
void recVUMI_RXOR(VURegs *vuRegs, int info);
void recVUMI_WAITQ(VURegs *vuRegs, int info);
void recVUMI_FSAND(VURegs *vuRegs, int info);
void recVUMI_FSEQ(VURegs *vuRegs, int info);
void recVUMI_FSOR(VURegs *vuRegs, int info);
void recVUMI_FSSET(VURegs *vuRegs, int info);
void recVUMI_FMAND(VURegs *vuRegs, int info);
void recVUMI_FMEQ(VURegs *vuRegs, int info);
void recVUMI_FMOR(VURegs *vuRegs, int info);
void recVUMI_FCAND(VURegs *vuRegs, int info);
void recVUMI_FCEQ(VURegs *vuRegs, int info);
void recVUMI_FCOR(VURegs *vuRegs, int info);
void recVUMI_FCSET(VURegs *vuRegs, int info);
void recVUMI_FCGET(VURegs *vuRegs, int info);
void recVUMI_IBEQ(VURegs *vuRegs, int info);
void recVUMI_IBGEZ(VURegs *vuRegs, int info);
void recVUMI_IBGTZ(VURegs *vuRegs, int info);
void recVUMI_IBLTZ(VURegs *vuRegs, int info);
void recVUMI_IBLEZ(VURegs *vuRegs, int info);
void recVUMI_IBNE(VURegs *vuRegs, int info);
void recVUMI_B(VURegs *vuRegs, int info);
void recVUMI_BAL(VURegs *vuRegs, int info);
void recVUMI_JR(VURegs *vuRegs, int info);
void recVUMI_JALR(VURegs *vuRegs, int info);
void recVUMI_MFP(VURegs *vuRegs, int info);
void recVUMI_WAITP(VURegs *vuRegs, int info);
void recVUMI_ESADD(VURegs *vuRegs, int info);
void recVUMI_ERSADD(VURegs *vuRegs, int info);
void recVUMI_ELENG(VURegs *vuRegs, int info);
void recVUMI_ERLENG(VURegs *vuRegs, int info);
void recVUMI_EATANxy(VURegs *vuRegs, int info);
void recVUMI_EATANxz(VURegs *vuRegs, int info);
void recVUMI_ESUM(VURegs *vuRegs, int info);
void recVUMI_ERCPR(VURegs *vuRegs, int info);
void recVUMI_ESQRT(VURegs *vuRegs, int info);
void recVUMI_ERSQRT(VURegs *vuRegs, int info);
void recVUMI_ESIN(VURegs *vuRegs, int info);
void recVUMI_EATAN(VURegs *vuRegs, int info);
void recVUMI_EEXP(VURegs *vuRegs, int info);
void recVUMI_XGKICK(VURegs *vuRegs, int info);
void recVUMI_XTOP(VURegs *vuRegs, int info);
void recVUMI_XITOP(VURegs *vuRegs, int info);
void recVUMI_XTOP( VURegs *VU , int info);
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "VUmicro.h"
extern u32 vudump;
#define VU0_MEMSIZE 0x1000
#define VU1_MEMSIZE 0x4000
void recResetVU0();
void recExecuteVU0Block();
void recClearVU0( u32 Addr, u32 Size );
void recVU1Init();
void recVU1Shutdown();
void recResetVU1();
void recExecuteVU1Block();
void recClearVU1( u32 Addr, u32 Size );
u32 GetVIAddr(VURegs * VU, int reg, int read, int info); // returns the correct VI addr
void recUpdateFlags(VURegs * VU, int reg, int info);
void _recvuTestPipes(VURegs * VU);
void _recvuFlushFDIV(VURegs * VU);
void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn);
#define VUOP_READ 2
#define VUOP_WRITE 4
// save on mem
struct _vuopinfo {
int cycle;
int cycles;
u8 statusflag;
u8 macflag;
u8 clipflag;
u8 dummy;
u8 q;
u8 p;
u16 pqinst; // bit of instruction specifying index (srec only)
};
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
extern int vucycle;
typedef void (*vFloat)(int regd, int regTemp);
extern vFloat vFloats1[16];
extern vFloat vFloats1_useEAX[16];
extern vFloat vFloats2[16];
extern vFloat vFloats4[16];
extern vFloat vFloats4_useEAX[16];
extern const __aligned16 float s_fones[8];
extern const __aligned16 u32 s_mask[4];
extern const __aligned16 u32 s_expmask[4];
extern const __aligned16 u32 g_minvals[4];
extern const __aligned16 u32 g_maxvals[4];
extern const __aligned16 u32 const_clip[8];
u32 GetVIAddr(VURegs * VU, int reg, int read, int info);
int _vuGetTempXMMreg(int info);
void vuFloat(int info, int regd, int XYZW);
void vuFloat_useEAX(int regd, int regTemp, int XYZW);
void vuFloat2(int regd, int regTemp, int XYZW);
void vuFloat3(uptr x86ptr);
void vuFloat4(int regd, int regTemp, int XYZW);
void vuFloat4_useEAX(int regd, int regTemp, int XYZW);
void vuFloat5(int regd, int regTemp, int XYZW);
void vuFloat5_useEAX(int regd, int regTemp, int XYZW);
void _vuFlipRegSS(VURegs * VU, int reg);
void _vuFlipRegSS_xyzw(int reg, int xyzw);
void _vuMoveSS(VURegs * VU, int dstreg, int srcreg);
void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw);
void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw);
void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw);
void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw);
#define VU_MERGE_REGS(dest, src) { \
VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \
}
// use for allocating vi regs
#define ALLOCTEMPX86(mode) _allocX86reg(-1, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
#define ALLOCVI(vi, mode) _allocX86reg(-1, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi);
#define SWAP(x, y) *(u32*)&y ^= *(u32*)&x ^= *(u32*)&y ^= *(u32*)&x;
/*****************************************
VU Micromode Upper instructions
*****************************************/
void recVUMI_ABS(VURegs *vuRegs, int info);
void recVUMI_ADD(VURegs *vuRegs, int info);
void recVUMI_ADDi(VURegs *vuRegs, int info);
void recVUMI_ADDq(VURegs *vuRegs, int info);
void recVUMI_ADDx(VURegs *vuRegs, int info);
void recVUMI_ADDy(VURegs *vuRegs, int info);
void recVUMI_ADDz(VURegs *vuRegs, int info);
void recVUMI_ADDw(VURegs *vuRegs, int info);
void recVUMI_ADDA(VURegs *vuRegs, int info);
void recVUMI_ADDAi(VURegs *vuRegs, int info);
void recVUMI_ADDAq(VURegs *vuRegs, int info);
void recVUMI_ADDAx(VURegs *vuRegs, int info);
void recVUMI_ADDAy(VURegs *vuRegs, int info);
void recVUMI_ADDAz(VURegs *vuRegs, int info);
void recVUMI_ADDAw(VURegs *vuRegs, int info);
void recVUMI_SUB(VURegs *vuRegs, int info);
void recVUMI_SUBi(VURegs *vuRegs, int info);
void recVUMI_SUBq(VURegs *vuRegs, int info);
void recVUMI_SUBx(VURegs *vuRegs, int info);
void recVUMI_SUBy(VURegs *vuRegs, int info);
void recVUMI_SUBz(VURegs *vuRegs, int info);
void recVUMI_SUBw(VURegs *vuRegs, int info);
void recVUMI_SUBA(VURegs *vuRegs, int info);
void recVUMI_SUBAi(VURegs *vuRegs, int info);
void recVUMI_SUBAq(VURegs *vuRegs, int info);
void recVUMI_SUBAx(VURegs *vuRegs, int info);
void recVUMI_SUBAy(VURegs *vuRegs, int info);
void recVUMI_SUBAz(VURegs *vuRegs, int info);
void recVUMI_SUBAw(VURegs *vuRegs, int info);
void recVUMI_MUL(VURegs *vuRegs, int info);
void recVUMI_MULi(VURegs *vuRegs, int info);
void recVUMI_MULq(VURegs *vuRegs, int info);
void recVUMI_MULx(VURegs *vuRegs, int info);
void recVUMI_MULy(VURegs *vuRegs, int info);
void recVUMI_MULz(VURegs *vuRegs, int info);
void recVUMI_MULw(VURegs *vuRegs, int info);
void recVUMI_MULA(VURegs *vuRegs, int info);
void recVUMI_MULAi(VURegs *vuRegs, int info);
void recVUMI_MULAq(VURegs *vuRegs, int info);
void recVUMI_MULAx(VURegs *vuRegs, int info);
void recVUMI_MULAy(VURegs *vuRegs, int info);
void recVUMI_MULAz(VURegs *vuRegs, int info);
void recVUMI_MULAw(VURegs *vuRegs, int info);
void recVUMI_MADD(VURegs *vuRegs, int info);
void recVUMI_MADDi(VURegs *vuRegs, int info);
void recVUMI_MADDq(VURegs *vuRegs, int info);
void recVUMI_MADDx(VURegs *vuRegs, int info);
void recVUMI_MADDy(VURegs *vuRegs, int info);
void recVUMI_MADDz(VURegs *vuRegs, int info);
void recVUMI_MADDw(VURegs *vuRegs, int info);
void recVUMI_MADDA(VURegs *vuRegs, int info);
void recVUMI_MADDAi(VURegs *vuRegs, int info);
void recVUMI_MADDAq(VURegs *vuRegs, int info);
void recVUMI_MADDAx(VURegs *vuRegs, int info);
void recVUMI_MADDAy(VURegs *vuRegs, int info);
void recVUMI_MADDAz(VURegs *vuRegs, int info);
void recVUMI_MADDAw(VURegs *vuRegs, int info);
void recVUMI_MSUB(VURegs *vuRegs, int info);
void recVUMI_MSUBi(VURegs *vuRegs, int info);
void recVUMI_MSUBq(VURegs *vuRegs, int info);
void recVUMI_MSUBx(VURegs *vuRegs, int info);
void recVUMI_MSUBy(VURegs *vuRegs, int info);
void recVUMI_MSUBz(VURegs *vuRegs, int info);
void recVUMI_MSUBw(VURegs *vuRegs, int info);
void recVUMI_MSUBA(VURegs *vuRegs, int info);
void recVUMI_MSUBAi(VURegs *vuRegs, int info);
void recVUMI_MSUBAq(VURegs *vuRegs, int info);
void recVUMI_MSUBAx(VURegs *vuRegs, int info);
void recVUMI_MSUBAy(VURegs *vuRegs, int info);
void recVUMI_MSUBAz(VURegs *vuRegs, int info);
void recVUMI_MSUBAw(VURegs *vuRegs, int info);
void recVUMI_MAX(VURegs *vuRegs, int info);
void recVUMI_MAXi(VURegs *vuRegs, int info);
void recVUMI_MAXx(VURegs *vuRegs, int info);
void recVUMI_MAXy(VURegs *vuRegs, int info);
void recVUMI_MAXz(VURegs *vuRegs, int info);
void recVUMI_MAXw(VURegs *vuRegs, int info);
void recVUMI_MINI(VURegs *vuRegs, int info);
void recVUMI_MINIi(VURegs *vuRegs, int info);
void recVUMI_MINIx(VURegs *vuRegs, int info);
void recVUMI_MINIy(VURegs *vuRegs, int info);
void recVUMI_MINIz(VURegs *vuRegs, int info);
void recVUMI_MINIw(VURegs *vuRegs, int info);
void recVUMI_OPMULA(VURegs *vuRegs, int info);
void recVUMI_OPMSUB(VURegs *vuRegs, int info);
void recVUMI_NOP(VURegs *vuRegs, int info);
void recVUMI_FTOI0(VURegs *vuRegs, int info);
void recVUMI_FTOI4(VURegs *vuRegs, int info);
void recVUMI_FTOI12(VURegs *vuRegs, int info);
void recVUMI_FTOI15(VURegs *vuRegs, int info);
void recVUMI_ITOF0(VURegs *vuRegs, int info);
void recVUMI_ITOF4(VURegs *vuRegs, int info);
void recVUMI_ITOF12(VURegs *vuRegs, int info);
void recVUMI_ITOF15(VURegs *vuRegs, int info);
void recVUMI_CLIP(VURegs *vuRegs, int info);
/*****************************************
VU Micromode Lower instructions
*****************************************/
void recVUMI_DIV(VURegs *vuRegs, int info);
void recVUMI_SQRT(VURegs *vuRegs, int info);
void recVUMI_RSQRT(VURegs *vuRegs, int info);
void recVUMI_IADD(VURegs *vuRegs, int info);
void recVUMI_IADDI(VURegs *vuRegs, int info);
void recVUMI_IADDIU(VURegs *vuRegs, int info);
void recVUMI_IAND(VURegs *vuRegs, int info);
void recVUMI_IOR(VURegs *vuRegs, int info);
void recVUMI_ISUB(VURegs *vuRegs, int info);
void recVUMI_ISUBIU(VURegs *vuRegs, int info);
void recVUMI_MOVE(VURegs *vuRegs, int info);
void recVUMI_MFIR(VURegs *vuRegs, int info);
void recVUMI_MTIR(VURegs *vuRegs, int info);
void recVUMI_MR32(VURegs *vuRegs, int info);
void recVUMI_LQ(VURegs *vuRegs, int info);
void recVUMI_LQD(VURegs *vuRegs, int info);
void recVUMI_LQI(VURegs *vuRegs, int info);
void recVUMI_SQ(VURegs *vuRegs, int info);
void recVUMI_SQD(VURegs *vuRegs, int info);
void recVUMI_SQI(VURegs *vuRegs, int info);
void recVUMI_ILW(VURegs *vuRegs, int info);
void recVUMI_ISW(VURegs *vuRegs, int info);
void recVUMI_ILWR(VURegs *vuRegs, int info);
void recVUMI_ISWR(VURegs *vuRegs, int info);
void recVUMI_LOI(VURegs *vuRegs, int info);
void recVUMI_RINIT(VURegs *vuRegs, int info);
void recVUMI_RGET(VURegs *vuRegs, int info);
void recVUMI_RNEXT(VURegs *vuRegs, int info);
void recVUMI_RXOR(VURegs *vuRegs, int info);
void recVUMI_WAITQ(VURegs *vuRegs, int info);
void recVUMI_FSAND(VURegs *vuRegs, int info);
void recVUMI_FSEQ(VURegs *vuRegs, int info);
void recVUMI_FSOR(VURegs *vuRegs, int info);
void recVUMI_FSSET(VURegs *vuRegs, int info);
void recVUMI_FMAND(VURegs *vuRegs, int info);
void recVUMI_FMEQ(VURegs *vuRegs, int info);
void recVUMI_FMOR(VURegs *vuRegs, int info);
void recVUMI_FCAND(VURegs *vuRegs, int info);
void recVUMI_FCEQ(VURegs *vuRegs, int info);
void recVUMI_FCOR(VURegs *vuRegs, int info);
void recVUMI_FCSET(VURegs *vuRegs, int info);
void recVUMI_FCGET(VURegs *vuRegs, int info);
void recVUMI_IBEQ(VURegs *vuRegs, int info);
void recVUMI_IBGEZ(VURegs *vuRegs, int info);
void recVUMI_IBGTZ(VURegs *vuRegs, int info);
void recVUMI_IBLTZ(VURegs *vuRegs, int info);
void recVUMI_IBLEZ(VURegs *vuRegs, int info);
void recVUMI_IBNE(VURegs *vuRegs, int info);
void recVUMI_B(VURegs *vuRegs, int info);
void recVUMI_BAL(VURegs *vuRegs, int info);
void recVUMI_JR(VURegs *vuRegs, int info);
void recVUMI_JALR(VURegs *vuRegs, int info);
void recVUMI_MFP(VURegs *vuRegs, int info);
void recVUMI_WAITP(VURegs *vuRegs, int info);
void recVUMI_ESADD(VURegs *vuRegs, int info);
void recVUMI_ERSADD(VURegs *vuRegs, int info);
void recVUMI_ELENG(VURegs *vuRegs, int info);
void recVUMI_ERLENG(VURegs *vuRegs, int info);
void recVUMI_EATANxy(VURegs *vuRegs, int info);
void recVUMI_EATANxz(VURegs *vuRegs, int info);
void recVUMI_ESUM(VURegs *vuRegs, int info);
void recVUMI_ERCPR(VURegs *vuRegs, int info);
void recVUMI_ESQRT(VURegs *vuRegs, int info);
void recVUMI_ERSQRT(VURegs *vuRegs, int info);
void recVUMI_ESIN(VURegs *vuRegs, int info);
void recVUMI_EATAN(VURegs *vuRegs, int info);
void recVUMI_EEXP(VURegs *vuRegs, int info);
void recVUMI_XGKICK(VURegs *vuRegs, int info);
void recVUMI_XTOP(VURegs *vuRegs, int info);
void recVUMI_XITOP(VURegs *vuRegs, int info);
void recVUMI_XTOP( VURegs *VU , int info);

File diff suppressed because it is too large Load Diff

View File

@ -1,73 +1,73 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// Super VU recompiler - author: zerofrog(@gmail.com)
#pragma once
#include "sVU_Micro.h"
//Using assembly code from an external file.
#ifdef __LINUX__
extern "C" {
#endif
extern void SuperVUExecuteProgram(u32 startpc, int vuindex);
extern void SuperVUEndProgram();
extern void svudispfntemp();
#ifdef __LINUX__
}
#endif
extern void SuperVUDestroy(int vuindex);
extern void SuperVUReset(int vuindex);
// read = 0, will write to reg
// read = 1, will read from reg
// read = 2, addr of previously written reg (used for status and clip flags)
extern u32 SuperVUGetVIAddr(int reg, int read);
// if p == 0, flush q else flush p; if wait is != 0, waits for p/q
extern void SuperVUFlush(int p, int wait);
class recSuperVU0 : public BaseVUmicroCPU
{
public:
recSuperVU0();
const char* GetShortName() const { return "sVU0"; }
wxString GetLongName() const { return L"SuperVU0 Recompiler"; }
void Allocate();
void Shutdown() throw();
void Reset();
void ExecuteBlock();
void Clear(u32 Addr, u32 Size);
};
class recSuperVU1 : public BaseVUmicroCPU
{
public:
recSuperVU1();
const char* GetShortName() const { return "sVU1"; }
wxString GetLongName() const { return L"SuperVU1 Recompiler"; }
void Allocate();
void Shutdown() throw();
void Reset();
void ExecuteBlock();
void Clear(u32 Addr, u32 Size);
};
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// Super VU recompiler - author: zerofrog(@gmail.com)
#pragma once
#include "sVU_Micro.h"
//Using assembly code from an external file.
#ifdef __LINUX__
extern "C" {
#endif
extern void SuperVUExecuteProgram(u32 startpc, int vuindex);
extern void SuperVUEndProgram();
extern void svudispfntemp();
#ifdef __LINUX__
}
#endif
extern void SuperVUDestroy(int vuindex);
extern void SuperVUReset(int vuindex);
// read = 0, will write to reg
// read = 1, will read from reg
// read = 2, addr of previously written reg (used for status and clip flags)
extern u32 SuperVUGetVIAddr(int reg, int read);
// if p == 0, flush q else flush p; if wait is != 0, waits for p/q
extern void SuperVUFlush(int p, int wait);
class recSuperVU0 : public BaseVUmicroCPU
{
public:
recSuperVU0();
const char* GetShortName() const { return "sVU0"; }
wxString GetLongName() const { return L"SuperVU0 Recompiler"; }
void Allocate();
void Shutdown() throw();
void Reset();
void ExecuteBlock();
void Clear(u32 Addr, u32 Size);
};
class recSuperVU1 : public BaseVUmicroCPU
{
public:
recSuperVU1();
const char* GetShortName() const { return "sVU1"; }
wxString GetLongName() const { return L"SuperVU1 Recompiler"; }
void Allocate();
void Shutdown() throw();
void Reset();
void ExecuteBlock();
void Clear(u32 Addr, u32 Size);
};