diff --git a/common/src/Utilities/AlignedMalloc.cpp b/common/src/Utilities/AlignedMalloc.cpp
index d4bb6a1081..b5f5a05561 100644
--- a/common/src/Utilities/AlignedMalloc.cpp
+++ b/common/src/Utilities/AlignedMalloc.cpp
@@ -28,7 +28,7 @@ static const uint headsize = sizeof(AlignedMallocHeader);
void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
{
- jASSUME( align < 0x10000 );
+ pxAssume( align < 0x10000 );
u8* p = (u8*)malloc(size+align+headsize);
@@ -47,15 +47,16 @@ void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align)
{
- if( handle == NULL ) return NULL;
- jASSUME( align < 0x10000 );
-
- AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
+ pxAssume( align < 0x10000 );
void* newbuf = pcsx2_aligned_malloc( size, align );
- memcpy_fast( newbuf, handle, std::min( size, header->size ) );
- free( header->baseptr );
+ if( handle != NULL )
+ {
+ AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
+ memcpy_fast( newbuf, handle, std::min( size, header->size ) );
+ free( header->baseptr );
+ }
return newbuf;
}
@@ -74,7 +75,7 @@ __forceinline void pcsx2_aligned_free(void* pmem)
// memzero_obj and stuff).
__forceinline void _memset16_unaligned( void* dest, u16 data, size_t size )
{
- jASSUME( (size & 0x1) == 0 );
+ pxAssume( (size & 0x1) == 0 );
u16* dst = (u16*)dest;
for(int i=size; i; --i, ++dst )
diff --git a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp b/pcsx2/x86/VifUnpackSSE_Dynarec.cpp
index dbe6f3e779..b4ee375beb 100644
--- a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp
+++ b/pcsx2/x86/VifUnpackSSE_Dynarec.cpp
@@ -1,282 +1,282 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2009 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
-// authors: cottonvibes(@gmail.com)
-// Jake.Stine (@gmail.com)
-
-#include "PrecompiledHeader.h"
-#include "VifUnpackSSE.h"
-
-#if newVif
-
-static __aligned16 nVifBlock _vBlock = {0};
-static __pagealigned u8 nVifMemCmp[__pagesize];
-
-static void emitCustomCompare() {
- HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
- memset8<0xcc>(nVifMemCmp);
- xSetPtr(nVifMemCmp);
-
- xMOVAPS (xmm0, ptr32[ecx]);
- xPCMP.EQD(xmm0, ptr32[edx]);
- xMOVMSKPS(eax, xmm0);
- xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
-
- xRET();
- HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
-}
-
-void dVifInit(int idx) {
- nVif[idx].idx = idx;
- nVif[idx].VU = idx ? &VU1 : &VU0;
- nVif[idx].vif = idx ? &vif1 : &vif0;
- nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
- nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
- nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
- nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
- nVif[idx].vifBlocks = new HashBucket<_tParams>();
- nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
- nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
- //emitCustomCompare();
-}
-
-// Loads Row/Col Data from vifRegs instead of g_vifmask
-// Useful for testing vifReg and g_vifmask inconsistency.
-static void loadRowCol(nVifStruct& v) {
- xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
- xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
- xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
- xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
- xPSHUF.D(xmm0, xmm0, _v0);
- xPSHUF.D(xmm1, xmm1, _v0);
- xPSHUF.D(xmm2, xmm2, _v0);
- xPSHUF.D(xmm6, xmm6, _v0);
- mVUmergeRegs(XMM6, XMM0, 8);
- mVUmergeRegs(XMM6, XMM1, 4);
- mVUmergeRegs(XMM6, XMM2, 2);
- xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
- xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
- xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
- xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
- xPSHUF.D(xmm2, xmm2, _v0);
- xPSHUF.D(xmm3, xmm3, _v0);
- xPSHUF.D(xmm4, xmm4, _v0);
- xPSHUF.D(xmm5, xmm5, _v0);
-}
-
-VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
- : v(vif_)
- , vB(vifBlock_)
-{
- isFill = (vB.cl < vB.wl);
- usn = (vB.upkType>>5) & 1;
- doMask = (vB.upkType>>4) & 1;
- doMode = vB.mode & 3;
-}
-
-#define makeMergeMask(x) { \
- x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
-}
-
-_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
- u32 m0 = vB.mask;
- u32 m1 = m0 & 0xaaaaaaaa;
- u32 m2 =(~m1>>1) & m0;
- u32 m3 = (m1>>1) & ~m0;
- u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
- u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
- if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); }
- if (m3&&doMask) {
- xMOVAPS(xmmCol0, ptr32[col]);
- if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
- if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
- if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
- if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
- }
- //if (mask||mode) loadRowCol(v);
-}
-
-void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
- pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
- int cc = aMin(vCL, 3);
- u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
- u32 m1 = m0 & 0xaaaa;
- u32 m2 =(~m1>>1) & m0;
- u32 m3 = (m1>>1) & ~m0;
- u32 m4 = (m1>>1) & m0;
- makeMergeMask(m2);
- makeMergeMask(m3);
- makeMergeMask(m4);
- if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
- if (doMask&&m2) { mVUmergeRegs(regX.Id, xmmRow.Id, m2); } // Merge Row
- if (doMask&&m3) { mVUmergeRegs(regX.Id, xmmCol0.Id+cc, m3); } // Merge Col
- if (doMask&&m4) { mVUmergeRegs(regX.Id, xmmTemp.Id, m4); } // Merge Write Protect
- if (doMode) {
- u32 m5 = (~m1>>1) & ~m0;
- if (!doMask) m5 = 0xf;
- else makeMergeMask(m5);
- if (m5 < 0xf) {
- xPXOR(xmmTemp, xmmTemp);
- mVUmergeRegs(xmmTemp.Id, xmmRow.Id, m5);
- xPADD.D(regX, xmmTemp);
- if (doMode==2) mVUmergeRegs(xmmRow.Id, regX.Id, m5);
- }
- else if (m5 == 0xf) {
- xPADD.D(regX, xmmRow);
- if (doMode==2) xMOVAPS(xmmRow, regX);
- }
- }
- xMOVAPS(ptr32[dstIndirect], regX);
-}
-
-void VifUnpackSSE_Dynarec::writeBackRow() const {
- u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
- xMOVAPS(ptr32[row], xmmRow);
- DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
- // ToDo: Do we need to write back to vifregs.rX too!? :/
-}
-
-static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg )
-{
- // Shifts the displacement factor of a given indirect address, so that the address
- // remains in the optimal 0xf0 range (which allows for byte-form displacements when
- // generating instructions).
-
- int addImm = 0;
- while( addr.Displacement >= 0x80 )
- {
- addImm += 0xf0;
- addr -= 0xf0;
- }
- if(addImm) xADD(modReg, addImm);
-}
-
-void VifUnpackSSE_Dynarec::CompileRoutine() {
- const int upkNum = vB.upkType & 0xf;
- const u8& vift = nVifT[upkNum];
- const int cycleSize = isFill ? vB.cl : vB.wl;
- const int blockSize = isFill ? vB.wl : vB.cl;
- const int skipSize = blockSize - cycleSize;
-
- int vNum = vifRegs->num;
- vCL = vif->cl;
-
- SetMasks(cycleSize);
-
- while (vNum) {
-
- ShiftDisplacementWindow( srcIndirect, edx );
- ShiftDisplacementWindow( dstIndirect, ecx );
-
- if (vCL < cycleSize) {
- xUnpack(upkNum);
- srcIndirect += vift;
- dstIndirect += 16;
- vNum--;
- if (++vCL == blockSize) vCL = 0;
- }
- else if (isFill) {
- DevCon.WriteLn("filling mode!");
- VifUnpackSSE_Dynarec::FillingWrite( *this ).xUnpack(upkNum);
- dstIndirect += 16;
- vNum--;
- if (++vCL == blockSize) vCL = 0;
- }
- else {
- dstIndirect += (16 * skipSize);
- vCL = 0;
- }
- }
- if (doMode==2) writeBackRow();
- xMOV(ptr32[&vif->cl], vCL);
- xMOV(ptr32[&vifRegs->num], vNum);
- xRET();
-}
-
-static _f u8* dVifsetVUptr(const nVifStruct& v, int offset) {
- u8* ptr = (u8*)(v.VU->Mem + (offset & v.vuMemLimit));
- u8* endPtr = ptr + _vBlock.num * 16;
- if (endPtr > v.vuMemEnd) {
- DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
- ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
- }
- return ptr;
-}
-
-static _f void dVifRecLimit(int idx) {
- if (nVif[idx].recPtr > nVif[idx].recEnd) {
- DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
- nVif[idx].vifBlocks->clear();
- nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
- }
-}
-
-_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
-
- const nVifStruct& v = nVif[idx];
- const u8 upkType = vif->cmd & 0x1f | ((!!vif->usn) << 5);
- const int doMask = (upkType>>4) & 1;
-
- const int cycle_cl = vifRegs->cycle.cl;
- const int cycle_wl = vifRegs->cycle.wl;
- const int cycleSize = isFill ? cycle_cl : cycle_wl;
- const int blockSize = isFill ? cycle_wl : cycle_cl;
-
- if (vif->cl >= blockSize) vif->cl = 0;
-
- _vBlock.upkType = upkType;
- _vBlock.num = *(u8*)&vifRegs->num;
- _vBlock.mode = *(u8*)&vifRegs->mode;
- _vBlock.scl = vif->cl;
- _vBlock.cl = cycle_cl;
- _vBlock.wl = cycle_wl;
-
- // Zero out the mask parameter if it's unused -- games leave random junk
- // values here which cause false recblock cache misses.
- _vBlock.mask = doMask ? vifRegs->mask : 0x00;
-
- if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
- if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
- //DevCon.WriteLn("Running Recompiled Block!");
- ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
- }
- else {
- //DevCon.WriteLn("Running Interpreter Block");
- _nVifUnpack(idx, data, size, isFill);
- }
- return;
- }
- static int recBlockNum = 0;
- DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
- DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl/wl=0x%x/0x%x, mask=%s)",
- _vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
- doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
- );
-
- xSetPtr(v.recPtr);
- _vBlock.startPtr = (uptr)xGetAlignedCallTarget();
- v.vifBlocks->add(_vBlock);
- VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
- nVif[idx].recPtr = xGetPtr();
-
- dVifRecLimit(idx);
-
- // Run the block we just compiled. Various conditions may force us to still use
- // the interpreter unpacker though, so a recursive call is the safest way here...
- dVifUnpack(idx, data, size, isFill);
-}
-
-#endif
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
+// authors: cottonvibes(@gmail.com)
+// Jake.Stine (@gmail.com)
+
+#include "PrecompiledHeader.h"
+#include "VifUnpackSSE.h"
+
+#if newVif
+
+static __aligned16 nVifBlock _vBlock = {0};
+static __pagealigned u8 nVifMemCmp[__pagesize];
+
+static void emitCustomCompare() {
+ HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
+ memset8<0xcc>(nVifMemCmp);
+ xSetPtr(nVifMemCmp);
+
+ xMOVAPS (xmm0, ptr32[ecx]);
+ xPCMP.EQD(xmm0, ptr32[edx]);
+ xMOVMSKPS(eax, xmm0);
+ xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
+
+ xRET();
+ HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
+}
+
+void dVifInit(int idx) {
+ nVif[idx].idx = idx;
+ nVif[idx].VU = idx ? &VU1 : &VU0;
+ nVif[idx].vif = idx ? &vif1 : &vif0;
+ nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
+ nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
+ nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
+ nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
+ nVif[idx].vifBlocks = new HashBucket<_tParams>();
+ nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
+ nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
+ //emitCustomCompare();
+}
+
+// Loads Row/Col Data from vifRegs instead of g_vifmask
+// Useful for testing vifReg and g_vifmask inconsistency.
+static void loadRowCol(nVifStruct& v) {
+ xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
+ xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
+ xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
+ xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
+ xPSHUF.D(xmm0, xmm0, _v0);
+ xPSHUF.D(xmm1, xmm1, _v0);
+ xPSHUF.D(xmm2, xmm2, _v0);
+ xPSHUF.D(xmm6, xmm6, _v0);
+ mVUmergeRegs(XMM6, XMM0, 8);
+ mVUmergeRegs(XMM6, XMM1, 4);
+ mVUmergeRegs(XMM6, XMM2, 2);
+ xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
+ xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
+ xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
+ xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
+ xPSHUF.D(xmm2, xmm2, _v0);
+ xPSHUF.D(xmm3, xmm3, _v0);
+ xPSHUF.D(xmm4, xmm4, _v0);
+ xPSHUF.D(xmm5, xmm5, _v0);
+}
+
+VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
+ : v(vif_)
+ , vB(vifBlock_)
+{
+ isFill = (vB.cl < vB.wl);
+ usn = (vB.upkType>>5) & 1;
+ doMask = (vB.upkType>>4) & 1;
+ doMode = vB.mode & 3;
+}
+
+#define makeMergeMask(x) { \
+ x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
+}
+
+_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
+ u32 m0 = vB.mask;
+ u32 m1 = m0 & 0xaaaaaaaa;
+ u32 m2 =(~m1>>1) & m0;
+ u32 m3 = (m1>>1) & ~m0;
+ u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
+ u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
+ if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); }
+ if (m3&&doMask) {
+ xMOVAPS(xmmCol0, ptr32[col]);
+ if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
+ if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
+ if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
+ if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
+ }
+ //if (mask||mode) loadRowCol(v);
+}
+
+void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
+ pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
+ int cc = aMin(vCL, 3);
+ u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
+ u32 m1 = m0 & 0xaaaa;
+ u32 m2 =(~m1>>1) & m0;
+ u32 m3 = (m1>>1) & ~m0;
+ u32 m4 = (m1>>1) & m0;
+ makeMergeMask(m2);
+ makeMergeMask(m3);
+ makeMergeMask(m4);
+ if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
+ if (doMask&&m2) { mVUmergeRegs(regX.Id, xmmRow.Id, m2); } // Merge Row
+ if (doMask&&m3) { mVUmergeRegs(regX.Id, xmmCol0.Id+cc, m3); } // Merge Col
+ if (doMask&&m4) { mVUmergeRegs(regX.Id, xmmTemp.Id, m4); } // Merge Write Protect
+ if (doMode) {
+ u32 m5 = (~m1>>1) & ~m0;
+ if (!doMask) m5 = 0xf;
+ else makeMergeMask(m5);
+ if (m5 < 0xf) {
+ xPXOR(xmmTemp, xmmTemp);
+ mVUmergeRegs(xmmTemp.Id, xmmRow.Id, m5);
+ xPADD.D(regX, xmmTemp);
+ if (doMode==2) mVUmergeRegs(xmmRow.Id, regX.Id, m5);
+ }
+ else if (m5 == 0xf) {
+ xPADD.D(regX, xmmRow);
+ if (doMode==2) xMOVAPS(xmmRow, regX);
+ }
+ }
+ xMOVAPS(ptr32[dstIndirect], regX);
+}
+
+void VifUnpackSSE_Dynarec::writeBackRow() const {
+ u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
+ xMOVAPS(ptr32[row], xmmRow);
+ DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
+ // ToDo: Do we need to write back to vifregs.rX too!? :/
+}
+
+static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg )
+{
+ // Shifts the displacement factor of a given indirect address, so that the address
+ // remains in the optimal 0xf0 range (which allows for byte-form displacements when
+ // generating instructions).
+
+ int addImm = 0;
+ while( addr.Displacement >= 0x80 )
+ {
+ addImm += 0xf0;
+ addr -= 0xf0;
+ }
+ if(addImm) xADD(modReg, addImm);
+}
+
+void VifUnpackSSE_Dynarec::CompileRoutine() {
+ const int upkNum = vB.upkType & 0xf;
+ const u8& vift = nVifT[upkNum];
+ const int cycleSize = isFill ? vB.cl : vB.wl;
+ const int blockSize = isFill ? vB.wl : vB.cl;
+ const int skipSize = blockSize - cycleSize;
+
+ int vNum = vifRegs->num;
+ vCL = vif->cl;
+
+ SetMasks(cycleSize);
+
+ while (vNum) {
+
+ ShiftDisplacementWindow( srcIndirect, edx );
+ ShiftDisplacementWindow( dstIndirect, ecx );
+
+ if (vCL < cycleSize) {
+ xUnpack(upkNum);
+ srcIndirect += vift;
+ dstIndirect += 16;
+ vNum--;
+ if (++vCL == blockSize) vCL = 0;
+ }
+ else if (isFill) {
+ DevCon.WriteLn("filling mode!");
+ VifUnpackSSE_Dynarec::FillingWrite( *this ).xUnpack(upkNum);
+ dstIndirect += 16;
+ vNum--;
+ if (++vCL == blockSize) vCL = 0;
+ }
+ else {
+ dstIndirect += (16 * skipSize);
+ vCL = 0;
+ }
+ }
+ if (doMode==2) writeBackRow();
+ xMOV(ptr32[&vif->cl], vCL);
+ xMOV(ptr32[&vifRegs->num], vNum);
+ xRET();
+}
+
+static _f u8* dVifsetVUptr(const nVifStruct& v, int offset) {
+ u8* ptr = (u8*)(v.VU->Mem + (offset & v.vuMemLimit));
+ u8* endPtr = ptr + _vBlock.num * 16;
+ if (endPtr > v.vuMemEnd) {
+ DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
+ ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
+ }
+ return ptr;
+}
+
+static _f void dVifRecLimit(int idx) {
+ if (nVif[idx].recPtr > nVif[idx].recEnd) {
+ DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
+ nVif[idx].vifBlocks->clear();
+ nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
+ }
+}
+
+_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
+
+ const nVifStruct& v = nVif[idx];
+ const u8 upkType = vif->cmd & 0x1f | ((!!vif->usn) << 5);
+ const int doMask = (upkType>>4) & 1;
+
+ const int cycle_cl = vifRegs->cycle.cl;
+ const int cycle_wl = vifRegs->cycle.wl;
+ const int cycleSize = isFill ? cycle_cl : cycle_wl;
+ const int blockSize = isFill ? cycle_wl : cycle_cl;
+
+ if (vif->cl >= blockSize) vif->cl = 0;
+
+ _vBlock.upkType = upkType;
+ _vBlock.num = *(u8*)&vifRegs->num;
+ _vBlock.mode = *(u8*)&vifRegs->mode;
+ _vBlock.scl = vif->cl;
+ _vBlock.cl = cycle_cl;
+ _vBlock.wl = cycle_wl;
+
+ // Zero out the mask parameter if it's unused -- games leave random junk
+ // values here which cause false recblock cache misses.
+ _vBlock.mask = doMask ? vifRegs->mask : 0x00;
+
+ if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
+ if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
+ //DevCon.WriteLn("Running Recompiled Block!");
+ ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
+ }
+ else {
+ //DevCon.WriteLn("Running Interpreter Block");
+ _nVifUnpack(idx, data, size, isFill);
+ }
+ return;
+ }
+ static int recBlockNum = 0;
+ DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
+ DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl/wl=0x%x/0x%x, mask=%s)",
+ _vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
+ doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
+ );
+
+ xSetPtr(v.recPtr);
+ _vBlock.startPtr = (uptr)xGetAlignedCallTarget();
+ v.vifBlocks->add(_vBlock);
+ VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
+ nVif[idx].recPtr = xGetPtr();
+
+ dVifRecLimit(idx);
+
+ // Run the block we just compiled. Various conditions may force us to still use
+ // the interpreter unpacker though, so a recursive call is the safest way here...
+ dVifUnpack(idx, data, size, isFill);
+}
+
+#endif
diff --git a/pcsx2/x86/newVif_HashBucket.h b/pcsx2/x86/newVif_HashBucket.h
index 3cea0f8771..d27a15cc4c 100644
--- a/pcsx2/x86/newVif_HashBucket.h
+++ b/pcsx2/x86/newVif_HashBucket.h
@@ -1,86 +1,100 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2009 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
-#include "xmmintrin.h"
-#pragma once
-
-template< typename T >
-struct SizeChain
-{
- int Size;
- T* Chain;
-};
-
-// HashBucket is a container which uses a built-in hash function
-// to perform quick searches.
-// T is a struct data type (note: size must be in multiples of 16 bytes!)
-// hSize determines the number of buckets HashBucket will use for sorting.
-// cmpSize is the size of data to consider 2 structs equal (see find())
-// The hash function is determined by taking the first bytes of data and
-// performing a modulus the size of hSize. So the most diverse-data should
-// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
-template
-class HashBucket {
-protected:
- SizeChain mBucket[hSize];
-
-public:
- HashBucket() {
- for (int i = 0; i < hSize; i++) {
- mBucket[i].Chain = NULL;
- mBucket[i].Size = 0;
- }
- }
- ~HashBucket() { clear(); }
- int quickFind(u32 data) {
- return mBucket[data % hSize].Size;
- }
- __forceinline T* find(T* dataPtr) {
- u32 d = *((u32*)dataPtr);
- const SizeChain& bucket( mBucket[d % hSize] );
-
- for (int i=bucket.Size; i; --i) {
- // This inline version seems about 1-2% faster in tests of games that average 1
- // program per bucket. Games that average more should see a bigger improvement --air
- int result = _mm_movemask_ps( (__m128&) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
- if( result == 0x7 ) return &bucket.Chain[i];
-
- // Dynamically generated function version, can't be inlined. :(
- //if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];
-
- //if (!memcmp(&bucket.Chain[i], dataPtr, sizeof(T)-4)) return &c[i]; // old school version! >_<
- }
- if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
- return NULL;
- }
- __forceinline void add(const T& dataPtr) {
- u32 d = (u32&)dataPtr;
- SizeChain& bucket( mBucket[d % hSize] );
-
- if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
- throw Exception::OutOfMemory(
- wxsFormat(L"Out of memory re-allocating hash bucket (bucket size=%d)", bucket.Size+1),
- wxEmptyString
- );
- }
- memcpy_fast(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T));
- }
- void clear() {
- for (int i = 0; i < hSize; i++) {
- safe_aligned_free(mBucket[i].Chain);
- mBucket[i].Size = 0;
- }
- }
-};
+#include "xmmintrin.h"
+#pragma once
+
+// Create some typecast operators for SIMD operations. For some reason MSVC needs a
+// handle/reference typecast to avoid error. GCC (and presumably other compilers)
+// generate an error if the handle/ref is used. Honestly neither makes sense, since
+// both typecasts should be perfectly valid >_<. --air
+#ifdef _MSC_VER
+# define cast_m128 __m128&
+# define cast_m128i __m128i&
+# define cast_m128d __m128d&
+#else // defined(__GNUC__)
+# define cast_m128 __m128
+# define cast_m128i __m128i
+# define cast_m128d __m128d
+#endif
+
+template< typename T >
+struct SizeChain
+{
+ int Size;
+ T* Chain;
+};
+
+// HashBucket is a container which uses a built-in hash function
+// to perform quick searches.
+// T is a struct data type (note: size must be in multiples of 16 bytes!)
+// hSize determines the number of buckets HashBucket will use for sorting.
+// cmpSize is the size of data to consider 2 structs equal (see find())
+// The hash function is determined by taking the first bytes of data and
+// performing a modulus the size of hSize. So the most diverse-data should
+// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
+template
+class HashBucket {
+protected:
+ SizeChain mBucket[hSize];
+
+public:
+ HashBucket() {
+ for (int i = 0; i < hSize; i++) {
+ mBucket[i].Chain = NULL;
+ mBucket[i].Size = 0;
+ }
+ }
+ ~HashBucket() { clear(); }
+ int quickFind(u32 data) {
+ return mBucket[data % hSize].Size;
+ }
+ __forceinline T* find(T* dataPtr) {
+ u32 d = *((u32*)dataPtr);
+ const SizeChain& bucket( mBucket[d % hSize] );
+
+ for (int i=bucket.Size; i; --i) {
+ // This inline version seems about 1-2% faster in tests of games that average 1
+ // program per bucket. Games that average more should see a bigger improvement --air
+ int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
+ if( result == 0x7 ) return &bucket.Chain[i];
+
+ // Dynamically generated function version, can't be inlined. :(
+ //if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];
+
+ //if (!memcmp(&bucket.Chain[i], dataPtr, sizeof(T)-4)) return &c[i]; // old school version! >_<
+ }
+ if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
+ return NULL;
+ }
+ __forceinline void add(const T& dataPtr) {
+ u32 d = (u32&)dataPtr;
+ SizeChain& bucket( mBucket[d % hSize] );
+
+ if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
+ throw Exception::OutOfMemory(
+ wxsFormat(L"Out of memory re-allocating hash bucket (bucket size=%d)", bucket.Size+1),
+ wxEmptyString
+ );
+ }
+ memcpy_fast(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T));
+ }
+ void clear() {
+ for (int i = 0; i < hSize; i++) {
+ safe_aligned_free(mBucket[i].Chain);
+ mBucket[i].Size = 0;
+ }
+ }
+};