From 2b3b60511b316ff76fcba2ab5e2ae4768863c407 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 16 Dec 2009 23:24:42 +0000 Subject: [PATCH] newVif: some minor changes and cleanups... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2351 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/newVif.h | 28 +++++++++--- pcsx2/x86/newVif_BlockBuffer.h | 23 +++++++--- pcsx2/x86/newVif_Unpack.inl | 33 +++----------- pcsx2/x86/newVif_UnpackGen.inl | 78 +++++++++++++++------------------- 4 files changed, 79 insertions(+), 83 deletions(-) diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index e814738cd9..db1ad9fed0 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -16,6 +16,7 @@ #pragma once #ifdef newVif +#include "newVif_BlockBuffer.h" #include "x86emitter/x86emitter.h" using namespace x86Emitter; extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0); @@ -27,6 +28,7 @@ static __pagealigned u8 nVifUpkExec[__pagesize*16]; static __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle] static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector] +#define _1mb (0x100000) #define _v0 0 #define _v1 0x55 #define _v2 0xaa @@ -35,10 +37,25 @@ static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Ve #define aMin(x, y) std::min(x,y) #define _f __forceinline -#define xShiftR(regX, n) { \ - if (usn) { xPSRL.D(regX, n); } \ - else { xPSRA.D(regX, n); } \ -} +struct nVifBlock { + u8 upkType; // Unpack Type + u8 num; // Num Field + u8 mode; // Mode Field + u8 cl; // CL Field + u8 wl; // WL Field + u32 mask; // Mask Field + u8* startPtr; // Start Ptr of RecGen Code +}; + +struct nVifStruct { + u32 idx; // VIF0 or VIF1 + vifStruct* vif; // Vif Struct ptr + VIFregisters* vifRegs; // Vif Regs ptr + VURegs* VU; // VU Regs ptr + u8* vuMemEnd; // End of VU Memory + u32 vuMemLimit; // Use for fast AND + BlockBuffer* vifCache; // Block Buffer +}; static const u32 nVifT[16] = { 4, // S-32 @@ -59,9 +76,10 @@ static const u32 nVifT[16] = { 2, // V4-5 }; -#include "newVif_BlockBuffer.h" #include "newVif_OldUnpack.inl" #include "newVif_UnpackGen.inl" #include "newVif_Unpack.inl" +//#include "newVif_Dynarec.inl" + #endif diff --git a/pcsx2/x86/newVif_BlockBuffer.h b/pcsx2/x86/newVif_BlockBuffer.h index cb378c74e5..b540319c7f 100644 --- a/pcsx2/x86/newVif_BlockBuffer.h +++ b/pcsx2/x86/newVif_BlockBuffer.h @@ -20,20 +20,29 @@ private: u32 mSize; // Cur Size u32 mSizeT; // Total Size u8* mData; // Data Ptr - void grow(u32 newSize) { - u8* temp = new u8[newSize]; - memcpy(temp, mData, mSizeT); - safe_delete( mData ); - mData = temp; + void alloc(int size) { + mData = SysMmapEx(NULL, size, 0, "nVif_BlockBuffer"); + if (!mData) throw Exception::OutOfMemory("nVif Error: Failed to allocate recompiler memory!"); + memset(mData, 0xcc, size); + } + void dealloc(u8* &dPtr, int size) { + if (dPtr) { HostSys::Munmap(dPtr, size); dPtr = NULL; } } public: - BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; mData = new u8[mSizeT]; } - virtual ~BlockBuffer() { safe_delete(mData); } + BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; alloc(mSizeT); } + ~BlockBuffer() { dealloc(mData, mSizeT); } void append(void *addr, u32 size) { if (mSize + size > mSizeT) grow(mSize*2 + size); memcpy(&mData[mSize], addr, size); mSize += size; } + void grow(u32 newSize) { + u8* temp = mData; + alloc (newSize); + memcpy (mData, temp, mSize); + dealloc(temp, mSizeT); + mSizeT = newSize; + } void clear() { mSize = 0; } u32 getSize() { return mSize; } u8* getBlock() { return mData; } diff --git a/pcsx2/x86/newVif_Unpack.inl b/pcsx2/x86/newVif_Unpack.inl index 54f36e3d87..8ae66f019b 100644 --- a/pcsx2/x86/newVif_Unpack.inl +++ b/pcsx2/x86/newVif_Unpack.inl @@ -13,20 +13,12 @@ * If not, see . */ -// newVif! - author: cottonvibes(@gmail.com) +// newVif! +// authors: cottonvibes(@gmail.com) +// Jake.Stine (@gmail.com) #pragma once -struct nVifStruct { - u32 idx; // VIF0 or VIF1 - vifStruct* vif; // Vif Struct ptr - VIFregisters* vifRegs; // Vif Regs ptr - VURegs* VU; // VU Regs ptr - u8* vuMemEnd; // End of VU Memory - u32 vuMemLimit; // Use for fast AND - BlockBuffer* vifBlock; // Block Buffer -}; - static __aligned16 nVifStruct nVif[2]; void initNewVif(int idx) { @@ -34,9 +26,9 @@ void initNewVif(int idx) { nVif[idx].VU = idx ? &VU1 : &VU0; nVif[idx].vif = idx ? &vif1 : &vif0; nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs; - nVif[idx].vifBlock = new BlockBuffer(0x2000); // 8kb Block Buffer nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000)); nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0; + nVif[idx].vifCache = NULL; HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false); memset8<0xcc>( nVifUpkExec ); @@ -197,7 +189,7 @@ void _nVifUnpack(int idx, u8 *data, u32 size) { vif = nVif[idx].vif; vifRegs = nVif[idx].vifRegs; - const bool doMode = !!vifRegs->mode; + const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10); const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl); //UnpackLoopTable[idx][doMode][isFill]( data, size ); @@ -219,18 +211,3 @@ void _nVifUnpack(int idx, u8 *data, u32 size) { //DevCon.WriteLn("%s Write! [mask = %08x][type = %02d][num = %d]", (isFill?"Filling":"Skipping"), vifRegs->mask, upkNum, vifRegs->num); } } - -//data += ft.gsize; -//size -= ft.gsize; -//vifRegs->num--; -//else { -// //DevCon.WriteLn("SSE Unpack!"); -// int c = aMin((cycleSize - vif->cl), 3); -// size -= vift * c; -// //if (c>1) { DevCon.WriteLn("C > 1!"); } -// if (c<0||c>3) { DbgCon.WriteLn("C wtf!"); } -// if (size < 0) { DbgCon.WriteLn("Size Shit"); size+=vift*c;c=1;size-=vift*c;} -// fnbase[(aMin(vif->cl, 4) * 4) + c-1](dest, data); -// data += vift * c; -// vifRegs->num -= c; -//} \ No newline at end of file diff --git a/pcsx2/x86/newVif_UnpackGen.inl b/pcsx2/x86/newVif_UnpackGen.inl index b3dad6655b..6f24a7aea5 100644 --- a/pcsx2/x86/newVif_UnpackGen.inl +++ b/pcsx2/x86/newVif_UnpackGen.inl @@ -15,47 +15,22 @@ #pragma once -#define xMaskWrite(regX, x) { \ - if (x==0) xMOVAPS(xmm7, ptr32[ecx]); \ - if (x==1) xMOVAPS(xmm7, ptr32[ecx+0x10]); \ - if (x==2) xMOVAPS(xmm7, ptr32[ecx+0x20]); \ - int offX = aMin(curCycle+x, 4); \ - xPAND(regX, ptr32[nVifMask[0][offX]]); \ - xPAND(xmm7, ptr32[nVifMask[1][offX]]); \ - xPOR (regX, ptr32[nVifMask[2][offX]]); \ - xPOR (regX, xmm7); \ - if (x==0) xMOVAPS(ptr32[ecx], regX); \ - if (x==1) xMOVAPS(ptr32[ecx+0x10], regX); \ - if (x==2) xMOVAPS(ptr32[ecx+0x20], regX); \ +#define xMaskWrite(regX) { \ + xMOVAPS(xmm7, ptr32[ecx]); \ + int offX = aMin(curCycle, 4); \ + xPAND(regX, ptr32[nVifMask[0][offX]]); \ + xPAND(xmm7, ptr32[nVifMask[1][offX]]); \ + xPOR (regX, ptr32[nVifMask[2][offX]]); \ + xPOR (regX, xmm7); \ + xMOVAPS(ptr32[ecx], regX); \ } - -#define xMovDest(reg0) { \ - if (mask==0) { xMOVAPS (ptr32[ecx], reg0); } \ - else { xMaskWrite(reg0, 0); } \ +#define xMovDest(regX) { \ + if (mask==0) { xMOVAPS (ptr32[ecx], regX); } \ + else { xMaskWrite(regX); } \ } - -// xmm2 gets result -void convertRGB() { - xPSLL.D (xmm1, 3); // ABG|R5.000 - xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits) - xPSRL.D (xmm1, 8); // ABG - xPSLL.D (xmm1, 3); // AB|G5.000 - xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits) - xPSRL.D (xmm1, 8); // AB - xPSLL.D (xmm1, 3); // A|B5.000 - xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits) - xPSRL.D (xmm1, 8); // A - xPSLL.D (xmm1, 7); // A.0000000 - - xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A - xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G - xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B - mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R - mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R - mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R - - xPSLL.D (xmm2, 24); // can optimize to - xPSRL.D (xmm2, 24); // single AND... +#define xShiftR(regX, n) { \ + if (usn) { xPSRL.D(regX, n); } \ + else { xPSRA.D(regX, n); } \ } struct VifUnpackIndexer { @@ -184,10 +159,27 @@ void nVifGen(int usn, int mask, int curCycle) { // A | B5 | G5 | R5 // ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000 indexer.xSetCall(0xf); // V4-5 - xMOV16 (xmm0, ptr32[edx]); - xMOVAPS (xmm1, xmm0); - convertRGB(); - xMovDest (xmm2); + xMOV16 (xmm0, ptr32[edx]); + xMOVAPS (xmm1, xmm0); + xPSLL.D (xmm1, 3); // ABG|R5.000 + xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits) + xPSRL.D (xmm1, 8); // ABG + xPSLL.D (xmm1, 3); // AB|G5.000 + xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits) + xPSRL.D (xmm1, 8); // AB + xPSLL.D (xmm1, 3); // A|B5.000 + xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits) + xPSRL.D (xmm1, 8); // A + xPSLL.D (xmm1, 7); // A.0000000 + xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A + xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G + xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B + mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R + mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R + mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R + xPSLL.D (xmm2, 24); // can optimize to + xPSRL.D (xmm2, 24); // single AND... + xMovDest (xmm2); xRET(); pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );