From c5b47530f908bcc9c8b5d5131e7588a7d3b028b7 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 22 Dec 2009 14:21:21 +0000 Subject: [PATCH] newVif: minor optimizations. * Improved hashing slightly by ignoring the garbage values in the 'mask' parameter when doMask is false. * Wrote an inlineable version of the hash compare function, using x86 intrinsics. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2384 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 4 +-- pcsx2/x86/newVif_Dynarec.inl | 26 +++++++++----- pcsx2/x86/newVif_HashBucket.h | 66 +++++++++++++++++++++-------------- pcsx2/x86/newVif_Tables.inl | 8 ++--- 4 files changed, 61 insertions(+), 43 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 3818c49e83..c43d5cb205 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -230,7 +230,7 @@ microVUf(int) mVUfindLeastUsedProg() { mVU->prog.prog[i].isOld = 0; mVU->prog.prog[i].used = 1; mVUsortProg(mVU, i); - Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1); + Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1); return i; } } @@ -247,7 +247,7 @@ microVUf(int) mVUfindLeastUsedProg() { mVU->prog.prog[pIdx].isOld = 0; mVU->prog.prog[pIdx].used = 1; mVUsortProg(mVU, pIdx); - Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1); + Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1); return pIdx; } diff --git a/pcsx2/x86/newVif_Dynarec.inl b/pcsx2/x86/newVif_Dynarec.inl index 7a88e30268..4d2b0a648a 100644 --- a/pcsx2/x86/newVif_Dynarec.inl +++ b/pcsx2/x86/newVif_Dynarec.inl @@ -123,8 +123,9 @@ _f u8* dVifsetVUptr(nVifStruct& v, int offset) { void dVifUnpack(int idx, u8 *data, u32 size) { - nVifStruct& v = nVif[idx]; - const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5); + nVifStruct& v = nVif[idx]; + const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5); + const int doMask = (upkType>>4)&1; _vBlock.upkType = upkType; _vBlock.num = *(u8*)&vifRegs->num; @@ -132,22 +133,29 @@ void dVifUnpack(int idx, u8 *data, u32 size) { _vBlock.scl = vif->cl; _vBlock.cl = vifRegs->cycle.cl; _vBlock.wl = vifRegs->cycle.wl; - _vBlock.mask = vifRegs->mask; + + // Zero out the mask parameter if it's unused -- games leave random junk + // values here which cause false recblock cache misses. + _vBlock.mask = doMask ? vifRegs->mask : 0x00; if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) { - u8* dest = dVifsetVUptr(v, vif->tag.addr); - if (!dest) { - //DevCon.WriteLn("Running Interpreter Block"); - _nVifUnpack(idx, data, size); - } - else { + if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) { //DevCon.WriteLn("Running Recompiled Block!"); ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data); } + else { + //DevCon.WriteLn("Running Interpreter Block"); + _nVifUnpack(idx, data, size); + } return; } static int recBlockNum = 0; DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++); + DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl=0x%x, wl=0x%x, mask=%s)", + _vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl, + doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored" + ); + dVifRecompile(v, &_vBlock); v.vifBlocks->add(&_vBlock); dVifRecLimit(idx); diff --git a/pcsx2/x86/newVif_HashBucket.h b/pcsx2/x86/newVif_HashBucket.h index e77798410a..51ad7ae8c8 100644 --- a/pcsx2/x86/newVif_HashBucket.h +++ b/pcsx2/x86/newVif_HashBucket.h @@ -15,7 +15,14 @@ #pragma once -extern __pagealigned u8 nVifMemCmp[__pagesize]; +static __pagealigned u8 nVifMemCmp[__pagesize]; + +template< typename T > +struct SizeChain +{ + int Size; + T* Chain; +}; // HashBucket is a container which uses a built-in hash function // to perform quick searches. @@ -27,49 +34,54 @@ extern __pagealigned u8 nVifMemCmp[__pagesize]; // be in the first bytes of the struct. (hence why nVifBlock is specifically sorted) template class HashBucket { -private: - T* mChain[hSize]; - int mSize [hSize]; +protected: + SizeChain mBucket[hSize]; + public: HashBucket() { for (int i = 0; i < hSize; i++) { - mChain[i] = NULL; - mSize [i] = 0; + mBucket[i].Chain = NULL; + mBucket[i].Size = 0; } } ~HashBucket() { clear(); } int quickFind(u32 data) { - int o = data % hSize; - return mSize[o]; + return mBucket[data % hSize].Size; } - T* find(T* dataPtr) { + __forceinline T* find(T* dataPtr) { u32 d = *((u32*)dataPtr); - int o = d % hSize; - int s = mSize[o]; - T* c = mChain[o]; - for (int i = 0; i < s; i++) { - //if (!memcmp(&c[i], dataPtr, cmpSize)) return &c[i]; - if ((((nVifCall)((void*)nVifMemCmp))(&c[i], dataPtr))==7) return &c[i]; + const SizeChain& bucket( mBucket[d % hSize] ); + + for (int i=bucket.Size; i; --i) { + // This inline version seems about 1-2% faster in tests of games that average 1 + // program per bucket. Games that average more should see a bigger improvement --air + int result = _mm_movemask_ps( (__m128&)_mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7; + if( result == 0x7 ) return &bucket.Chain[i]; + + // Dynamically generated function version, can't be inlined. :( + //if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i]; + + //if (!memcmp(&bucket.Chain[i], dataPtr, sizeof(T)-4)) return &c[i]; // old school version! >_< } + if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size ); return NULL; } - void add(T* dataPtr) { + __forceinline void add(T* dataPtr) { u32 d = *(u32*)dataPtr; - int o = d % hSize; - int s = mSize[o]++; - T* c = mChain[o]; - T* n = (T*)_aligned_malloc(sizeof(T)*(s+1), 16); - if (s) { - memcpy(n, c, sizeof(T) * s); - safe_aligned_free(c); + SizeChain& bucket( mBucket[d % hSize] ); + + if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) { + throw Exception::OutOfMemory( + wxsFormat(L"Out of memory re-allocating hash bucket (bucket size=%d)", bucket.Size+1), + wxEmptyString + ); } - memcpy(&n[s], dataPtr, sizeof(T)); - mChain[o] = n; + memcpy_fast(&bucket.Chain[bucket.Size++], dataPtr, sizeof(T)); } void clear() { for (int i = 0; i < hSize; i++) { - safe_aligned_free(mChain[i]); - mSize[i] = 0; + safe_aligned_free(mBucket[i].Chain); + mBucket[i].Size = 0; } } }; diff --git a/pcsx2/x86/newVif_Tables.inl b/pcsx2/x86/newVif_Tables.inl index 9457938866..1350fb591c 100644 --- a/pcsx2/x86/newVif_Tables.inl +++ b/pcsx2/x86/newVif_Tables.inl @@ -272,18 +272,16 @@ void writeBackRow(nVifStruct& v) { // ToDo: Do we need to write back to vifregs.rX too!? :/ } - __pagealigned u8 nVifMemCmp[__pagesize]; - void emitCustomCompare() { HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false); - memset_8<0xcc,__pagesize>(nVifMemCmp); + memset8<0xcc>(nVifMemCmp); xSetPtr(nVifMemCmp); xMOVAPS (xmm0, ptr32[ecx]); xPCMP.EQD(xmm0, ptr32[edx]); xMOVMSKPS(eax, xmm0); - xAND (eax, 0x7); + xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer) xRET(); HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true); -} +}