From 10b3d429fed8bb836481a9bbcc44b7776efd9fa9 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 13 Dec 2016 19:47:31 +0100 Subject: [PATCH] vif: new implementation of the hash bucket Previous implementation saved the both the chain pointer and the chain size Rational: size is useful to add new element and to detect the end of the chain Vif cache is rarely miss. So 'add' is barely called and the end of a chain is barely reached. New implementation will add a null cell at the end of the chain. As a cell contains a x86 pointer, if is null you could conclude that you reach the end of the chain. The 'add' function will traverse the chain to get the current size. It is a cold path besides the chain is often short (< 4). The 'find' function only need to check the startPtr bytes to detect the end of the loop. Note: SizeChain was replaced with a std::array --- pcsx2/x86/newVif.h | 3 +- pcsx2/x86/newVif_Dynarec.cpp | 3 +- pcsx2/x86/newVif_HashBucket.h | 103 +++++++++++++++++++++------------- 3 files changed, 67 insertions(+), 42 deletions(-) diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index cd491fa941..1a1b1760fb 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -55,7 +55,6 @@ _vifT extern void dVifUnpack (const u8* data, bool isFill); #define xmmRow xmm6 #define xmmTemp xmm7 -#define _hSize 0x4000 // [usn*1:mask*1:upk*4:num*8] hash... struct nVifStruct { __aligned16 nVifBlock block; @@ -72,7 +71,7 @@ struct nVifStruct { RecompiledCodeReserve* recReserve; u8* recWritePtr; // current write pos into the reserve - HashBucket<_hSize> vifBlocks; // Vif Blocks + HashBucket vifBlocks; // Vif Blocks nVifStruct(); }; diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index 4e06268a9c..65084d355d 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -23,7 +23,7 @@ #include "Utilities/Perf.h" static void recReset(int idx) { - nVif[idx].vifBlocks.clear(); + nVif[idx].vifBlocks.reset(); nVif[idx].recReserve->Reset(); @@ -341,6 +341,7 @@ _vifT __fi void dVifUnpack(const u8* data, bool isFill) { v.block.cl = vifRegs.cycle.cl; v.block.wl = vifRegs.cycle.wl ? vifRegs.cycle.wl : 256; v.block.aligned = vif.start_aligned; //MTVU doesn't have a packet size! + v.block.startPtr = 0; // Ease the detection of the end of the hash bucket if ((upkType & 0xf) != 9) v.block.aligned &= 0x1; diff --git a/pcsx2/x86/newVif_HashBucket.h b/pcsx2/x86/newVif_HashBucket.h index 34998e960d..e4f6fe79a0 100644 --- a/pcsx2/x86/newVif_HashBucket.h +++ b/pcsx2/x86/newVif_HashBucket.h @@ -13,9 +13,10 @@ * If not, see . */ -#include "x86emitter/x86_intrin.h" #pragma once +#include + // nVifBlock - Ordered for Hashing; the 'num' field and the lower 6 bits of upkType are // used as the hash bucket selector. struct __aligned16 nVifBlock { @@ -29,72 +30,96 @@ struct __aligned16 nVifBlock { uptr startPtr; // [12] Start Ptr of RecGen Code }; // 16 bytes -template< typename T > -struct SizeChain -{ - int Size; - T* Chain; -}; +#define hSize 0x4000 // [usn*1:mask*1:upk*4:num*8] hash... // HashBucket is a container which uses a built-in hash function -// to perform quick searches. -// hSize determines the number of buckets HashBucket will use for sorting. +// to perform quick searches. It is designed around the nVifBlock structure +// // The hash function is determined by taking the first bytes of data and // performing a modulus the size of hSize. So the most diverse-data should // be in the first bytes of the struct. (hence why nVifBlock is specifically sorted) -template class HashBucket { protected: - SizeChain mBucket[hSize]; + std::array m_bucket; public: HashBucket() { - for (int i = 0; i < hSize; i++) { - mBucket[i].Chain = NULL; - mBucket[i].Size = 0; - } + m_bucket.fill(nullptr); } - virtual ~HashBucket() throw() { clear(); } - int quickFind(u32 data) { - return mBucket[data % hSize].Size; - } + ~HashBucket() throw() { clear(); } __fi nVifBlock* find(nVifBlock* dataPtr) { u32 d = *((u32*)dataPtr); - const SizeChain& bucket( mBucket[d % hSize] ); + const __m128i* chainpos = (__m128i*)m_bucket[d % m_bucket.size()]; - const __m128i* endpos = (__m128i*)&bucket.Chain[bucket.Size]; const __m128i data128( _mm_load_si128((__m128i*)dataPtr) ); - for( const __m128i* chainpos = (__m128i*)bucket.Chain; chainpos& bucket( mBucket[d % hSize] ); + u32 b = d % m_bucket.size(); - if( (bucket.Chain = (nVifBlock*)pcsx2_aligned_realloc( bucket.Chain, sizeof(nVifBlock)*(bucket.Size+1), 16, sizeof(nVifBlock)*bucket.Size)) == NULL ) { + u32 size = bucket_size( dataPtr ); + + // Warning there is an extra +1 due to the empty cell + if( (m_bucket[b] = (nVifBlock*)pcsx2_aligned_realloc( m_bucket[b], sizeof(nVifBlock)*(size+2), 16, sizeof(nVifBlock)*(size+1) )) == NULL ) { throw Exception::OutOfMemory( - wxsFormat(L"HashBucket Chain (bucket size=%d)", bucket.Size+1) + wxsFormat(L"HashBucket Chain (bucket size=%d)", size+2) ); } - memcpy(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(nVifBlock)); - if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size ); + + // Replace the empty cell by the new block and create a new empty cell + memcpy(&m_bucket[b][size++], &dataPtr, sizeof(nVifBlock)); + memset(&m_bucket[b][size], 0, sizeof(nVifBlock)); + + if( size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", b, size ); } + + u32 bucket_size(const nVifBlock& dataPtr) { + u32 d = (u32&)dataPtr; + nVifBlock* chainpos = m_bucket[d % m_bucket.size()]; + + u32 size = 0; + + while (chainpos->startPtr != 0) { + size++; + chainpos++; + } + + return size; + } + void clear() { - for (int i = 0; i < hSize; i++) { - safe_aligned_free(mBucket[i].Chain); - mBucket[i].Size = 0; + for (auto& bucket : m_bucket) + safe_aligned_free(bucket); + } + + void reset() { + clear(); + + // Allocate an empty cell for all buckets + for (auto& bucket : m_bucket) { + if( (bucket = (nVifBlock*)_aligned_malloc( sizeof(nVifBlock), 16 )) == nullptr ) { + throw Exception::OutOfMemory( + wxsFormat(L"HashBucket Chain (bucket size=%d)", 1) + ); + } + + memset(bucket, 0, sizeof(nVifBlock)); } } };