newVif: minor optimizations.

* Improved hashing slightly by ignoring the garbage values in the 'mask' parameter when doMask is false.
 * Wrote an inlineable version of the hash compare function, using x86 intrinsics.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2384 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-12-22 14:21:21 +00:00
parent 3c03e15dc1
commit c5b47530f9
4 changed files with 61 additions and 43 deletions

View File

@ -230,7 +230,7 @@ microVUf(int) mVUfindLeastUsedProg() {
mVU->prog.prog[i].isOld = 0; mVU->prog.prog[i].isOld = 0;
mVU->prog.prog[i].used = 1; mVU->prog.prog[i].used = 1;
mVUsortProg(mVU, i); mVUsortProg(mVU, i);
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1); Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1);
return i; return i;
} }
} }
@ -247,7 +247,7 @@ microVUf(int) mVUfindLeastUsedProg() {
mVU->prog.prog[pIdx].isOld = 0; mVU->prog.prog[pIdx].isOld = 0;
mVU->prog.prog[pIdx].used = 1; mVU->prog.prog[pIdx].used = 1;
mVUsortProg(mVU, pIdx); mVUsortProg(mVU, pIdx);
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1); Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1);
return pIdx; return pIdx;
} }

View File

@ -123,8 +123,9 @@ _f u8* dVifsetVUptr(nVifStruct& v, int offset) {
void dVifUnpack(int idx, u8 *data, u32 size) { void dVifUnpack(int idx, u8 *data, u32 size) {
nVifStruct& v = nVif[idx]; nVifStruct& v = nVif[idx];
const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5); const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5);
const int doMask = (upkType>>4)&1;
_vBlock.upkType = upkType; _vBlock.upkType = upkType;
_vBlock.num = *(u8*)&vifRegs->num; _vBlock.num = *(u8*)&vifRegs->num;
@ -132,22 +133,29 @@ void dVifUnpack(int idx, u8 *data, u32 size) {
_vBlock.scl = vif->cl; _vBlock.scl = vif->cl;
_vBlock.cl = vifRegs->cycle.cl; _vBlock.cl = vifRegs->cycle.cl;
_vBlock.wl = vifRegs->cycle.wl; _vBlock.wl = vifRegs->cycle.wl;
_vBlock.mask = vifRegs->mask;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) { if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
u8* dest = dVifsetVUptr(v, vif->tag.addr); if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
if (!dest) {
//DevCon.WriteLn("Running Interpreter Block");
_nVifUnpack(idx, data, size);
}
else {
//DevCon.WriteLn("Running Recompiled Block!"); //DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data); ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
} }
else {
//DevCon.WriteLn("Running Interpreter Block");
_nVifUnpack(idx, data, size);
}
return; return;
} }
static int recBlockNum = 0; static int recBlockNum = 0;
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++); DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl=0x%x, wl=0x%x, mask=%s)",
_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
);
dVifRecompile(v, &_vBlock); dVifRecompile(v, &_vBlock);
v.vifBlocks->add(&_vBlock); v.vifBlocks->add(&_vBlock);
dVifRecLimit(idx); dVifRecLimit(idx);

View File

@ -15,7 +15,14 @@
#pragma once #pragma once
extern __pagealigned u8 nVifMemCmp[__pagesize]; static __pagealigned u8 nVifMemCmp[__pagesize];
template< typename T >
struct SizeChain
{
int Size;
T* Chain;
};
// HashBucket is a container which uses a built-in hash function // HashBucket is a container which uses a built-in hash function
// to perform quick searches. // to perform quick searches.
@ -27,49 +34,54 @@ extern __pagealigned u8 nVifMemCmp[__pagesize];
// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted) // be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
template<typename T, int hSize, int cmpSize> template<typename T, int hSize, int cmpSize>
class HashBucket { class HashBucket {
private: protected:
T* mChain[hSize]; SizeChain<T> mBucket[hSize];
int mSize [hSize];
public: public:
HashBucket() { HashBucket() {
for (int i = 0; i < hSize; i++) { for (int i = 0; i < hSize; i++) {
mChain[i] = NULL; mBucket[i].Chain = NULL;
mSize [i] = 0; mBucket[i].Size = 0;
} }
} }
~HashBucket() { clear(); } ~HashBucket() { clear(); }
int quickFind(u32 data) { int quickFind(u32 data) {
int o = data % hSize; return mBucket[data % hSize].Size;
return mSize[o];
} }
T* find(T* dataPtr) { __forceinline T* find(T* dataPtr) {
u32 d = *((u32*)dataPtr); u32 d = *((u32*)dataPtr);
int o = d % hSize; const SizeChain<T>& bucket( mBucket[d % hSize] );
int s = mSize[o];
T* c = mChain[o]; for (int i=bucket.Size; i; --i) {
for (int i = 0; i < s; i++) { // This inline version seems about 1-2% faster in tests of games that average 1
//if (!memcmp(&c[i], dataPtr, cmpSize)) return &c[i]; // program per bucket. Games that average more should see a bigger improvement --air
if ((((nVifCall)((void*)nVifMemCmp))(&c[i], dataPtr))==7) return &c[i]; int result = _mm_movemask_ps( (__m128&)_mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
if( result == 0x7 ) return &bucket.Chain[i];
// Dynamically generated function version, can't be inlined. :(
//if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];
//if (!memcmp(&bucket.Chain[i], dataPtr, sizeof(T)-4)) return &c[i]; // old school version! >_<
} }
if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
return NULL; return NULL;
} }
void add(T* dataPtr) { __forceinline void add(T* dataPtr) {
u32 d = *(u32*)dataPtr; u32 d = *(u32*)dataPtr;
int o = d % hSize; SizeChain<T>& bucket( mBucket[d % hSize] );
int s = mSize[o]++;
T* c = mChain[o]; if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
T* n = (T*)_aligned_malloc(sizeof(T)*(s+1), 16); throw Exception::OutOfMemory(
if (s) { wxsFormat(L"Out of memory re-allocating hash bucket (bucket size=%d)", bucket.Size+1),
memcpy(n, c, sizeof(T) * s); wxEmptyString
safe_aligned_free(c); );
} }
memcpy(&n[s], dataPtr, sizeof(T)); memcpy_fast(&bucket.Chain[bucket.Size++], dataPtr, sizeof(T));
mChain[o] = n;
} }
void clear() { void clear() {
for (int i = 0; i < hSize; i++) { for (int i = 0; i < hSize; i++) {
safe_aligned_free(mChain[i]); safe_aligned_free(mBucket[i].Chain);
mSize[i] = 0; mBucket[i].Size = 0;
} }
} }
}; };

View File

@ -272,18 +272,16 @@ void writeBackRow(nVifStruct& v) {
// ToDo: Do we need to write back to vifregs.rX too!? :/ // ToDo: Do we need to write back to vifregs.rX too!? :/
} }
__pagealigned u8 nVifMemCmp[__pagesize];
void emitCustomCompare() { void emitCustomCompare() {
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false); HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
memset_8<0xcc,__pagesize>(nVifMemCmp); memset8<0xcc>(nVifMemCmp);
xSetPtr(nVifMemCmp); xSetPtr(nVifMemCmp);
xMOVAPS (xmm0, ptr32[ecx]); xMOVAPS (xmm0, ptr32[ecx]);
xPCMP.EQD(xmm0, ptr32[edx]); xPCMP.EQD(xmm0, ptr32[edx]);
xMOVMSKPS(eax, xmm0); xMOVMSKPS(eax, xmm0);
xAND (eax, 0x7); xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
xRET(); xRET();
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true); HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
} }