newVif: minor optimizations.

* Improved hashing slightly by ignoring the garbage values in the 'mask' parameter when doMask is false.
 * Wrote an inlineable version of the hash compare function, using x86 intrinsics.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2384 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-12-22 14:21:21 +00:00
parent 3c03e15dc1
commit c5b47530f9
4 changed files with 61 additions and 43 deletions

View File

@ -230,7 +230,7 @@ microVUf(int) mVUfindLeastUsedProg() {
mVU->prog.prog[i].isOld = 0;
mVU->prog.prog[i].used = 1;
mVUsortProg(mVU, i);
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1);
Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1);
return i;
}
}
@ -247,7 +247,7 @@ microVUf(int) mVUfindLeastUsedProg() {
mVU->prog.prog[pIdx].isOld = 0;
mVU->prog.prog[pIdx].used = 1;
mVUsortProg(mVU, pIdx);
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1);
Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1);
return pIdx;
}

View File

@ -125,6 +125,7 @@ void dVifUnpack(int idx, u8 *data, u32 size) {
nVifStruct& v = nVif[idx];
const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5);
const int doMask = (upkType>>4)&1;
_vBlock.upkType = upkType;
_vBlock.num = *(u8*)&vifRegs->num;
@ -132,22 +133,29 @@ void dVifUnpack(int idx, u8 *data, u32 size) {
_vBlock.scl = vif->cl;
_vBlock.cl = vifRegs->cycle.cl;
_vBlock.wl = vifRegs->cycle.wl;
_vBlock.mask = vifRegs->mask;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
u8* dest = dVifsetVUptr(v, vif->tag.addr);
if (!dest) {
//DevCon.WriteLn("Running Interpreter Block");
_nVifUnpack(idx, data, size);
}
else {
if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
//DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
}
else {
//DevCon.WriteLn("Running Interpreter Block");
_nVifUnpack(idx, data, size);
}
return;
}
static int recBlockNum = 0;
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl=0x%x, wl=0x%x, mask=%s)",
_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
);
dVifRecompile(v, &_vBlock);
v.vifBlocks->add(&_vBlock);
dVifRecLimit(idx);

View File

@ -15,7 +15,14 @@
#pragma once
extern __pagealigned u8 nVifMemCmp[__pagesize];
static __pagealigned u8 nVifMemCmp[__pagesize];
template< typename T >
struct SizeChain
{
int Size;
T* Chain;
};
// HashBucket is a container which uses a built-in hash function
// to perform quick searches.
@ -27,49 +34,54 @@ extern __pagealigned u8 nVifMemCmp[__pagesize];
// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
template<typename T, int hSize, int cmpSize>
class HashBucket {
private:
T* mChain[hSize];
int mSize [hSize];
protected:
SizeChain<T> mBucket[hSize];
public:
HashBucket() {
for (int i = 0; i < hSize; i++) {
mChain[i] = NULL;
mSize [i] = 0;
mBucket[i].Chain = NULL;
mBucket[i].Size = 0;
}
}
~HashBucket() { clear(); }
int quickFind(u32 data) {
int o = data % hSize;
return mSize[o];
return mBucket[data % hSize].Size;
}
T* find(T* dataPtr) {
__forceinline T* find(T* dataPtr) {
u32 d = *((u32*)dataPtr);
int o = d % hSize;
int s = mSize[o];
T* c = mChain[o];
for (int i = 0; i < s; i++) {
//if (!memcmp(&c[i], dataPtr, cmpSize)) return &c[i];
if ((((nVifCall)((void*)nVifMemCmp))(&c[i], dataPtr))==7) return &c[i];
const SizeChain<T>& bucket( mBucket[d % hSize] );
for (int i=bucket.Size; i; --i) {
// This inline version seems about 1-2% faster in tests of games that average 1
// program per bucket. Games that average more should see a bigger improvement --air
int result = _mm_movemask_ps( (__m128&)_mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
if( result == 0x7 ) return &bucket.Chain[i];
// Dynamically generated function version, can't be inlined. :(
//if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];
//if (!memcmp(&bucket.Chain[i], dataPtr, sizeof(T)-4)) return &c[i]; // old school version! >_<
}
if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
return NULL;
}
void add(T* dataPtr) {
__forceinline void add(T* dataPtr) {
u32 d = *(u32*)dataPtr;
int o = d % hSize;
int s = mSize[o]++;
T* c = mChain[o];
T* n = (T*)_aligned_malloc(sizeof(T)*(s+1), 16);
if (s) {
memcpy(n, c, sizeof(T) * s);
safe_aligned_free(c);
SizeChain<T>& bucket( mBucket[d % hSize] );
if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
throw Exception::OutOfMemory(
wxsFormat(L"Out of memory re-allocating hash bucket (bucket size=%d)", bucket.Size+1),
wxEmptyString
);
}
memcpy(&n[s], dataPtr, sizeof(T));
mChain[o] = n;
memcpy_fast(&bucket.Chain[bucket.Size++], dataPtr, sizeof(T));
}
void clear() {
for (int i = 0; i < hSize; i++) {
safe_aligned_free(mChain[i]);
mSize[i] = 0;
safe_aligned_free(mBucket[i].Chain);
mBucket[i].Size = 0;
}
}
};

View File

@ -272,17 +272,15 @@ void writeBackRow(nVifStruct& v) {
// ToDo: Do we need to write back to vifregs.rX too!? :/
}
__pagealigned u8 nVifMemCmp[__pagesize];
void emitCustomCompare() {
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
memset_8<0xcc,__pagesize>(nVifMemCmp);
memset8<0xcc>(nVifMemCmp);
xSetPtr(nVifMemCmp);
xMOVAPS (xmm0, ptr32[ecx]);
xPCMP.EQD(xmm0, ptr32[edx]);
xMOVMSKPS(eax, xmm0);
xAND (eax, 0x7);
xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
xRET();
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);