mirror of https://github.com/PCSX2/pcsx2.git
newVif: minor optimizations.
* Improved hashing slightly by ignoring the garbage values in the 'mask' parameter when doMask is false. * Wrote an inlineable version of the hash compare function, using x86 intrinsics. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2384 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
3c03e15dc1
commit
c5b47530f9
|
@ -230,7 +230,7 @@ microVUf(int) mVUfindLeastUsedProg() {
|
|||
mVU->prog.prog[i].isOld = 0;
|
||||
mVU->prog.prog[i].used = 1;
|
||||
mVUsortProg(mVU, i);
|
||||
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1);
|
||||
Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
@ -247,7 +247,7 @@ microVUf(int) mVUfindLeastUsedProg() {
|
|||
mVU->prog.prog[pIdx].isOld = 0;
|
||||
mVU->prog.prog[pIdx].used = 1;
|
||||
mVUsortProg(mVU, pIdx);
|
||||
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1);
|
||||
Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1);
|
||||
return pIdx;
|
||||
}
|
||||
|
||||
|
|
|
@ -123,8 +123,9 @@ _f u8* dVifsetVUptr(nVifStruct& v, int offset) {
|
|||
|
||||
void dVifUnpack(int idx, u8 *data, u32 size) {
|
||||
|
||||
nVifStruct& v = nVif[idx];
|
||||
const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5);
|
||||
nVifStruct& v = nVif[idx];
|
||||
const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5);
|
||||
const int doMask = (upkType>>4)&1;
|
||||
|
||||
_vBlock.upkType = upkType;
|
||||
_vBlock.num = *(u8*)&vifRegs->num;
|
||||
|
@ -132,22 +133,29 @@ void dVifUnpack(int idx, u8 *data, u32 size) {
|
|||
_vBlock.scl = vif->cl;
|
||||
_vBlock.cl = vifRegs->cycle.cl;
|
||||
_vBlock.wl = vifRegs->cycle.wl;
|
||||
_vBlock.mask = vifRegs->mask;
|
||||
|
||||
// Zero out the mask parameter if it's unused -- games leave random junk
|
||||
// values here which cause false recblock cache misses.
|
||||
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
|
||||
|
||||
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
|
||||
u8* dest = dVifsetVUptr(v, vif->tag.addr);
|
||||
if (!dest) {
|
||||
//DevCon.WriteLn("Running Interpreter Block");
|
||||
_nVifUnpack(idx, data, size);
|
||||
}
|
||||
else {
|
||||
if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
|
||||
//DevCon.WriteLn("Running Recompiled Block!");
|
||||
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
|
||||
}
|
||||
else {
|
||||
//DevCon.WriteLn("Running Interpreter Block");
|
||||
_nVifUnpack(idx, data, size);
|
||||
}
|
||||
return;
|
||||
}
|
||||
static int recBlockNum = 0;
|
||||
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
|
||||
DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl=0x%x, wl=0x%x, mask=%s)",
|
||||
_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
|
||||
doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
|
||||
);
|
||||
|
||||
dVifRecompile(v, &_vBlock);
|
||||
v.vifBlocks->add(&_vBlock);
|
||||
dVifRecLimit(idx);
|
||||
|
|
|
@ -15,7 +15,14 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
extern __pagealigned u8 nVifMemCmp[__pagesize];
|
||||
static __pagealigned u8 nVifMemCmp[__pagesize];
|
||||
|
||||
template< typename T >
|
||||
struct SizeChain
|
||||
{
|
||||
int Size;
|
||||
T* Chain;
|
||||
};
|
||||
|
||||
// HashBucket is a container which uses a built-in hash function
|
||||
// to perform quick searches.
|
||||
|
@ -27,49 +34,54 @@ extern __pagealigned u8 nVifMemCmp[__pagesize];
|
|||
// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
|
||||
template<typename T, int hSize, int cmpSize>
|
||||
class HashBucket {
|
||||
private:
|
||||
T* mChain[hSize];
|
||||
int mSize [hSize];
|
||||
protected:
|
||||
SizeChain<T> mBucket[hSize];
|
||||
|
||||
public:
|
||||
HashBucket() {
|
||||
for (int i = 0; i < hSize; i++) {
|
||||
mChain[i] = NULL;
|
||||
mSize [i] = 0;
|
||||
mBucket[i].Chain = NULL;
|
||||
mBucket[i].Size = 0;
|
||||
}
|
||||
}
|
||||
~HashBucket() { clear(); }
|
||||
int quickFind(u32 data) {
|
||||
int o = data % hSize;
|
||||
return mSize[o];
|
||||
return mBucket[data % hSize].Size;
|
||||
}
|
||||
T* find(T* dataPtr) {
|
||||
__forceinline T* find(T* dataPtr) {
|
||||
u32 d = *((u32*)dataPtr);
|
||||
int o = d % hSize;
|
||||
int s = mSize[o];
|
||||
T* c = mChain[o];
|
||||
for (int i = 0; i < s; i++) {
|
||||
//if (!memcmp(&c[i], dataPtr, cmpSize)) return &c[i];
|
||||
if ((((nVifCall)((void*)nVifMemCmp))(&c[i], dataPtr))==7) return &c[i];
|
||||
const SizeChain<T>& bucket( mBucket[d % hSize] );
|
||||
|
||||
for (int i=bucket.Size; i; --i) {
|
||||
// This inline version seems about 1-2% faster in tests of games that average 1
|
||||
// program per bucket. Games that average more should see a bigger improvement --air
|
||||
int result = _mm_movemask_ps( (__m128&)_mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
|
||||
if( result == 0x7 ) return &bucket.Chain[i];
|
||||
|
||||
// Dynamically generated function version, can't be inlined. :(
|
||||
//if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];
|
||||
|
||||
//if (!memcmp(&bucket.Chain[i], dataPtr, sizeof(T)-4)) return &c[i]; // old school version! >_<
|
||||
}
|
||||
if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
|
||||
return NULL;
|
||||
}
|
||||
void add(T* dataPtr) {
|
||||
__forceinline void add(T* dataPtr) {
|
||||
u32 d = *(u32*)dataPtr;
|
||||
int o = d % hSize;
|
||||
int s = mSize[o]++;
|
||||
T* c = mChain[o];
|
||||
T* n = (T*)_aligned_malloc(sizeof(T)*(s+1), 16);
|
||||
if (s) {
|
||||
memcpy(n, c, sizeof(T) * s);
|
||||
safe_aligned_free(c);
|
||||
SizeChain<T>& bucket( mBucket[d % hSize] );
|
||||
|
||||
if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
|
||||
throw Exception::OutOfMemory(
|
||||
wxsFormat(L"Out of memory re-allocating hash bucket (bucket size=%d)", bucket.Size+1),
|
||||
wxEmptyString
|
||||
);
|
||||
}
|
||||
memcpy(&n[s], dataPtr, sizeof(T));
|
||||
mChain[o] = n;
|
||||
memcpy_fast(&bucket.Chain[bucket.Size++], dataPtr, sizeof(T));
|
||||
}
|
||||
void clear() {
|
||||
for (int i = 0; i < hSize; i++) {
|
||||
safe_aligned_free(mChain[i]);
|
||||
mSize[i] = 0;
|
||||
safe_aligned_free(mBucket[i].Chain);
|
||||
mBucket[i].Size = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -272,18 +272,16 @@ void writeBackRow(nVifStruct& v) {
|
|||
// ToDo: Do we need to write back to vifregs.rX too!? :/
|
||||
}
|
||||
|
||||
__pagealigned u8 nVifMemCmp[__pagesize];
|
||||
|
||||
void emitCustomCompare() {
|
||||
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
|
||||
memset_8<0xcc,__pagesize>(nVifMemCmp);
|
||||
memset8<0xcc>(nVifMemCmp);
|
||||
xSetPtr(nVifMemCmp);
|
||||
|
||||
xMOVAPS (xmm0, ptr32[ecx]);
|
||||
xPCMP.EQD(xmm0, ptr32[edx]);
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xAND (eax, 0x7);
|
||||
xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
|
||||
|
||||
xRET();
|
||||
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue