mirror of https://github.com/PCSX2/pcsx2.git
newVif: minor optimizations.
* Improved hashing slightly by ignoring the garbage values in the 'mask' parameter when doMask is false. * Wrote an inlineable version of the hash compare function, using x86 intrinsics. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2384 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
3c03e15dc1
commit
c5b47530f9
|
@ -230,7 +230,7 @@ microVUf(int) mVUfindLeastUsedProg() {
|
||||||
mVU->prog.prog[i].isOld = 0;
|
mVU->prog.prog[i].isOld = 0;
|
||||||
mVU->prog.prog[i].used = 1;
|
mVU->prog.prog[i].used = 1;
|
||||||
mVUsortProg(mVU, i);
|
mVUsortProg(mVU, i);
|
||||||
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1);
|
Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, i+1, mVU->prog.total+1);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -247,7 +247,7 @@ microVUf(int) mVUfindLeastUsedProg() {
|
||||||
mVU->prog.prog[pIdx].isOld = 0;
|
mVU->prog.prog[pIdx].isOld = 0;
|
||||||
mVU->prog.prog[pIdx].used = 1;
|
mVU->prog.prog[pIdx].used = 1;
|
||||||
mVUsortProg(mVU, pIdx);
|
mVUsortProg(mVU, pIdx);
|
||||||
Console.Warning("microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1);
|
Console.WriteLn( Color_Orange, "microVU%d: Cached MicroPrograms = [%03d] [%03d]", vuIndex, pIdx+1, mVU->prog.total+1);
|
||||||
return pIdx;
|
return pIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -123,8 +123,9 @@ _f u8* dVifsetVUptr(nVifStruct& v, int offset) {
|
||||||
|
|
||||||
void dVifUnpack(int idx, u8 *data, u32 size) {
|
void dVifUnpack(int idx, u8 *data, u32 size) {
|
||||||
|
|
||||||
nVifStruct& v = nVif[idx];
|
nVifStruct& v = nVif[idx];
|
||||||
const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5);
|
const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5);
|
||||||
|
const int doMask = (upkType>>4)&1;
|
||||||
|
|
||||||
_vBlock.upkType = upkType;
|
_vBlock.upkType = upkType;
|
||||||
_vBlock.num = *(u8*)&vifRegs->num;
|
_vBlock.num = *(u8*)&vifRegs->num;
|
||||||
|
@ -132,22 +133,29 @@ void dVifUnpack(int idx, u8 *data, u32 size) {
|
||||||
_vBlock.scl = vif->cl;
|
_vBlock.scl = vif->cl;
|
||||||
_vBlock.cl = vifRegs->cycle.cl;
|
_vBlock.cl = vifRegs->cycle.cl;
|
||||||
_vBlock.wl = vifRegs->cycle.wl;
|
_vBlock.wl = vifRegs->cycle.wl;
|
||||||
_vBlock.mask = vifRegs->mask;
|
|
||||||
|
// Zero out the mask parameter if it's unused -- games leave random junk
|
||||||
|
// values here which cause false recblock cache misses.
|
||||||
|
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
|
||||||
|
|
||||||
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
|
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
|
||||||
u8* dest = dVifsetVUptr(v, vif->tag.addr);
|
if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
|
||||||
if (!dest) {
|
|
||||||
//DevCon.WriteLn("Running Interpreter Block");
|
|
||||||
_nVifUnpack(idx, data, size);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
//DevCon.WriteLn("Running Recompiled Block!");
|
//DevCon.WriteLn("Running Recompiled Block!");
|
||||||
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
|
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
//DevCon.WriteLn("Running Interpreter Block");
|
||||||
|
_nVifUnpack(idx, data, size);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
static int recBlockNum = 0;
|
static int recBlockNum = 0;
|
||||||
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
|
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
|
||||||
|
DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl=0x%x, wl=0x%x, mask=%s)",
|
||||||
|
_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
|
||||||
|
doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
|
||||||
|
);
|
||||||
|
|
||||||
dVifRecompile(v, &_vBlock);
|
dVifRecompile(v, &_vBlock);
|
||||||
v.vifBlocks->add(&_vBlock);
|
v.vifBlocks->add(&_vBlock);
|
||||||
dVifRecLimit(idx);
|
dVifRecLimit(idx);
|
||||||
|
|
|
@ -15,7 +15,14 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
extern __pagealigned u8 nVifMemCmp[__pagesize];
|
static __pagealigned u8 nVifMemCmp[__pagesize];
|
||||||
|
|
||||||
|
template< typename T >
|
||||||
|
struct SizeChain
|
||||||
|
{
|
||||||
|
int Size;
|
||||||
|
T* Chain;
|
||||||
|
};
|
||||||
|
|
||||||
// HashBucket is a container which uses a built-in hash function
|
// HashBucket is a container which uses a built-in hash function
|
||||||
// to perform quick searches.
|
// to perform quick searches.
|
||||||
|
@ -27,49 +34,54 @@ extern __pagealigned u8 nVifMemCmp[__pagesize];
|
||||||
// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
|
// be in the first bytes of the struct. (hence why nVifBlock is specifically sorted)
|
||||||
template<typename T, int hSize, int cmpSize>
|
template<typename T, int hSize, int cmpSize>
|
||||||
class HashBucket {
|
class HashBucket {
|
||||||
private:
|
protected:
|
||||||
T* mChain[hSize];
|
SizeChain<T> mBucket[hSize];
|
||||||
int mSize [hSize];
|
|
||||||
public:
|
public:
|
||||||
HashBucket() {
|
HashBucket() {
|
||||||
for (int i = 0; i < hSize; i++) {
|
for (int i = 0; i < hSize; i++) {
|
||||||
mChain[i] = NULL;
|
mBucket[i].Chain = NULL;
|
||||||
mSize [i] = 0;
|
mBucket[i].Size = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
~HashBucket() { clear(); }
|
~HashBucket() { clear(); }
|
||||||
int quickFind(u32 data) {
|
int quickFind(u32 data) {
|
||||||
int o = data % hSize;
|
return mBucket[data % hSize].Size;
|
||||||
return mSize[o];
|
|
||||||
}
|
}
|
||||||
T* find(T* dataPtr) {
|
__forceinline T* find(T* dataPtr) {
|
||||||
u32 d = *((u32*)dataPtr);
|
u32 d = *((u32*)dataPtr);
|
||||||
int o = d % hSize;
|
const SizeChain<T>& bucket( mBucket[d % hSize] );
|
||||||
int s = mSize[o];
|
|
||||||
T* c = mChain[o];
|
for (int i=bucket.Size; i; --i) {
|
||||||
for (int i = 0; i < s; i++) {
|
// This inline version seems about 1-2% faster in tests of games that average 1
|
||||||
//if (!memcmp(&c[i], dataPtr, cmpSize)) return &c[i];
|
// program per bucket. Games that average more should see a bigger improvement --air
|
||||||
if ((((nVifCall)((void*)nVifMemCmp))(&c[i], dataPtr))==7) return &c[i];
|
int result = _mm_movemask_ps( (__m128&)_mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
|
||||||
|
if( result == 0x7 ) return &bucket.Chain[i];
|
||||||
|
|
||||||
|
// Dynamically generated function version, can't be inlined. :(
|
||||||
|
//if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];
|
||||||
|
|
||||||
|
//if (!memcmp(&bucket.Chain[i], dataPtr, sizeof(T)-4)) return &c[i]; // old school version! >_<
|
||||||
}
|
}
|
||||||
|
if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
void add(T* dataPtr) {
|
__forceinline void add(T* dataPtr) {
|
||||||
u32 d = *(u32*)dataPtr;
|
u32 d = *(u32*)dataPtr;
|
||||||
int o = d % hSize;
|
SizeChain<T>& bucket( mBucket[d % hSize] );
|
||||||
int s = mSize[o]++;
|
|
||||||
T* c = mChain[o];
|
if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
|
||||||
T* n = (T*)_aligned_malloc(sizeof(T)*(s+1), 16);
|
throw Exception::OutOfMemory(
|
||||||
if (s) {
|
wxsFormat(L"Out of memory re-allocating hash bucket (bucket size=%d)", bucket.Size+1),
|
||||||
memcpy(n, c, sizeof(T) * s);
|
wxEmptyString
|
||||||
safe_aligned_free(c);
|
);
|
||||||
}
|
}
|
||||||
memcpy(&n[s], dataPtr, sizeof(T));
|
memcpy_fast(&bucket.Chain[bucket.Size++], dataPtr, sizeof(T));
|
||||||
mChain[o] = n;
|
|
||||||
}
|
}
|
||||||
void clear() {
|
void clear() {
|
||||||
for (int i = 0; i < hSize; i++) {
|
for (int i = 0; i < hSize; i++) {
|
||||||
safe_aligned_free(mChain[i]);
|
safe_aligned_free(mBucket[i].Chain);
|
||||||
mSize[i] = 0;
|
mBucket[i].Size = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -272,18 +272,16 @@ void writeBackRow(nVifStruct& v) {
|
||||||
// ToDo: Do we need to write back to vifregs.rX too!? :/
|
// ToDo: Do we need to write back to vifregs.rX too!? :/
|
||||||
}
|
}
|
||||||
|
|
||||||
__pagealigned u8 nVifMemCmp[__pagesize];
|
|
||||||
|
|
||||||
void emitCustomCompare() {
|
void emitCustomCompare() {
|
||||||
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
|
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
|
||||||
memset_8<0xcc,__pagesize>(nVifMemCmp);
|
memset8<0xcc>(nVifMemCmp);
|
||||||
xSetPtr(nVifMemCmp);
|
xSetPtr(nVifMemCmp);
|
||||||
|
|
||||||
xMOVAPS (xmm0, ptr32[ecx]);
|
xMOVAPS (xmm0, ptr32[ecx]);
|
||||||
xPCMP.EQD(xmm0, ptr32[edx]);
|
xPCMP.EQD(xmm0, ptr32[edx]);
|
||||||
xMOVMSKPS(eax, xmm0);
|
xMOVMSKPS(eax, xmm0);
|
||||||
xAND (eax, 0x7);
|
xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
|
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue