mirror of https://github.com/PCSX2/pcsx2.git
vif: replace sse cmp code with standard cmp
Standard instruction are faster to execute besides the CPU can optimize the cmp/jne SSE e0: add ecx,0x10 e3: cmp eax,0x7 e6: jg 1b0 <void dVifUnpack<0>(unsigned char const*, bool)+0x1b0> enter_loop: ec: vpcmpeqd xmm0,xmm1,XMMWORD PTR [ecx] f0: vmovmskps eax,xmm0 f4: cmp eax,0x7 f7: jne e0 <void dVifUnpack<0>(unsigned char const*, bool)+0xe0> Standard cmp d8: add eax,0x10 db: mov esi,DWORD PTR [eax+0xc] de: test esi,esi e0: je 190 <void dVifUnpack<0>(unsigned char const*, bool)+0x190> enter_loop: e6: cmp ecx,DWORD PTR [eax+0x4] e9: jne d8 <void dVifUnpack<0>(unsigned char const*, bool)+0xd8> eb: cmp DWORD PTR [eax+0x8],ebx ee: jne d8 <void dVifUnpack<0>(unsigned char const*, bool)+0xd8> v2: use reference instead of a pointer for find parameter
This commit is contained in:
parent
2320efeb55
commit
7a33cda122
|
@ -286,7 +286,7 @@ _vifT static __fi u8* dVifsetVUptr(uint cl, uint wl, u8 num, bool isFill) {
|
||||||
|
|
||||||
_vifT __fi uptr dVifCompile(nVifBlock& key) {
|
_vifT __fi uptr dVifCompile(nVifBlock& key) {
|
||||||
nVifStruct& v = nVif[idx];
|
nVifStruct& v = nVif[idx];
|
||||||
nVifBlock* block = v.vifBlocks.find(&key);
|
nVifBlock* block = v.vifBlocks.find(key);
|
||||||
|
|
||||||
// Cache hit
|
// Cache hit
|
||||||
if (likely(block != nullptr))
|
if (likely(block != nullptr))
|
||||||
|
|
|
@ -64,23 +64,18 @@ public:
|
||||||
|
|
||||||
~HashBucket() throw() { clear(); }
|
~HashBucket() throw() { clear(); }
|
||||||
|
|
||||||
__fi nVifBlock* find(nVifBlock* dataPtr) {
|
__fi nVifBlock* find(const nVifBlock& dataPtr) {
|
||||||
const __m128i* chainpos = (__m128i*)m_bucket[dataPtr->hash_key];
|
nVifBlock* chainpos = m_bucket[dataPtr.hash_key];
|
||||||
|
|
||||||
const __m128i data128( _mm_load_si128((__m128i*)dataPtr) );
|
while (true) {
|
||||||
|
if (chainpos->key0 == dataPtr.key0 && chainpos->key1 == dataPtr.key1)
|
||||||
|
return chainpos;
|
||||||
|
|
||||||
int result;
|
if (chainpos->startPtr == 0)
|
||||||
do {
|
return nullptr;
|
||||||
// This inline SSE code is generally faster than using emitter code, since it inlines nicely. --air
|
|
||||||
result = _mm_movemask_ps( _mm_castsi128_ps( _mm_cmpeq_epi32( data128, _mm_load_si128(chainpos) ) ) );
|
|
||||||
// startPtr doesn't match (aka not nullptr) hence 4th bit must be 0
|
|
||||||
if (result == 0x7) return (nVifBlock*)chainpos;
|
|
||||||
|
|
||||||
chainpos += sizeof(nVifBlock) / sizeof(__m128i);
|
chainpos++;
|
||||||
|
}
|
||||||
} while(result < 0x8);
|
|
||||||
|
|
||||||
return nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void add(const nVifBlock& dataPtr) {
|
void add(const nVifBlock& dataPtr) {
|
||||||
|
|
Loading…
Reference in New Issue