mirror of https://github.com/PCSX2/pcsx2.git
GregMiscellaneous: zzogl-pg:
* Finish GAS removal. I use SSE2 instead of mmx because it was easier to write (faster anyway). Code seems equivalent between GAS & intrinsics but control flow will need a good testing (and some explanation) Note: I also add a basic C version for reference/debug (not tested yet) git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3837 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
8036106b07
commit
8435ad3f01
|
@ -32,6 +32,9 @@
|
|||
#include "targets.h"
|
||||
#include "GLWin.h"
|
||||
#include "ZZoglShaders.h"
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
//----------------------- Defines
|
||||
|
||||
|
@ -851,6 +854,55 @@ bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
|
|||
|
||||
bool bRet = false;
|
||||
|
||||
// FIXME code generated by intrinsics is the same as the linux asm.
|
||||
// However there is no "cmp %%esi, 0x90" equivalent in the windows asm !!!
|
||||
// So control flow must be check
|
||||
#define TEST_THIS
|
||||
#ifdef TEST_THIS
|
||||
while(entries != 0) {
|
||||
#ifdef ZEROGS_SSE2
|
||||
__m128i result = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src), _mm_load_si128((__m128i*)dst));
|
||||
|
||||
__m128i result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+1), _mm_load_si128((__m128i*)dst+1));
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+2), _mm_load_si128((__m128i*)dst+2));
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+3), _mm_load_si128((__m128i*)dst+3));
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
u32 result_int = _mm_movemask_epi8(result);
|
||||
if (result_int != 0xFF) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
#else
|
||||
// I see no point to keep an mmx version. SSE2 versions is probably faster.
|
||||
// Keep a slow portable C version for reference/debug
|
||||
for (int i=0; i < 16 ; i++) {
|
||||
if (*((u32*)src+i) != *((u32*)dst+i)) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (entries & 0x10) {
|
||||
src -= 56; // go back and down one column
|
||||
}
|
||||
|
||||
src += 32; // go to the right block
|
||||
|
||||
if (entries == 0x90) {
|
||||
src += 32; // skip whole block
|
||||
}
|
||||
|
||||
dst += 8;
|
||||
entries -= 16;
|
||||
}
|
||||
#else
|
||||
|
||||
// do a fast test with MMX
|
||||
#ifdef _MSC_VER
|
||||
int storeebx;
|
||||
|
@ -977,6 +1029,7 @@ Return:
|
|||
".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
|
||||
|
||||
#endif // _WIN32
|
||||
#endif
|
||||
return bRet;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue