mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg:
* rework isdirty intrinsic. I miss swizzle stuff in first try :p git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3918 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
33214df091
commit
8c58c8ee3c
|
@ -167,6 +167,7 @@ inline bool PSMT_IS16Z(int psm) {return ((psm & 0x32) == 0x32);}
|
|||
|
||||
// Check to see if it is 32 bits. According to code comments, anyways.
|
||||
// I'll have to look closer at it, because it'd seem like it'd return true for 24 bits.
|
||||
// Note: the function only works for clut format. Clut PSM is 4 bits only. The possible value are PSMCT32, PSMCT16, PSMCT16S
|
||||
inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}
|
||||
|
||||
// When color format is RGB24 (PSMCT24) or RGBA16 (PSMCT16 & 16S) alpha value expanded, based on
|
||||
|
@ -544,9 +545,7 @@ typedef struct
|
|||
|
||||
extern GSinternal gs;
|
||||
|
||||
// Note the function is used in a template parameter so it must be declared extern
|
||||
// Note2: In this case extern is not compatible with __forceinline so just inline it...
|
||||
extern inline u16 RGBA32to16(u32 c)
|
||||
static __forceinline u16 RGBA32to16(u32 c)
|
||||
{
|
||||
return (u16)((((c) & 0x000000f8) >> 3) |
|
||||
(((c) & 0x0000f800) >> 6) |
|
||||
|
|
|
@ -861,31 +861,42 @@ bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
|
|||
#ifdef TEST_THIS
|
||||
while(entries != 0) {
|
||||
#ifdef ZEROGS_SSE2
|
||||
__m128i result = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src), _mm_load_si128((__m128i*)dst));
|
||||
// Note: local memory datas are swizzles
|
||||
__m128i src_0 = _mm_load_si128((__m128i*)src); // 9 8 1 0
|
||||
__m128i src_1 = _mm_load_si128((__m128i*)src+1); // 11 10 3 2
|
||||
__m128i src_2 = _mm_load_si128((__m128i*)src+2); // 13 12 5 4
|
||||
__m128i src_3 = _mm_load_si128((__m128i*)src+3); // 15 14 7 6
|
||||
|
||||
__m128i result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+1), _mm_load_si128((__m128i*)dst+1));
|
||||
__m128i dst_0 = _mm_load_si128((__m128i*)dst);
|
||||
__m128i dst_1 = _mm_load_si128((__m128i*)dst+1);
|
||||
__m128i dst_2 = _mm_load_si128((__m128i*)dst+2);
|
||||
__m128i dst_3 = _mm_load_si128((__m128i*)dst+3);
|
||||
|
||||
__m128i result = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_0, src_1), dst_0);
|
||||
|
||||
__m128i result_tmp = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_2, src_3), dst_1);
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+2), _mm_load_si128((__m128i*)dst+2));
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_0, src_1), dst_2);
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+3), _mm_load_si128((__m128i*)dst+3));
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_2, src_3), dst_3);
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
u32 result_int = _mm_movemask_epi8(result);
|
||||
if (result_int != 0xFF) {
|
||||
if (result_int != 0xFFFF) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
#else
|
||||
// I see no point to keep an mmx version. SSE2 versions is probably faster.
|
||||
// Keep a slow portable C version for reference/debug
|
||||
for (int i=0; i < 16 ; i++) {
|
||||
if (*((u32*)src+i) != *((u32*)dst+i)) {
|
||||
// Note: local memory datas are swizzles
|
||||
if (dst[0] != src[0] || dst[1] != src[2] || dst[2] != src[4] || dst[3] != src[6]
|
||||
|| dst[4] != src[1] || dst[5] != src[3] || dst[6] != src[5] || dst[7] != src[7]) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (entries & 0x10) {
|
||||
|
|
Loading…
Reference in New Issue