Linux: Use memcmp_mmx instead of memcmp; Add a workaround to a segfault in iVif on some games.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1350 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-06-08 21:24:18 +00:00
parent e538ecd7d8
commit 70cd55b332
2 changed files with 35 additions and 45 deletions

View File

@ -25,8 +25,6 @@
#include <xmmintrin.h> #include <xmmintrin.h>
#include <emmintrin.h> #include <emmintrin.h>
//#define USE_OLD_IVIF_CODE
// sse2 highly optimized vif (~200 separate functions are built) zerofrog(@gmail.com) // sse2 highly optimized vif (~200 separate functions are built) zerofrog(@gmail.com)
extern u32 g_vif1Masks[48], g_vif0Masks[48]; extern u32 g_vif1Masks[48], g_vif0Masks[48];
extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4]; extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4];
@ -55,7 +53,10 @@ extern u8 s_maskwrite[256];
extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]) = {0}; extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]) = {0};
#if defined(_MSC_VER) || !defined(USE_OLD_IVIF_CODE) #ifdef __LINUX__
static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask);
#endif
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{ {
u32 i; u32 i;
@ -66,7 +67,13 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
prev |= s_maskwrite[mask&0xff]; prev |= s_maskwrite[mask&0xff];
hasmask[i] = prev; hasmask[i] = prev;
if( (mask&0xff) != (oldmask&0xff) ) { if ((mask&0xff) != (oldmask&0xff))
#ifdef __LINUX__
if (mask == 0) // Temporary workaround for a bug causing a segfault.
UseOldMaskCode(vif1masks, mask);
else
#endif
{
__m128i r0, r1, r2, r3; __m128i r0, r1, r2, r3;
r0 = _mm_load_si128((__m128i*)&s_maskarr[mask&15][0]); r0 = _mm_load_si128((__m128i*)&s_maskarr[mask&15][0]);
r2 = _mm_unpackhi_epi16(r0, r0); r2 = _mm_unpackhi_epi16(r0, r0);
@ -90,47 +97,29 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
FreezeXMMRegs(0); FreezeXMMRegs(0);
} }
#else // gcc #ifdef __LINUX__
// After some experimentation, I'm putting the old code back in for now for testing purposes, as the static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask)
// other version reliably SegFaults when loading YuGiOh: Duelist of the Roses on Linux (when setting
// r0 to _mm_load_si128, when vif1masks=0x846f670, hasmask=0x846f454, mask=0, and oldmask=5).
// Seems to work everywhere else. It'll stay disabled for now, but it's easier for me to fiddle with if its in here. -arcum42
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{ {
u32 i; u8* p0 = (u8*)&s_maskarr[mask&15][0];
u32 prev = 0; u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0];
FreezeXMMRegs(1);
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) { __asm__(".intel_syntax noprefix\n"
"movaps xmm0, [%0]\n"
prev |= s_maskwrite[mask&0xff];//((mask&3)==3)||((mask&0xc)==0xc)||((mask&0x30)==0x30)||((mask&0xc0)==0xc0); "movaps xmm1, [%1]\n"
hasmask[i] = prev; "movaps xmm2, xmm0\n"
"punpcklwd xmm0, xmm0\n"
if( (mask&0xff) != (oldmask&0xff) ) { "punpckhwd xmm2, xmm2\n"
u8* p0 = (u8*)&s_maskarr[mask&15][0]; "movaps xmm3, xmm1\n"
u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0]; "punpcklwd xmm1, xmm1\n"
"punpckhwd xmm3, xmm3\n"
__asm__(".intel_syntax noprefix\n" "movq [%2], xmm0\n"
"movaps xmm0, [%0]\n" "movq [%2+8], xmm1\n"
"movaps xmm1, [%1]\n" "movhps [%2+16], xmm0\n"
"movaps xmm2, xmm0\n" "movhps [%2+24], xmm1\n"
"punpcklwd xmm0, xmm0\n" "movq [%2+32], xmm2\n"
"punpckhwd xmm2, xmm2\n" "movq [%2+40], xmm3\n"
"movaps xmm3, xmm1\n" "movhps [%2+48], xmm2\n"
"punpcklwd xmm1, xmm1\n" "movhps [%2+56], xmm3\n"
"punpckhwd xmm3, xmm3\n" ".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) );
"movq [%2], xmm0\n"
"movq [%2+8], xmm1\n"
"movhps [%2+16], xmm0\n"
"movhps [%2+24], xmm1\n"
"movq [%2+32], xmm2\n"
"movq [%2+40], xmm3\n"
"movhps [%2+48], xmm2\n"
"movhps [%2+56], xmm3\n"
".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) );
}
}
FreezeXMMRegs(0);
} }
#endif #endif

View File

@ -169,7 +169,8 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
// that they're not guaranteeing 16-byte alignment on the structs // that they're not guaranteeing 16-byte alignment on the structs
// being compared. So use this function instead: // being compared. So use this function instead:
#else #else
#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size)) //#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
#define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
#endif #endif
// Misc Macros... // Misc Macros...