diff --git a/common/include/x86emitter/tools.h b/common/include/x86emitter/tools.h index b09c86cef6..4ab623b109 100644 --- a/common/include/x86emitter/tools.h +++ b/common/include/x86emitter/tools.h @@ -15,6 +15,8 @@ #pragma once +#include "x86emitter.h" + // this is all that needs to be called and will fill up the below structs extern void cpudetectInit(); @@ -104,6 +106,7 @@ namespace MMXRegisters extern void Freeze(); extern void Thaw(); extern bool Saved(); + extern __aligned16 u64 data[8]; }; namespace XMMRegisters @@ -111,6 +114,7 @@ namespace XMMRegisters extern void Freeze(); extern void Thaw(); extern bool Saved(); + extern __aligned16 u64 data[2*iREGCNT_XMM]; }; namespace Registers diff --git a/common/src/x86emitter/tools.cpp b/common/src/x86emitter/tools.cpp index 9bedf8e553..9afa00e338 100644 --- a/common/src/x86emitter/tools.cpp +++ b/common/src/x86emitter/tools.cpp @@ -14,14 +14,11 @@ */ #include "PrecompiledHeader.h" -#include "internal.h" #include "tools.h" // To make sure regs don't get changed while in the recompiler, // use Freeze/Thaw in MMXRegisters, XMMRegisters, & Registers. -__aligned16 u64 g_globalMMXData[8]; -__aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; ///////////////////////////////////////////////////////////////////// // MMX Register Freezing @@ -30,6 +27,7 @@ __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; namespace MMXRegisters { u8 stack_depth = 0; + __aligned16 u64 data[8]; __forceinline bool Saved() { @@ -51,7 +49,7 @@ namespace MMXRegisters #ifdef _MSC_VER __asm { - mov ecx, offset g_globalMMXData + mov ecx, offset data movntq mmword ptr [ecx+0], mm0 movntq mmword ptr [ecx+8], mm1 movntq mmword ptr [ecx+16], mm2 @@ -65,16 +63,16 @@ namespace MMXRegisters #else __asm__ volatile( ".intel_syntax noprefix\n" - "movq [%[g_globalMMXData]+0x00], mm0\n" - "movq [%[g_globalMMXData]+0x08], mm1\n" - "movq [%[g_globalMMXData]+0x10], mm2\n" - "movq [%[g_globalMMXData]+0x18], mm3\n" - "movq [%[g_globalMMXData]+0x20], mm4\n" - "movq [%[g_globalMMXData]+0x28], mm5\n" - "movq [%[g_globalMMXData]+0x30], mm6\n" - "movq [%[g_globalMMXData]+0x38], mm7\n" + "movq [%[data]+0x00], mm0\n" + "movq [%[data]+0x08], mm1\n" + "movq [%[data]+0x10], mm2\n" + "movq [%[data]+0x18], mm3\n" + "movq [%[data]+0x20], mm4\n" + "movq [%[data]+0x28], mm5\n" + "movq [%[data]+0x30], mm6\n" + "movq [%[data]+0x38], mm7\n" "emms\n" - ".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory" + ".att_syntax\n" : : [data]"r"(data) : "memory" ); #endif } @@ -96,7 +94,7 @@ namespace MMXRegisters #ifdef _MSC_VER __asm { - mov ecx, offset g_globalMMXData + mov ecx, offset data movq mm0, mmword ptr [ecx+0] movq mm1, mmword ptr [ecx+8] movq mm2, mmword ptr [ecx+16] @@ -110,16 +108,16 @@ namespace MMXRegisters #else __asm__ volatile( ".intel_syntax noprefix\n" - "movq mm0, [%[g_globalMMXData]+0x00]\n" - "movq mm1, [%[g_globalMMXData]+0x08]\n" - "movq mm2, [%[g_globalMMXData]+0x10]\n" - "movq mm3, [%[g_globalMMXData]+0x18]\n" - "movq mm4, [%[g_globalMMXData]+0x20]\n" - "movq mm5, [%[g_globalMMXData]+0x28]\n" - "movq mm6, [%[g_globalMMXData]+0x30]\n" - "movq mm7, [%[g_globalMMXData]+0x38]\n" + "movq mm0, [%[data]+0x00]\n" + "movq mm1, [%[data]+0x08]\n" + "movq mm2, [%[data]+0x10]\n" + "movq mm3, [%[data]+0x18]\n" + "movq mm4, [%[data]+0x20]\n" + "movq mm5, [%[data]+0x28]\n" + "movq mm6, [%[data]+0x30]\n" + "movq mm7, [%[data]+0x38]\n" "emms\n" - ".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory" + ".att_syntax\n" : : [data]"r"(data) : "memory" ); #endif } @@ -132,6 +130,7 @@ namespace MMXRegisters namespace XMMRegisters { u8 stack_depth = 0; + __aligned16 u64 data[2*iREGCNT_XMM]; __forceinline bool Saved() { @@ -154,7 +153,7 @@ namespace XMMRegisters #ifdef _MSC_VER __asm { - mov ecx, offset g_globalXMMData + mov ecx, offset data movaps xmmword ptr [ecx+0x00], xmm0 movaps xmmword ptr [ecx+0x10], xmm1 movaps xmmword ptr [ecx+0x20], xmm2 @@ -167,15 +166,15 @@ namespace XMMRegisters #else __asm__ volatile( ".intel_syntax noprefix\n" - "movaps [%[g_globalXMMData]+0x00], xmm0\n" - "movaps [%[g_globalXMMData]+0x10], xmm1\n" - "movaps [%[g_globalXMMData]+0x20], xmm2\n" - "movaps [%[g_globalXMMData]+0x30], xmm3\n" - "movaps [%[g_globalXMMData]+0x40], xmm4\n" - "movaps [%[g_globalXMMData]+0x50], xmm5\n" - "movaps [%[g_globalXMMData]+0x60], xmm6\n" - "movaps [%[g_globalXMMData]+0x70], xmm7\n" - ".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory" + "movaps [%[data]+0x00], xmm0\n" + "movaps [%[data]+0x10], xmm1\n" + "movaps [%[data]+0x20], xmm2\n" + "movaps [%[data]+0x30], xmm3\n" + "movaps [%[data]+0x40], xmm4\n" + "movaps [%[data]+0x50], xmm5\n" + "movaps [%[data]+0x60], xmm6\n" + "movaps [%[data]+0x70], xmm7\n" + ".att_syntax\n" : : [data]"r"(data) : "memory" ); #endif // _MSC_VER } @@ -199,7 +198,7 @@ namespace XMMRegisters #ifdef _MSC_VER __asm { - mov ecx, offset g_globalXMMData + mov ecx, offset data movaps xmm0, xmmword ptr [ecx+0x00] movaps xmm1, xmmword ptr [ecx+0x10] movaps xmm2, xmmword ptr [ecx+0x20] @@ -212,15 +211,15 @@ namespace XMMRegisters #else __asm__ volatile( ".intel_syntax noprefix\n" - "movaps xmm0, [%[g_globalXMMData]+0x00]\n" - "movaps xmm1, [%[g_globalXMMData]+0x10]\n" - "movaps xmm2, [%[g_globalXMMData]+0x20]\n" - "movaps xmm3, [%[g_globalXMMData]+0x30]\n" - "movaps xmm4, [%[g_globalXMMData]+0x40]\n" - "movaps xmm5, [%[g_globalXMMData]+0x50]\n" - "movaps xmm6, [%[g_globalXMMData]+0x60]\n" - "movaps xmm7, [%[g_globalXMMData]+0x70]\n" - ".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory" + "movaps xmm0, [%[data]+0x00]\n" + "movaps xmm1, [%[data]+0x10]\n" + "movaps xmm2, [%[data]+0x20]\n" + "movaps xmm3, [%[data]+0x30]\n" + "movaps xmm4, [%[data]+0x40]\n" + "movaps xmm5, [%[data]+0x50]\n" + "movaps xmm6, [%[data]+0x60]\n" + "movaps xmm7, [%[data]+0x70]\n" + ".att_syntax\n" : : [data]"r"(data) : "memory" ); #endif // _MSC_VER } diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index a3527125ce..d4b06ddee5 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -32,7 +32,8 @@ #include "internal.h" // defined in tools.cpp -extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; +//extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; +#include "tools.h" // ------------------------------------------------------------------------ // Notes on Thread Local Storage: @@ -753,12 +754,12 @@ __emitinline void xBSWAP( const xRegister32& to ) __emitinline void xStoreReg( const xRegisterSSE& src ) { - xMOVDQA( &g_globalXMMData[src.Id*2], src ); + xMOVDQA( &XMMRegisters::data[src.Id*2], src ); } __emitinline void xRestoreReg( const xRegisterSSE& dest ) { - xMOVDQA( dest, &g_globalXMMData[dest.Id*2] ); + xMOVDQA( dest, &XMMRegisters::data[dest.Id*2] ); } } diff --git a/pcsx2/x86/iVif.cpp b/pcsx2/x86/iVif.cpp index 5f58da560d..624fb3bcbe 100644 --- a/pcsx2/x86/iVif.cpp +++ b/pcsx2/x86/iVif.cpp @@ -53,10 +53,8 @@ extern u8 s_maskwrite[256]; extern "C" __aligned16 u32 s_TempDecompress[4]; __aligned16 u32 s_TempDecompress[4] = {0}; -/*#ifdef __LINUX__ -static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask); -#endif*/ - +// Note: this function used to break regularly on Linux due to stack alignment. +// Refer to old revisions of this code if it breaks again for workarounds. void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) { u32 i; @@ -69,19 +67,6 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) hasmask[i] = prev; if ((mask&0xff) != (oldmask&0xff)) - -//#ifdef __LINUX__ -// This seems to now be hitting several games, and not just in that spot, -// so until we can work out something better, I'm reverting to using the -// old Linux mask code all the time. --arcum42 - -// Note: not neccessary if we set -mpreferred-stack-boundary=2, so it is now -// disabled. - - //if (mask == 0) // Temporary workaround for a bug causing a segfault. - //UseOldMaskCode(vif1masks, mask); - //else -//#else { __m128i r0, r1, r2, r3; r0 = _mm_load_si128((__m128i*)&s_maskarr[mask&15][0]); // Tends to crash Linux, @@ -102,34 +87,6 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) _mm_storeh_pi((__m64*)&vif1masks[12], *(__m128*)&r2); _mm_storeh_pi((__m64*)&vif1masks[14], *(__m128*)&r3); } -//#endif } XMMRegisters::Thaw(); } - -/*#ifdef __LINUX__ -static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask) -{ - u8* p0 = (u8*)&s_maskarr[mask&15][0]; - u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0]; - - __asm__ __volatile__(".intel_syntax noprefix\n" - "movaps xmm0, [%0]\n" - "movaps xmm1, [%1]\n" - "movaps xmm2, xmm0\n" - "punpcklwd xmm0, xmm0\n" - "punpckhwd xmm2, xmm2\n" - "movaps xmm3, xmm1\n" - "punpcklwd xmm1, xmm1\n" - "punpckhwd xmm3, xmm3\n" - "movq [%2], xmm0\n" - "movq [%2+8], xmm1\n" - "movhps [%2+16], xmm0\n" - "movhps [%2+24], xmm1\n" - "movq [%2+32], xmm2\n" - "movq [%2+40], xmm3\n" - "movhps [%2+48], xmm2\n" - "movhps [%2+56], xmm3\n" - ".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) : "memory" ); -} -#endif*/