One last pass on the register freezing code for now, and remove the remnants of the iVif workaround code.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2060 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-10-22 13:00:59 +00:00
parent 1a09a7792f
commit 06d3e01efe
4 changed files with 52 additions and 91 deletions

View File

@ -15,6 +15,8 @@
#pragma once #pragma once
#include "x86emitter.h"
// this is all that needs to be called and will fill up the below structs // this is all that needs to be called and will fill up the below structs
extern void cpudetectInit(); extern void cpudetectInit();
@ -104,6 +106,7 @@ namespace MMXRegisters
extern void Freeze(); extern void Freeze();
extern void Thaw(); extern void Thaw();
extern bool Saved(); extern bool Saved();
extern __aligned16 u64 data[8];
}; };
namespace XMMRegisters namespace XMMRegisters
@ -111,6 +114,7 @@ namespace XMMRegisters
extern void Freeze(); extern void Freeze();
extern void Thaw(); extern void Thaw();
extern bool Saved(); extern bool Saved();
extern __aligned16 u64 data[2*iREGCNT_XMM];
}; };
namespace Registers namespace Registers

View File

@ -14,14 +14,11 @@
*/ */
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "internal.h"
#include "tools.h" #include "tools.h"
// To make sure regs don't get changed while in the recompiler, // To make sure regs don't get changed while in the recompiler,
// use Freeze/Thaw in MMXRegisters, XMMRegisters, & Registers. // use Freeze/Thaw in MMXRegisters, XMMRegisters, & Registers.
__aligned16 u64 g_globalMMXData[8];
__aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
// MMX Register Freezing // MMX Register Freezing
@ -30,6 +27,7 @@ __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
namespace MMXRegisters namespace MMXRegisters
{ {
u8 stack_depth = 0; u8 stack_depth = 0;
__aligned16 u64 data[8];
__forceinline bool Saved() __forceinline bool Saved()
{ {
@ -51,7 +49,7 @@ namespace MMXRegisters
#ifdef _MSC_VER #ifdef _MSC_VER
__asm { __asm {
mov ecx, offset g_globalMMXData mov ecx, offset data
movntq mmword ptr [ecx+0], mm0 movntq mmword ptr [ecx+0], mm0
movntq mmword ptr [ecx+8], mm1 movntq mmword ptr [ecx+8], mm1
movntq mmword ptr [ecx+16], mm2 movntq mmword ptr [ecx+16], mm2
@ -65,16 +63,16 @@ namespace MMXRegisters
#else #else
__asm__ volatile( __asm__ volatile(
".intel_syntax noprefix\n" ".intel_syntax noprefix\n"
"movq [%[g_globalMMXData]+0x00], mm0\n" "movq [%[data]+0x00], mm0\n"
"movq [%[g_globalMMXData]+0x08], mm1\n" "movq [%[data]+0x08], mm1\n"
"movq [%[g_globalMMXData]+0x10], mm2\n" "movq [%[data]+0x10], mm2\n"
"movq [%[g_globalMMXData]+0x18], mm3\n" "movq [%[data]+0x18], mm3\n"
"movq [%[g_globalMMXData]+0x20], mm4\n" "movq [%[data]+0x20], mm4\n"
"movq [%[g_globalMMXData]+0x28], mm5\n" "movq [%[data]+0x28], mm5\n"
"movq [%[g_globalMMXData]+0x30], mm6\n" "movq [%[data]+0x30], mm6\n"
"movq [%[g_globalMMXData]+0x38], mm7\n" "movq [%[data]+0x38], mm7\n"
"emms\n" "emms\n"
".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory" ".att_syntax\n" : : [data]"r"(data) : "memory"
); );
#endif #endif
} }
@ -96,7 +94,7 @@ namespace MMXRegisters
#ifdef _MSC_VER #ifdef _MSC_VER
__asm { __asm {
mov ecx, offset g_globalMMXData mov ecx, offset data
movq mm0, mmword ptr [ecx+0] movq mm0, mmword ptr [ecx+0]
movq mm1, mmword ptr [ecx+8] movq mm1, mmword ptr [ecx+8]
movq mm2, mmword ptr [ecx+16] movq mm2, mmword ptr [ecx+16]
@ -110,16 +108,16 @@ namespace MMXRegisters
#else #else
__asm__ volatile( __asm__ volatile(
".intel_syntax noprefix\n" ".intel_syntax noprefix\n"
"movq mm0, [%[g_globalMMXData]+0x00]\n" "movq mm0, [%[data]+0x00]\n"
"movq mm1, [%[g_globalMMXData]+0x08]\n" "movq mm1, [%[data]+0x08]\n"
"movq mm2, [%[g_globalMMXData]+0x10]\n" "movq mm2, [%[data]+0x10]\n"
"movq mm3, [%[g_globalMMXData]+0x18]\n" "movq mm3, [%[data]+0x18]\n"
"movq mm4, [%[g_globalMMXData]+0x20]\n" "movq mm4, [%[data]+0x20]\n"
"movq mm5, [%[g_globalMMXData]+0x28]\n" "movq mm5, [%[data]+0x28]\n"
"movq mm6, [%[g_globalMMXData]+0x30]\n" "movq mm6, [%[data]+0x30]\n"
"movq mm7, [%[g_globalMMXData]+0x38]\n" "movq mm7, [%[data]+0x38]\n"
"emms\n" "emms\n"
".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory" ".att_syntax\n" : : [data]"r"(data) : "memory"
); );
#endif #endif
} }
@ -132,6 +130,7 @@ namespace MMXRegisters
namespace XMMRegisters namespace XMMRegisters
{ {
u8 stack_depth = 0; u8 stack_depth = 0;
__aligned16 u64 data[2*iREGCNT_XMM];
__forceinline bool Saved() __forceinline bool Saved()
{ {
@ -154,7 +153,7 @@ namespace XMMRegisters
#ifdef _MSC_VER #ifdef _MSC_VER
__asm { __asm {
mov ecx, offset g_globalXMMData mov ecx, offset data
movaps xmmword ptr [ecx+0x00], xmm0 movaps xmmword ptr [ecx+0x00], xmm0
movaps xmmword ptr [ecx+0x10], xmm1 movaps xmmword ptr [ecx+0x10], xmm1
movaps xmmword ptr [ecx+0x20], xmm2 movaps xmmword ptr [ecx+0x20], xmm2
@ -167,15 +166,15 @@ namespace XMMRegisters
#else #else
__asm__ volatile( __asm__ volatile(
".intel_syntax noprefix\n" ".intel_syntax noprefix\n"
"movaps [%[g_globalXMMData]+0x00], xmm0\n" "movaps [%[data]+0x00], xmm0\n"
"movaps [%[g_globalXMMData]+0x10], xmm1\n" "movaps [%[data]+0x10], xmm1\n"
"movaps [%[g_globalXMMData]+0x20], xmm2\n" "movaps [%[data]+0x20], xmm2\n"
"movaps [%[g_globalXMMData]+0x30], xmm3\n" "movaps [%[data]+0x30], xmm3\n"
"movaps [%[g_globalXMMData]+0x40], xmm4\n" "movaps [%[data]+0x40], xmm4\n"
"movaps [%[g_globalXMMData]+0x50], xmm5\n" "movaps [%[data]+0x50], xmm5\n"
"movaps [%[g_globalXMMData]+0x60], xmm6\n" "movaps [%[data]+0x60], xmm6\n"
"movaps [%[g_globalXMMData]+0x70], xmm7\n" "movaps [%[data]+0x70], xmm7\n"
".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory" ".att_syntax\n" : : [data]"r"(data) : "memory"
); );
#endif // _MSC_VER #endif // _MSC_VER
} }
@ -199,7 +198,7 @@ namespace XMMRegisters
#ifdef _MSC_VER #ifdef _MSC_VER
__asm __asm
{ {
mov ecx, offset g_globalXMMData mov ecx, offset data
movaps xmm0, xmmword ptr [ecx+0x00] movaps xmm0, xmmword ptr [ecx+0x00]
movaps xmm1, xmmword ptr [ecx+0x10] movaps xmm1, xmmword ptr [ecx+0x10]
movaps xmm2, xmmword ptr [ecx+0x20] movaps xmm2, xmmword ptr [ecx+0x20]
@ -212,15 +211,15 @@ namespace XMMRegisters
#else #else
__asm__ volatile( __asm__ volatile(
".intel_syntax noprefix\n" ".intel_syntax noprefix\n"
"movaps xmm0, [%[g_globalXMMData]+0x00]\n" "movaps xmm0, [%[data]+0x00]\n"
"movaps xmm1, [%[g_globalXMMData]+0x10]\n" "movaps xmm1, [%[data]+0x10]\n"
"movaps xmm2, [%[g_globalXMMData]+0x20]\n" "movaps xmm2, [%[data]+0x20]\n"
"movaps xmm3, [%[g_globalXMMData]+0x30]\n" "movaps xmm3, [%[data]+0x30]\n"
"movaps xmm4, [%[g_globalXMMData]+0x40]\n" "movaps xmm4, [%[data]+0x40]\n"
"movaps xmm5, [%[g_globalXMMData]+0x50]\n" "movaps xmm5, [%[data]+0x50]\n"
"movaps xmm6, [%[g_globalXMMData]+0x60]\n" "movaps xmm6, [%[data]+0x60]\n"
"movaps xmm7, [%[g_globalXMMData]+0x70]\n" "movaps xmm7, [%[data]+0x70]\n"
".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory" ".att_syntax\n" : : [data]"r"(data) : "memory"
); );
#endif // _MSC_VER #endif // _MSC_VER
} }

View File

@ -32,7 +32,8 @@
#include "internal.h" #include "internal.h"
// defined in tools.cpp // defined in tools.cpp
extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; //extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
#include "tools.h"
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Notes on Thread Local Storage: // Notes on Thread Local Storage:
@ -753,12 +754,12 @@ __emitinline void xBSWAP( const xRegister32& to )
__emitinline void xStoreReg( const xRegisterSSE& src ) __emitinline void xStoreReg( const xRegisterSSE& src )
{ {
xMOVDQA( &g_globalXMMData[src.Id*2], src ); xMOVDQA( &XMMRegisters::data[src.Id*2], src );
} }
__emitinline void xRestoreReg( const xRegisterSSE& dest ) __emitinline void xRestoreReg( const xRegisterSSE& dest )
{ {
xMOVDQA( dest, &g_globalXMMData[dest.Id*2] ); xMOVDQA( dest, &XMMRegisters::data[dest.Id*2] );
} }
} }

View File

@ -53,10 +53,8 @@ extern u8 s_maskwrite[256];
extern "C" __aligned16 u32 s_TempDecompress[4]; extern "C" __aligned16 u32 s_TempDecompress[4];
__aligned16 u32 s_TempDecompress[4] = {0}; __aligned16 u32 s_TempDecompress[4] = {0};
/*#ifdef __LINUX__ // Note: this function used to break regularly on Linux due to stack alignment.
static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask); // Refer to old revisions of this code if it breaks again for workarounds.
#endif*/
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{ {
u32 i; u32 i;
@ -69,19 +67,6 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
hasmask[i] = prev; hasmask[i] = prev;
if ((mask&0xff) != (oldmask&0xff)) if ((mask&0xff) != (oldmask&0xff))
//#ifdef __LINUX__
// This seems to now be hitting several games, and not just in that spot,
// so until we can work out something better, I'm reverting to using the
// old Linux mask code all the time. --arcum42
// Note: not neccessary if we set -mpreferred-stack-boundary=2, so it is now
// disabled.
//if (mask == 0) // Temporary workaround for a bug causing a segfault.
//UseOldMaskCode(vif1masks, mask);
//else
//#else
{ {
__m128i r0, r1, r2, r3; __m128i r0, r1, r2, r3;
r0 = _mm_load_si128((__m128i*)&s_maskarr[mask&15][0]); // Tends to crash Linux, r0 = _mm_load_si128((__m128i*)&s_maskarr[mask&15][0]); // Tends to crash Linux,
@ -102,34 +87,6 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
_mm_storeh_pi((__m64*)&vif1masks[12], *(__m128*)&r2); _mm_storeh_pi((__m64*)&vif1masks[12], *(__m128*)&r2);
_mm_storeh_pi((__m64*)&vif1masks[14], *(__m128*)&r3); _mm_storeh_pi((__m64*)&vif1masks[14], *(__m128*)&r3);
} }
//#endif
} }
XMMRegisters::Thaw(); XMMRegisters::Thaw();
} }
/*#ifdef __LINUX__
static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask)
{
u8* p0 = (u8*)&s_maskarr[mask&15][0];
u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0];
__asm__ __volatile__(".intel_syntax noprefix\n"
"movaps xmm0, [%0]\n"
"movaps xmm1, [%1]\n"
"movaps xmm2, xmm0\n"
"punpcklwd xmm0, xmm0\n"
"punpckhwd xmm2, xmm2\n"
"movaps xmm3, xmm1\n"
"punpcklwd xmm1, xmm1\n"
"punpckhwd xmm3, xmm3\n"
"movq [%2], xmm0\n"
"movq [%2+8], xmm1\n"
"movhps [%2+16], xmm0\n"
"movhps [%2+24], xmm1\n"
"movq [%2+32], xmm2\n"
"movq [%2+40], xmm3\n"
"movhps [%2+48], xmm2\n"
"movhps [%2+56], xmm3\n"
".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) : "memory" );
}
#endif*/