One last pass on the register freezing code for now, and remove the remnants of the iVif workaround code.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2060 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-10-22 13:00:59 +00:00
parent 1a09a7792f
commit 06d3e01efe
4 changed files with 52 additions and 91 deletions

View File

@ -15,6 +15,8 @@
#pragma once
#include "x86emitter.h"
// this is all that needs to be called and will fill up the below structs
extern void cpudetectInit();
@ -104,6 +106,7 @@ namespace MMXRegisters
extern void Freeze();
extern void Thaw();
extern bool Saved();
extern __aligned16 u64 data[8];
};
namespace XMMRegisters
@ -111,6 +114,7 @@ namespace XMMRegisters
extern void Freeze();
extern void Thaw();
extern bool Saved();
extern __aligned16 u64 data[2*iREGCNT_XMM];
};
namespace Registers

View File

@ -14,14 +14,11 @@
*/
#include "PrecompiledHeader.h"
#include "internal.h"
#include "tools.h"
// To make sure regs don't get changed while in the recompiler,
// use Freeze/Thaw in MMXRegisters, XMMRegisters, & Registers.
__aligned16 u64 g_globalMMXData[8];
__aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
/////////////////////////////////////////////////////////////////////
// MMX Register Freezing
@ -30,6 +27,7 @@ __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
namespace MMXRegisters
{
u8 stack_depth = 0;
__aligned16 u64 data[8];
__forceinline bool Saved()
{
@ -51,7 +49,7 @@ namespace MMXRegisters
#ifdef _MSC_VER
__asm {
mov ecx, offset g_globalMMXData
mov ecx, offset data
movntq mmword ptr [ecx+0], mm0
movntq mmword ptr [ecx+8], mm1
movntq mmword ptr [ecx+16], mm2
@ -65,16 +63,16 @@ namespace MMXRegisters
#else
__asm__ volatile(
".intel_syntax noprefix\n"
"movq [%[g_globalMMXData]+0x00], mm0\n"
"movq [%[g_globalMMXData]+0x08], mm1\n"
"movq [%[g_globalMMXData]+0x10], mm2\n"
"movq [%[g_globalMMXData]+0x18], mm3\n"
"movq [%[g_globalMMXData]+0x20], mm4\n"
"movq [%[g_globalMMXData]+0x28], mm5\n"
"movq [%[g_globalMMXData]+0x30], mm6\n"
"movq [%[g_globalMMXData]+0x38], mm7\n"
"movq [%[data]+0x00], mm0\n"
"movq [%[data]+0x08], mm1\n"
"movq [%[data]+0x10], mm2\n"
"movq [%[data]+0x18], mm3\n"
"movq [%[data]+0x20], mm4\n"
"movq [%[data]+0x28], mm5\n"
"movq [%[data]+0x30], mm6\n"
"movq [%[data]+0x38], mm7\n"
"emms\n"
".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory"
".att_syntax\n" : : [data]"r"(data) : "memory"
);
#endif
}
@ -96,7 +94,7 @@ namespace MMXRegisters
#ifdef _MSC_VER
__asm {
mov ecx, offset g_globalMMXData
mov ecx, offset data
movq mm0, mmword ptr [ecx+0]
movq mm1, mmword ptr [ecx+8]
movq mm2, mmword ptr [ecx+16]
@ -110,16 +108,16 @@ namespace MMXRegisters
#else
__asm__ volatile(
".intel_syntax noprefix\n"
"movq mm0, [%[g_globalMMXData]+0x00]\n"
"movq mm1, [%[g_globalMMXData]+0x08]\n"
"movq mm2, [%[g_globalMMXData]+0x10]\n"
"movq mm3, [%[g_globalMMXData]+0x18]\n"
"movq mm4, [%[g_globalMMXData]+0x20]\n"
"movq mm5, [%[g_globalMMXData]+0x28]\n"
"movq mm6, [%[g_globalMMXData]+0x30]\n"
"movq mm7, [%[g_globalMMXData]+0x38]\n"
"movq mm0, [%[data]+0x00]\n"
"movq mm1, [%[data]+0x08]\n"
"movq mm2, [%[data]+0x10]\n"
"movq mm3, [%[data]+0x18]\n"
"movq mm4, [%[data]+0x20]\n"
"movq mm5, [%[data]+0x28]\n"
"movq mm6, [%[data]+0x30]\n"
"movq mm7, [%[data]+0x38]\n"
"emms\n"
".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory"
".att_syntax\n" : : [data]"r"(data) : "memory"
);
#endif
}
@ -132,6 +130,7 @@ namespace MMXRegisters
namespace XMMRegisters
{
u8 stack_depth = 0;
__aligned16 u64 data[2*iREGCNT_XMM];
__forceinline bool Saved()
{
@ -154,7 +153,7 @@ namespace XMMRegisters
#ifdef _MSC_VER
__asm {
mov ecx, offset g_globalXMMData
mov ecx, offset data
movaps xmmword ptr [ecx+0x00], xmm0
movaps xmmword ptr [ecx+0x10], xmm1
movaps xmmword ptr [ecx+0x20], xmm2
@ -167,15 +166,15 @@ namespace XMMRegisters
#else
__asm__ volatile(
".intel_syntax noprefix\n"
"movaps [%[g_globalXMMData]+0x00], xmm0\n"
"movaps [%[g_globalXMMData]+0x10], xmm1\n"
"movaps [%[g_globalXMMData]+0x20], xmm2\n"
"movaps [%[g_globalXMMData]+0x30], xmm3\n"
"movaps [%[g_globalXMMData]+0x40], xmm4\n"
"movaps [%[g_globalXMMData]+0x50], xmm5\n"
"movaps [%[g_globalXMMData]+0x60], xmm6\n"
"movaps [%[g_globalXMMData]+0x70], xmm7\n"
".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory"
"movaps [%[data]+0x00], xmm0\n"
"movaps [%[data]+0x10], xmm1\n"
"movaps [%[data]+0x20], xmm2\n"
"movaps [%[data]+0x30], xmm3\n"
"movaps [%[data]+0x40], xmm4\n"
"movaps [%[data]+0x50], xmm5\n"
"movaps [%[data]+0x60], xmm6\n"
"movaps [%[data]+0x70], xmm7\n"
".att_syntax\n" : : [data]"r"(data) : "memory"
);
#endif // _MSC_VER
}
@ -199,7 +198,7 @@ namespace XMMRegisters
#ifdef _MSC_VER
__asm
{
mov ecx, offset g_globalXMMData
mov ecx, offset data
movaps xmm0, xmmword ptr [ecx+0x00]
movaps xmm1, xmmword ptr [ecx+0x10]
movaps xmm2, xmmword ptr [ecx+0x20]
@ -212,15 +211,15 @@ namespace XMMRegisters
#else
__asm__ volatile(
".intel_syntax noprefix\n"
"movaps xmm0, [%[g_globalXMMData]+0x00]\n"
"movaps xmm1, [%[g_globalXMMData]+0x10]\n"
"movaps xmm2, [%[g_globalXMMData]+0x20]\n"
"movaps xmm3, [%[g_globalXMMData]+0x30]\n"
"movaps xmm4, [%[g_globalXMMData]+0x40]\n"
"movaps xmm5, [%[g_globalXMMData]+0x50]\n"
"movaps xmm6, [%[g_globalXMMData]+0x60]\n"
"movaps xmm7, [%[g_globalXMMData]+0x70]\n"
".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory"
"movaps xmm0, [%[data]+0x00]\n"
"movaps xmm1, [%[data]+0x10]\n"
"movaps xmm2, [%[data]+0x20]\n"
"movaps xmm3, [%[data]+0x30]\n"
"movaps xmm4, [%[data]+0x40]\n"
"movaps xmm5, [%[data]+0x50]\n"
"movaps xmm6, [%[data]+0x60]\n"
"movaps xmm7, [%[data]+0x70]\n"
".att_syntax\n" : : [data]"r"(data) : "memory"
);
#endif // _MSC_VER
}

View File

@ -32,7 +32,8 @@
#include "internal.h"
// defined in tools.cpp
extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
//extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
#include "tools.h"
// ------------------------------------------------------------------------
// Notes on Thread Local Storage:
@ -753,12 +754,12 @@ __emitinline void xBSWAP( const xRegister32& to )
__emitinline void xStoreReg( const xRegisterSSE& src )
{
xMOVDQA( &g_globalXMMData[src.Id*2], src );
xMOVDQA( &XMMRegisters::data[src.Id*2], src );
}
__emitinline void xRestoreReg( const xRegisterSSE& dest )
{
xMOVDQA( dest, &g_globalXMMData[dest.Id*2] );
xMOVDQA( dest, &XMMRegisters::data[dest.Id*2] );
}
}

View File

@ -53,10 +53,8 @@ extern u8 s_maskwrite[256];
extern "C" __aligned16 u32 s_TempDecompress[4];
__aligned16 u32 s_TempDecompress[4] = {0};
/*#ifdef __LINUX__
static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask);
#endif*/
// Note: this function used to break regularly on Linux due to stack alignment.
// Refer to old revisions of this code if it breaks again for workarounds.
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{
u32 i;
@ -69,19 +67,6 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
hasmask[i] = prev;
if ((mask&0xff) != (oldmask&0xff))
//#ifdef __LINUX__
// This seems to now be hitting several games, and not just in that spot,
// so until we can work out something better, I'm reverting to using the
// old Linux mask code all the time. --arcum42
// Note: not neccessary if we set -mpreferred-stack-boundary=2, so it is now
// disabled.
//if (mask == 0) // Temporary workaround for a bug causing a segfault.
//UseOldMaskCode(vif1masks, mask);
//else
//#else
{
__m128i r0, r1, r2, r3;
r0 = _mm_load_si128((__m128i*)&s_maskarr[mask&15][0]); // Tends to crash Linux,
@ -102,34 +87,6 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
_mm_storeh_pi((__m64*)&vif1masks[12], *(__m128*)&r2);
_mm_storeh_pi((__m64*)&vif1masks[14], *(__m128*)&r3);
}
//#endif
}
XMMRegisters::Thaw();
}
/*#ifdef __LINUX__
static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask)
{
u8* p0 = (u8*)&s_maskarr[mask&15][0];
u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0];
__asm__ __volatile__(".intel_syntax noprefix\n"
"movaps xmm0, [%0]\n"
"movaps xmm1, [%1]\n"
"movaps xmm2, xmm0\n"
"punpcklwd xmm0, xmm0\n"
"punpckhwd xmm2, xmm2\n"
"movaps xmm3, xmm1\n"
"punpcklwd xmm1, xmm1\n"
"punpckhwd xmm3, xmm3\n"
"movq [%2], xmm0\n"
"movq [%2+8], xmm1\n"
"movhps [%2+16], xmm0\n"
"movhps [%2+24], xmm1\n"
"movq [%2+32], xmm2\n"
"movq [%2+40], xmm3\n"
"movhps [%2+48], xmm2\n"
"movhps [%2+56], xmm3\n"
".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) : "memory" );
}
#endif*/