diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index 379d34f645..2dc100e0c1 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -28,8 +28,6 @@ enum XMMSSEType }; extern __threadlocal u8 *x86Ptr; -extern __threadlocal u8 *j8Ptr[32]; // depreciated item. use local u8* vars instead. -extern __threadlocal u32 *j32Ptr[32]; // depreciated item. use local u32* vars instead. extern __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM]; diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index e316bce630..5daba6e590 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -43,22 +43,27 @@ extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; // generally identical. // // Performance Considerations: -// * GCC's implementation involves an extra dereference from normal storage. +// * GCC's implementation involves an extra dereference from normal storage (possibly +// applies to x86-32 only -- x86-64 is untested). // // * MSVC's implementation involves *two* extra dereferences from normal storage because // it has to look up the TLS heap pointer from the Windows Thread Storage Area. (in -// generated ASM code, this dereference is denoted by access to the fs:[2ch] address). +// generated ASM code, this dereference is denoted by access to the fs:[2ch] address), // // * However, in either case, the optimizer usually optimizes it to a register so the -// extra overhead is minimal over a series of instructions. (Note!! the Full Opt- -// imization [/Ox] option effectively disables TLS optimizations in MSVC, causing -// generally significant code bloat). +// extra overhead is minimal over a series of instructions. +// +// MSVC Notes: +// * Important!! the Full Optimization [/Ox] option effectively disables TLS optimizations +// in MSVC 2008 and earlier, causing generally significant code bloat. Not tested in +// VC2010 yet. +// +// * VC2010 generally does a superior job of optimizing TLS across inlined functions and +// class methods, compared to predecessors. // __threadlocal u8 *x86Ptr; -__threadlocal u8 *j8Ptr[32]; -__threadlocal u32 *j32Ptr[32]; __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index bf02009c25..ba43b5ad85 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -22,6 +22,9 @@ #include "VU.h" #include "R3000A.h" +__threadlocal u8 *j8Ptr[32]; +__threadlocal u32 *j32Ptr[32]; + u16 g_x86AllocCounter = 0; u16 g_xmmAllocCounter = 0; diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 01718a4b45..94fe01c586 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -303,6 +303,9 @@ extern u32 g_cpuRegHasSignExt, g_cpuPrevRegHasSignExt; extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM]; +extern __threadlocal u8 *j8Ptr[32]; // depreciated item. use local u8* vars instead. +extern __threadlocal u32 *j32Ptr[32]; // depreciated item. use local u32* vars instead. + extern u16 g_x86AllocCounter; extern u16 g_xmmAllocCounter; diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 8755b91985..525a8df980 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -616,8 +616,6 @@ static void recShutdown() s_nInstCacheSize = 0; } -#pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code - u32 g_psxlastpc = 0; static void iopClearRecLUT(BASEBLOCK* base, int count) @@ -647,11 +645,9 @@ static __forceinline s32 recExecuteBlock( s32 eeCycles ) push ebx push esi push edi - push ebp call iopDispatcherReg - pop ebp pop edi pop esi pop ebx @@ -661,20 +657,14 @@ static __forceinline s32 recExecuteBlock( s32 eeCycles ) ( // We should be able to rely on GAS syntax (the register clobber list) as a // replacement for manual push/pop of unpreserved registers. - // - // EBP note: As I feared, EBP is "required" for C++ exception handling in Linux, and trying - // to issue a clobber specifier for it causes an error. We really need to find a way to - // disable EBP regalloc in iCore. --air ".intel_syntax noprefix\n" //"push ebx\n" //"push esi\n" //"push edi\n" - "push ebp\n" "call iopDispatcherReg\n" - "pop ebp\n" //"pop edi\n" //"pop esi\n" //"pop ebx\n" diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 5d64f48c1f..a9c349138d 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -475,10 +475,6 @@ static void recShutdown( void ) s_nInstCacheSize = 0; } -#ifdef _MSC_VER -#pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code -#endif - void recStep( void ) { } @@ -509,7 +505,6 @@ void recEventTest() //////////////////////////////////////////////////// static u32 g_lastpc = 0; -u32 g_EEDispatchTemp; #ifdef _MSC_VER @@ -570,30 +565,28 @@ static void recExecute() // invoking DispatcherReg. These things are code bits which are called infrequently, // such as dyna_block_discard and dyna_page_reset. - // Optimization note: - // Compared pushad against manually pushing the regs one-by-one. - // Manually pushing is faster, especially on Core2's and such. :) - - g_EEFreezeRegs = true; - try { while( true ) { + // Note: make sure the FreezeRegs boolean is reset to true here, because + // it might be set to false, depending on if the rec exits from the context of + // an EventTest or not. + + g_EEFreezeRegs = true; + try { - #ifdef _MSC_VER + #ifdef _MSC_VER __asm { push ebx push esi push edi - push ebp call DispatcherReg - pop ebp pop edi pop esi pop ebx diff --git a/pcsx2/x86/sVU_Lower.cpp b/pcsx2/x86/sVU_Lower.cpp index 8f6620767a..f51c6f9fb7 100644 --- a/pcsx2/x86/sVU_Lower.cpp +++ b/pcsx2/x86/sVU_Lower.cpp @@ -800,12 +800,13 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) // (this is one of my test cases for the new emitter --air) using namespace x86Emitter; - xAddressReg thisreg( x86reg ); + xAddressInfo indexer( offset ); + if( x86reg != -1 ) indexer.Add( xAddressReg( x86reg ) ); - if ( _X ) xMOV(ptr32[thisreg+offset], 0x00000000); - if ( _Y ) xMOV(ptr32[thisreg+offset+4], 0x00000000); - if ( _Z ) xMOV(ptr32[thisreg+offset+8], 0x00000000); - if ( _W ) xMOV(ptr32[thisreg+offset+12], 0x3f800000); + if ( _X ) xMOV(ptr32[indexer], 0x00000000); + if ( _Y ) xMOV(ptr32[indexer+4], 0x00000000); + if ( _Z ) xMOV(ptr32[indexer+8], 0x00000000); + if ( _W ) xMOV(ptr32[indexer+12], 0x3f800000); } return; }