Set up the x86 emitter so that it always uses MOVAPS over MOVDQA. This generates slightly faster code since movaps is 1 byte shorter than movdqa (both instructions are functionally identical). The optimization is optional via AlwaysUseMovaps defined in ix86_sse.inl

Enabled optimization stuff for zlib and bzip2 projects (Release/Devel build targets). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@658 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-03-02 18:39:29 +00:00 · 2009-03-02 18:39:29 +00:00 · 79cde4c09e
parent 63fe0a5fdc
commit 79cde4c09e
4 changed files with 77 additions and 29 deletions
--- a/3rdparty/bzip2/bzip2.vcproj
+++ b/3rdparty/bzip2/bzip2.vcproj
@ -98,8 +98,13 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="2"
+				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				StringPooling="true"
 				RuntimeLibrary="0"
+				BufferSecurityCheck="false"
 				EnableFunctionLevelLinking="true"
 				WarningLevel="3"
 				DebugInformationFormat="3"
@ -157,8 +162,13 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="2"
+				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				StringPooling="true"
 				RuntimeLibrary="0"
+				BufferSecurityCheck="false"
 				EnableFunctionLevelLinking="true"
 				WarningLevel="3"
 				DebugInformationFormat="3"
--- a/3rdparty/zlib/zlib.vcproj
+++ b/3rdparty/zlib/zlib.vcproj
@ -98,9 +98,12 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="2"
+				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				StringPooling="true"
 				RuntimeLibrary="0"
-				EnableFunctionLevelLinking="true"
 				WarningLevel="3"
 				DebugInformationFormat="3"
 			/>
@ -157,9 +160,12 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="2"
+				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				StringPooling="true"
 				RuntimeLibrary="0"
-				EnableFunctionLevelLinking="true"
 				WarningLevel="3"
 				DebugInformationFormat="3"
 			/>
--- a/pcsx2/x86/ix86/ix86_sse.inl
+++ b/pcsx2/x86/ix86/ix86_sse.inl
@ -22,6 +22,14 @@
 // SSE instructions
 //------------------------------------------------------------------

+// This tells the recompiler's emitter to always use movaps instead of movdqa.  Both instructions
+// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
+// and some marginal speed gains as a result.  (it's possible someday in the future the per-
+// formance of the two instructions could change, so this constant is provided to restore MOVDQA
+// use easily at a later time, if needed).
+
+static const bool AlwaysUseMovaps = true;
+
 #define SSEMtoR( code, overb ) \
 	assert( to < XMMREGS ) ; \
 	RexR(0, to);             \
@ -203,19 +211,29 @@ emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int
 // movdqa [r32+offset] to r32
 emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
 {
-	write8<I>(0x66);
-    RexRB(0, to, from);
-	write16<I>( 0x6f0f );
-    WriteRmOffsetFrom<I>(to, from, offset);
+	if( AlwaysUseMovaps )
+		eSSE_MOVAPSRmtoROffset<I>( to, from, offset );
+	else
+	{
+		write8<I>(0x66);
+		RexRB(0, to, from);
+		write16<I>( 0x6f0f );
+		WriteRmOffsetFrom<I>(to, from, offset);
+	}
 }

 // movdqa r32 to [r32+offset]
 emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) 
 {
-	write8<I>(0x66);
-    RexRB(0, from, to);
-	write16<I>( 0x7f0f );
-    WriteRmOffsetFrom<I>(from, to, offset);
+	if( AlwaysUseMovaps )
+		eSSE_MOVAPSRtoRmOffset<I>( to, from, offset );
+	else
+	{
+		write8<I>(0x66);
+		RexRB(0, from, to);
+		write16<I>( 0x7f0f );
+		WriteRmOffsetFrom<I>(from, to, offset);
+	}
 }

 // movups [r32+offset] to r32
@ -833,13 +851,30 @@ emitterT void eSSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )		{ S
 emitterT void eSSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from )				{ SSEMtoR66( 0xEF0F ) }; 
 ///////////////////////////////////////////////////////////////////////////////////////

-emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from)				{ SSEMtoR66(0x6F0F); }
-emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )			{ SSERtoM66(0x7F0F); } 
-emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from)	{ if (to != from) { SSERtoR66(0x6F0F); } }
+emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from)				{ if( AlwaysUseMovaps ) eSSE_MOVAPS_M128_to_XMM<I>( to, from ); else SSEMtoR66(0x6F0F); }
+emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )			{ if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_M128<I>( to, from ); else SSERtoM66(0x7F0F); } 
+emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from)	{ if (to != from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_XMM<I>( to, from ); else SSERtoR66(0x6F0F); } }

-emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from)				{ write8<I>(0xF3); SSEMtoR(0x6F0F, 0); }
-emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from)			{ write8<I>(0xF3); SSERtoM(0x7F0F, 0); }
-emitterT void eSSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from)	{ write8<I>(0xF3); SSERtoR(0x6F0F); }
+emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from)
+{
+	if( AlwaysUseMovaps )
+		eSSE_MOVUPS_M128_to_XMM<I>( to, from );
+	else
+	{
+		write8<I>(0xF3);
+		SSEMtoR(0x6F0F, 0);
+	}
+}
+emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from)
+{
+	if( AlwaysUseMovaps )
+		eSSE_MOVUPS_XMM_to_M128<I>( to, from );
+	else
+	{
+		write8<I>(0xF3);
+		SSERtoM(0x7F0F, 0);
+	}
+}

 // shift right logical

--- a/pcsx2/x86/ix86/ix86_sse_helpers.h
+++ b/pcsx2/x86/ix86/ix86_sse_helpers.h
@ -26,54 +26,51 @@
 #error Dependency fail: Please define _EmitterId_ and include ix86.h first.
 #endif

+// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the
+// overhead of dynarec instructions that use these.
+
 static __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
 {
-	if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
+	if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
 	else SSE_MOVAPS_M128_to_XMM(to, from);
 }

 static __forceinline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
 {
-	if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
+	if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
 	else SSE_MOVAPS_XMM_to_M128(to, from);
 }

 static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
 {
-	if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
+	if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
 	else SSE_MOVAPS_XMM_to_XMM(to, from);
 }

 static __forceinline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
 {
-	if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
+	if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
 	else SSE_MOVAPSRmtoROffset(to, from, offset);
 }

 static __forceinline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
 {
-	if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
+	if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
 	else SSE_MOVAPSRtoRmOffset(to, from, offset);
 }

 static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
 {
-	if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
+	if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
 	else SSE_MOVAPS_M128_to_XMM(to, from);
 }

 static __forceinline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
 {
-	if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
+	if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
 	else SSE_MOVAPS_XMM_to_M128(to, from);
 }

-static __forceinline void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
-{
-	if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
-	else SSE_MOVAPS_XMM_to_XMM(to, from);
-}
-
 static __forceinline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
 {
 	if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);