Set up the x86 emitter so that it always uses MOVAPS over MOVDQA. This generates slightly faster code since movaps is 1 byte shorter than movdqa (both instructions are functionally identical). The optimization is optional via AlwaysUseMovaps defined in ix86_sse.inl

Enabled optimization stuff for zlib and bzip2 projects (Release/Devel build targets).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@658 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-03-02 18:39:29 +00:00
parent 63fe0a5fdc
commit 79cde4c09e
4 changed files with 77 additions and 29 deletions

View File

@ -98,8 +98,13 @@
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
OmitFramePointers="true"
StringPooling="true"
RuntimeLibrary="0"
BufferSecurityCheck="false"
EnableFunctionLevelLinking="true"
WarningLevel="3"
DebugInformationFormat="3"
@ -157,8 +162,13 @@
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
OmitFramePointers="true"
StringPooling="true"
RuntimeLibrary="0"
BufferSecurityCheck="false"
EnableFunctionLevelLinking="true"
WarningLevel="3"
DebugInformationFormat="3"

View File

@ -98,9 +98,12 @@
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
OmitFramePointers="true"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
WarningLevel="3"
DebugInformationFormat="3"
/>
@ -157,9 +160,12 @@
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
OmitFramePointers="true"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
WarningLevel="3"
DebugInformationFormat="3"
/>

View File

@ -22,6 +22,14 @@
// SSE instructions
//------------------------------------------------------------------
// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions
// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
// and some marginal speed gains as a result. (it's possible someday in the future the per-
// formance of the two instructions could change, so this constant is provided to restore MOVDQA
// use easily at a later time, if needed).
static const bool AlwaysUseMovaps = true;
#define SSEMtoR( code, overb ) \
assert( to < XMMREGS ) ; \
RexR(0, to); \
@ -203,19 +211,29 @@ emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int
// movdqa [r32+offset] to r32
emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
{
write8<I>(0x66);
RexRB(0, to, from);
write16<I>( 0x6f0f );
WriteRmOffsetFrom<I>(to, from, offset);
if( AlwaysUseMovaps )
eSSE_MOVAPSRmtoROffset<I>( to, from, offset );
else
{
write8<I>(0x66);
RexRB(0, to, from);
write16<I>( 0x6f0f );
WriteRmOffsetFrom<I>(to, from, offset);
}
}
// movdqa r32 to [r32+offset]
emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
write8<I>(0x66);
RexRB(0, from, to);
write16<I>( 0x7f0f );
WriteRmOffsetFrom<I>(from, to, offset);
if( AlwaysUseMovaps )
eSSE_MOVAPSRtoRmOffset<I>( to, from, offset );
else
{
write8<I>(0x66);
RexRB(0, from, to);
write16<I>( 0x7f0f );
WriteRmOffsetFrom<I>(from, to, offset);
}
}
// movups [r32+offset] to r32
@ -833,13 +851,30 @@ emitterT void eSSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S
emitterT void eSSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ) };
///////////////////////////////////////////////////////////////////////////////////////
emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0x6F0F); }
emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM66(0x7F0F); }
emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if (to != from) { SSERtoR66(0x6F0F); } }
emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_M128_to_XMM<I>( to, from ); else SSEMtoR66(0x6F0F); }
emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_M128<I>( to, from ); else SSERtoM66(0x7F0F); }
emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if (to != from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_XMM<I>( to, from ); else SSERtoR66(0x6F0F); } }
emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { write8<I>(0xF3); SSEMtoR(0x6F0F, 0); }
emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { write8<I>(0xF3); SSERtoM(0x7F0F, 0); }
emitterT void eSSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { write8<I>(0xF3); SSERtoR(0x6F0F); }
emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from)
{
if( AlwaysUseMovaps )
eSSE_MOVUPS_M128_to_XMM<I>( to, from );
else
{
write8<I>(0xF3);
SSEMtoR(0x6F0F, 0);
}
}
emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from)
{
if( AlwaysUseMovaps )
eSSE_MOVUPS_XMM_to_M128<I>( to, from );
else
{
write8<I>(0xF3);
SSERtoM(0x7F0F, 0);
}
}
// shift right logical

View File

@ -26,54 +26,51 @@
#error Dependency fail: Please define _EmitterId_ and include ix86.h first.
#endif
// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the
// overhead of dynarec instructions that use these.
static __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
else SSE_MOVAPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
else SSE_MOVAPS_XMM_to_M128(to, from);
}
static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
else SSE_MOVAPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
else SSE_MOVAPSRmtoROffset(to, from, offset);
}
static __forceinline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
else SSE_MOVAPSRtoRmOffset(to, from, offset);
}
static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
else SSE_MOVAPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
else SSE_MOVAPS_XMM_to_M128(to, from);
}
static __forceinline void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
else SSE_MOVAPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);