mirror of https://github.com/PCSX2/pcsx2.git
Set up the x86 emitter so that it always uses MOVAPS over MOVDQA. This generates slightly faster code since movaps is 1 byte shorter than movdqa (both instructions are functionally identical). The optimization is optional via AlwaysUseMovaps defined in ix86_sse.inl
Enabled optimization stuff for zlib and bzip2 projects (Release/Devel build targets). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@658 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
63fe0a5fdc
commit
79cde4c09e
|
@ -98,8 +98,13 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="2"
|
||||
InlineFunctionExpansion="2"
|
||||
EnableIntrinsicFunctions="true"
|
||||
FavorSizeOrSpeed="1"
|
||||
OmitFramePointers="true"
|
||||
StringPooling="true"
|
||||
RuntimeLibrary="0"
|
||||
BufferSecurityCheck="false"
|
||||
EnableFunctionLevelLinking="true"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
|
@ -157,8 +162,13 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="2"
|
||||
InlineFunctionExpansion="2"
|
||||
EnableIntrinsicFunctions="true"
|
||||
FavorSizeOrSpeed="1"
|
||||
OmitFramePointers="true"
|
||||
StringPooling="true"
|
||||
RuntimeLibrary="0"
|
||||
BufferSecurityCheck="false"
|
||||
EnableFunctionLevelLinking="true"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
|
|
|
@ -98,9 +98,12 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="2"
|
||||
InlineFunctionExpansion="2"
|
||||
EnableIntrinsicFunctions="true"
|
||||
FavorSizeOrSpeed="1"
|
||||
OmitFramePointers="true"
|
||||
StringPooling="true"
|
||||
RuntimeLibrary="0"
|
||||
EnableFunctionLevelLinking="true"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
|
@ -157,9 +160,12 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="2"
|
||||
InlineFunctionExpansion="2"
|
||||
EnableIntrinsicFunctions="true"
|
||||
FavorSizeOrSpeed="1"
|
||||
OmitFramePointers="true"
|
||||
StringPooling="true"
|
||||
RuntimeLibrary="0"
|
||||
EnableFunctionLevelLinking="true"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
|
|
|
@ -22,6 +22,14 @@
|
|||
// SSE instructions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions
|
||||
// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
|
||||
// and some marginal speed gains as a result. (it's possible someday in the future the per-
|
||||
// formance of the two instructions could change, so this constant is provided to restore MOVDQA
|
||||
// use easily at a later time, if needed).
|
||||
|
||||
static const bool AlwaysUseMovaps = true;
|
||||
|
||||
#define SSEMtoR( code, overb ) \
|
||||
assert( to < XMMREGS ) ; \
|
||||
RexR(0, to); \
|
||||
|
@ -203,19 +211,29 @@ emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int
|
|||
// movdqa [r32+offset] to r32
|
||||
emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
|
||||
{
|
||||
write8<I>(0x66);
|
||||
RexRB(0, to, from);
|
||||
write16<I>( 0x6f0f );
|
||||
WriteRmOffsetFrom<I>(to, from, offset);
|
||||
if( AlwaysUseMovaps )
|
||||
eSSE_MOVAPSRmtoROffset<I>( to, from, offset );
|
||||
else
|
||||
{
|
||||
write8<I>(0x66);
|
||||
RexRB(0, to, from);
|
||||
write16<I>( 0x6f0f );
|
||||
WriteRmOffsetFrom<I>(to, from, offset);
|
||||
}
|
||||
}
|
||||
|
||||
// movdqa r32 to [r32+offset]
|
||||
emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
||||
{
|
||||
write8<I>(0x66);
|
||||
RexRB(0, from, to);
|
||||
write16<I>( 0x7f0f );
|
||||
WriteRmOffsetFrom<I>(from, to, offset);
|
||||
if( AlwaysUseMovaps )
|
||||
eSSE_MOVAPSRtoRmOffset<I>( to, from, offset );
|
||||
else
|
||||
{
|
||||
write8<I>(0x66);
|
||||
RexRB(0, from, to);
|
||||
write16<I>( 0x7f0f );
|
||||
WriteRmOffsetFrom<I>(from, to, offset);
|
||||
}
|
||||
}
|
||||
|
||||
// movups [r32+offset] to r32
|
||||
|
@ -833,13 +851,30 @@ emitterT void eSSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S
|
|||
emitterT void eSSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ) };
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0x6F0F); }
|
||||
emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM66(0x7F0F); }
|
||||
emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if (to != from) { SSERtoR66(0x6F0F); } }
|
||||
emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_M128_to_XMM<I>( to, from ); else SSEMtoR66(0x6F0F); }
|
||||
emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_M128<I>( to, from ); else SSERtoM66(0x7F0F); }
|
||||
emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if (to != from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_XMM<I>( to, from ); else SSERtoR66(0x6F0F); } }
|
||||
|
||||
emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { write8<I>(0xF3); SSEMtoR(0x6F0F, 0); }
|
||||
emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { write8<I>(0xF3); SSERtoM(0x7F0F, 0); }
|
||||
emitterT void eSSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { write8<I>(0xF3); SSERtoR(0x6F0F); }
|
||||
emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from)
|
||||
{
|
||||
if( AlwaysUseMovaps )
|
||||
eSSE_MOVUPS_M128_to_XMM<I>( to, from );
|
||||
else
|
||||
{
|
||||
write8<I>(0xF3);
|
||||
SSEMtoR(0x6F0F, 0);
|
||||
}
|
||||
}
|
||||
emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from)
|
||||
{
|
||||
if( AlwaysUseMovaps )
|
||||
eSSE_MOVUPS_XMM_to_M128<I>( to, from );
|
||||
else
|
||||
{
|
||||
write8<I>(0xF3);
|
||||
SSERtoM(0x7F0F, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// shift right logical
|
||||
|
||||
|
|
|
@ -26,54 +26,51 @@
|
|||
#error Dependency fail: Please define _EmitterId_ and include ix86.h first.
|
||||
#endif
|
||||
|
||||
// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the
|
||||
// overhead of dynarec instructions that use these.
|
||||
|
||||
static __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
|
||||
{
|
||||
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
|
||||
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
|
||||
else SSE_MOVAPS_M128_to_XMM(to, from);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
|
||||
{
|
||||
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
|
||||
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
|
||||
else SSE_MOVAPS_XMM_to_M128(to, from);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||
{
|
||||
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
|
||||
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
|
||||
else SSE_MOVAPS_XMM_to_XMM(to, from);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
|
||||
{
|
||||
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
|
||||
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
|
||||
else SSE_MOVAPSRmtoROffset(to, from, offset);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
||||
{
|
||||
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
|
||||
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
|
||||
else SSE_MOVAPSRtoRmOffset(to, from, offset);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
|
||||
{
|
||||
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
|
||||
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
|
||||
else SSE_MOVAPS_M128_to_XMM(to, from);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
|
||||
{
|
||||
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
|
||||
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
|
||||
else SSE_MOVAPS_XMM_to_M128(to, from);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||
{
|
||||
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
|
||||
else SSE_MOVAPS_XMM_to_XMM(to, from);
|
||||
}
|
||||
|
||||
static __forceinline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
|
||||
{
|
||||
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
|
||||
|
|
Loading…
Reference in New Issue