MMI: Added recompiled version of PPAC5/PEXT5, and optimized PADDUW a bit.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@651 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
tmkkmac 2009-03-01 16:01:53 +00:00
parent 8fa5c65884
commit b6f3f8e415
1 changed files with 84 additions and 41 deletions

View File

@ -737,12 +737,78 @@ CPU_SSE_XMMCACHE_END
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPEXT5() void recPEXT5()
{ {
if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); // for bit 0..4
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 10..14
SSE2_PSLLD_I8_to_XMM(EEREC_D, 27);
SSE2_PSLLD_I8_to_XMM(t0reg, 17);
SSE2_PSRLD_I8_to_XMM(EEREC_D, 27);
SSE2_PSRLW_I8_to_XMM(t0reg, 11);
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 5..9
SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); // for bit 15
SSE2_PSLLD_I8_to_XMM(t0reg, 22);
SSE2_PSRLW_I8_to_XMM(t1reg, 15);
SSE2_PSRLD_I8_to_XMM(t0reg, 27);
SSE2_PSLLD_I8_to_XMM(t1reg, 20);
SSE2_POR_XMM_to_XMM(t0reg, t1reg);
SSE2_PSLLW_I8_to_XMM(EEREC_D, 3);
SSE2_PSLLW_I8_to_XMM(t0reg, 11);
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
CPU_SSE_XMMCACHE_END
recCall( Interp::PEXT5, _Rd_ ); recCall( Interp::PEXT5, _Rd_ );
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPPAC5() void recPPAC5()
{ {
if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); // for bit 0..4
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 5..9
SSE2_PSLLD_I8_to_XMM(EEREC_D, 24);
SSE2_PSRLD_I8_to_XMM(t0reg, 11);
SSE2_PSRLD_I8_to_XMM(EEREC_D, 27);
SSE2_PSLLD_I8_to_XMM(t0reg, 5);
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 10..14
SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); // for bit 15
SSE2_PSLLD_I8_to_XMM(t0reg, 8);
SSE2_PSRLD_I8_to_XMM(t1reg, 31);
SSE2_PSRLD_I8_to_XMM(t0reg, 17);
SSE2_PSLLD_I8_to_XMM(t1reg, 15);
SSE2_POR_XMM_to_XMM(t0reg, t1reg);
SSE2_PCMPEQD_XMM_to_XMM(t1reg, t1reg);
SSE2_PSRLD_I8_to_XMM(t1reg, 22);
SSE2_PAND_XMM_to_XMM(EEREC_D, t1reg);
SSE2_PANDN_XMM_to_XMM(t1reg, t0reg);
SSE2_POR_XMM_to_XMM(EEREC_D, t1reg);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
CPU_SSE_XMMCACHE_END
recCall( Interp::PPAC5, _Rd_ ); recCall( Interp::PPAC5, _Rd_ );
} }
@ -1795,54 +1861,31 @@ CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WR
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
} }
else { else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1);
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
if( _hasFreeXMMreg() ) { SSE2_PCMPEQB_XMM_to_XMM(t0reg, t0reg);
int t3reg = _allocTempXMMreg(XMMT_INT, -1); SSE2_PSLLD_I8_to_XMM(t0reg, 31); // 0x80000000
SSEX_PXOR_XMM_to_XMM(t0reg, t0reg); SSEX_MOVDQA_XMM_to_XMM(t1reg, t0reg);
SSE2_MOVQ_XMM_to_XMM(t1reg, EEREC_S); SSE2_PXOR_XMM_to_XMM(t0reg, EEREC_S); // invert MSB of Rs (for unsigned comparison)
SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_S);
if( EEREC_D != EEREC_T ) SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T); // normal 32-bit addition
SSEX_MOVDQA_XMM_to_XMM(t3reg, EEREC_T); if( EEREC_D == EEREC_S ) SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PUNPCKLDQ_XMM_to_XMM(t1reg, t0reg); else if( EEREC_D == EEREC_T ) SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_S);
SSE2_PUNPCKHDQ_XMM_to_XMM(t2reg, t0reg);
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
SSE2_PUNPCKHDQ_XMM_to_XMM(t3reg, t0reg);
SSE2_PADDQ_XMM_to_XMM(t1reg, EEREC_D);
SSE2_PADDQ_XMM_to_XMM(t2reg, t3reg);
_freeXMMreg(t3reg);
}
else { else {
SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_S); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_MOVQ_XMM_to_XMM(t1reg, EEREC_S);
SSE2_PSRLDQ_I8_to_XMM(t2reg, 8);
if( EEREC_D != EEREC_T ) SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSRLDQ_I8_to_XMM(t0reg, 8);
SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0xE8);
SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0xE8);
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0xE8);
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xE8);
SSE2_PADDQ_XMM_to_XMM(t1reg, EEREC_D);
SSE2_PADDQ_XMM_to_XMM(t2reg, t0reg);
SSEX_PXOR_XMM_to_XMM(t0reg, t0reg);
} }
SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0xd8); // unsigned 32-bit comparison
SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0xd8); SSE2_PXOR_XMM_to_XMM(t1reg, EEREC_D); // invert MSB of Rd (for unsigned comparison)
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, t1reg); SSE2_PCMPGTD_XMM_to_XMM(t0reg, t1reg);
SSE2_PUNPCKHQDQ_XMM_to_XMM(t1reg, t2reg);
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t2reg); // saturate
SSE2_PCMPGTD_XMM_to_XMM(t1reg, t0reg); SSE2_POR_XMM_to_XMM(EEREC_D, t0reg); // clear word with 0xFFFFFFFF if (Rd < Rs)
SSEX_POR_XMM_to_XMM(EEREC_D, t1reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
_freeXMMreg(t2reg);
} }
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
@ -1930,7 +1973,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
SSE2_PXOR_XMM_to_XMM(t1reg, EEREC_T); SSE2_PXOR_XMM_to_XMM(t1reg, EEREC_T);
} }
// ungigned 32-bit comparison // unsigned 32-bit comparison
SSE2_PCMPGTD_XMM_to_XMM(t0reg, t1reg); SSE2_PCMPGTD_XMM_to_XMM(t0reg, t1reg);
// saturate // saturate