MMI: Added recompiled version of PPAC5/PEXT5, and optimized PADDUW a bit.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@651 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
tmkkmac 2009-03-01 16:01:53 +00:00
parent 8fa5c65884
commit b6f3f8e415
1 changed files with 84 additions and 41 deletions

View File

@ -737,12 +737,78 @@ CPU_SSE_XMMCACHE_END
////////////////////////////////////////////////////
void recPEXT5()
{
if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); // for bit 0..4
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 10..14
SSE2_PSLLD_I8_to_XMM(EEREC_D, 27);
SSE2_PSLLD_I8_to_XMM(t0reg, 17);
SSE2_PSRLD_I8_to_XMM(EEREC_D, 27);
SSE2_PSRLW_I8_to_XMM(t0reg, 11);
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 5..9
SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); // for bit 15
SSE2_PSLLD_I8_to_XMM(t0reg, 22);
SSE2_PSRLW_I8_to_XMM(t1reg, 15);
SSE2_PSRLD_I8_to_XMM(t0reg, 27);
SSE2_PSLLD_I8_to_XMM(t1reg, 20);
SSE2_POR_XMM_to_XMM(t0reg, t1reg);
SSE2_PSLLW_I8_to_XMM(EEREC_D, 3);
SSE2_PSLLW_I8_to_XMM(t0reg, 11);
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
CPU_SSE_XMMCACHE_END
recCall( Interp::PEXT5, _Rd_ );
}
////////////////////////////////////////////////////
void recPPAC5()
{
if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); // for bit 0..4
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 5..9
SSE2_PSLLD_I8_to_XMM(EEREC_D, 24);
SSE2_PSRLD_I8_to_XMM(t0reg, 11);
SSE2_PSRLD_I8_to_XMM(EEREC_D, 27);
SSE2_PSLLD_I8_to_XMM(t0reg, 5);
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); // for bit 10..14
SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); // for bit 15
SSE2_PSLLD_I8_to_XMM(t0reg, 8);
SSE2_PSRLD_I8_to_XMM(t1reg, 31);
SSE2_PSRLD_I8_to_XMM(t0reg, 17);
SSE2_PSLLD_I8_to_XMM(t1reg, 15);
SSE2_POR_XMM_to_XMM(t0reg, t1reg);
SSE2_PCMPEQD_XMM_to_XMM(t1reg, t1reg);
SSE2_PSRLD_I8_to_XMM(t1reg, 22);
SSE2_PAND_XMM_to_XMM(EEREC_D, t1reg);
SSE2_PANDN_XMM_to_XMM(t1reg, t0reg);
SSE2_POR_XMM_to_XMM(EEREC_D, t1reg);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
CPU_SSE_XMMCACHE_END
recCall( Interp::PPAC5, _Rd_ );
}
@ -1795,54 +1861,31 @@ CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WR
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
if( _hasFreeXMMreg() ) {
int t3reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_PXOR_XMM_to_XMM(t0reg, t0reg);
SSE2_MOVQ_XMM_to_XMM(t1reg, EEREC_S);
SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_S);
if( EEREC_D != EEREC_T ) SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T);
SSEX_MOVDQA_XMM_to_XMM(t3reg, EEREC_T);
SSE2_PUNPCKLDQ_XMM_to_XMM(t1reg, t0reg);
SSE2_PUNPCKHDQ_XMM_to_XMM(t2reg, t0reg);
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
SSE2_PUNPCKHDQ_XMM_to_XMM(t3reg, t0reg);
SSE2_PADDQ_XMM_to_XMM(t1reg, EEREC_D);
SSE2_PADDQ_XMM_to_XMM(t2reg, t3reg);
_freeXMMreg(t3reg);
}
SSE2_PCMPEQB_XMM_to_XMM(t0reg, t0reg);
SSE2_PSLLD_I8_to_XMM(t0reg, 31); // 0x80000000
SSEX_MOVDQA_XMM_to_XMM(t1reg, t0reg);
SSE2_PXOR_XMM_to_XMM(t0reg, EEREC_S); // invert MSB of Rs (for unsigned comparison)
// normal 32-bit addition
if( EEREC_D == EEREC_S ) SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_S);
else {
SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_S);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
SSE2_MOVQ_XMM_to_XMM(t1reg, EEREC_S);
SSE2_PSRLDQ_I8_to_XMM(t2reg, 8);
if( EEREC_D != EEREC_T ) SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSRLDQ_I8_to_XMM(t0reg, 8);
SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0xE8);
SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0xE8);
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0xE8);
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xE8);
SSE2_PADDQ_XMM_to_XMM(t1reg, EEREC_D);
SSE2_PADDQ_XMM_to_XMM(t2reg, t0reg);
SSEX_PXOR_XMM_to_XMM(t0reg, t0reg);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_T);
}
SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0xd8);
SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0xd8);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, t1reg);
SSE2_PUNPCKHQDQ_XMM_to_XMM(t1reg, t2reg);
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t2reg);
SSE2_PCMPGTD_XMM_to_XMM(t1reg, t0reg);
SSEX_POR_XMM_to_XMM(EEREC_D, t1reg);
// unsigned 32-bit comparison
SSE2_PXOR_XMM_to_XMM(t1reg, EEREC_D); // invert MSB of Rd (for unsigned comparison)
SSE2_PCMPGTD_XMM_to_XMM(t0reg, t1reg);
// saturate
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg); // clear word with 0xFFFFFFFF if (Rd < Rs)
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
_freeXMMreg(t2reg);
}
CPU_SSE_XMMCACHE_END
@ -1930,7 +1973,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
SSE2_PXOR_XMM_to_XMM(t1reg, EEREC_T);
}
// ungigned 32-bit comparison
// unsigned 32-bit comparison
SSE2_PCMPGTD_XMM_to_XMM(t0reg, t1reg);
// saturate