mirror of https://github.com/PCSX2/pcsx2.git
Lots of work from tmkk. This update adds recompiling for several MMI opcodes, fixes bugs and adds SSSE3 detection.
Thanks again, tmkk! :) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@522 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
af89808f8f
commit
33d5c66ac7
|
@ -1058,9 +1058,9 @@ void PSLLVW() {
|
|||
void PSRLVW() {
|
||||
if (!_Rd_) return;
|
||||
|
||||
cpuRegs.GPR.r[_Rd_].UD[0] = (cpuRegs.GPR.r[_Rt_].UL[0] >>
|
||||
cpuRegs.GPR.r[_Rd_].UD[0] = (s32)(cpuRegs.GPR.r[_Rt_].UL[0] >>
|
||||
(cpuRegs.GPR.r[_Rs_].UL[0] & 0x1F));
|
||||
cpuRegs.GPR.r[_Rd_].UD[1] = (cpuRegs.GPR.r[_Rt_].UL[2] >>
|
||||
cpuRegs.GPR.r[_Rd_].UD[1] = (s32)(cpuRegs.GPR.r[_Rt_].UL[2] >>
|
||||
(cpuRegs.GPR.r[_Rs_].UL[2] & 0x1F));
|
||||
}
|
||||
|
||||
|
|
|
@ -134,11 +134,13 @@ void SysDetect()
|
|||
"\t%sDetected SSE\n"
|
||||
"\t%sDetected SSE2\n"
|
||||
"\t%sDetected SSE3\n"
|
||||
"\t%sDetected SSSE3\n"
|
||||
"\t%sDetected SSE4.1\n", params
|
||||
cpucaps.hasMultimediaExtensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMDExtensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD2Extensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD3Extensions ? "" : "Not ",
|
||||
cpucaps.hasSupplementalStreamingSIMD3Extensions ? "" : "Not ",
|
||||
cpucaps.hasStreamingSIMD4Extensions ? "" : "Not "
|
||||
);
|
||||
|
||||
|
|
|
@ -208,6 +208,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI)
|
|||
|
||||
case 0x02: // SLW
|
||||
// fall to interp
|
||||
EEINST_SETSIGNEXT(_Rd_);
|
||||
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
|
||||
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
|
||||
_flushCachedRegs();
|
||||
|
@ -307,11 +308,11 @@ void recPSRLH( void )
|
|||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( (_Sa_&0xf) == 0 ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
SSE2_PSRLW_I8_to_XMM(EEREC_D,_Sa_&0xf );
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_flushCachedRegs();
|
||||
|
@ -336,11 +337,11 @@ void recPSRLW( void )
|
|||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( _Sa_ == 0 ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
SSE2_PSRLD_I8_to_XMM(EEREC_D,_Sa_ );
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_flushCachedRegs();
|
||||
|
@ -365,11 +366,11 @@ void recPSRAH( void )
|
|||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( (_Sa_&0xf) == 0 ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
SSE2_PSRAW_I8_to_XMM(EEREC_D,_Sa_&0xf );
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_flushCachedRegs();
|
||||
|
@ -394,11 +395,11 @@ void recPSRAW( void )
|
|||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( _Sa_ == 0 ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
SSE2_PSRAD_I8_to_XMM(EEREC_D,_Sa_ );
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_flushCachedRegs();
|
||||
|
@ -423,11 +424,11 @@ void recPSLLH( void )
|
|||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( (_Sa_&0xf) == 0 ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
SSE2_PSLLW_I8_to_XMM(EEREC_D,_Sa_&0xf );
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_flushCachedRegs();
|
||||
|
@ -452,11 +453,11 @@ void recPSLLW( void )
|
|||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( _Sa_ == 0 ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
SSE2_PSLLD_I8_to_XMM(EEREC_D,_Sa_ );
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_flushCachedRegs();
|
||||
|
@ -533,13 +534,22 @@ void recPMAXW()
|
|||
if ( ! _Rd_ ) return;
|
||||
|
||||
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
if( EEREC_S == EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else if( EEREC_D == EEREC_S ) SSE4_PMAXSD_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
else if ( EEREC_D == EEREC_T ) SSE4_PMAXSD_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE4_PMAXSD_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
}
|
||||
else {
|
||||
int t0reg;
|
||||
|
||||
if( EEREC_S == EEREC_T ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
|
||||
SSE2_PCMPGTD_XMM_to_XMM(t0reg, EEREC_T);
|
||||
|
@ -564,6 +574,8 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
|||
|
||||
SSEX_POR_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
recCall( Interp::PMAXW, _Rd_ );
|
||||
|
@ -1602,6 +1614,10 @@ void recPABSW()
|
|||
if( !_Rd_ ) return;
|
||||
|
||||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( cpucaps.hasSupplementalStreamingSIMD3Extensions ) {
|
||||
SSSE3_PABSD_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
|
@ -1609,6 +1625,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
|||
SSEX_PXOR_XMM_to_XMM(EEREC_D, t0reg);
|
||||
SSE2_PSUBD_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_deleteEEreg(_Rt_, 1);
|
||||
|
@ -1626,6 +1643,10 @@ void recPABSH()
|
|||
if( !_Rd_ ) return;
|
||||
|
||||
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
if( cpucaps.hasSupplementalStreamingSIMD3Extensions ) {
|
||||
SSSE3_PABSW_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
|
@ -1633,6 +1654,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
|||
SSEX_PXOR_XMM_to_XMM(EEREC_D, t0reg);
|
||||
SSE2_PSUBW_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
_deleteEEreg(_Rt_, 1);
|
||||
|
@ -1650,13 +1672,22 @@ void recPMINW()
|
|||
if ( ! _Rd_ ) return;
|
||||
|
||||
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
if( EEREC_S == EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else if( EEREC_D == EEREC_S ) SSE4_PMINSD_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
else if ( EEREC_D == EEREC_T ) SSE4_PMINSD_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE4_PMINSD_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
}
|
||||
else {
|
||||
int t0reg;
|
||||
|
||||
if( EEREC_S == EEREC_T ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE2_PCMPGTD_XMM_to_XMM(t0reg, EEREC_S);
|
||||
|
@ -1681,6 +1712,8 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
|||
|
||||
SSEX_POR_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
recCall( Interp::PMINW, _Rd_ );
|
||||
|
@ -1689,6 +1722,8 @@ CPU_SSE_XMMCACHE_END
|
|||
////////////////////////////////////////////////////
|
||||
void recPADSBH()
|
||||
{
|
||||
if ( ! _Rd_ ) return;
|
||||
|
||||
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
||||
int t0reg;
|
||||
|
||||
|
@ -1698,9 +1733,8 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
|||
// reset lower bits to 0s
|
||||
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8);
|
||||
SSE2_PSLLDQ_I8_to_XMM(EEREC_D, 8);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
|
||||
|
@ -1719,6 +1753,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
|||
SSE2_PSRLDQ_I8_to_XMM(t0reg, 8);
|
||||
SSE_MOVLHPS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
|
@ -1728,6 +1763,8 @@ CPU_SSE_XMMCACHE_END
|
|||
////////////////////////////////////////////////////
|
||||
void recPADDUW()
|
||||
{
|
||||
if ( ! _Rd_ ) return;
|
||||
|
||||
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
|
||||
|
||||
if( _Rt_ == 0 ) {
|
||||
|
@ -2249,18 +2286,182 @@ void recPMADDW()
|
|||
EEINST_SETSIGNEXT(_Rs_);
|
||||
EEINST_SETSIGNEXT(_Rt_);
|
||||
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
|
||||
if( !cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
recCall( Interp::PMADDW, _Rd_ );
|
||||
return;
|
||||
}
|
||||
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
|
||||
SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
|
||||
if( _Rd_ ) {
|
||||
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
else if( EEREC_D == EEREC_S ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
else if( EEREC_D == EEREC_T ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
|
||||
SSE4_PMULDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
|
||||
}
|
||||
}
|
||||
|
||||
// add from LO/HI
|
||||
if ( _Rd_ ) SSE2_PADDQ_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
else SSE2_PADDQ_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
|
||||
// interleave & sign extend
|
||||
if ( _Rd_ ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_D, 0xdd);
|
||||
}
|
||||
else {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
|
||||
}
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
CPU_SSE_XMMCACHE_END
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recPSLLVW()
|
||||
{
|
||||
if ( ! _Rd_ ) return;
|
||||
|
||||
EEINST_SETSIGNEXT(_Rd_);
|
||||
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
|
||||
if( _Rs_ == 0 ) {
|
||||
if( _Rt_ == 0 ) {
|
||||
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( _Rt_ == 0 ) {
|
||||
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
// shamt is 5-bit
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
|
||||
SSE2_PSLLQ_I8_to_XMM(t0reg, 27);
|
||||
SSE2_PSRLQ_I8_to_XMM(t0reg, 27);
|
||||
|
||||
// EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2]
|
||||
SSE_MOVHLPS_XMM_to_XMM(t1reg, EEREC_T);
|
||||
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
|
||||
// shift (left) Rt[0]
|
||||
SSE2_PSLLD_XMM_to_XMM(EEREC_D, t0reg);
|
||||
|
||||
// shift (left) Rt[2]
|
||||
SSE_MOVHLPS_XMM_to_XMM(t0reg, t0reg);
|
||||
SSE2_PSLLD_XMM_to_XMM(t1reg, t0reg);
|
||||
|
||||
// merge & sign extend
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
recCall( Interp::PSLLVW, _Rd_ );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recPSRLVW()
|
||||
{
|
||||
if ( ! _Rd_ ) return;
|
||||
|
||||
EEINST_SETSIGNEXT(_Rd_);
|
||||
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
|
||||
if( _Rs_ == 0 ) {
|
||||
if( _Rt_ == 0 ) {
|
||||
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( _Rt_ == 0 ) {
|
||||
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
// shamt is 5-bit
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
|
||||
SSE2_PSLLQ_I8_to_XMM(t0reg, 27);
|
||||
SSE2_PSRLQ_I8_to_XMM(t0reg, 27);
|
||||
|
||||
// EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2]
|
||||
SSE_MOVHLPS_XMM_to_XMM(t1reg, EEREC_T);
|
||||
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
|
||||
// shift (right logical) Rt[0]
|
||||
SSE2_PSRLD_XMM_to_XMM(EEREC_D, t0reg);
|
||||
|
||||
// shift (right logical) Rt[2]
|
||||
SSE_MOVHLPS_XMM_to_XMM(t0reg, t0reg);
|
||||
SSE2_PSRLD_XMM_to_XMM(t1reg, t0reg);
|
||||
|
||||
// merge & sign extend
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
|
||||
CPU_SSE_XMMCACHE_END
|
||||
recCall( Interp::PSRLVW, _Rd_ );
|
||||
}
|
||||
|
||||
|
@ -2270,38 +2471,52 @@ void recPMSUBW()
|
|||
EEINST_SETSIGNEXT(_Rs_);
|
||||
EEINST_SETSIGNEXT(_Rt_);
|
||||
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
|
||||
//CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
|
||||
// int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
//
|
||||
// if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
// else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
// else {
|
||||
// SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
// SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
// }
|
||||
//
|
||||
// // add from LO/HI
|
||||
// SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
// SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8);
|
||||
// SSE2_PSUBQ_XMM_to_XMM(EEREC_LO, EEREC_D);
|
||||
//
|
||||
// // get the signs
|
||||
// SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_LO);
|
||||
// SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
// SSE2_PSRAD_I8_to_XMM(t0reg, 31);
|
||||
//
|
||||
// // interleave
|
||||
// SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8);
|
||||
// SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8);
|
||||
// SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
//
|
||||
// SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
// SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
//
|
||||
// _freeXMMreg(t0reg);
|
||||
//CPU_SSE_XMMCACHE_END
|
||||
|
||||
if( !cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
recCall( Interp::PMSUBW, _Rd_ );
|
||||
return;
|
||||
}
|
||||
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
|
||||
SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
|
||||
if( _Rd_ ) {
|
||||
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
else if( EEREC_D == EEREC_S ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
else if( EEREC_D == EEREC_T ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
|
||||
SSE4_PMULDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
|
||||
}
|
||||
}
|
||||
|
||||
// sub from LO/HI
|
||||
if ( _Rd_ ) {
|
||||
SSE2_PSUBQ_XMM_to_XMM(EEREC_LO, EEREC_D);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
}
|
||||
else {
|
||||
SSE2_PSUBQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
}
|
||||
|
||||
// interleave & sign extend
|
||||
if ( _Rd_ ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_D, 0xdd);
|
||||
}
|
||||
else {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
|
||||
}
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
CPU_SSE_XMMCACHE_END
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -2310,7 +2525,43 @@ void recPMULTW()
|
|||
EEINST_SETSIGNEXT(_Rs_);
|
||||
EEINST_SETSIGNEXT(_Rt_);
|
||||
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
|
||||
if( !cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
recCall( Interp::PMULTW, _Rd_ );
|
||||
return;
|
||||
}
|
||||
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI)
|
||||
if( !_Rs_ || !_Rt_ ) {
|
||||
if( _Rd_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
SSE2_PXOR_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
}
|
||||
else {
|
||||
if( _Rd_ ) {
|
||||
if( EEREC_D == EEREC_S ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
else if( EEREC_D == EEREC_T ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
}
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
|
||||
SSE4_PMULDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
|
||||
}
|
||||
|
||||
// interleave & sign extend
|
||||
if ( _Rd_ ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_D, 0xdd);
|
||||
}
|
||||
else {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
|
||||
}
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
}
|
||||
////////////////////////////////////////////////////
|
||||
void recPDIVW()
|
||||
|
@ -2332,55 +2583,26 @@ PCSX2_ALIGNED16(int s_mask1[4]) = {~0, 0, ~0, 0};
|
|||
void recPHMADH()
|
||||
{
|
||||
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI)
|
||||
int t0reg = _Rd_ ? EEREC_D : _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
if( t0reg == EEREC_S ) {
|
||||
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_S);
|
||||
|
||||
if( t0reg == EEREC_T ) {
|
||||
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_T);
|
||||
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_T);
|
||||
}
|
||||
else {
|
||||
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_T);
|
||||
}
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
}
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_T);
|
||||
|
||||
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_S);
|
||||
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_S);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
}
|
||||
|
||||
// 0-3
|
||||
SSE2_PUNPCKLWD_XMM_to_XMM(t0reg, EEREC_LO);
|
||||
// 4-7
|
||||
SSE2_PUNPCKHWD_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
|
||||
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8); // 0,2,1,3, L->H
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xd8); // 4,6,5,7, L->H
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
|
||||
SSE2_PUNPCKLQDQ_XMM_to_XMM(t0reg, EEREC_HI);
|
||||
SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
|
||||
|
||||
SSE2_PADDD_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
|
||||
if( _Rd_ ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
if( EEREC_D == EEREC_S ) {
|
||||
SSE2_PMADDWD_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
else if( EEREC_D == EEREC_T ) {
|
||||
SSE2_PMADDWD_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
}
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
SSE2_PMADDWD_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
}
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_D);
|
||||
}
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_T);
|
||||
SSE2_PMADDWD_XMM_to_XMM(EEREC_LO, EEREC_S);
|
||||
}
|
||||
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_LO, 0xf5);
|
||||
|
||||
SSE2_PAND_M128_to_XMM(EEREC_LO, (uptr)s_mask1);
|
||||
SSE2_PAND_M128_to_XMM(EEREC_HI, (uptr)s_mask1);
|
||||
|
||||
if( !_Rd_ ) _freeXMMreg(t0reg);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
SSE2_PSRLQ_I8_to_XMM(EEREC_HI, 32);
|
||||
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
|
@ -2451,38 +2673,20 @@ CPU_SSE_XMMCACHE_END
|
|||
////////////////////////////////////////////////////
|
||||
void recPHMSBH()
|
||||
{
|
||||
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI)
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_S);
|
||||
|
||||
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_T);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
|
||||
// 0-3
|
||||
SSE2_PUNPCKLWD_XMM_to_XMM(t0reg, EEREC_LO);
|
||||
// 4-7
|
||||
SSE2_PUNPCKHWD_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
|
||||
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8); // 0,2,1,3, L->H
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xd8); // 4,6,5,7, L->H
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(t0reg, EEREC_HI);
|
||||
SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
|
||||
|
||||
SSE2_PSUBD_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
|
||||
if( _Rd_ ) {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
}
|
||||
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_LO, 0xf5);
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI)
|
||||
SSE2_PCMPEQD_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE2_PSRLD_XMM_to_XMM(EEREC_LO, 16);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
|
||||
SSE2_PAND_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
SSE2_PMADDWD_XMM_to_XMM(EEREC_HI, EEREC_T);
|
||||
SSE2_PSLLD_XMM_to_XMM(EEREC_LO, 16);
|
||||
SSE2_PAND_XMM_to_XMM(EEREC_LO, EEREC_S);
|
||||
SSE2_PMADDWD_XMM_to_XMM(EEREC_LO, EEREC_T);
|
||||
SSE2_PSUBD_XMM_to_XMM(EEREC_LO, EEREC_HI);
|
||||
if( _Rd_ ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
SSE2_PSRLQ_I8_to_XMM(EEREC_HI, 32);
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
recCall( Interp::PHMSBH, _Rd_ );
|
||||
|
@ -2585,7 +2789,7 @@ void recPROT3W( void )
|
|||
{
|
||||
if (!_Rd_) return;
|
||||
|
||||
CPU_SSE_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
|
||||
CPU_SSE_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0xc9);
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
|
@ -3001,8 +3205,72 @@ REC_FUNC_DEL( PEXCH, _Rd_);
|
|||
////////////////////////////////////////////////////
|
||||
//REC_FUNC( PSRAVW, _Rd_ );
|
||||
|
||||
void recPSRAVW( void )
|
||||
void recPSRAVW()
|
||||
{
|
||||
if ( ! _Rd_ ) return;
|
||||
|
||||
EEINST_SETSIGNEXT(_Rd_);
|
||||
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
|
||||
if( _Rs_ == 0 ) {
|
||||
if( _Rt_ == 0 ) {
|
||||
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( _Rt_ == 0 ) {
|
||||
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
// shamt is 5-bit
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
|
||||
SSE2_PSLLQ_I8_to_XMM(t0reg, 27);
|
||||
SSE2_PSRLQ_I8_to_XMM(t0reg, 27);
|
||||
|
||||
// EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2]
|
||||
SSE_MOVHLPS_XMM_to_XMM(t1reg, EEREC_T);
|
||||
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
|
||||
// shift (right arithmetic) Rt[0]
|
||||
SSE2_PSRAD_XMM_to_XMM(EEREC_D, t0reg);
|
||||
|
||||
// shift (right arithmetic) Rt[2]
|
||||
SSE_MOVHLPS_XMM_to_XMM(t0reg, t0reg);
|
||||
SSE2_PSRAD_XMM_to_XMM(t1reg, t0reg);
|
||||
|
||||
// merge & sign extend
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
}
|
||||
else {
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code );
|
||||
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc );
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
@ -3069,32 +3337,49 @@ CPU_SSE_XMMCACHE_END
|
|||
////////////////////////////////////////////////////
|
||||
void recPMULTUW()
|
||||
{
|
||||
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI)
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
|
||||
EEINST_SETSIGNEXT(_Rs_);
|
||||
EEINST_SETSIGNEXT(_Rt_);
|
||||
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
|
||||
|
||||
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI)
|
||||
if( !_Rs_ || !_Rt_ ) {
|
||||
if( _Rd_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
SSE2_PXOR_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
}
|
||||
else {
|
||||
if( _Rd_ ) {
|
||||
if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_D);
|
||||
}
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
|
||||
SSE2_PMULUDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
|
||||
}
|
||||
|
||||
// get the signs
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
|
||||
|
||||
// interleave
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0xd8);
|
||||
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
// interleave & sign extend
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_HI, 0xd8);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
|
||||
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
recCall( Interp::PMULTUW, _Rd_ );
|
||||
}
|
||||
|
@ -3102,37 +3387,52 @@ CPU_SSE_XMMCACHE_END
|
|||
////////////////////////////////////////////////////
|
||||
void recPMADDUW()
|
||||
{
|
||||
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
|
||||
EEINST_SETSIGNEXT(_Rs_);
|
||||
EEINST_SETSIGNEXT(_Rt_);
|
||||
|
||||
if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
|
||||
SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
|
||||
if( _Rd_ ) {
|
||||
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
else if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_D);
|
||||
}
|
||||
else {
|
||||
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
else {
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
|
||||
SSE2_PMULUDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
|
||||
}
|
||||
}
|
||||
|
||||
// add from LO/HI
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0x88);
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
|
||||
SSE2_PADDQ_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
if ( _Rd_ ) SSE2_PADDQ_XMM_to_XMM(EEREC_D, EEREC_LO);
|
||||
else SSE2_PADDQ_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
|
||||
// get the signs
|
||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
|
||||
|
||||
// interleave
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0xd8);
|
||||
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
|
||||
// interleave & sign extend
|
||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
|
||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
|
||||
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_HI, 0xd8);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
|
||||
|
||||
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg);
|
||||
SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg);
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
CPU_SSE_XMMCACHE_END
|
||||
|
||||
recCall( Interp::PMADDUW, _Rd_ );
|
||||
|
@ -3142,6 +3442,8 @@ CPU_SSE_XMMCACHE_END
|
|||
//do EEINST_SETSIGNEXT
|
||||
void recPDIVUW()
|
||||
{
|
||||
EEINST_SETSIGNEXT(_Rs_);
|
||||
EEINST_SETSIGNEXT(_Rt_);
|
||||
recCall( Interp::PDIVUW, _Rd_ );
|
||||
}
|
||||
|
||||
|
|
|
@ -126,6 +126,7 @@ struct CAPABILITIES {
|
|||
u32 hasThermalMonitor;
|
||||
u32 hasIntel64BitArchitecture;
|
||||
u32 hasStreamingSIMD3Extensions;
|
||||
u32 hasSupplementalStreamingSIMD3Extensions;
|
||||
u32 hasStreamingSIMD4Extensions;
|
||||
|
||||
// AMD-specific CPU Features
|
||||
|
@ -1413,6 +1414,9 @@ extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
|
|||
extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
|
||||
extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
|
||||
|
||||
extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
|
||||
extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
|
||||
|
||||
extern void SSE_STMXCSR( uptr from );
|
||||
extern void SSE_LDMXCSR( uptr from );
|
||||
|
||||
|
@ -1610,6 +1614,13 @@ extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from);
|
|||
extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from);
|
||||
|
||||
// SSSE3
|
||||
|
||||
extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8);
|
||||
|
||||
// SSE4.1
|
||||
|
||||
#ifndef _MM_MK_INSERTPS_NDX
|
||||
|
@ -1633,6 +1644,7 @@ extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from);
|
|||
extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from);
|
||||
extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from);
|
||||
extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from);
|
||||
extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
|
||||
//*********************
|
||||
// SSE-X - uses both SSE,SSE2 code and tries to keep consistensies between the data
|
||||
|
|
|
@ -376,6 +376,10 @@ void cpudetectInit()
|
|||
|
||||
cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1
|
||||
|
||||
// --> SSSE3 detection <--
|
||||
|
||||
cpucaps.hasSupplementalStreamingSIMD3Extensions = ( cpuinfo.x86Flags2 >> 9 ) & 1; //ssse3
|
||||
|
||||
// --> SSE3 detection <--
|
||||
// These instructions may not be recognized by some compilers, or may not have
|
||||
// intrinsic equivalents available. So we use our own ix86 emitter to generate
|
||||
|
|
|
@ -661,6 +661,13 @@ __forceinline void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType f
|
|||
write8(imm8);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//SHUFPD: Shuffle Packed Double-Precision FP Values *
|
||||
//**********************************************************************************
|
||||
__forceinline void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); }
|
||||
__forceinline void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//PSHUFD: Shuffle Packed DoubleWords *
|
||||
|
@ -1076,6 +1083,41 @@ __forceinline void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { writ
|
|||
__forceinline void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); }
|
||||
__forceinline void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); }
|
||||
|
||||
// SSSE3
|
||||
|
||||
__forceinline void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x1C380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
__forceinline void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x1D380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
__forceinline void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x1E380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
__forceinline void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x0F3A0F);
|
||||
ModRM(3, to, from);
|
||||
write8(imm8);
|
||||
}
|
||||
|
||||
// SSE4.1
|
||||
|
||||
__forceinline void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
|
||||
|
@ -1224,6 +1266,14 @@ __forceinline void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from)
|
|||
write32(MEMADDR(from, 4));
|
||||
}
|
||||
|
||||
__forceinline void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x28380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
// SSE-X
|
||||
__forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue