Lots of work from tmkk. This update adds recompiling for several MMI opcodes, fixes bugs and adds SSSE3 detection.

Thanks again, tmkk! :)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@522 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
ramapcsx2 2009-02-17 23:29:47 +00:00
parent af89808f8f
commit 33d5c66ac7
6 changed files with 643 additions and 273 deletions

View File

@ -1058,9 +1058,9 @@ void PSLLVW() {
void PSRLVW() { void PSRLVW() {
if (!_Rd_) return; if (!_Rd_) return;
cpuRegs.GPR.r[_Rd_].UD[0] = (cpuRegs.GPR.r[_Rt_].UL[0] >> cpuRegs.GPR.r[_Rd_].UD[0] = (s32)(cpuRegs.GPR.r[_Rt_].UL[0] >>
(cpuRegs.GPR.r[_Rs_].UL[0] & 0x1F)); (cpuRegs.GPR.r[_Rs_].UL[0] & 0x1F));
cpuRegs.GPR.r[_Rd_].UD[1] = (cpuRegs.GPR.r[_Rt_].UL[2] >> cpuRegs.GPR.r[_Rd_].UD[1] = (s32)(cpuRegs.GPR.r[_Rt_].UL[2] >>
(cpuRegs.GPR.r[_Rs_].UL[2] & 0x1F)); (cpuRegs.GPR.r[_Rs_].UL[2] & 0x1F));
} }

View File

@ -134,11 +134,13 @@ void SysDetect()
"\t%sDetected SSE\n" "\t%sDetected SSE\n"
"\t%sDetected SSE2\n" "\t%sDetected SSE2\n"
"\t%sDetected SSE3\n" "\t%sDetected SSE3\n"
"\t%sDetected SSSE3\n"
"\t%sDetected SSE4.1\n", params "\t%sDetected SSE4.1\n", params
cpucaps.hasMultimediaExtensions ? "" : "Not ", cpucaps.hasMultimediaExtensions ? "" : "Not ",
cpucaps.hasStreamingSIMDExtensions ? "" : "Not ", cpucaps.hasStreamingSIMDExtensions ? "" : "Not ",
cpucaps.hasStreamingSIMD2Extensions ? "" : "Not ", cpucaps.hasStreamingSIMD2Extensions ? "" : "Not ",
cpucaps.hasStreamingSIMD3Extensions ? "" : "Not ", cpucaps.hasStreamingSIMD3Extensions ? "" : "Not ",
cpucaps.hasSupplementalStreamingSIMD3Extensions ? "" : "Not ",
cpucaps.hasStreamingSIMD4Extensions ? "" : "Not " cpucaps.hasStreamingSIMD4Extensions ? "" : "Not "
); );

View File

@ -208,6 +208,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI)
case 0x02: // SLW case 0x02: // SLW
// fall to interp // fall to interp
EEINST_SETSIGNEXT(_Rd_);
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc ); MOV32ItoM( (uptr)&cpuRegs.pc, pc );
_flushCachedRegs(); _flushCachedRegs();
@ -307,11 +308,11 @@ void recPSRLH( void )
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( (_Sa_&0xf) == 0 ) { if( (_Sa_&0xf) == 0 ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
return;
} }
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSRLW_I8_to_XMM(EEREC_D,_Sa_&0xf ); SSE2_PSRLW_I8_to_XMM(EEREC_D,_Sa_&0xf );
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_flushCachedRegs(); _flushCachedRegs();
@ -336,11 +337,11 @@ void recPSRLW( void )
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( _Sa_ == 0 ) { if( _Sa_ == 0 ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
return;
} }
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSRLD_I8_to_XMM(EEREC_D,_Sa_ ); SSE2_PSRLD_I8_to_XMM(EEREC_D,_Sa_ );
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_flushCachedRegs(); _flushCachedRegs();
@ -365,11 +366,11 @@ void recPSRAH( void )
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( (_Sa_&0xf) == 0 ) { if( (_Sa_&0xf) == 0 ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
return;
} }
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSRAW_I8_to_XMM(EEREC_D,_Sa_&0xf ); SSE2_PSRAW_I8_to_XMM(EEREC_D,_Sa_&0xf );
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_flushCachedRegs(); _flushCachedRegs();
@ -394,11 +395,11 @@ void recPSRAW( void )
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( _Sa_ == 0 ) { if( _Sa_ == 0 ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
return;
} }
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSRAD_I8_to_XMM(EEREC_D,_Sa_ ); SSE2_PSRAD_I8_to_XMM(EEREC_D,_Sa_ );
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_flushCachedRegs(); _flushCachedRegs();
@ -423,11 +424,11 @@ void recPSLLH( void )
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( (_Sa_&0xf) == 0 ) { if( (_Sa_&0xf) == 0 ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
return;
} }
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSLLW_I8_to_XMM(EEREC_D,_Sa_&0xf ); SSE2_PSLLW_I8_to_XMM(EEREC_D,_Sa_&0xf );
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_flushCachedRegs(); _flushCachedRegs();
@ -452,11 +453,11 @@ void recPSLLW( void )
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( _Sa_ == 0 ) { if( _Sa_ == 0 ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
return;
} }
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PSLLD_I8_to_XMM(EEREC_D,_Sa_ ); SSE2_PSLLD_I8_to_XMM(EEREC_D,_Sa_ );
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_flushCachedRegs(); _flushCachedRegs();
@ -533,13 +534,22 @@ void recPMAXW()
if ( ! _Rd_ ) return; if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
if ( cpucaps.hasStreamingSIMD4Extensions ) {
if( EEREC_S == EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
else if( EEREC_D == EEREC_S ) SSE4_PMAXSD_XMM_to_XMM(EEREC_D, EEREC_T);
else if ( EEREC_D == EEREC_T ) SSE4_PMAXSD_XMM_to_XMM(EEREC_D, EEREC_S);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE4_PMAXSD_XMM_to_XMM(EEREC_D, EEREC_T);
}
}
else {
int t0reg; int t0reg;
if( EEREC_S == EEREC_T ) { if( EEREC_S == EEREC_T ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
return;
} }
else {
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
SSE2_PCMPGTD_XMM_to_XMM(t0reg, EEREC_T); SSE2_PCMPGTD_XMM_to_XMM(t0reg, EEREC_T);
@ -564,6 +574,8 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
SSEX_POR_XMM_to_XMM(EEREC_D, t0reg); SSEX_POR_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
}
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
recCall( Interp::PMAXW, _Rd_ ); recCall( Interp::PMAXW, _Rd_ );
@ -1602,6 +1614,10 @@ void recPABSW()
if( !_Rd_ ) return; if( !_Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( cpucaps.hasSupplementalStreamingSIMD3Extensions ) {
SSSE3_PABSD_XMM_to_XMM(EEREC_D, EEREC_T);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
@ -1609,6 +1625,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
SSEX_PXOR_XMM_to_XMM(EEREC_D, t0reg); SSEX_PXOR_XMM_to_XMM(EEREC_D, t0reg);
SSE2_PSUBD_XMM_to_XMM(EEREC_D, t0reg); SSE2_PSUBD_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_deleteEEreg(_Rt_, 1); _deleteEEreg(_Rt_, 1);
@ -1626,6 +1643,10 @@ void recPABSH()
if( !_Rd_ ) return; if( !_Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
if( cpucaps.hasSupplementalStreamingSIMD3Extensions ) {
SSSE3_PABSW_XMM_to_XMM(EEREC_D, EEREC_T);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
@ -1633,6 +1654,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
SSEX_PXOR_XMM_to_XMM(EEREC_D, t0reg); SSEX_PXOR_XMM_to_XMM(EEREC_D, t0reg);
SSE2_PSUBW_XMM_to_XMM(EEREC_D, t0reg); SSE2_PSUBW_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
_deleteEEreg(_Rt_, 1); _deleteEEreg(_Rt_, 1);
@ -1650,13 +1672,22 @@ void recPMINW()
if ( ! _Rd_ ) return; if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
if ( cpucaps.hasStreamingSIMD4Extensions ) {
if( EEREC_S == EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
else if( EEREC_D == EEREC_S ) SSE4_PMINSD_XMM_to_XMM(EEREC_D, EEREC_T);
else if ( EEREC_D == EEREC_T ) SSE4_PMINSD_XMM_to_XMM(EEREC_D, EEREC_S);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE4_PMINSD_XMM_to_XMM(EEREC_D, EEREC_T);
}
}
else {
int t0reg; int t0reg;
if( EEREC_S == EEREC_T ) { if( EEREC_S == EEREC_T ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
return;
} }
else {
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
SSE2_PCMPGTD_XMM_to_XMM(t0reg, EEREC_S); SSE2_PCMPGTD_XMM_to_XMM(t0reg, EEREC_S);
@ -1681,6 +1712,8 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
SSEX_POR_XMM_to_XMM(EEREC_D, t0reg); SSEX_POR_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
}
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
recCall( Interp::PMINW, _Rd_ ); recCall( Interp::PMINW, _Rd_ );
@ -1689,6 +1722,8 @@ CPU_SSE_XMMCACHE_END
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPADSBH() void recPADSBH()
{ {
if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
int t0reg; int t0reg;
@ -1698,9 +1733,8 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
// reset lower bits to 0s // reset lower bits to 0s
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8); SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8);
SSE2_PSLLDQ_I8_to_XMM(EEREC_D, 8); SSE2_PSLLDQ_I8_to_XMM(EEREC_D, 8);
return;
} }
else {
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
@ -1719,6 +1753,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED)
SSE2_PSRLDQ_I8_to_XMM(t0reg, 8); SSE2_PSRLDQ_I8_to_XMM(t0reg, 8);
SSE_MOVLHPS_XMM_to_XMM(EEREC_D, t0reg); SSE_MOVLHPS_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
@ -1728,6 +1763,8 @@ CPU_SSE_XMMCACHE_END
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPADDUW() void recPADDUW()
{ {
if ( ! _Rd_ ) return;
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
if( _Rt_ == 0 ) { if( _Rt_ == 0 ) {
@ -2249,18 +2286,182 @@ void recPMADDW()
EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_); EEINST_SETSIGNEXT(_Rt_);
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
if( !cpucaps.hasStreamingSIMD4Extensions ) {
recCall( Interp::PMADDW, _Rd_ ); recCall( Interp::PMADDW, _Rd_ );
return;
}
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
if( _Rd_ ) {
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
else if( EEREC_D == EEREC_S ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_S);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
}
}
else {
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
SSE4_PMULDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
}
}
// add from LO/HI
if ( _Rd_ ) SSE2_PADDQ_XMM_to_XMM(EEREC_D, EEREC_LO);
else SSE2_PADDQ_XMM_to_XMM(EEREC_HI, EEREC_LO);
// interleave & sign extend
if ( _Rd_ ) {
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_D, 0xdd);
}
else {
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
}
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
CPU_SSE_XMMCACHE_END
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPSLLVW() void recPSLLVW()
{ {
if ( ! _Rd_ ) return;
EEINST_SETSIGNEXT(_Rd_);
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) {
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg);
}
}
}
else if( _Rt_ == 0 ) {
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
// shamt is 5-bit
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
SSE2_PSLLQ_I8_to_XMM(t0reg, 27);
SSE2_PSRLQ_I8_to_XMM(t0reg, 27);
// EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2]
SSE_MOVHLPS_XMM_to_XMM(t1reg, EEREC_T);
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
// shift (left) Rt[0]
SSE2_PSLLD_XMM_to_XMM(EEREC_D, t0reg);
// shift (left) Rt[2]
SSE_MOVHLPS_XMM_to_XMM(t0reg, t0reg);
SSE2_PSLLD_XMM_to_XMM(t1reg, t0reg);
// merge & sign extend
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
}
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
}
CPU_SSE_XMMCACHE_END
recCall( Interp::PSLLVW, _Rd_ ); recCall( Interp::PSLLVW, _Rd_ );
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPSRLVW() void recPSRLVW()
{ {
if ( ! _Rd_ ) return;
EEINST_SETSIGNEXT(_Rd_);
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) {
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg);
}
}
}
else if( _Rt_ == 0 ) {
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
// shamt is 5-bit
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
SSE2_PSLLQ_I8_to_XMM(t0reg, 27);
SSE2_PSRLQ_I8_to_XMM(t0reg, 27);
// EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2]
SSE_MOVHLPS_XMM_to_XMM(t1reg, EEREC_T);
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
// shift (right logical) Rt[0]
SSE2_PSRLD_XMM_to_XMM(EEREC_D, t0reg);
// shift (right logical) Rt[2]
SSE_MOVHLPS_XMM_to_XMM(t0reg, t0reg);
SSE2_PSRLD_XMM_to_XMM(t1reg, t0reg);
// merge & sign extend
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
}
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
}
CPU_SSE_XMMCACHE_END
recCall( Interp::PSRLVW, _Rd_ ); recCall( Interp::PSRLVW, _Rd_ );
} }
@ -2270,38 +2471,52 @@ void recPMSUBW()
EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_); EEINST_SETSIGNEXT(_Rt_);
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
//CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI) if( !cpucaps.hasStreamingSIMD4Extensions ) {
// int t0reg = _allocTempXMMreg(XMMT_INT, -1);
//
// if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
// else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S);
// else {
// SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
// SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
// }
//
// // add from LO/HI
// SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
// SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8);
// SSE2_PSUBQ_XMM_to_XMM(EEREC_LO, EEREC_D);
//
// // get the signs
// SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_LO);
// SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
// SSE2_PSRAD_I8_to_XMM(t0reg, 31);
//
// // interleave
// SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8);
// SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8);
// SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
//
// SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg);
// SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg);
//
// _freeXMMreg(t0reg);
//CPU_SSE_XMMCACHE_END
recCall( Interp::PMSUBW, _Rd_ ); recCall( Interp::PMSUBW, _Rd_ );
return;
}
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
if( _Rd_ ) {
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
else if( EEREC_D == EEREC_S ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_S);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
}
}
else {
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
SSE4_PMULDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
}
}
// sub from LO/HI
if ( _Rd_ ) {
SSE2_PSUBQ_XMM_to_XMM(EEREC_LO, EEREC_D);
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
}
else {
SSE2_PSUBQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
}
// interleave & sign extend
if ( _Rd_ ) {
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_D, 0xdd);
}
else {
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
}
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
CPU_SSE_XMMCACHE_END
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
@ -2310,7 +2525,43 @@ void recPMULTW()
EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_); EEINST_SETSIGNEXT(_Rt_);
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
if( !cpucaps.hasStreamingSIMD4Extensions ) {
recCall( Interp::PMULTW, _Rd_ ); recCall( Interp::PMULTW, _Rd_ );
return;
}
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI)
if( !_Rs_ || !_Rt_ ) {
if( _Rd_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
SSE2_PXOR_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
}
else {
if( _Rd_ ) {
if( EEREC_D == EEREC_S ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_S);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE4_PMULDQ_XMM_to_XMM(EEREC_D, EEREC_T);
}
}
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
SSE4_PMULDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
}
// interleave & sign extend
if ( _Rd_ ) {
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_D, 0xdd);
}
else {
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
}
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
}
CPU_SSE_XMMCACHE_END
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPDIVW() void recPDIVW()
@ -2332,55 +2583,26 @@ PCSX2_ALIGNED16(int s_mask1[4]) = {~0, 0, ~0, 0};
void recPHMADH() void recPHMADH()
{ {
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI) CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _Rd_ ? EEREC_D : _allocTempXMMreg(XMMT_INT, -1);
if( t0reg == EEREC_S ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_S);
if( t0reg == EEREC_T ) {
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_T);
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_T);
}
else {
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_T);
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_T);
}
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
}
else {
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T);
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_T);
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_S);
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_S);
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
}
// 0-3
SSE2_PUNPCKLWD_XMM_to_XMM(t0reg, EEREC_LO);
// 4-7
SSE2_PUNPCKHWD_XMM_to_XMM(EEREC_HI, EEREC_LO);
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8); // 0,2,1,3, L->H
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xd8); // 4,6,5,7, L->H
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
SSE2_PUNPCKLQDQ_XMM_to_XMM(t0reg, EEREC_HI);
SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
SSE2_PADDD_XMM_to_XMM(EEREC_LO, t0reg);
if( _Rd_ ) { if( _Rd_ ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO); if( EEREC_D == EEREC_S ) {
SSE2_PMADDWD_XMM_to_XMM(EEREC_D, EEREC_T);
}
else if( EEREC_D == EEREC_T ) {
SSE2_PMADDWD_XMM_to_XMM(EEREC_D, EEREC_S);
}
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE2_PMADDWD_XMM_to_XMM(EEREC_D, EEREC_S);
}
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_D);
}
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_T);
SSE2_PMADDWD_XMM_to_XMM(EEREC_LO, EEREC_S);
} }
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_LO, 0xf5); SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
SSE2_PSRLQ_I8_to_XMM(EEREC_HI, 32);
SSE2_PAND_M128_to_XMM(EEREC_LO, (uptr)s_mask1);
SSE2_PAND_M128_to_XMM(EEREC_HI, (uptr)s_mask1);
if( !_Rd_ ) _freeXMMreg(t0reg);
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
@ -2451,38 +2673,20 @@ CPU_SSE_XMMCACHE_END
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPHMSBH() void recPHMSBH()
{ {
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _allocTempXMMreg(XMMT_INT, -1); SSE2_PCMPEQD_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE2_PSRLD_XMM_to_XMM(EEREC_LO, 16);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_S); SSE2_PAND_XMM_to_XMM(EEREC_HI, EEREC_LO);
SSE2_PMADDWD_XMM_to_XMM(EEREC_HI, EEREC_T);
SSE2_PMULLW_XMM_to_XMM(t0reg, EEREC_T); SSE2_PSLLD_XMM_to_XMM(EEREC_LO, 16);
SSE2_PMULHW_XMM_to_XMM(EEREC_LO, EEREC_T); SSE2_PAND_XMM_to_XMM(EEREC_LO, EEREC_S);
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg); SSE2_PMADDWD_XMM_to_XMM(EEREC_LO, EEREC_T);
SSE2_PSUBD_XMM_to_XMM(EEREC_LO, EEREC_HI);
// 0-3 if( _Rd_ ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
SSE2_PUNPCKLWD_XMM_to_XMM(t0reg, EEREC_LO);
// 4-7
SSE2_PUNPCKHWD_XMM_to_XMM(EEREC_HI, EEREC_LO);
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8); // 0,2,1,3, L->H
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xd8); // 4,6,5,7, L->H
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
SSE2_PUNPCKLDQ_XMM_to_XMM(t0reg, EEREC_HI);
SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
SSE2_PSUBD_XMM_to_XMM(EEREC_LO, t0reg);
if( _Rd_ ) {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO);
}
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_LO, 0xf5);
_freeXMMreg(t0reg);
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO);
SSE2_PSRLQ_I8_to_XMM(EEREC_HI, 32);
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
recCall( Interp::PHMSBH, _Rd_ ); recCall( Interp::PHMSBH, _Rd_ );
@ -2585,7 +2789,7 @@ void recPROT3W( void )
{ {
if (!_Rd_) return; if (!_Rd_) return;
CPU_SSE_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) CPU_SSE_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED)
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0xc9); SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0xc9);
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
@ -3001,8 +3205,72 @@ REC_FUNC_DEL( PEXCH, _Rd_);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
//REC_FUNC( PSRAVW, _Rd_ ); //REC_FUNC( PSRAVW, _Rd_ );
void recPSRAVW( void ) void recPSRAVW()
{ {
if ( ! _Rd_ ) return;
EEINST_SETSIGNEXT(_Rd_);
CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED)
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) {
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
SSE2_PSRAD_I8_to_XMM(t0reg, 31);
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg);
}
}
}
else if( _Rt_ == 0 ) {
SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
// shamt is 5-bit
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
SSE2_PSLLQ_I8_to_XMM(t0reg, 27);
SSE2_PSRLQ_I8_to_XMM(t0reg, 27);
// EEREC_D[0] <- Rt[0], t1reg[0] <- Rt[2]
SSE_MOVHLPS_XMM_to_XMM(t1reg, EEREC_T);
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
// shift (right arithmetic) Rt[0]
SSE2_PSRAD_XMM_to_XMM(EEREC_D, t0reg);
// shift (right arithmetic) Rt[2]
SSE_MOVHLPS_XMM_to_XMM(t0reg, t0reg);
SSE2_PSRAD_XMM_to_XMM(t1reg, t0reg);
// merge & sign extend
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_D, EEREC_D);
}
else {
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t1reg);
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D);
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, t0reg);
}
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
}
CPU_SSE_XMMCACHE_END
MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc );
iFlushCall(FLUSH_EVERYTHING); iFlushCall(FLUSH_EVERYTHING);
@ -3069,32 +3337,49 @@ CPU_SSE_XMMCACHE_END
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPMULTUW() void recPMULTUW()
{ {
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI) if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_); EEINST_SETSIGNEXT(_Rt_);
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI)
if( !_Rs_ || !_Rt_ ) {
if( _Rd_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
SSE2_PXOR_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
}
else {
if( _Rd_ ) {
if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T); if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S); else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S);
else { else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
} }
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_D);
}
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
SSE2_PMULUDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
}
// get the signs // interleave & sign extend
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D); if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PSRAD_I8_to_XMM(t0reg, 31); SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
// interleave SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0xd8); SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8); }
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO); else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_HI, 0xd8);
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg); SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg);
SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg); SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
}
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
recCall( Interp::PMULTUW, _Rd_ ); recCall( Interp::PMULTUW, _Rd_ );
} }
@ -3102,37 +3387,52 @@ CPU_SSE_XMMCACHE_END
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recPMADDUW() void recPMADDUW()
{ {
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI) if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_); EEINST_SETSIGNEXT(_Rt_);
CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T); SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
if( _Rd_ ) {
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D);
else if( EEREC_D == EEREC_S ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S); else if( EEREC_D == EEREC_T ) SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_S);
else { else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S);
SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PMULUDQ_XMM_to_XMM(EEREC_D, EEREC_T);
} }
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_D);
}
else {
if( !_Rs_ || !_Rt_ ) SSE2_PXOR_XMM_to_XMM(EEREC_HI, EEREC_HI);
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S);
SSE2_PMULUDQ_XMM_to_XMM(EEREC_HI, EEREC_T);
}
}
// add from LO/HI // add from LO/HI
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0x88); if ( _Rd_ ) SSE2_PADDQ_XMM_to_XMM(EEREC_D, EEREC_LO);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0x88); else SSE2_PADDQ_XMM_to_XMM(EEREC_HI, EEREC_LO);
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, EEREC_HI);
SSE2_PADDQ_XMM_to_XMM(EEREC_D, EEREC_LO);
// get the signs // interleave & sign extend
SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_D); if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_PSRAD_I8_to_XMM(t0reg, 31); SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88);
SSE2_PSHUFD_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xdd);
// interleave SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO);
SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_D, 0xd8); SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI);
SSE2_PSHUFD_XMM_to_XMM(t0reg, t0reg, 0xd8); }
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_LO); else {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_HI, 0xd8);
SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, t0reg);
SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, t0reg);
SSE2_PSRAD_I8_to_XMM(t0reg, 31); // get the signs
SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg); SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_LO, t0reg);
SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg); SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
}
CPU_SSE_XMMCACHE_END CPU_SSE_XMMCACHE_END
recCall( Interp::PMADDUW, _Rd_ ); recCall( Interp::PMADDUW, _Rd_ );
@ -3142,6 +3442,8 @@ CPU_SSE_XMMCACHE_END
//do EEINST_SETSIGNEXT //do EEINST_SETSIGNEXT
void recPDIVUW() void recPDIVUW()
{ {
EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_);
recCall( Interp::PDIVUW, _Rd_ ); recCall( Interp::PDIVUW, _Rd_ );
} }

View File

@ -126,6 +126,7 @@ struct CAPABILITIES {
u32 hasThermalMonitor; u32 hasThermalMonitor;
u32 hasIntel64BitArchitecture; u32 hasIntel64BitArchitecture;
u32 hasStreamingSIMD3Extensions; u32 hasStreamingSIMD3Extensions;
u32 hasSupplementalStreamingSIMD3Extensions;
u32 hasStreamingSIMD4Extensions; u32 hasStreamingSIMD4Extensions;
// AMD-specific CPU Features // AMD-specific CPU Features
@ -1413,6 +1414,9 @@ extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
extern void SSE_STMXCSR( uptr from ); extern void SSE_STMXCSR( uptr from );
extern void SSE_LDMXCSR( uptr from ); extern void SSE_LDMXCSR( uptr from );
@ -1610,6 +1614,13 @@ extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from);
extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from);
// SSSE3
extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8);
// SSE4.1 // SSE4.1
#ifndef _MM_MK_INSERTPS_NDX #ifndef _MM_MK_INSERTPS_NDX
@ -1633,6 +1644,7 @@ extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from);
extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from);
extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from);
extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from);
extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
//********************* //*********************
// SSE-X - uses both SSE,SSE2 code and tries to keep consistensies between the data // SSE-X - uses both SSE,SSE2 code and tries to keep consistensies between the data

View File

@ -376,6 +376,10 @@ void cpudetectInit()
cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1 cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1
// --> SSSE3 detection <--
cpucaps.hasSupplementalStreamingSIMD3Extensions = ( cpuinfo.x86Flags2 >> 9 ) & 1; //ssse3
// --> SSE3 detection <-- // --> SSE3 detection <--
// These instructions may not be recognized by some compilers, or may not have // These instructions may not be recognized by some compilers, or may not have
// intrinsic equivalents available. So we use our own ix86 emitter to generate // intrinsic equivalents available. So we use our own ix86 emitter to generate

View File

@ -661,6 +661,13 @@ __forceinline void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType f
write8(imm8); write8(imm8);
} }
//////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//SHUFPD: Shuffle Packed Double-Precision FP Values *
//**********************************************************************************
__forceinline void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); }
__forceinline void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); }
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/ //**********************************************************************************/
//PSHUFD: Shuffle Packed DoubleWords * //PSHUFD: Shuffle Packed DoubleWords *
@ -1076,6 +1083,41 @@ __forceinline void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { writ
__forceinline void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } __forceinline void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); }
__forceinline void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } __forceinline void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); }
// SSSE3
__forceinline void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x1C380F);
ModRM(3, to, from);
}
__forceinline void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x1D380F);
ModRM(3, to, from);
}
__forceinline void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x1E380F);
ModRM(3, to, from);
}
__forceinline void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
{
write8(0x66);
RexRB(0, to, from);
write24(0x0F3A0F);
ModRM(3, to, from);
write8(imm8);
}
// SSE4.1 // SSE4.1
__forceinline void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) __forceinline void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
@ -1224,6 +1266,14 @@ __forceinline void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from)
write32(MEMADDR(from, 4)); write32(MEMADDR(from, 4));
} }
__forceinline void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x28380F);
ModRM(3, to, from);
}
// SSE-X // SSE-X
__forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
{ {