Removed support for CPU's without SSE2. Now i know there's the occasional AthlonXP user out there.. sorry. The SSE2 (and SSE1 too!) checks were very frequent and could've hurt performance a bit.

Note: This move was discussed with the team, we all support it.
Note 2: I tried very hard to avoid mistakes, but due to the many changes i could've overlooked something (especially in iR5900tables.c).
I'd apreciate it if someone could look over the changes a bit ;)

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@281 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
ramapcsx2 2008-11-03 14:10:45 +00:00 committed by Gregory Hainaut
parent 1fbeeeb678
commit b70563a9fa
17 changed files with 169 additions and 406 deletions

View File

@ -1497,31 +1497,6 @@ void ipu_csc(struct macroblock_8 *mb8, struct macroblock_rgb32 *rgb32, int sgn){
convert_init.start(convert_init.id, (u8*)rgb32, CONVERT_FRAME);
convert_init.copy(convert_init.id, (u8*)mb8->Y, (u8*)mb8->Cr, (u8*)mb8->Cb, 0);
// do alpha processing
// if( cpucaps.hasStreamingSIMD2Extensions ) {
// int i;
// u8* p = (u8*)rgb32;
//
// __asm {
// movaps xmm6, s_thresh
// pshufd xmm7, xmm6, 0xee
// pshufd xmm6, xmm6, 0x44
// pxor xmm5, xmm5
// }
//
// for(i = 0; i < 64; i += 4, p += 64) {
// // process 2 qws at a time
// __asm {
// // extract 8 dwords
// mov edi, p
// movaps xmm0, qword ptr [edi]
// movaps xmm1, qword ptr [edi+16]
// movaps xmm2, qword ptr [edi+32]
// movaps xmm3, qword ptr [edi+48]
//
//
// }
// fixes suikoden5
if( s_thresh[0] > 0 ) {
for(i = 0; i < 64*4; i++, p += 4) {
if( p[0] < s_thresh[0] && p[1] < s_thresh[0] && p[2] < s_thresh[0] )

View File

@ -546,7 +546,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
#if !defined(PCSX2_NORECBUILD)
if( size >= ft->gsize && !(v->addr&0xf) && cpucaps.hasStreamingSIMD2Extensions) {
if( size >= ft->gsize && !(v->addr&0xf)) {
const UNPACKPARTFUNCTYPESSE* pfn;
int writemask;
//static LARGE_INTEGER lbase, lfinal;

View File

@ -267,7 +267,7 @@ int _checkXMMreg(int type, int reg, int mode)
SSEX_MOVDQA_M128_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
}
else if (mode & MODE_READHALF) {
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[i] == XMMT_INT )
if( g_xmmtypes[i] == XMMT_INT )
SSE2_MOVQ_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
else
SSE_MOVLPS_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
@ -446,31 +446,25 @@ int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
#ifndef __x86_64__
if( (mmxreg = _checkMMXreg(MMX_GPR+gprreg, 0)) >= 0 ) {
// transfer
if (cpucaps.hasStreamingSIMD2Extensions ) {
// transfer
SetMMXstate();
SSE2_MOVQ2DQ_MM_to_XMM(xmmreg, mmxreg);
SSE2_PUNPCKLQDQ_XMM_to_XMM(xmmreg, xmmreg);
SSE2_PUNPCKHQDQ_M128_to_XMM(xmmreg, (u32)&cpuRegs.GPR.r[gprreg].UL[0]);
SetMMXstate();
SSE2_MOVQ2DQ_MM_to_XMM(xmmreg, mmxreg);
SSE2_PUNPCKLQDQ_XMM_to_XMM(xmmreg, xmmreg);
SSE2_PUNPCKHQDQ_M128_to_XMM(xmmreg, (u32)&cpuRegs.GPR.r[gprreg].UL[0]);
if( mmxregs[mmxreg].mode & MODE_WRITE ) {
if( mmxregs[mmxreg].mode & MODE_WRITE ) {
// instead of setting to write, just flush to mem
if( !(mode & MODE_WRITE) ) {
SetMMXstate();
MOVQRtoM((u32)&cpuRegs.GPR.r[gprreg].UL[0], mmxreg);
}
//xmmregs[xmmreg].mode |= MODE_WRITE;
// instead of setting to write, just flush to mem
if( !(mode & MODE_WRITE) ) {
SetMMXstate();
MOVQRtoM((u32)&cpuRegs.GPR.r[gprreg].UL[0], mmxreg);
}
//xmmregs[xmmreg].mode |= MODE_WRITE;
}
// don't flush
mmxregs[mmxreg].inuse = 0;
}
else {
_freeMMXreg(mmxreg);
SSEX_MOVDQA_M128_to_XMM(xmmreg, (u32)&cpuRegs.GPR.r[gprreg].UL[0]);
}
// don't flush
mmxregs[mmxreg].inuse = 0;
}
#else
if( (mmxreg = _checkX86reg(X86TYPE_GPR, gprreg, 0)) >= 0 ) {
@ -1131,49 +1125,36 @@ int _signExtendXMMtoM(u32 to, x86SSERegType from, int candestroy)
int t0reg;
g_xmmtypes[from] = XMMT_INT;
if( candestroy ) {
if( g_xmmtypes[from] == XMMT_FPS || !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVSS_XMM_to_M32(to, from);
if( g_xmmtypes[from] == XMMT_FPS ) SSE_MOVSS_XMM_to_M32(to, from);
else SSE2_MOVD_XMM_to_M32(to, from);
if( cpucaps.hasStreamingSIMD2Extensions ) {
SSE2_PSRAD_I8_to_XMM(from, 31);
SSE2_MOVD_XMM_to_M32(to+4, from);
return 1;
}
else {
SSE_MOVSS_XMM_to_M32(to+4, from);
SAR32ItoM(to+4, 31);
return 0;
}
SSE2_PSRAD_I8_to_XMM(from, 31);
SSE2_MOVD_XMM_to_M32(to+4, from);
return 1;
}
else {
// can't destroy and type is int
assert( g_xmmtypes[from] == XMMT_INT );
if( cpucaps.hasStreamingSIMD2Extensions ) {
if( _hasFreeXMMreg() ) {
xmmregs[from].needed = 1;
t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(t0reg, from);
SSE2_PSRAD_I8_to_XMM(from, 31);
SSE2_MOVD_XMM_to_M32(to, t0reg);
SSE2_MOVD_XMM_to_M32(to+4, from);
if( _hasFreeXMMreg() ) {
xmmregs[from].needed = 1;
t0reg = _allocTempXMMreg(XMMT_INT, -1);
SSEX_MOVDQA_XMM_to_XMM(t0reg, from);
SSE2_PSRAD_I8_to_XMM(from, 31);
SSE2_MOVD_XMM_to_M32(to, t0reg);
SSE2_MOVD_XMM_to_M32(to+4, from);
// swap xmm regs.. don't ask
xmmregs[t0reg] = xmmregs[from];
xmmregs[from].inuse = 0;
}
else {
SSE2_MOVD_XMM_to_M32(to+4, from);
SSE2_MOVD_XMM_to_M32(to, from);
SAR32ItoM(to+4, 31);
}
// swap xmm regs.. don't ask
xmmregs[t0reg] = xmmregs[from];
xmmregs[from].inuse = 0;
}
else {
SSE_MOVSS_XMM_to_M32(to+4, from);
SSE_MOVSS_XMM_to_M32(to, from);
SSE2_MOVD_XMM_to_M32(to+4, from);
SSE2_MOVD_XMM_to_M32(to, from);
SAR32ItoM(to+4, 31);
}
return 0;
}

View File

@ -113,12 +113,6 @@ void LoadCW( void ) {
}
void recCOP1_S( void ) {
#ifndef __x86_64__
if( !cpucaps.hasStreamingSIMD2Extensions ) { // fixme - Not sure if this is needed anymore... (cottonvibes)
_freeMMXreg(6);
_freeMMXreg(7);
}
#endif
recCP1S[ _Funct_ ]( );
}
@ -332,22 +326,16 @@ void recMTC1(void)
#ifndef __x86_64__
else if( (mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ)) >= 0 ) {
if( cpucaps.hasStreamingSIMD2Extensions ) {
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
if( mmreg2 >= 0 ) {
SetMMXstate();
SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg);
}
else {
SetMMXstate();
MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg);
}
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
if( mmreg2 >= 0 ) {
SetMMXstate();
SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg);
}
else {
_deleteFPtoXMMreg(_Fs_, 0);
SetMMXstate();
MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg);
}
}
}
#endif
else {
@ -801,21 +789,7 @@ void recCVT_S_xmm(int info)
SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
}
else {
if( cpucaps.hasStreamingSIMD2Extensions ) {
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_D, EEREC_S);
}
else {
if( info&PROCESS_EE_MODEWRITES ) {
if( xmmregs[EEREC_S].reg == _Fs_ )
_deleteFPtoXMMreg(_Fs_, 1);
else {
// force sync
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[_Fs_], EEREC_S);
}
}
SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
xmmregs[EEREC_D].mode |= MODE_WRITE; // in the case that _Fs_ == _Fd_
}
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_D, EEREC_S);
}
}

View File

@ -685,8 +685,7 @@ void recPPACW()
{
if ( ! _Rd_ ) return;
CPU_SSE_XMMCACHE_START(((_Rs_!=0||!cpucaps.hasStreamingSIMD2Extensions)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED)
if( cpucaps.hasStreamingSIMD2Extensions ) {
CPU_SSE_XMMCACHE_START(((_Rs_!=0)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED)
if( _Rs_ == 0 ) {
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8);
@ -709,17 +708,6 @@ CPU_SSE_XMMCACHE_START(((_Rs_!=0||!cpucaps.hasStreamingSIMD2Extensions)?XMMINFO_
xmmregs[EEREC_D].inuse = 0;
}
}
}
else {
if( EEREC_D != EEREC_S ) {
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_S, 0x88 );
}
else {
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_T, 0x88 );
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e);
}
}
CPU_SSE_XMMCACHE_END
_flushCachedRegs();
@ -755,14 +743,10 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED)
SSE2_PSHUFHW_XMM_to_XMM(t0reg, t0reg, 0x88);
SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0x88);
if( cpucaps.hasStreamingSIMD2Extensions ) {
SSE2_PSRLDQ_I8_to_XMM(t0reg, 4);
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 4);
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t0reg);
}
else {
SSE_SHUFPS_XMM_to_XMM(EEREC_D, t0reg, 0x88);
}
SSE2_PSRLDQ_I8_to_XMM(t0reg, 4);
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 4);
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t0reg);
_freeXMMreg(t0reg);
}
CPU_SSE_XMMCACHE_END
@ -2791,8 +2775,7 @@ void recPCPYLD( void )
{
if ( ! _Rd_ ) return;
CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|((cpucaps.hasStreamingSIMD2Extensions&&_Rs_==0)?0:XMMINFO_READS)|XMMINFO_READT)
if( cpucaps.hasStreamingSIMD2Extensions ) {
CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|(( _Rs_== 0) ? 0:XMMINFO_READS)|XMMINFO_READT)
if( _Rs_ == 0 ) {
SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T);
}
@ -2808,18 +2791,6 @@ CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|((cpucaps.hasStreamingSIMD2Extensions&&_Rs
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_S);
}
}
}
else {
if( EEREC_D == EEREC_T ) SSE_MOVLHPS_XMM_to_XMM(EEREC_D, EEREC_S);
else if( EEREC_D == EEREC_S ) {
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_T, 0x44);
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e);
}
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_S, 0x44);
}
}
CPU_SSE_XMMCACHE_END
_flushCachedRegs();
@ -3232,9 +3203,8 @@ void recPCPYUD( void )
{
if ( ! _Rd_ ) return;
CPU_SSE_XMMCACHE_START(XMMINFO_READS|((cpucaps.hasStreamingSIMD2Extensions&&_Rs_==0)?0:XMMINFO_READT)|XMMINFO_WRITED)
CPU_SSE_XMMCACHE_START(XMMINFO_READS|(( _Rs_ == 0) ? 0:XMMINFO_READT)|XMMINFO_WRITED)
if( cpucaps.hasStreamingSIMD2Extensions ) {
if( _Rt_ == 0 ) {
if( EEREC_D == EEREC_S ) {
SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_S);
@ -3262,19 +3232,6 @@ CPU_SSE_XMMCACHE_START(XMMINFO_READS|((cpucaps.hasStreamingSIMD2Extensions&&_Rs_
}
}
}
}
else {
if( EEREC_D == EEREC_S ) {
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_T, 0xee);
}
else if( EEREC_D == EEREC_T ) {
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_S);
}
else {
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_S);
}
}
CPU_SSE_XMMCACHE_END
_flushCachedRegs();

View File

@ -224,7 +224,6 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode
int info = eeRecompileCodeXMM(xmminfo); \
#define CPU_SSE2_XMMCACHE_START(xmminfo) \
if (cpucaps.hasStreamingSIMD2Extensions) \
{ \
int info = eeRecompileCodeXMM(xmminfo); \

View File

@ -3564,13 +3564,11 @@ void recVUMI_FTOI0(VURegs *VU, int info)
if ( _Ft_ == 0 ) return;
if (_X_Y_Z_W != 0xf) {
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
}
else {
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
}
}
@ -3581,16 +3579,14 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
if (_X_Y_Z_W != 0xf) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
}
else {
if (EEREC_T != EEREC_S) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
}
}
@ -3603,27 +3599,15 @@ void recVUMI_ITOF0( VURegs *VU, int info )
if ( _Ft_ == 0 ) return;
if (_X_Y_Z_W != 0xf) {
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
else {
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_TEMP, VU_VFx_ADDR( _Fs_ ));
}
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
xmmregs[EEREC_T].mode |= MODE_WRITE;
vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities
}
else {
if( cpucaps.hasStreamingSIMD2Extensions ) {
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
}
else {
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_T, VU_VFx_ADDR( _Fs_ ));
xmmregs[EEREC_T].mode |= MODE_WRITE;
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
}
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
}
}
@ -3632,24 +3616,14 @@ void recVUMI_ITOFX(VURegs *VU, int addr, int info)
if ( _Ft_ == 0 ) return;
if (_X_Y_Z_W != 0xf) {
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
else {
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_TEMP, VU_VFx_ADDR( _Fs_ ));
}
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
xmmregs[EEREC_T].mode |= MODE_WRITE;
vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities
} else {
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
else {
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_T, VU_VFx_ADDR( _Fs_ ));
xmmregs[EEREC_T].mode |= MODE_WRITE;
}
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
}
@ -4166,38 +4140,22 @@ void recVUMI_MFIR( VURegs *VU, int info )
_deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Fs_, 1);
if( cpucaps.hasStreamingSIMD2Extensions ) {
if( _XYZW_SS ) {
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
_vuFlipRegSS(VU, EEREC_T);
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
_vuFlipRegSS(VU, EEREC_T);
}
else if (_X_Y_Z_W != 0xf) {
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
} else {
SSE2_MOVD_M32_to_XMM(EEREC_T, VU_VI_ADDR(_Fs_, 1)-2);
SSE2_PSRAD_I8_to_XMM(EEREC_T, 16);
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
}
if( _XYZW_SS ) {
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
_vuFlipRegSS(VU, EEREC_T);
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
_vuFlipRegSS(VU, EEREC_T);
}
else {
MOVSX32M16toR(EAX, VU_VI_ADDR(_Fs_, 1));
MOV32RtoM((uptr)&s_temp, EAX);
if( _X_Y_Z_W != 0xf ) {
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&s_temp);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
}
else {
SSE_MOVSS_M32_to_XMM(EEREC_T, (uptr)&s_temp);
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
}
else if (_X_Y_Z_W != 0xf) {
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
} else {
SSE2_MOVD_M32_to_XMM(EEREC_T, VU_VI_ADDR(_Fs_, 1)-2);
SSE2_PSRAD_I8_to_XMM(EEREC_T, 16);
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
}
}

View File

@ -78,7 +78,6 @@ void SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{
u32 i;
u32 prev = 0;
if( !cpucaps.hasStreamingSIMD2Extensions ) return;
FreezeXMMRegs(1);
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
@ -116,7 +115,6 @@ void SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{
u32 i;
u32 prev = 0;
if( !cpucaps.hasStreamingSIMD2Extensions ) return;
FreezeXMMRegs(1);
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {

View File

@ -459,7 +459,7 @@ void rpropBSC(EEINST* prev, EEINST* pinst)
case 5: // bne
rpropSetRead(_Rs_, EEINST_LIVE1);
rpropSetRead(_Rt_, EEINST_LIVE1);
pinst->info |= (cpucaps.hasStreamingSIMD2Extensions?(EEINST_REALXMM|EEINST_MMX):0);
pinst->info |= EEINST_REALXMM|EEINST_MMX;
break;
case 20: // beql
@ -469,7 +469,7 @@ void rpropBSC(EEINST* prev, EEINST* pinst)
prev->info = 0;
rpropSetRead(_Rs_, EEINST_LIVE1);
rpropSetRead(_Rt_, EEINST_LIVE1);
pinst->info |= (cpucaps.hasStreamingSIMD2Extensions?(EEINST_REALXMM|EEINST_MMX):0);
pinst->info |= EEINST_REALXMM|EEINST_MMX;
break;
case 6: // blez
@ -488,7 +488,7 @@ void rpropBSC(EEINST* prev, EEINST* pinst)
case 24: // daddi
case 25: // daddiu
rpropSetWrite(_Rt_, EEINST_LIVE1);
rpropSetRead(_Rs_, EEINST_LIVE1|((_Rs_!=0&&cpucaps.hasStreamingSIMD2Extensions)?EEINST_MMX:0));
rpropSetRead(_Rs_, EEINST_LIVE1 | (_Rs_!=0 && EEINST_MMX));
break;
case 8: // addi
@ -682,7 +682,7 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
case 24: // mult
// can do unsigned mult only if HI isn't used
//temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE0|EEINST_LIVE1))?0:(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0);
//temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE0|EEINST_LIVE1))?0:EEINST_MMX;
temp = 0;
rpropSetWrite(XMMGPR_LO, EEINST_LIVE1);
rpropSetWrite(XMMGPR_HI, EEINST_LIVE1);
@ -697,9 +697,9 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
rpropSetWrite(XMMGPR_LO, EEINST_LIVE1);
rpropSetWrite(XMMGPR_HI, EEINST_LIVE1);
rpropSetWrite(_Rd_, EEINST_LIVE1);
rpropSetRead(_Rs_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
rpropSetRead(_Rt_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
if( cpucaps.hasStreamingSIMD2Extensions ) pinst->info |= EEINST_MMX;
rpropSetRead(_Rs_, EEINST_MMX);
rpropSetRead(_Rt_, EEINST_MMX);
pinst->info |= EEINST_MMX;
break;
case 26: // div
@ -763,10 +763,10 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
rpropSetRead(_Rt_, (pinst->regs[_Rd_]&EEINST_LIVE1));
}
else {
rpropSetRead(_Rs_, (pinst->regs[_Rd_]&EEINST_LIVE1)|(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
rpropSetRead(_Rt_, (pinst->regs[_Rd_]&EEINST_LIVE1)|(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
rpropSetRead(_Rs_, (pinst->regs[_Rd_]&EEINST_LIVE1)|EEINST_MMX);
rpropSetRead(_Rt_, (pinst->regs[_Rd_]&EEINST_LIVE1)|EEINST_MMX);
}
if( cpucaps.hasStreamingSIMD2Extensions ) pinst->info |= EEINST_MMX;
pinst->info |= EEINST_MMX;
break;
// traps
@ -781,13 +781,13 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
case 62: // dsrl32
case 63: // dsra32
rpropSetWrite(_Rd_, EEINST_LIVE1);
rpropSetRead(_Rt_, EEINST_LIVE1|(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
rpropSetRead(_Rt_, EEINST_LIVE1|EEINST_MMX);
pinst->info |= EEINST_MMX;
break;
case 60: // dsll32
rpropSetWrite(_Rd_, EEINST_LIVE1);
rpropSetRead(_Rt_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
rpropSetRead(_Rt_, EEINST_MMX);
pinst->info |= EEINST_MMX;
break;
@ -1060,7 +1060,7 @@ void rpropMMI(EEINST* prev, EEINST* pinst)
break;
case 24: // mult1
temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE2))?0:(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0);
temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE2))?0:EEINST_MMX;
rpropSetWrite0(XMMGPR_LO, EEINST_LIVE2, 0);
rpropSetWrite0(XMMGPR_HI, EEINST_LIVE2, 0);
rpropSetWrite(_Rd_, EEINST_LIVE1);
@ -1072,9 +1072,9 @@ void rpropMMI(EEINST* prev, EEINST* pinst)
rpropSetWrite0(XMMGPR_LO, EEINST_LIVE2, 0);
rpropSetWrite0(XMMGPR_HI, EEINST_LIVE2, 0);
rpropSetWrite(_Rd_, EEINST_LIVE1);
rpropSetRead(_Rs_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
rpropSetRead(_Rt_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
if( cpucaps.hasStreamingSIMD2Extensions ) pinst->info |= EEINST_MMX;
rpropSetRead(_Rs_, EEINST_MMX);
rpropSetRead(_Rt_, EEINST_MMX);
pinst->info |= EEINST_MMX;
break;
case 26: // div1

View File

@ -515,28 +515,15 @@ int _allocMMXreg(int mmxreg, int reg, int mode)
else {
int xmmreg;
if( MMX_ISGPR(reg) && (xmmreg = _checkXMMreg(XMMTYPE_GPRREG, reg-MMX_GPR, 0)) >= 0 ) {
if (cpucaps.hasStreamingSIMD2Extensions) {
SSE_MOVHPS_XMM_to_M64((u32)_MMXGetAddr(reg)+8, xmmreg);
if( mode & MODE_READ )
SSE2_MOVDQ2Q_XMM_to_MM(mmxreg, xmmreg);
SSE_MOVHPS_XMM_to_M64((u32)_MMXGetAddr(reg)+8, xmmreg);
if( mode & MODE_READ )
SSE2_MOVDQ2Q_XMM_to_MM(mmxreg, xmmreg);
if( xmmregs[xmmreg].mode & MODE_WRITE )
mmxregs[mmxreg].mode |= MODE_WRITE;
if( xmmregs[xmmreg].mode & MODE_WRITE )
mmxregs[mmxreg].mode |= MODE_WRITE;
// don't flush
xmmregs[xmmreg].inuse = 0;
}
else {
_freeXMMreg(xmmreg);
if( (mode & MODE_READHALF) || (MMX_IS32BITS(reg)&&(mode&MODE_READ)) ) {
MOVDMtoMMX(mmxreg, (u32)_MMXGetAddr(reg));
}
else if( mode & MODE_READ ) {
MOVQMtoR(mmxreg, (u32)_MMXGetAddr(reg));
}
}
// don't flush
xmmregs[xmmreg].inuse = 0;
}
else {
if( MMX_ISGPR(reg) ) {

View File

@ -1575,6 +1575,11 @@ int recInit( void )
SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) );
return -1;
}
if ( !( cpucaps.hasStreamingSIMD2Extensions ) )
{
SysMessage( _( "Processor doesn't supports SSE2, can't run recompiler without that" ) );
return -1;
}
x86FpuState = FPU_STATE;

View File

@ -254,7 +254,6 @@ void recDADD_constv(int info, int creg, int vreg)
if( info & PROCESS_EE_MMX ) {
int mmreg = vreg == _Rt_ ? EEREC_T : EEREC_S;
assert( cpucaps.hasStreamingSIMD2Extensions );
if( g_cpuConstRegs[ creg ].UD[0] ) {
@ -267,7 +266,7 @@ void recDADD_constv(int info, int creg, int vreg)
}
else {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
SetMMXstate();
@ -337,7 +336,6 @@ void recDADD_(int info)
assert( !(info&PROCESS_EE_XMM) );
if( info & PROCESS_EE_MMX ) {
assert( cpucaps.hasStreamingSIMD2Extensions );
if( EEREC_D == EEREC_S ) PADDQRtoR(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) PADDQRtoR(EEREC_D, EEREC_S);
@ -347,7 +345,7 @@ void recDADD_(int info)
}
}
else {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
@ -648,7 +646,6 @@ void recDSUB_consts(int info)
assert( !(info&PROCESS_EE_XMM) );
if( info & PROCESS_EE_MMX ) {
assert( cpucaps.hasStreamingSIMD2Extensions );
if( g_cpuConstRegs[ _Rs_ ].UD[0] ) {
@ -685,7 +682,7 @@ void recDSUB_consts(int info)
}
}
else {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
SetMMXstate();
MOVQMtoR(mmreg, (u32)_eeGetConstReg(_Rs_));
@ -751,7 +748,6 @@ void recDSUB_constt(int info)
assert( !(info&PROCESS_EE_XMM) );
if( info & PROCESS_EE_MMX ) {
assert( cpucaps.hasStreamingSIMD2Extensions );
if( g_cpuConstRegs[ _Rt_ ].UD[0] ) {
@ -763,7 +759,7 @@ void recDSUB_constt(int info)
}
}
else {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
SetMMXstate();
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
@ -807,7 +803,6 @@ void recDSUB_(int info)
assert( !(info&PROCESS_EE_XMM) );
if( info & PROCESS_EE_MMX ) {
assert( cpucaps.hasStreamingSIMD2Extensions );
if( EEREC_D == EEREC_S ) PSUBQRtoR(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) {
@ -825,7 +820,7 @@ void recDSUB_(int info)
}
}
else {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
SetMMXstate();
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[_Rs_].UL[ 0 ]);

View File

@ -156,7 +156,6 @@ void recDADDI_(int info)
assert( !(info&PROCESS_EE_XMM) );
if( info & PROCESS_EE_MMX ) {
assert( cpucaps.hasStreamingSIMD2Extensions );
if( _Imm_ != 0 ) {
@ -173,7 +172,7 @@ void recDADDI_(int info)
return;
}
if( (g_pCurInstInfo->regs[_Rt_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
if( (g_pCurInstInfo->regs[_Rt_]&EEINST_MMX) ) {
int rtreg;
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = _Imm_;

View File

@ -85,7 +85,7 @@ void _eeOnLoadWrite(int reg)
if( regt >= 0 ) {
if( xmmregs[regt].mode & MODE_WRITE ) {
if( cpucaps.hasStreamingSIMD2Extensions && (reg != _Rs_) ) {
if( reg != _Rs_ ) {
SSE2_PUNPCKHQDQ_XMM_to_XMM(regt, regt);
SSE2_MOVQ_XMM_to_M64((u32)&cpuRegs.GPR.r[reg].UL[2], regt);
}

View File

@ -101,12 +101,7 @@ void recMFHILO(int hi)
xmmregs[regd].inuse = 0;
if( cpucaps.hasStreamingSIMD2Extensions ) {
SSE2_MOVQ_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[0], reghi);
}
else {
SSE_MOVLPS_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[0], reghi);
}
SSE2_MOVQ_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[0], reghi);
if( xmmregs[regd].mode & MODE_WRITE ) {
SSE_MOVHPS_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[2], regd);
@ -199,20 +194,15 @@ void recMTHILO(int hi)
if( regs >= 0 ) {
assert( reghi != regs );
if( cpucaps.hasStreamingSIMD2Extensions ) {
_deleteGPRtoXMMreg(_Rs_, 0);
SSE2_PUNPCKHQDQ_XMM_to_XMM(reghi, reghi);
SSE2_PUNPCKLQDQ_XMM_to_XMM(regs, reghi);
_deleteGPRtoXMMreg(_Rs_, 0);
SSE2_PUNPCKHQDQ_XMM_to_XMM(reghi, reghi);
SSE2_PUNPCKLQDQ_XMM_to_XMM(regs, reghi);
// swap regs
xmmregs[regs] = xmmregs[reghi];
xmmregs[reghi].inuse = 0;
xmmregs[regs].mode |= MODE_WRITE;
}
else {
SSE2EMU_MOVSD_XMM_to_XMM(reghi, regs);
xmmregs[reghi].mode |= MODE_WRITE;
}
// swap regs
xmmregs[regs] = xmmregs[reghi];
xmmregs[reghi].inuse = 0;
xmmregs[regs].mode |= MODE_WRITE;
}
else {
regs = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ);
@ -336,13 +326,8 @@ void recMFHILO1(int hi)
else {
if( regd >= 0 ) {
if( EEINST_ISLIVE2(_Rd_) ) {
if( cpucaps.hasStreamingSIMD2Extensions ) {
SSE2_PUNPCKHQDQ_M128_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 0 ] : (int)&cpuRegs.LO.UD[ 0 ]);
SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x4e);
}
else {
SSE_MOVLPS_M64_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 1 ] : (int)&cpuRegs.LO.UD[ 1 ]);
}
SSE2_PUNPCKHQDQ_M128_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 0 ] : (int)&cpuRegs.LO.UD[ 0 ]);
SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x4e);
}
else {
SSE2_MOVQ_M64_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 1 ] : (int)&cpuRegs.LO.UD[ 1 ]);
@ -381,14 +366,11 @@ void recMTHILO1(int hi)
if( reghi >= 0 ) {
if( regs >= 0 ) {
if( cpucaps.hasStreamingSIMD2Extensions ) SSE2_PUNPCKLQDQ_XMM_to_XMM(reghi, regs);
else SSE_MOVLHPS_XMM_to_XMM(reghi, regs);
SSE2_PUNPCKLQDQ_XMM_to_XMM(reghi, regs);
}
else {
_deleteEEreg(_Rs_, 1);
if( cpucaps.hasStreamingSIMD2Extensions ) SSE2_PUNPCKLQDQ_M128_to_XMM(reghi, (int)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ]);
else SSE_MOVHPS_M64_to_XMM(reghi, (int)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ]);
SSE2_PUNPCKLQDQ_M128_to_XMM(reghi, (int)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ]);
}
}
else {

View File

@ -440,7 +440,7 @@ void recMULTUsuper(int info, int upper, int process)
EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_);
if( (info & PROCESS_EE_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
if( (info & PROCESS_EE_MMX) ) {
if( !_Rd_ ) {
// need some temp reg

View File

@ -281,53 +281,35 @@ _inline void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SS
_inline void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from);
else {
write8(0xf2);
SSERtoR( 0x100f);
}
write8(0xf2);
SSERtoR( 0x100f);
}
_inline void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from);
else {
write8(0xf3); SSEMtoR( 0x7e0f, 0);
}
write8(0xf3); SSEMtoR( 0x7e0f, 0);
}
_inline void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from);
else {
write8(0xf3); SSERtoR( 0x7e0f);
}
write8(0xf3); SSERtoR( 0x7e0f);
}
_inline void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from);
else {
SSERtoM66(0xd60f);
}
SSERtoM66(0xd60f);
}
#ifndef __x86_64__
_inline void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
{
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from);
else {
write8(0xf2);
SSERtoR( 0xd60f);
}
write8(0xf2);
SSERtoR( 0xd60f);
}
_inline void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
{
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from);
else {
write8(0xf3);
SSERtoR( 0xd60f);
}
write8(0xf3);
SSERtoR( 0xd60f);
}
#endif
@ -710,13 +692,8 @@ _inline void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, i
//**********************************************************************************
_inline void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) {
SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8);
}
else {
SSERtoR66( 0x700F );
write8( imm8 );
}
SSERtoR66( 0x700F );
write8( imm8 );
}
_inline void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); }
@ -803,22 +780,12 @@ _inline void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSE
_inline void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
_inline void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) {
SSE_CMPEQPS_XMM_to_XMM(to, from);
}
else {
SSERtoR66( 0x760F );
}
SSERtoR66( 0x760F );
}
_inline void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) {
SSE_CMPEQPS_M128_to_XMM(to, from);
}
else {
SSEMtoR66( 0x760F );
}
SSEMtoR66( 0x760F );
}
////////////////////////////////////////////////////////////////////////////////////////////
@ -848,12 +815,7 @@ _inline void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x
_inline void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
_inline void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) {
SSE2EMU_MOVD_R_to_XMM(to, from);
}
else {
SSERtoR66(0x6E0F);
}
SSERtoR66(0x6E0F);
}
_inline void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
@ -873,13 +835,9 @@ _inline void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, in
}
_inline void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
_inline void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
if( !cpucaps.hasStreamingSIMD2Extensions ) {
SSE2EMU_MOVD_XMM_to_R(to, from);
}
else {
_SSERtoR66(0x7E0F);
}
_inline void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from )
{
_SSERtoR66(0x7E0F);
}
_inline void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
@ -892,15 +850,10 @@ _inline void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
_inline void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
if( !cpucaps.hasStreamingSIMD2Extensions ) {
SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset);
}
else {
write8(0x66);
RexRB(0, from, to);
write16( 0x7e0f );
WriteRmOffsetFrom(from, to, offset);
}
write8(0x66);
RexRB(0, from, to);
write16( 0x7e0f );
WriteRmOffsetFrom(from, to, offset);
}
#ifdef __x86_64__
@ -1237,157 +1190,157 @@ _inline void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from)
// SSE-X
_inline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
else SSE_MOVAPS_M128_to_XMM(to, from);
}
_inline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
else SSE_MOVAPS_XMM_to_M128(to, from);
}
_inline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
else SSE_MOVAPS_XMM_to_XMM(to, from);
}
_inline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
else SSE_MOVAPSRmtoROffset(to, from, offset);
}
_inline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
else SSE_MOVAPSRtoRmOffset(to, from, offset);
}
_inline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
else SSE_MOVAPS_M128_to_XMM(to, from);
}
_inline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
else SSE_MOVAPS_XMM_to_M128(to, from);
}
_inline void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
else SSE_MOVAPS_XMM_to_XMM(to, from);
}
_inline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
else SSE_MOVSS_M32_to_XMM(to, from);
}
_inline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
else SSE_MOVSS_XMM_to_M32(to, from);
}
_inline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
else SSE_MOVSS_XMM_to_Rm(to, from);
}
_inline void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
}
_inline void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
}
_inline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
else SSE_ORPS_M128_to_XMM(to, from);
}
_inline void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
else SSE_ORPS_XMM_to_XMM(to, from);
}
_inline void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
else SSE_XORPS_M128_to_XMM(to, from);
}
_inline void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
else SSE_XORPS_XMM_to_XMM(to, from);
}
_inline void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
else SSE_ANDPS_M128_to_XMM(to, from);
}
_inline void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
else SSE_ANDPS_XMM_to_XMM(to, from);
}
_inline void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
else SSE_ANDNPS_M128_to_XMM(to, from);
}
_inline void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
else SSE_ANDNPS_XMM_to_XMM(to, from);
}
_inline void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
else SSE_UNPCKLPS_M128_to_XMM(to, from);
}
_inline void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
else SSE_UNPCKLPS_XMM_to_XMM(to, from);
}
_inline void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
else SSE_UNPCKHPS_M128_to_XMM(to, from);
}
_inline void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
else SSE_UNPCKHPS_XMM_to_XMM(to, from);
}
_inline void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) {
if( g_xmmtypes[from] == XMMT_INT ) {
SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
}