mirror of https://github.com/PCSX2/pcsx2.git
Removed support for CPU's without SSE2. Now i know there's the occasional AthlonXP user out there.. sorry. The SSE2 (and SSE1 too!) checks were very frequent and could've hurt performance a bit.
Note: This move was discussed with the team, we all support it. Note 2: I tried very hard to avoid mistakes, but due to the many changes i could've overlooked something (especially in iR5900tables.c). I'd apreciate it if someone could look over the changes a bit ;) git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@281 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
1fbeeeb678
commit
b70563a9fa
|
@ -1497,31 +1497,6 @@ void ipu_csc(struct macroblock_8 *mb8, struct macroblock_rgb32 *rgb32, int sgn){
|
||||||
convert_init.start(convert_init.id, (u8*)rgb32, CONVERT_FRAME);
|
convert_init.start(convert_init.id, (u8*)rgb32, CONVERT_FRAME);
|
||||||
convert_init.copy(convert_init.id, (u8*)mb8->Y, (u8*)mb8->Cr, (u8*)mb8->Cb, 0);
|
convert_init.copy(convert_init.id, (u8*)mb8->Y, (u8*)mb8->Cr, (u8*)mb8->Cb, 0);
|
||||||
|
|
||||||
// do alpha processing
|
|
||||||
// if( cpucaps.hasStreamingSIMD2Extensions ) {
|
|
||||||
// int i;
|
|
||||||
// u8* p = (u8*)rgb32;
|
|
||||||
//
|
|
||||||
// __asm {
|
|
||||||
// movaps xmm6, s_thresh
|
|
||||||
// pshufd xmm7, xmm6, 0xee
|
|
||||||
// pshufd xmm6, xmm6, 0x44
|
|
||||||
// pxor xmm5, xmm5
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for(i = 0; i < 64; i += 4, p += 64) {
|
|
||||||
// // process 2 qws at a time
|
|
||||||
// __asm {
|
|
||||||
// // extract 8 dwords
|
|
||||||
// mov edi, p
|
|
||||||
// movaps xmm0, qword ptr [edi]
|
|
||||||
// movaps xmm1, qword ptr [edi+16]
|
|
||||||
// movaps xmm2, qword ptr [edi+32]
|
|
||||||
// movaps xmm3, qword ptr [edi+48]
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
// fixes suikoden5
|
|
||||||
if( s_thresh[0] > 0 ) {
|
if( s_thresh[0] > 0 ) {
|
||||||
for(i = 0; i < 64*4; i++, p += 4) {
|
for(i = 0; i < 64*4; i++, p += 4) {
|
||||||
if( p[0] < s_thresh[0] && p[1] < s_thresh[0] && p[2] < s_thresh[0] )
|
if( p[0] < s_thresh[0] && p[1] < s_thresh[0] && p[2] < s_thresh[0] )
|
||||||
|
|
|
@ -546,7 +546,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
||||||
|
|
||||||
#if !defined(PCSX2_NORECBUILD)
|
#if !defined(PCSX2_NORECBUILD)
|
||||||
|
|
||||||
if( size >= ft->gsize && !(v->addr&0xf) && cpucaps.hasStreamingSIMD2Extensions) {
|
if( size >= ft->gsize && !(v->addr&0xf)) {
|
||||||
const UNPACKPARTFUNCTYPESSE* pfn;
|
const UNPACKPARTFUNCTYPESSE* pfn;
|
||||||
int writemask;
|
int writemask;
|
||||||
//static LARGE_INTEGER lbase, lfinal;
|
//static LARGE_INTEGER lbase, lfinal;
|
||||||
|
|
|
@ -267,7 +267,7 @@ int _checkXMMreg(int type, int reg, int mode)
|
||||||
SSEX_MOVDQA_M128_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
|
SSEX_MOVDQA_M128_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
|
||||||
}
|
}
|
||||||
else if (mode & MODE_READHALF) {
|
else if (mode & MODE_READHALF) {
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[i] == XMMT_INT )
|
if( g_xmmtypes[i] == XMMT_INT )
|
||||||
SSE2_MOVQ_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
|
SSE2_MOVQ_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
|
||||||
else
|
else
|
||||||
SSE_MOVLPS_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
|
SSE_MOVLPS_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
|
||||||
|
@ -446,31 +446,25 @@ int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
|
||||||
|
|
||||||
#ifndef __x86_64__
|
#ifndef __x86_64__
|
||||||
if( (mmxreg = _checkMMXreg(MMX_GPR+gprreg, 0)) >= 0 ) {
|
if( (mmxreg = _checkMMXreg(MMX_GPR+gprreg, 0)) >= 0 ) {
|
||||||
// transfer
|
// transfer
|
||||||
if (cpucaps.hasStreamingSIMD2Extensions ) {
|
|
||||||
|
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
SSE2_MOVQ2DQ_MM_to_XMM(xmmreg, mmxreg);
|
SSE2_MOVQ2DQ_MM_to_XMM(xmmreg, mmxreg);
|
||||||
SSE2_PUNPCKLQDQ_XMM_to_XMM(xmmreg, xmmreg);
|
SSE2_PUNPCKLQDQ_XMM_to_XMM(xmmreg, xmmreg);
|
||||||
SSE2_PUNPCKHQDQ_M128_to_XMM(xmmreg, (u32)&cpuRegs.GPR.r[gprreg].UL[0]);
|
SSE2_PUNPCKHQDQ_M128_to_XMM(xmmreg, (u32)&cpuRegs.GPR.r[gprreg].UL[0]);
|
||||||
|
|
||||||
if( mmxregs[mmxreg].mode & MODE_WRITE ) {
|
if( mmxregs[mmxreg].mode & MODE_WRITE ) {
|
||||||
|
|
||||||
// instead of setting to write, just flush to mem
|
// instead of setting to write, just flush to mem
|
||||||
if( !(mode & MODE_WRITE) ) {
|
if( !(mode & MODE_WRITE) ) {
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
MOVQRtoM((u32)&cpuRegs.GPR.r[gprreg].UL[0], mmxreg);
|
MOVQRtoM((u32)&cpuRegs.GPR.r[gprreg].UL[0], mmxreg);
|
||||||
}
|
|
||||||
//xmmregs[xmmreg].mode |= MODE_WRITE;
|
|
||||||
}
|
}
|
||||||
|
//xmmregs[xmmreg].mode |= MODE_WRITE;
|
||||||
|
}
|
||||||
|
|
||||||
// don't flush
|
// don't flush
|
||||||
mmxregs[mmxreg].inuse = 0;
|
mmxregs[mmxreg].inuse = 0;
|
||||||
}
|
|
||||||
else {
|
|
||||||
_freeMMXreg(mmxreg);
|
|
||||||
SSEX_MOVDQA_M128_to_XMM(xmmreg, (u32)&cpuRegs.GPR.r[gprreg].UL[0]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if( (mmxreg = _checkX86reg(X86TYPE_GPR, gprreg, 0)) >= 0 ) {
|
if( (mmxreg = _checkX86reg(X86TYPE_GPR, gprreg, 0)) >= 0 ) {
|
||||||
|
@ -1131,49 +1125,36 @@ int _signExtendXMMtoM(u32 to, x86SSERegType from, int candestroy)
|
||||||
int t0reg;
|
int t0reg;
|
||||||
g_xmmtypes[from] = XMMT_INT;
|
g_xmmtypes[from] = XMMT_INT;
|
||||||
if( candestroy ) {
|
if( candestroy ) {
|
||||||
if( g_xmmtypes[from] == XMMT_FPS || !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVSS_XMM_to_M32(to, from);
|
if( g_xmmtypes[from] == XMMT_FPS ) SSE_MOVSS_XMM_to_M32(to, from);
|
||||||
else SSE2_MOVD_XMM_to_M32(to, from);
|
else SSE2_MOVD_XMM_to_M32(to, from);
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
SSE2_PSRAD_I8_to_XMM(from, 31);
|
||||||
SSE2_PSRAD_I8_to_XMM(from, 31);
|
SSE2_MOVD_XMM_to_M32(to+4, from);
|
||||||
SSE2_MOVD_XMM_to_M32(to+4, from);
|
return 1;
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MOVSS_XMM_to_M32(to+4, from);
|
|
||||||
SAR32ItoM(to+4, 31);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// can't destroy and type is int
|
// can't destroy and type is int
|
||||||
assert( g_xmmtypes[from] == XMMT_INT );
|
assert( g_xmmtypes[from] == XMMT_INT );
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
|
||||||
if( _hasFreeXMMreg() ) {
|
if( _hasFreeXMMreg() ) {
|
||||||
xmmregs[from].needed = 1;
|
xmmregs[from].needed = 1;
|
||||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
SSEX_MOVDQA_XMM_to_XMM(t0reg, from);
|
SSEX_MOVDQA_XMM_to_XMM(t0reg, from);
|
||||||
SSE2_PSRAD_I8_to_XMM(from, 31);
|
SSE2_PSRAD_I8_to_XMM(from, 31);
|
||||||
SSE2_MOVD_XMM_to_M32(to, t0reg);
|
SSE2_MOVD_XMM_to_M32(to, t0reg);
|
||||||
SSE2_MOVD_XMM_to_M32(to+4, from);
|
SSE2_MOVD_XMM_to_M32(to+4, from);
|
||||||
|
|
||||||
// swap xmm regs.. don't ask
|
// swap xmm regs.. don't ask
|
||||||
xmmregs[t0reg] = xmmregs[from];
|
xmmregs[t0reg] = xmmregs[from];
|
||||||
xmmregs[from].inuse = 0;
|
xmmregs[from].inuse = 0;
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE2_MOVD_XMM_to_M32(to+4, from);
|
|
||||||
SSE2_MOVD_XMM_to_M32(to, from);
|
|
||||||
SAR32ItoM(to+4, 31);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
SSE_MOVSS_XMM_to_M32(to+4, from);
|
SSE2_MOVD_XMM_to_M32(to+4, from);
|
||||||
SSE_MOVSS_XMM_to_M32(to, from);
|
SSE2_MOVD_XMM_to_M32(to, from);
|
||||||
SAR32ItoM(to+4, 31);
|
SAR32ItoM(to+4, 31);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -113,12 +113,6 @@ void LoadCW( void ) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void recCOP1_S( void ) {
|
void recCOP1_S( void ) {
|
||||||
#ifndef __x86_64__
|
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) { // fixme - Not sure if this is needed anymore... (cottonvibes)
|
|
||||||
_freeMMXreg(6);
|
|
||||||
_freeMMXreg(7);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
recCP1S[ _Funct_ ]( );
|
recCP1S[ _Funct_ ]( );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -332,22 +326,16 @@ void recMTC1(void)
|
||||||
#ifndef __x86_64__
|
#ifndef __x86_64__
|
||||||
else if( (mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ)) >= 0 ) {
|
else if( (mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ)) >= 0 ) {
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
|
||||||
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
|
|
||||||
if( mmreg2 >= 0 ) {
|
if( mmreg2 >= 0 ) {
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg);
|
SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg);
|
||||||
}
|
|
||||||
else {
|
|
||||||
SetMMXstate();
|
|
||||||
MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
_deleteFPtoXMMreg(_Fs_, 0);
|
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg);
|
MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
else {
|
else {
|
||||||
|
@ -801,21 +789,7 @@ void recCVT_S_xmm(int info)
|
||||||
SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
|
SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||||
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if( info&PROCESS_EE_MODEWRITES ) {
|
|
||||||
if( xmmregs[EEREC_S].reg == _Fs_ )
|
|
||||||
_deleteFPtoXMMreg(_Fs_, 1);
|
|
||||||
else {
|
|
||||||
// force sync
|
|
||||||
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[_Fs_], EEREC_S);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
|
|
||||||
xmmregs[EEREC_D].mode |= MODE_WRITE; // in the case that _Fs_ == _Fd_
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -685,8 +685,7 @@ void recPPACW()
|
||||||
{
|
{
|
||||||
if ( ! _Rd_ ) return;
|
if ( ! _Rd_ ) return;
|
||||||
|
|
||||||
CPU_SSE_XMMCACHE_START(((_Rs_!=0||!cpucaps.hasStreamingSIMD2Extensions)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED)
|
CPU_SSE_XMMCACHE_START(((_Rs_!=0)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED)
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
|
||||||
if( _Rs_ == 0 ) {
|
if( _Rs_ == 0 ) {
|
||||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88);
|
||||||
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8);
|
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8);
|
||||||
|
@ -709,17 +708,6 @@ CPU_SSE_XMMCACHE_START(((_Rs_!=0||!cpucaps.hasStreamingSIMD2Extensions)?XMMINFO_
|
||||||
xmmregs[EEREC_D].inuse = 0;
|
xmmregs[EEREC_D].inuse = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else {
|
|
||||||
if( EEREC_D != EEREC_S ) {
|
|
||||||
if( EEREC_D != EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_S, 0x88 );
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_T, 0x88 );
|
|
||||||
SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CPU_SSE_XMMCACHE_END
|
CPU_SSE_XMMCACHE_END
|
||||||
|
|
||||||
_flushCachedRegs();
|
_flushCachedRegs();
|
||||||
|
@ -755,14 +743,10 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED)
|
||||||
SSE2_PSHUFHW_XMM_to_XMM(t0reg, t0reg, 0x88);
|
SSE2_PSHUFHW_XMM_to_XMM(t0reg, t0reg, 0x88);
|
||||||
SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0x88);
|
SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0x88);
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
SSE2_PSRLDQ_I8_to_XMM(t0reg, 4);
|
||||||
SSE2_PSRLDQ_I8_to_XMM(t0reg, 4);
|
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 4);
|
||||||
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 4);
|
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t0reg);
|
||||||
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t0reg);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_D, t0reg, 0x88);
|
|
||||||
}
|
|
||||||
_freeXMMreg(t0reg);
|
_freeXMMreg(t0reg);
|
||||||
}
|
}
|
||||||
CPU_SSE_XMMCACHE_END
|
CPU_SSE_XMMCACHE_END
|
||||||
|
@ -2791,8 +2775,7 @@ void recPCPYLD( void )
|
||||||
{
|
{
|
||||||
if ( ! _Rd_ ) return;
|
if ( ! _Rd_ ) return;
|
||||||
|
|
||||||
CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|((cpucaps.hasStreamingSIMD2Extensions&&_Rs_==0)?0:XMMINFO_READS)|XMMINFO_READT)
|
CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|(( _Rs_== 0) ? 0:XMMINFO_READS)|XMMINFO_READT)
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
|
||||||
if( _Rs_ == 0 ) {
|
if( _Rs_ == 0 ) {
|
||||||
SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||||
}
|
}
|
||||||
|
@ -2808,18 +2791,6 @@ CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|((cpucaps.hasStreamingSIMD2Extensions&&_Rs
|
||||||
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else {
|
|
||||||
if( EEREC_D == EEREC_T ) SSE_MOVLHPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
||||||
else if( EEREC_D == EEREC_S ) {
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_T, 0x44);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_S, 0x44);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CPU_SSE_XMMCACHE_END
|
CPU_SSE_XMMCACHE_END
|
||||||
|
|
||||||
_flushCachedRegs();
|
_flushCachedRegs();
|
||||||
|
@ -3232,9 +3203,8 @@ void recPCPYUD( void )
|
||||||
{
|
{
|
||||||
if ( ! _Rd_ ) return;
|
if ( ! _Rd_ ) return;
|
||||||
|
|
||||||
CPU_SSE_XMMCACHE_START(XMMINFO_READS|((cpucaps.hasStreamingSIMD2Extensions&&_Rs_==0)?0:XMMINFO_READT)|XMMINFO_WRITED)
|
CPU_SSE_XMMCACHE_START(XMMINFO_READS|(( _Rs_ == 0) ? 0:XMMINFO_READT)|XMMINFO_WRITED)
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
|
||||||
if( _Rt_ == 0 ) {
|
if( _Rt_ == 0 ) {
|
||||||
if( EEREC_D == EEREC_S ) {
|
if( EEREC_D == EEREC_S ) {
|
||||||
SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||||
|
@ -3262,19 +3232,6 @@ CPU_SSE_XMMCACHE_START(XMMINFO_READS|((cpucaps.hasStreamingSIMD2Extensions&&_Rs_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else {
|
|
||||||
if( EEREC_D == EEREC_S ) {
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_T, 0xee);
|
|
||||||
}
|
|
||||||
else if( EEREC_D == EEREC_T ) {
|
|
||||||
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
||||||
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CPU_SSE_XMMCACHE_END
|
CPU_SSE_XMMCACHE_END
|
||||||
|
|
||||||
_flushCachedRegs();
|
_flushCachedRegs();
|
||||||
|
|
|
@ -224,7 +224,6 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode
|
||||||
int info = eeRecompileCodeXMM(xmminfo); \
|
int info = eeRecompileCodeXMM(xmminfo); \
|
||||||
|
|
||||||
#define CPU_SSE2_XMMCACHE_START(xmminfo) \
|
#define CPU_SSE2_XMMCACHE_START(xmminfo) \
|
||||||
if (cpucaps.hasStreamingSIMD2Extensions) \
|
|
||||||
{ \
|
{ \
|
||||||
int info = eeRecompileCodeXMM(xmminfo); \
|
int info = eeRecompileCodeXMM(xmminfo); \
|
||||||
|
|
||||||
|
|
|
@ -3564,13 +3564,11 @@ void recVUMI_FTOI0(VURegs *VU, int info)
|
||||||
if ( _Ft_ == 0 ) return;
|
if ( _Ft_ == 0 ) return;
|
||||||
|
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
||||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
|
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
|
||||||
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3581,16 +3579,14 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
|
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
|
||||||
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||||
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
||||||
|
|
||||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (EEREC_T != EEREC_S) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
|
if (EEREC_T != EEREC_S) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
|
||||||
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
|
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
|
||||||
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
|
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
|
||||||
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3603,27 +3599,15 @@ void recVUMI_ITOF0( VURegs *VU, int info )
|
||||||
if ( _Ft_ == 0 ) return;
|
if ( _Ft_ == 0 ) return;
|
||||||
|
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
else {
|
|
||||||
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
||||||
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_TEMP, VU_VFx_ADDR( _Fs_ ));
|
|
||||||
}
|
|
||||||
|
|
||||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
||||||
xmmregs[EEREC_T].mode |= MODE_WRITE;
|
xmmregs[EEREC_T].mode |= MODE_WRITE;
|
||||||
vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities
|
vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
|
||||||
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
|
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
|
||||||
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
||||||
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_T, VU_VFx_ADDR( _Fs_ ));
|
|
||||||
xmmregs[EEREC_T].mode |= MODE_WRITE;
|
|
||||||
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3632,24 +3616,14 @@ void recVUMI_ITOFX(VURegs *VU, int addr, int info)
|
||||||
if ( _Ft_ == 0 ) return;
|
if ( _Ft_ == 0 ) return;
|
||||||
|
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
else {
|
|
||||||
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
||||||
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_TEMP, VU_VFx_ADDR( _Fs_ ));
|
|
||||||
}
|
|
||||||
|
|
||||||
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
|
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
|
||||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
||||||
xmmregs[EEREC_T].mode |= MODE_WRITE;
|
xmmregs[EEREC_T].mode |= MODE_WRITE;
|
||||||
vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities
|
vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities
|
||||||
} else {
|
} else {
|
||||||
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
|
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
|
||||||
else {
|
|
||||||
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
||||||
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_T, VU_VFx_ADDR( _Fs_ ));
|
|
||||||
xmmregs[EEREC_T].mode |= MODE_WRITE;
|
|
||||||
}
|
|
||||||
|
|
||||||
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
|
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
|
||||||
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
|
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities
|
||||||
}
|
}
|
||||||
|
@ -4166,38 +4140,22 @@ void recVUMI_MFIR( VURegs *VU, int info )
|
||||||
|
|
||||||
_deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Fs_, 1);
|
_deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Fs_, 1);
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
if( _XYZW_SS ) {
|
||||||
if( _XYZW_SS ) {
|
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
|
||||||
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
|
_vuFlipRegSS(VU, EEREC_T);
|
||||||
_vuFlipRegSS(VU, EEREC_T);
|
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
|
||||||
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
|
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
|
||||||
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
|
_vuFlipRegSS(VU, EEREC_T);
|
||||||
_vuFlipRegSS(VU, EEREC_T);
|
|
||||||
}
|
|
||||||
else if (_X_Y_Z_W != 0xf) {
|
|
||||||
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
|
|
||||||
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
|
|
||||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
||||||
} else {
|
|
||||||
SSE2_MOVD_M32_to_XMM(EEREC_T, VU_VI_ADDR(_Fs_, 1)-2);
|
|
||||||
SSE2_PSRAD_I8_to_XMM(EEREC_T, 16);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else if (_X_Y_Z_W != 0xf) {
|
||||||
MOVSX32M16toR(EAX, VU_VI_ADDR(_Fs_, 1));
|
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
|
||||||
MOV32RtoM((uptr)&s_temp, EAX);
|
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
|
||||||
if( _X_Y_Z_W != 0xf ) {
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
||||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&s_temp);
|
} else {
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
|
SSE2_MOVD_M32_to_XMM(EEREC_T, VU_VI_ADDR(_Fs_, 1)-2);
|
||||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
SSE2_PSRAD_I8_to_XMM(EEREC_T, 16);
|
||||||
}
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
|
||||||
else {
|
|
||||||
SSE_MOVSS_M32_to_XMM(EEREC_T, (uptr)&s_temp);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,7 +78,6 @@ void SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
||||||
{
|
{
|
||||||
u32 i;
|
u32 i;
|
||||||
u32 prev = 0;
|
u32 prev = 0;
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) return;
|
|
||||||
FreezeXMMRegs(1);
|
FreezeXMMRegs(1);
|
||||||
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
||||||
|
|
||||||
|
@ -116,7 +115,6 @@ void SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
||||||
{
|
{
|
||||||
u32 i;
|
u32 i;
|
||||||
u32 prev = 0;
|
u32 prev = 0;
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) return;
|
|
||||||
FreezeXMMRegs(1);
|
FreezeXMMRegs(1);
|
||||||
|
|
||||||
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
||||||
|
|
|
@ -459,7 +459,7 @@ void rpropBSC(EEINST* prev, EEINST* pinst)
|
||||||
case 5: // bne
|
case 5: // bne
|
||||||
rpropSetRead(_Rs_, EEINST_LIVE1);
|
rpropSetRead(_Rs_, EEINST_LIVE1);
|
||||||
rpropSetRead(_Rt_, EEINST_LIVE1);
|
rpropSetRead(_Rt_, EEINST_LIVE1);
|
||||||
pinst->info |= (cpucaps.hasStreamingSIMD2Extensions?(EEINST_REALXMM|EEINST_MMX):0);
|
pinst->info |= EEINST_REALXMM|EEINST_MMX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 20: // beql
|
case 20: // beql
|
||||||
|
@ -469,7 +469,7 @@ void rpropBSC(EEINST* prev, EEINST* pinst)
|
||||||
prev->info = 0;
|
prev->info = 0;
|
||||||
rpropSetRead(_Rs_, EEINST_LIVE1);
|
rpropSetRead(_Rs_, EEINST_LIVE1);
|
||||||
rpropSetRead(_Rt_, EEINST_LIVE1);
|
rpropSetRead(_Rt_, EEINST_LIVE1);
|
||||||
pinst->info |= (cpucaps.hasStreamingSIMD2Extensions?(EEINST_REALXMM|EEINST_MMX):0);
|
pinst->info |= EEINST_REALXMM|EEINST_MMX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 6: // blez
|
case 6: // blez
|
||||||
|
@ -488,7 +488,7 @@ void rpropBSC(EEINST* prev, EEINST* pinst)
|
||||||
case 24: // daddi
|
case 24: // daddi
|
||||||
case 25: // daddiu
|
case 25: // daddiu
|
||||||
rpropSetWrite(_Rt_, EEINST_LIVE1);
|
rpropSetWrite(_Rt_, EEINST_LIVE1);
|
||||||
rpropSetRead(_Rs_, EEINST_LIVE1|((_Rs_!=0&&cpucaps.hasStreamingSIMD2Extensions)?EEINST_MMX:0));
|
rpropSetRead(_Rs_, EEINST_LIVE1 | (_Rs_!=0 && EEINST_MMX));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 8: // addi
|
case 8: // addi
|
||||||
|
@ -682,7 +682,7 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
|
||||||
|
|
||||||
case 24: // mult
|
case 24: // mult
|
||||||
// can do unsigned mult only if HI isn't used
|
// can do unsigned mult only if HI isn't used
|
||||||
//temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE0|EEINST_LIVE1))?0:(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0);
|
//temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE0|EEINST_LIVE1))?0:EEINST_MMX;
|
||||||
temp = 0;
|
temp = 0;
|
||||||
rpropSetWrite(XMMGPR_LO, EEINST_LIVE1);
|
rpropSetWrite(XMMGPR_LO, EEINST_LIVE1);
|
||||||
rpropSetWrite(XMMGPR_HI, EEINST_LIVE1);
|
rpropSetWrite(XMMGPR_HI, EEINST_LIVE1);
|
||||||
|
@ -697,9 +697,9 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
|
||||||
rpropSetWrite(XMMGPR_LO, EEINST_LIVE1);
|
rpropSetWrite(XMMGPR_LO, EEINST_LIVE1);
|
||||||
rpropSetWrite(XMMGPR_HI, EEINST_LIVE1);
|
rpropSetWrite(XMMGPR_HI, EEINST_LIVE1);
|
||||||
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
||||||
rpropSetRead(_Rs_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rs_, EEINST_MMX);
|
||||||
rpropSetRead(_Rt_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rt_, EEINST_MMX);
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) pinst->info |= EEINST_MMX;
|
pinst->info |= EEINST_MMX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 26: // div
|
case 26: // div
|
||||||
|
@ -763,10 +763,10 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
|
||||||
rpropSetRead(_Rt_, (pinst->regs[_Rd_]&EEINST_LIVE1));
|
rpropSetRead(_Rt_, (pinst->regs[_Rd_]&EEINST_LIVE1));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
rpropSetRead(_Rs_, (pinst->regs[_Rd_]&EEINST_LIVE1)|(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rs_, (pinst->regs[_Rd_]&EEINST_LIVE1)|EEINST_MMX);
|
||||||
rpropSetRead(_Rt_, (pinst->regs[_Rd_]&EEINST_LIVE1)|(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rt_, (pinst->regs[_Rd_]&EEINST_LIVE1)|EEINST_MMX);
|
||||||
}
|
}
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) pinst->info |= EEINST_MMX;
|
pinst->info |= EEINST_MMX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// traps
|
// traps
|
||||||
|
@ -781,13 +781,13 @@ void rpropSPECIAL(EEINST* prev, EEINST* pinst)
|
||||||
case 62: // dsrl32
|
case 62: // dsrl32
|
||||||
case 63: // dsra32
|
case 63: // dsra32
|
||||||
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
||||||
rpropSetRead(_Rt_, EEINST_LIVE1|(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rt_, EEINST_LIVE1|EEINST_MMX);
|
||||||
pinst->info |= EEINST_MMX;
|
pinst->info |= EEINST_MMX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 60: // dsll32
|
case 60: // dsll32
|
||||||
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
||||||
rpropSetRead(_Rt_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rt_, EEINST_MMX);
|
||||||
pinst->info |= EEINST_MMX;
|
pinst->info |= EEINST_MMX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1060,7 +1060,7 @@ void rpropMMI(EEINST* prev, EEINST* pinst)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 24: // mult1
|
case 24: // mult1
|
||||||
temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE2))?0:(cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0);
|
temp = (pinst->regs[XMMGPR_HI]&(EEINST_LIVE2))?0:EEINST_MMX;
|
||||||
rpropSetWrite0(XMMGPR_LO, EEINST_LIVE2, 0);
|
rpropSetWrite0(XMMGPR_LO, EEINST_LIVE2, 0);
|
||||||
rpropSetWrite0(XMMGPR_HI, EEINST_LIVE2, 0);
|
rpropSetWrite0(XMMGPR_HI, EEINST_LIVE2, 0);
|
||||||
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
||||||
|
@ -1072,9 +1072,9 @@ void rpropMMI(EEINST* prev, EEINST* pinst)
|
||||||
rpropSetWrite0(XMMGPR_LO, EEINST_LIVE2, 0);
|
rpropSetWrite0(XMMGPR_LO, EEINST_LIVE2, 0);
|
||||||
rpropSetWrite0(XMMGPR_HI, EEINST_LIVE2, 0);
|
rpropSetWrite0(XMMGPR_HI, EEINST_LIVE2, 0);
|
||||||
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
rpropSetWrite(_Rd_, EEINST_LIVE1);
|
||||||
rpropSetRead(_Rs_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rs_, EEINST_MMX);
|
||||||
rpropSetRead(_Rt_, (cpucaps.hasStreamingSIMD2Extensions?EEINST_MMX:0));
|
rpropSetRead(_Rt_, EEINST_MMX);
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) pinst->info |= EEINST_MMX;
|
pinst->info |= EEINST_MMX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 26: // div1
|
case 26: // div1
|
||||||
|
|
|
@ -515,28 +515,15 @@ int _allocMMXreg(int mmxreg, int reg, int mode)
|
||||||
else {
|
else {
|
||||||
int xmmreg;
|
int xmmreg;
|
||||||
if( MMX_ISGPR(reg) && (xmmreg = _checkXMMreg(XMMTYPE_GPRREG, reg-MMX_GPR, 0)) >= 0 ) {
|
if( MMX_ISGPR(reg) && (xmmreg = _checkXMMreg(XMMTYPE_GPRREG, reg-MMX_GPR, 0)) >= 0 ) {
|
||||||
if (cpucaps.hasStreamingSIMD2Extensions) {
|
SSE_MOVHPS_XMM_to_M64((u32)_MMXGetAddr(reg)+8, xmmreg);
|
||||||
SSE_MOVHPS_XMM_to_M64((u32)_MMXGetAddr(reg)+8, xmmreg);
|
if( mode & MODE_READ )
|
||||||
if( mode & MODE_READ )
|
SSE2_MOVDQ2Q_XMM_to_MM(mmxreg, xmmreg);
|
||||||
SSE2_MOVDQ2Q_XMM_to_MM(mmxreg, xmmreg);
|
|
||||||
|
|
||||||
if( xmmregs[xmmreg].mode & MODE_WRITE )
|
if( xmmregs[xmmreg].mode & MODE_WRITE )
|
||||||
mmxregs[mmxreg].mode |= MODE_WRITE;
|
mmxregs[mmxreg].mode |= MODE_WRITE;
|
||||||
|
|
||||||
// don't flush
|
// don't flush
|
||||||
xmmregs[xmmreg].inuse = 0;
|
xmmregs[xmmreg].inuse = 0;
|
||||||
}
|
|
||||||
else {
|
|
||||||
_freeXMMreg(xmmreg);
|
|
||||||
|
|
||||||
if( (mode & MODE_READHALF) || (MMX_IS32BITS(reg)&&(mode&MODE_READ)) ) {
|
|
||||||
MOVDMtoMMX(mmxreg, (u32)_MMXGetAddr(reg));
|
|
||||||
}
|
|
||||||
else if( mode & MODE_READ ) {
|
|
||||||
MOVQMtoR(mmxreg, (u32)_MMXGetAddr(reg));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( MMX_ISGPR(reg) ) {
|
if( MMX_ISGPR(reg) ) {
|
||||||
|
|
|
@ -1575,6 +1575,11 @@ int recInit( void )
|
||||||
SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) );
|
SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) );
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
if ( !( cpucaps.hasStreamingSIMD2Extensions ) )
|
||||||
|
{
|
||||||
|
SysMessage( _( "Processor doesn't supports SSE2, can't run recompiler without that" ) );
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
x86FpuState = FPU_STATE;
|
x86FpuState = FPU_STATE;
|
||||||
|
|
||||||
|
|
|
@ -254,7 +254,6 @@ void recDADD_constv(int info, int creg, int vreg)
|
||||||
|
|
||||||
if( info & PROCESS_EE_MMX ) {
|
if( info & PROCESS_EE_MMX ) {
|
||||||
int mmreg = vreg == _Rt_ ? EEREC_T : EEREC_S;
|
int mmreg = vreg == _Rt_ ? EEREC_T : EEREC_S;
|
||||||
assert( cpucaps.hasStreamingSIMD2Extensions );
|
|
||||||
|
|
||||||
if( g_cpuConstRegs[ creg ].UD[0] ) {
|
if( g_cpuConstRegs[ creg ].UD[0] ) {
|
||||||
|
|
||||||
|
@ -267,7 +266,7 @@ void recDADD_constv(int info, int creg, int vreg)
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
||||||
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
|
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
|
||||||
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
|
|
||||||
|
@ -337,7 +336,6 @@ void recDADD_(int info)
|
||||||
assert( !(info&PROCESS_EE_XMM) );
|
assert( !(info&PROCESS_EE_XMM) );
|
||||||
|
|
||||||
if( info & PROCESS_EE_MMX ) {
|
if( info & PROCESS_EE_MMX ) {
|
||||||
assert( cpucaps.hasStreamingSIMD2Extensions );
|
|
||||||
|
|
||||||
if( EEREC_D == EEREC_S ) PADDQRtoR(EEREC_D, EEREC_T);
|
if( EEREC_D == EEREC_S ) PADDQRtoR(EEREC_D, EEREC_T);
|
||||||
else if( EEREC_D == EEREC_T ) PADDQRtoR(EEREC_D, EEREC_S);
|
else if( EEREC_D == EEREC_T ) PADDQRtoR(EEREC_D, EEREC_S);
|
||||||
|
@ -347,7 +345,7 @@ void recDADD_(int info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
|
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
|
||||||
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
||||||
|
|
||||||
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
|
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
|
||||||
|
@ -648,7 +646,6 @@ void recDSUB_consts(int info)
|
||||||
assert( !(info&PROCESS_EE_XMM) );
|
assert( !(info&PROCESS_EE_XMM) );
|
||||||
|
|
||||||
if( info & PROCESS_EE_MMX ) {
|
if( info & PROCESS_EE_MMX ) {
|
||||||
assert( cpucaps.hasStreamingSIMD2Extensions );
|
|
||||||
|
|
||||||
if( g_cpuConstRegs[ _Rs_ ].UD[0] ) {
|
if( g_cpuConstRegs[ _Rs_ ].UD[0] ) {
|
||||||
|
|
||||||
|
@ -685,7 +682,7 @@ void recDSUB_consts(int info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
|
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
|
||||||
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
MOVQMtoR(mmreg, (u32)_eeGetConstReg(_Rs_));
|
MOVQMtoR(mmreg, (u32)_eeGetConstReg(_Rs_));
|
||||||
|
@ -751,7 +748,6 @@ void recDSUB_constt(int info)
|
||||||
assert( !(info&PROCESS_EE_XMM) );
|
assert( !(info&PROCESS_EE_XMM) );
|
||||||
|
|
||||||
if( info & PROCESS_EE_MMX ) {
|
if( info & PROCESS_EE_MMX ) {
|
||||||
assert( cpucaps.hasStreamingSIMD2Extensions );
|
|
||||||
|
|
||||||
if( g_cpuConstRegs[ _Rt_ ].UD[0] ) {
|
if( g_cpuConstRegs[ _Rt_ ].UD[0] ) {
|
||||||
|
|
||||||
|
@ -763,7 +759,7 @@ void recDSUB_constt(int info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
|
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
|
||||||
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
|
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
|
||||||
|
@ -807,7 +803,6 @@ void recDSUB_(int info)
|
||||||
assert( !(info&PROCESS_EE_XMM) );
|
assert( !(info&PROCESS_EE_XMM) );
|
||||||
|
|
||||||
if( info & PROCESS_EE_MMX ) {
|
if( info & PROCESS_EE_MMX ) {
|
||||||
assert( cpucaps.hasStreamingSIMD2Extensions );
|
|
||||||
|
|
||||||
if( EEREC_D == EEREC_S ) PSUBQRtoR(EEREC_D, EEREC_T);
|
if( EEREC_D == EEREC_S ) PSUBQRtoR(EEREC_D, EEREC_T);
|
||||||
else if( EEREC_D == EEREC_T ) {
|
else if( EEREC_D == EEREC_T ) {
|
||||||
|
@ -825,7 +820,7 @@ void recDSUB_(int info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
|
if( (g_pCurInstInfo->regs[_Rd_]&EEINST_MMX) ) {
|
||||||
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
int mmreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[_Rs_].UL[ 0 ]);
|
MOVQMtoR(mmreg, (int)&cpuRegs.GPR.r[_Rs_].UL[ 0 ]);
|
||||||
|
|
|
@ -156,7 +156,6 @@ void recDADDI_(int info)
|
||||||
assert( !(info&PROCESS_EE_XMM) );
|
assert( !(info&PROCESS_EE_XMM) );
|
||||||
|
|
||||||
if( info & PROCESS_EE_MMX ) {
|
if( info & PROCESS_EE_MMX ) {
|
||||||
assert( cpucaps.hasStreamingSIMD2Extensions );
|
|
||||||
|
|
||||||
if( _Imm_ != 0 ) {
|
if( _Imm_ != 0 ) {
|
||||||
|
|
||||||
|
@ -173,7 +172,7 @@ void recDADDI_(int info)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( (g_pCurInstInfo->regs[_Rt_]&EEINST_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
|
if( (g_pCurInstInfo->regs[_Rt_]&EEINST_MMX) ) {
|
||||||
int rtreg;
|
int rtreg;
|
||||||
u32* ptempmem = recAllocStackMem(8, 8);
|
u32* ptempmem = recAllocStackMem(8, 8);
|
||||||
ptempmem[0] = _Imm_;
|
ptempmem[0] = _Imm_;
|
||||||
|
|
|
@ -85,7 +85,7 @@ void _eeOnLoadWrite(int reg)
|
||||||
|
|
||||||
if( regt >= 0 ) {
|
if( regt >= 0 ) {
|
||||||
if( xmmregs[regt].mode & MODE_WRITE ) {
|
if( xmmregs[regt].mode & MODE_WRITE ) {
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && (reg != _Rs_) ) {
|
if( reg != _Rs_ ) {
|
||||||
SSE2_PUNPCKHQDQ_XMM_to_XMM(regt, regt);
|
SSE2_PUNPCKHQDQ_XMM_to_XMM(regt, regt);
|
||||||
SSE2_MOVQ_XMM_to_M64((u32)&cpuRegs.GPR.r[reg].UL[2], regt);
|
SSE2_MOVQ_XMM_to_M64((u32)&cpuRegs.GPR.r[reg].UL[2], regt);
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,12 +101,7 @@ void recMFHILO(int hi)
|
||||||
|
|
||||||
xmmregs[regd].inuse = 0;
|
xmmregs[regd].inuse = 0;
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
SSE2_MOVQ_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[0], reghi);
|
||||||
SSE2_MOVQ_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[0], reghi);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MOVLPS_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[0], reghi);
|
|
||||||
}
|
|
||||||
|
|
||||||
if( xmmregs[regd].mode & MODE_WRITE ) {
|
if( xmmregs[regd].mode & MODE_WRITE ) {
|
||||||
SSE_MOVHPS_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[2], regd);
|
SSE_MOVHPS_XMM_to_M64((u32)&cpuRegs.GPR.r[_Rd_].UL[2], regd);
|
||||||
|
@ -199,20 +194,15 @@ void recMTHILO(int hi)
|
||||||
if( regs >= 0 ) {
|
if( regs >= 0 ) {
|
||||||
assert( reghi != regs );
|
assert( reghi != regs );
|
||||||
|
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
_deleteGPRtoXMMreg(_Rs_, 0);
|
||||||
_deleteGPRtoXMMreg(_Rs_, 0);
|
SSE2_PUNPCKHQDQ_XMM_to_XMM(reghi, reghi);
|
||||||
SSE2_PUNPCKHQDQ_XMM_to_XMM(reghi, reghi);
|
SSE2_PUNPCKLQDQ_XMM_to_XMM(regs, reghi);
|
||||||
SSE2_PUNPCKLQDQ_XMM_to_XMM(regs, reghi);
|
|
||||||
|
|
||||||
// swap regs
|
// swap regs
|
||||||
xmmregs[regs] = xmmregs[reghi];
|
xmmregs[regs] = xmmregs[reghi];
|
||||||
xmmregs[reghi].inuse = 0;
|
xmmregs[reghi].inuse = 0;
|
||||||
xmmregs[regs].mode |= MODE_WRITE;
|
xmmregs[regs].mode |= MODE_WRITE;
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE2EMU_MOVSD_XMM_to_XMM(reghi, regs);
|
|
||||||
xmmregs[reghi].mode |= MODE_WRITE;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
regs = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ);
|
regs = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ);
|
||||||
|
@ -336,13 +326,8 @@ void recMFHILO1(int hi)
|
||||||
else {
|
else {
|
||||||
if( regd >= 0 ) {
|
if( regd >= 0 ) {
|
||||||
if( EEINST_ISLIVE2(_Rd_) ) {
|
if( EEINST_ISLIVE2(_Rd_) ) {
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
SSE2_PUNPCKHQDQ_M128_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 0 ] : (int)&cpuRegs.LO.UD[ 0 ]);
|
||||||
SSE2_PUNPCKHQDQ_M128_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 0 ] : (int)&cpuRegs.LO.UD[ 0 ]);
|
SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x4e);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x4e);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MOVLPS_M64_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 1 ] : (int)&cpuRegs.LO.UD[ 1 ]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
SSE2_MOVQ_M64_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 1 ] : (int)&cpuRegs.LO.UD[ 1 ]);
|
SSE2_MOVQ_M64_to_XMM(regd, hi ? (int)&cpuRegs.HI.UD[ 1 ] : (int)&cpuRegs.LO.UD[ 1 ]);
|
||||||
|
@ -381,14 +366,11 @@ void recMTHILO1(int hi)
|
||||||
|
|
||||||
if( reghi >= 0 ) {
|
if( reghi >= 0 ) {
|
||||||
if( regs >= 0 ) {
|
if( regs >= 0 ) {
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) SSE2_PUNPCKLQDQ_XMM_to_XMM(reghi, regs);
|
SSE2_PUNPCKLQDQ_XMM_to_XMM(reghi, regs);
|
||||||
else SSE_MOVLHPS_XMM_to_XMM(reghi, regs);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
_deleteEEreg(_Rs_, 1);
|
_deleteEEreg(_Rs_, 1);
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions ) SSE2_PUNPCKLQDQ_M128_to_XMM(reghi, (int)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ]);
|
SSE2_PUNPCKLQDQ_M128_to_XMM(reghi, (int)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ]);
|
||||||
else SSE_MOVHPS_M64_to_XMM(reghi, (int)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ]);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -440,7 +440,7 @@ void recMULTUsuper(int info, int upper, int process)
|
||||||
EEINST_SETSIGNEXT(_Rs_);
|
EEINST_SETSIGNEXT(_Rs_);
|
||||||
EEINST_SETSIGNEXT(_Rt_);
|
EEINST_SETSIGNEXT(_Rt_);
|
||||||
|
|
||||||
if( (info & PROCESS_EE_MMX) && cpucaps.hasStreamingSIMD2Extensions ) {
|
if( (info & PROCESS_EE_MMX) ) {
|
||||||
|
|
||||||
if( !_Rd_ ) {
|
if( !_Rd_ ) {
|
||||||
// need some temp reg
|
// need some temp reg
|
||||||
|
|
|
@ -281,53 +281,35 @@ _inline void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SS
|
||||||
|
|
||||||
_inline void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from);
|
write8(0xf2);
|
||||||
else {
|
SSERtoR( 0x100f);
|
||||||
write8(0xf2);
|
|
||||||
SSERtoR( 0x100f);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from);
|
write8(0xf3); SSEMtoR( 0x7e0f, 0);
|
||||||
else {
|
|
||||||
write8(0xf3); SSEMtoR( 0x7e0f, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from);
|
write8(0xf3); SSERtoR( 0x7e0f);
|
||||||
else {
|
|
||||||
write8(0xf3); SSERtoR( 0x7e0f);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
|
_inline void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from);
|
SSERtoM66(0xd60f);
|
||||||
else {
|
|
||||||
SSERtoM66(0xd60f);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __x86_64__
|
#ifndef __x86_64__
|
||||||
_inline void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
|
_inline void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from);
|
write8(0xf2);
|
||||||
else {
|
SSERtoR( 0xd60f);
|
||||||
write8(0xf2);
|
|
||||||
SSERtoR( 0xd60f);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_inline void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
|
_inline void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from);
|
write8(0xf3);
|
||||||
else {
|
SSERtoR( 0xd60f);
|
||||||
write8(0xf3);
|
|
||||||
SSERtoR( 0xd60f);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -710,13 +692,8 @@ _inline void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, i
|
||||||
//**********************************************************************************
|
//**********************************************************************************
|
||||||
_inline void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
|
_inline void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) {
|
SSERtoR66( 0x700F );
|
||||||
SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8);
|
write8( imm8 );
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSERtoR66( 0x700F );
|
|
||||||
write8( imm8 );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_inline void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); }
|
_inline void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); }
|
||||||
|
|
||||||
|
@ -803,22 +780,12 @@ _inline void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSE
|
||||||
_inline void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
|
_inline void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
|
||||||
_inline void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from )
|
_inline void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) {
|
SSERtoR66( 0x760F );
|
||||||
SSE_CMPEQPS_XMM_to_XMM(to, from);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSERtoR66( 0x760F );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from )
|
_inline void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) {
|
SSEMtoR66( 0x760F );
|
||||||
SSE_CMPEQPS_M128_to_XMM(to, from);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSEMtoR66( 0x760F );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -848,12 +815,7 @@ _inline void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x
|
||||||
_inline void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
|
_inline void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
|
||||||
_inline void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from )
|
_inline void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) {
|
SSERtoR66(0x6E0F);
|
||||||
SSE2EMU_MOVD_R_to_XMM(to, from);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSERtoR66(0x6E0F);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
|
_inline void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
|
||||||
|
@ -873,13 +835,9 @@ _inline void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, in
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
|
_inline void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
|
||||||
_inline void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
|
_inline void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from )
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) {
|
{
|
||||||
SSE2EMU_MOVD_XMM_to_R(to, from);
|
_SSERtoR66(0x7E0F);
|
||||||
}
|
|
||||||
else {
|
|
||||||
_SSERtoR66(0x7E0F);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
|
_inline void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
|
||||||
|
@ -892,15 +850,10 @@ _inline void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
|
||||||
|
|
||||||
_inline void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
_inline void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
||||||
{
|
{
|
||||||
if( !cpucaps.hasStreamingSIMD2Extensions ) {
|
write8(0x66);
|
||||||
SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset);
|
RexRB(0, from, to);
|
||||||
}
|
write16( 0x7e0f );
|
||||||
else {
|
WriteRmOffsetFrom(from, to, offset);
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, from, to);
|
|
||||||
write16( 0x7e0f );
|
|
||||||
WriteRmOffsetFrom(from, to, offset);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
@ -1237,157 +1190,157 @@ _inline void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from)
|
||||||
// SSE-X
|
// SSE-X
|
||||||
_inline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
|
||||||
else SSE_MOVAPS_M128_to_XMM(to, from);
|
else SSE_MOVAPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
|
_inline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
|
||||||
else SSE_MOVAPS_XMM_to_M128(to, from);
|
else SSE_MOVAPS_XMM_to_M128(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
|
||||||
else SSE_MOVAPS_XMM_to_XMM(to, from);
|
else SSE_MOVAPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
|
_inline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
|
||||||
else SSE_MOVAPSRmtoROffset(to, from, offset);
|
else SSE_MOVAPSRmtoROffset(to, from, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
_inline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
|
||||||
else SSE_MOVAPSRtoRmOffset(to, from, offset);
|
else SSE_MOVAPSRtoRmOffset(to, from, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
|
||||||
else SSE_MOVAPS_M128_to_XMM(to, from);
|
else SSE_MOVAPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
|
_inline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
|
||||||
else SSE_MOVAPS_XMM_to_M128(to, from);
|
else SSE_MOVAPS_XMM_to_M128(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
|
||||||
else SSE_MOVAPS_XMM_to_XMM(to, from);
|
else SSE_MOVAPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
|
||||||
else SSE_MOVSS_M32_to_XMM(to, from);
|
else SSE_MOVSS_M32_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
|
_inline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
|
||||||
else SSE_MOVSS_XMM_to_M32(to, from);
|
else SSE_MOVSS_XMM_to_M32(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
|
_inline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
|
||||||
else SSE_MOVSS_XMM_to_Rm(to, from);
|
else SSE_MOVSS_XMM_to_Rm(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
|
_inline void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
|
||||||
else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
|
else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
_inline void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
|
||||||
else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
|
else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
|
||||||
else SSE_ORPS_M128_to_XMM(to, from);
|
else SSE_ORPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
|
||||||
else SSE_ORPS_XMM_to_XMM(to, from);
|
else SSE_ORPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
|
||||||
else SSE_XORPS_M128_to_XMM(to, from);
|
else SSE_XORPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
|
||||||
else SSE_XORPS_XMM_to_XMM(to, from);
|
else SSE_XORPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
|
||||||
else SSE_ANDPS_M128_to_XMM(to, from);
|
else SSE_ANDPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
|
||||||
else SSE_ANDPS_XMM_to_XMM(to, from);
|
else SSE_ANDPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
|
_inline void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
|
||||||
else SSE_ANDNPS_M128_to_XMM(to, from);
|
else SSE_ANDNPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
|
||||||
else SSE_ANDNPS_XMM_to_XMM(to, from);
|
else SSE_ANDNPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
|
_inline void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
|
||||||
else SSE_UNPCKLPS_M128_to_XMM(to, from);
|
else SSE_UNPCKLPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
_inline void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
|
||||||
else SSE_UNPCKLPS_XMM_to_XMM(to, from);
|
else SSE_UNPCKLPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
|
_inline void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
|
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
|
||||||
else SSE_UNPCKHPS_M128_to_XMM(to, from);
|
else SSE_UNPCKHPS_M128_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
_inline void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
|
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
|
||||||
else SSE_UNPCKHPS_XMM_to_XMM(to, from);
|
else SSE_UNPCKHPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
_inline void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
_inline void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) {
|
if( g_xmmtypes[from] == XMMT_INT ) {
|
||||||
SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
|
SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
|
||||||
if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
|
if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue