Emitter: Implemented SQRT/RSQRT/UCOM and all variations of SSE CVTs (omg those were a nightmare) [also patched old emitter to use new MOVD implementations -- missed those earlier].

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1031 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-20 15:22:02 +00:00
parent 18c4765d31
commit cc48702b17
7 changed files with 355 additions and 358 deletions

View File

@ -164,8 +164,8 @@ class G1LogicImpl_PlusSSE : public Group1ImplAll<InstType>
public:
using Group1ImplAll<InstType>::operator();
const SSELogicImpl<0x00,OpcodeSSE> PS;
const SSELogicImpl<0x66,OpcodeSSE> PD;
const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision
const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision
G1LogicImpl_PlusSSE() {}
};
@ -179,8 +179,8 @@ class G1ArithmeticImpl_PlusSSE : public G1LogicImpl_PlusSSE<InstType, OpcodeSSE
public:
using Group1ImplAll<InstType>::operator();
const SSELogicImpl<0xf3,OpcodeSSE> SS;
const SSELogicImpl<0xf2,OpcodeSSE> SD;
const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision
const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision
G1ArithmeticImpl_PlusSSE() {}
};
@ -191,18 +191,19 @@ class G1CompareImpl_PlusSSE : Group1ImplAll< G1Type_CMP >
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
Woot() {}
};
public:
using Group1ImplAll< G1Type_CMP >::operator();
Woot<0x00> PS;
Woot<0x66> PD;
Woot<0xf3> SS;
Woot<0xf2> SD;
const Woot<0x00> PS;
const Woot<0x66> PD;
const Woot<0xf3> SS;
const Woot<0xf2> SD;
G1CompareImpl_PlusSSE() {} //GCWhat?
};

View File

@ -70,7 +70,7 @@ public:
__forceinline void operator()( const T* func ) const
{
if( isJmp )
iJccKnownTarget( Jcc_Unconditional, (void*)func );
iJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func );
else
{
// calls are relative to the instruction after this one, and length is

View File

@ -21,17 +21,7 @@
//////////////////////////////////////////////////////////////////////////////////////////
// MMX / SSE Helper Functions!
template< typename T >
__emitinline void SimdPrefix( u8 opcode, u8 prefix=0 )
{
if( sizeof( T ) == 16 && prefix != 0 )
{
xWrite<u16>( 0x0f00 | prefix );
xWrite<u8>( opcode );
}
else
xWrite<u16>( (opcode<<8) | 0x0f );
}
extern void SimdPrefix( u8 prefix, u8 opcode );
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instruction with prefixes.
@ -40,23 +30,23 @@ __emitinline void SimdPrefix( u8 opcode, u8 prefix=0 )
// instructions violate this "guideline.")
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& to, const xRegister<T2>& from )
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
{
SimdPrefix<T>( opcode, prefix );
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const ModSibBase& sib )
void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
{
SimdPrefix<T>( opcode, prefix );
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const void* data )
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
{
SimdPrefix<T>( opcode, prefix );
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
xWriteDisp( reg.Id, data );
}
@ -68,21 +58,21 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, con
template< typename T, typename T2 >
__emitinline void writeXMMop( u8 opcode, const xRegister<T>& to, const xRegister<T2>& from )
{
SimdPrefix<T>( opcode );
SimdPrefix( 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
void writeXMMop( u8 opcode, const xRegister<T>& reg, const ModSibBase& sib )
{
SimdPrefix<T>( opcode );
SimdPrefix( 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u8 opcode, const xRegister<T>& reg, const void* data )
{
SimdPrefix<T>( opcode );
SimdPrefix( 0, opcode );
xWriteDisp( reg.Id, data );
}
@ -170,6 +160,34 @@ public:
SSELogicImpl() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
// can be regDirect or ModRM (indirect).
//
template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
class SSEImpl_DestRegForm
{
public:
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__noinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
SSEImpl_DestRegForm() {} //GCWho?
};
// ------------------------------------------------------------------------
template< u8 OpcodeSSE >
class SSEImpl_PSPD_SSSD
{
public:
const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision
const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision
const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision
const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision
SSEImpl_PSPD_SSSD() {} //GChow?
};
// ------------------------------------------------------------------------
//
template< u8 OpcodeSSE >
@ -178,10 +196,62 @@ class SSEAndNotImpl
public:
const SSELogicImpl<0x00,OpcodeSSE> PS;
const SSELogicImpl<0x66,OpcodeSSE> PD;
SSEAndNotImpl() {}
};
// ------------------------------------------------------------------------
// For instructions that have SS/SD form only (UCOMI, etc)
// AltPrefix - prefixed used for doubles (SD form).
template< u8 AltPrefix, u8 OpcodeSSE >
class SSEImpl_SS_SD
{
public:
const SSELogicImpl<0x00,OpcodeSSE> SS;
const SSELogicImpl<AltPrefix,OpcodeSSE> SD;
SSEImpl_SS_SD() {}
};
// ------------------------------------------------------------------------
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
template< u8 OpcodeSSE >
class SSE_rSqrtImpl
{
public:
const SSELogicImpl<0x00,OpcodeSSE> PS;
const SSELogicImpl<0xf3,OpcodeSSE> SS;
SSE_rSqrtImpl() {}
};
// ------------------------------------------------------------------------
// For instructions that have PS/SS/SD form only (most commonly Sqrt functions)
template< u8 OpcodeSSE >
class SSE_SqrtImpl : public SSE_rSqrtImpl<OpcodeSSE>
{
public:
const SSELogicImpl<0xf2,OpcodeSSE> SD;
SSE_SqrtImpl() {}
};
// ------------------------------------------------------------------------
template< u8 OpcodeSSE >
class SSEImpl_Shuffle
{
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
Woot() {}
};
public:
const Woot<0x00> PS;
const Woot<0x66> PD;
SSEImpl_Shuffle() {} //GCWhat?
};
// ------------------------------------------------------------------------
template< SSE2_ComparisonType CType >
class SSECompareImpl
@ -192,13 +262,13 @@ protected:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
Woot() {}
};
public:
Woot<0x00> PS;
Woot<0x66> PD;
Woot<0xf3> SS;
Woot<0xf2> SD;
const Woot<0x00> PS;
const Woot<0x66> PD;
const Woot<0xf3> SS;
const Woot<0xf2> SD;
SSECompareImpl() {} //GCWhat?
};

View File

@ -138,8 +138,6 @@ namespace Internal
__forceinline void ModRM( uint mod, uint reg, uint rm )
{
xWrite<u8>( (mod << 6) | (reg << 3) | rm );
//*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
//x86Ptr++;
}
__forceinline void ModRM_Direct( uint reg, uint rm )
@ -150,8 +148,6 @@ namespace Internal
__forceinline void SibSB( u32 ss, u32 index, u32 base )
{
xWrite<u8>( (ss << 6) | (index << 3) | base );
//*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
//x86Ptr++;
}
__forceinline void xWriteDisp( int regfield, s32 displacement )
@ -645,6 +641,17 @@ __emitinline void xBSWAP( const xRegister32& to )
// MMX / XMM Instructions
// (these will get put in their own file later)
__emitinline void Internal::SimdPrefix( u8 prefix, u8 opcode )
{
if( prefix != 0 )
{
xWrite<u16>( 0x0f00 | prefix );
xWrite<u8>( opcode );
}
else
xWrite<u16>( (opcode<<8) | 0x0f );
}
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD;
@ -670,11 +677,50 @@ const PLogicImplAll<0xef> xPXOR;
const SSEAndNotImpl<0x55> xANDN;
// Compute Reciprocal Packed Single-Precision Floating-Point Values
const SSELogicImpl<0,0x53> xRCPPS;
const SSEImpl_SS_SD<0x66,0x2e> xUCOMI;
const SSE_rSqrtImpl<0x53> xRCP;
const SSE_rSqrtImpl<0x52> xRSQRT;
const SSE_SqrtImpl<0x51> xSQRT;
// Compute Reciprocal of Scalar Single-Precision Floating-Point Value
const SSELogicImpl<0xf3,0x53> xRCPSS;
const SSEImpl_PSPD_SSSD<0x5f> xMAX;
const SSEImpl_PSPD_SSSD<0x5d> xMIN;
const SSEImpl_Shuffle<0xc6> xSHUF;
// ------------------------------------------------------------------------
// SSE Conversion Operations, as looney as they are.
//
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
//
const SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD;
const SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS;
const SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ;
const SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI;
const SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS;
const SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD;
const SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS;
const SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ;
const SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD;
const SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI;
const SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI;
const SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS;
const SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD;
const SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS;
const SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD;
const SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI;
const SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ;
const SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI;
const SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ;
const SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI;
const SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI;
const SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI;
// ------------------------------------------------------------------------
@ -724,7 +770,7 @@ __forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from )
// Manual implementation of this form of MOVQ, since its parameters are unique in a way
// that breaks the template inference of writeXMMop();
SimdPrefix<u128>( 0xd6, 0xf2 );
SimdPrefix( 0xf2, 0xd6 );
ModRM_Direct( to.Id, from.Id );
}

View File

@ -347,14 +347,15 @@ namespace x86Emitter
Internal::writeXMMop( 0x66, 0x7e, from, dest );
}
// ------------------------------------------------------------------------
// xMASKMOV:
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.
// The default memory location is specified by DS:EDI. The most significant bit in each byte
// of the mask operand determines whether the corresponding byte in the source operand is
// written to the corresponding byte location in memory.
template< typename T >
static __forceinline void xMASKMOV( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); }
@ -368,7 +369,7 @@ namespace x86Emitter
//
template< typename T >
static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); }
// ------------------------------------------------------------------------
extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from );
@ -409,10 +410,9 @@ namespace x86Emitter
extern void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from );
extern void xMOVNTQ( void* to, const xRegisterMMX& from );
extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from );
//////////////////////////////////////////////////////////////////////////////////////////
//
// ------------------------------------------------------------------------
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS;
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS;
@ -433,6 +433,8 @@ namespace x86Emitter
extern const Internal::MovhlImplAll<0x16> xMOVH;
extern const Internal::MovhlImplAll<0x12> xMOVL;
// ------------------------------------------------------------------------
extern const Internal::PLogicImplAll<0xdb> xPAND;
extern const Internal::PLogicImplAll<0xdf> xPANDN;
extern const Internal::PLogicImplAll<0xeb> xPOR;
@ -440,9 +442,15 @@ namespace x86Emitter
extern const Internal::SSEAndNotImpl<0x55> xANDN;
extern const Internal::SSELogicImpl<0,0x53> xRCPPS;
extern const Internal::SSELogicImpl<0xf3,0x53> xRCPSS;
extern const Internal::SSEImpl_SS_SD<0x66,0x2e> xUCOMI;
extern const Internal::SSE_rSqrtImpl<0x53> xRCP;
extern const Internal::SSE_rSqrtImpl<0x52> xRSQRT;
extern const Internal::SSE_SqrtImpl<0x51> xSQRT;
extern const Internal::SSEImpl_PSPD_SSSD<0x5f> xMAX;
extern const Internal::SSEImpl_PSPD_SSSD<0x5d> xMIN;
extern const Internal::SSEImpl_Shuffle<0xc6> xSHUF;
// ------------------------------------------------------------------------
extern const Internal::SSECompareImpl<SSE2_Equal> xCMPEQ;
@ -453,5 +461,39 @@ namespace x86Emitter
extern const Internal::SSECompareImpl<SSE2_NotLess> xCMPNLT;
extern const Internal::SSECompareImpl<SSE2_NotLessOrEqual> xCMPNLE;
extern const Internal::SSECompareImpl<SSE2_Ordered> xCMPORD;
// ------------------------------------------------------------------------
// OMG Evil. I went cross-eyed an hour ago doing this.
//
extern const Internal::SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD;
extern const Internal::SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS;
extern const Internal::SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ;
extern const Internal::SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI;
extern const Internal::SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS;
extern const Internal::SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD;
extern const Internal::SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS;
extern const Internal::SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ;
extern const Internal::SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD;
extern const Internal::SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI;
extern const Internal::SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI;
extern const Internal::SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS;
extern const Internal::SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD;
extern const Internal::SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS;
extern const Internal::SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD;
extern const Internal::SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI;
extern const Internal::SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ;
extern const Internal::SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI;
extern const Internal::SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ;
extern const Internal::SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI;
extern const Internal::SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI;
extern const Internal::SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI;
}

View File

@ -97,14 +97,6 @@ using namespace x86Emitter;
ModRM( 0, to, DISP32 ), \
write32( MEMADDR(from, 4 + overb) ) \
#define SSE_SD_RtoM( code, overb ) \
assert( from < iREGCNT_XMM) , \
write8( 0xf2 ), \
RexR(0, from), \
write16( code ), \
ModRM( 0, from, DISP32 ), \
write32( MEMADDR(to, 4 + overb) ) \
#define SSE_SD_RtoR( code ) \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM) , \
write8( 0xf2 ), \
@ -112,30 +104,6 @@ using namespace x86Emitter;
write16( code ), \
ModRM( 3, to, from )
#define CMPPSMtoR( op ) \
SSEMtoR( 0xc20f, 1 ), \
write8( op )
#define CMPPSRtoR( op ) \
SSERtoR( 0xc20f ), \
write8( op )
#define CMPSSMtoR( op ) \
SSE_SS_MtoR( 0xc20f, 1 ), \
write8( op )
#define CMPSSRtoR( op ) \
SSE_SS_RtoR( 0xc20f ), \
write8( op )
#define CMPSDMtoR( op ) \
SSE_SD_MtoR( 0xc20f, 1 ), \
write8( op )
#define CMPSDRtoR( op ) \
SSE_SD_RtoR( 0xc20f ), \
write8( op )
#define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \
emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \
emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { xMOV##mod( (void*)to, xRegisterSSE(from) ); } \
@ -146,17 +114,92 @@ using namespace x86Emitter;
emitterT void sse##_MOV##mod##RtoRmS( x86IntRegType to, x86SSERegType from, x86IntRegType from2, int scale ) \
{ xMOV##mod( ptr[xAddressReg(to)+xAddressReg(from2)], xRegisterSSE(from) ); }
#define DEFINE_LEGACY_PSD_OPCODE( mod ) \
emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PD( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_SSSD_OPCODE( mod ) \
emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_CMP_OPCODE( comp ) \
emitterT void SSE_CMP##comp##PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_CMP##comp##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_CMP##comp##PD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_CMP##comp##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PD( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE_CMP##comp##SS_M32_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_CMP##comp##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_CMP##comp##SD_M64_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_CMP##comp##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_RSQRT_OPCODE(mod) \
emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_SQRT_OPCODE(mod) \
DEFINE_LEGACY_RSQRT_OPCODE(mod) \
emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
DEFINE_LEGACY_PSD_OPCODE( mod ) \
DEFINE_LEGACY_SSSD_OPCODE( mod )
DEFINE_LEGACY_MOV_OPCODE( UPS, SSE )
DEFINE_LEGACY_MOV_OPCODE( APS, SSE )
DEFINE_LEGACY_MOV_OPCODE( DQA, SSE2 )
DEFINE_LEGACY_MOV_OPCODE( DQU, SSE2 )
DEFINE_LEGACY_PSD_OPCODE( AND )
DEFINE_LEGACY_PSD_OPCODE( ANDN )
DEFINE_LEGACY_PSD_OPCODE( OR )
DEFINE_LEGACY_PSD_OPCODE( XOR )
//**********************************************************************************/
//MOVAPS: Move aligned Packed Single Precision FP values *
//**********************************************************************************
DEFINE_LEGACY_PSSD_OPCODE( SUB )
DEFINE_LEGACY_PSSD_OPCODE( ADD )
DEFINE_LEGACY_PSSD_OPCODE( MUL )
DEFINE_LEGACY_PSSD_OPCODE( DIV )
emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
DEFINE_LEGACY_PSSD_OPCODE( MIN )
DEFINE_LEGACY_PSSD_OPCODE( MAX )
DEFINE_LEGACY_CMP_OPCODE( EQ )
DEFINE_LEGACY_CMP_OPCODE( LT )
DEFINE_LEGACY_CMP_OPCODE( LE )
DEFINE_LEGACY_CMP_OPCODE( UNORD )
DEFINE_LEGACY_CMP_OPCODE( NE )
DEFINE_LEGACY_CMP_OPCODE( NLT )
DEFINE_LEGACY_CMP_OPCODE( NLE )
DEFINE_LEGACY_CMP_OPCODE( ORD )
DEFINE_LEGACY_SSSD_OPCODE( UCOMI )
DEFINE_LEGACY_RSQRT_OPCODE( RCP )
DEFINE_LEGACY_RSQRT_OPCODE( RSQRT )
DEFINE_LEGACY_SQRT_OPCODE( SQRT )
emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { xMOVDZX( xRegisterSSE(to), (void*)from ); }
emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { xMOVDZX( xRegisterSSE(to), xRegister32(from) ); }
emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
{
xMOVDZX( xRegisterSSE(to), ptr[xAddressReg(from)+offset] );
}
emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { xMOVD( (void*)to, xRegisterSSE(from) ); }
emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { xMOVD( xRegister32(to), xRegisterSSE(from) ); }
emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset )
{
xMOVD( ptr[xAddressReg(from)+offset], xRegisterSSE(from) );
}
emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVQZX( xRegisterSSE(to), (void*)from ); }
emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVQZX( xRegisterSSE(to), xRegisterSSE(from) ); }
@ -165,9 +208,6 @@ emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) { xM
emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) { xMOVQ( xRegisterSSE(to), xRegisterMMX(from) ); }
//**********************************************************************************/
//MOVSS: Move Scalar Single-Precision FP value *
//**********************************************************************************
emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { xMOVSSZX( xRegisterSSE(to), (void*)from ); }
emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { xMOVSS( (void*)to, xRegisterSSE(from) ); }
emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVSS( xRegisterSSE(to), xRegisterSSE(from) ); }
@ -181,189 +221,69 @@ emitterT void SSE2_MOVSD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int of
emitterT void SSE2_MOVSD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVSD( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); }
emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); }
//**********************************************************************************/
//MOVLPS: Move low Packed Single-Precision FP *
//**********************************************************************************
emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVL.PS( xRegisterSSE(to), (void*)from ); }
emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVL.PS( (void*)to, xRegisterSSE(from) ); }
emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVL.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); }
emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVL.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); }
/////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MOVHPS: Move High Packed Single-Precision FP *
//**********************************************************************************
emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVH.PS( xRegisterSSE(to), (void*)from ); }
emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.PS( (void*)to, xRegisterSSE(from) ); }
emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); }
emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); }
/////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MOVLHPS: Moved packed Single-Precision FP low to high *
//**********************************************************************************
emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
//////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MOVHLPS: Moved packed Single-Precision FP High to Low *
//**********************************************************************************
emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_PSD_OPCODE( mod ) \
emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PD( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
DEFINE_LEGACY_PSD_OPCODE( mod ) \
emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
DEFINE_LEGACY_PSD_OPCODE( AND )
DEFINE_LEGACY_PSD_OPCODE( ANDN )
DEFINE_LEGACY_PSD_OPCODE( OR )
DEFINE_LEGACY_PSD_OPCODE( XOR )
DEFINE_LEGACY_PSSD_OPCODE( SUB )
DEFINE_LEGACY_PSSD_OPCODE( ADD )
DEFINE_LEGACY_PSSD_OPCODE( MUL )
DEFINE_LEGACY_PSSD_OPCODE( DIV )
emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xRCPPS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { xRCPPS( xRegisterSSE(to), (void*)from ); }
emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xRCPSS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { xRCPSS( xRegisterSSE(to), (void*)from ); }
////////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//Packed Single-Precision FP compare (CMPccPS) *
//**********************************************************************************
#define DEFINE_LEGACY_CMP_OPCODE( comp ) \
emitterT void SSE_CMP##comp##PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_CMP##comp##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_CMP##comp##PD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_CMP##comp##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PD( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE_CMP##comp##SS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SS( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE_CMP##comp##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_CMP##comp##SD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_CMP##comp##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
DEFINE_LEGACY_CMP_OPCODE( EQ )
DEFINE_LEGACY_CMP_OPCODE( LT )
DEFINE_LEGACY_CMP_OPCODE( LE )
DEFINE_LEGACY_CMP_OPCODE( UNORD )
DEFINE_LEGACY_CMP_OPCODE( NE )
DEFINE_LEGACY_CMP_OPCODE( NLT )
DEFINE_LEGACY_CMP_OPCODE( NLE )
DEFINE_LEGACY_CMP_OPCODE( ORD )
emitterT void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from )
emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 )
{
RexR(0, to);
write16( 0x2e0f );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
xSHUF.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset], imm8 );
}
emitterT void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
RexRB(0, to, from);
write16( 0x2e0f );
ModRM( 3, to, from );
}
emitterT void SSE_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from )
{
write8(0x66);
RexR(0, to);
write16( 0x2e0f );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
write8(0x66);
RexRB(0, to, from);
write16( 0x2e0f );
ModRM( 3, to, from );
}
emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { xCVTPI2PS( xRegisterSSE(to), (u64*)from ); }
emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { xCVTPI2PS( xRegisterSSE(to), xRegisterMMX(from) ); }
emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { xCVTPS2PI( xRegisterMMX(to), (u64*)from ); }
emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { xCVTPS2PI( xRegisterMMX(to), xRegisterSSE(from) ); }
emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { xCVTTSS2SI( xRegister32(to), (u32*)from ); }
emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xCVTTSS2SI( xRegister32(to), xRegisterSSE(from) ); }
emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { xCVTSI2SS( xRegisterSSE(to), (u32*)from ); }
emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { xCVTSI2SS( xRegisterSSE(to), xRegister32(from) ); }
emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { xCVTSS2SD( xRegisterSSE(to), (u32*)from ); }
emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSS2SD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { xCVTSD2SS( xRegisterSSE(to), (u64*)from ); }
emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSD2SS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTDQ2PS( xRegisterSSE(to), (u128*)from ); }
emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTDQ2PS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTPS2DQ( xRegisterSSE(to), (u128*)from ); }
emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
//////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal *
//**********************************************************************************
emitterT void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); }
emitterT void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); }
//////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal *
//**********************************************************************************
emitterT void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); }
emitterT void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); }
////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//SQRTPS : Packed Single-Precision FP Square Root *
//**********************************************************************************
emitterT void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); }
emitterT void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); }
//////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//SQRTSS : Scalar Single-Precision FP Square Root *
//**********************************************************************************
emitterT void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); }
emitterT void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); }
emitterT void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); }
emitterT void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); }
////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MAXPS: Return Packed Single-Precision FP Maximum *
//**********************************************************************************
emitterT void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); }
emitterT void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); }
emitterT void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); }
emitterT void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); }
/////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MAXSS: Return Scalar Single-Precision FP Maximum *
//**********************************************************************************
emitterT void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); }
emitterT void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); }
emitterT void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); }
emitterT void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); }
/////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion *
//**********************************************************************************
emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); }
emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); }
///////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion *
//**********************************************************************************
emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); }
emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); }
//emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); }
// emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); }
//
// emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); }
// emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); }
/*
emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); }
emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from)
{
@ -372,8 +292,9 @@ emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from)
write16(0x2c0f);
ModRM(3, to, from);
}
*/
emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); }
/*emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); }
emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from)
{
write8(0xf3);
@ -384,50 +305,26 @@ emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from)
emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); }
emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); }
*/
emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); }
/*emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); }
emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); }
///////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion *
//**********************************************************************************
emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); }
emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); }
//**********************************************************************************/
//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion *
//**********************************************************************************
emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); }
emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); }
emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); }
/////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MINPS: Return Packed Single-Precision FP Minimum *
//**********************************************************************************
emitterT void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); }
emitterT void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); }
emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); }*/
emitterT void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); }
emitterT void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); }
//////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MINSS: Return Scalar Single-Precision FP Minimum *
//**********************************************************************************
emitterT void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); }
emitterT void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); }
emitterT void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); }
emitterT void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); }
///////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PMAXSW: Packed Signed Integer Word Maximum *
//**********************************************************************************
//missing
// SSE_PMAXSW_M64_to_MM
// SSE_PMAXSW_M64_to_MM
// SSE2_PMAXSW_M128_to_XMM
// SSE2_PMAXSW_XMM_to_XMM
emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); }
@ -437,33 +334,11 @@ emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERt
//PMINSW: Packed Signed Integer Word Minimum *
//**********************************************************************************
//missing
// SSE_PMINSW_M64_to_MM
// SSE_PMINSW_M64_to_MM
// SSE2_PMINSW_M128_to_XMM
// SSE2_PMINSW_XMM_to_XMM
emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); }
//////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//SHUFPS: Shuffle Packed Single-Precision FP Values *
//**********************************************************************************
emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); }
emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); }
emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 )
{
RexRB(0, to, from);
write16(0xc60f);
WriteRmOffsetFrom(to, from, offset);
write8(imm8);
}
//////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//SHUFPD: Shuffle Packed Double-Precision FP Values *
//**********************************************************************************
emitterT void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); }
emitterT void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); }
////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PSHUFD: Shuffle Packed DoubleWords *
@ -565,43 +440,6 @@ emitterT void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSER
emitterT void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); }
///////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MOVD: Move Dword(32bit) to /from XMM reg *
//**********************************************************************************
emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); }
emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
{
write8(0x66);
RexRB(0, to, from);
write16( 0x6e0f );
ModRM( 0, to, from);
}
emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
{
write8(0x66);
RexRB(0, to, from);
write16( 0x6e0f );
WriteRmOffsetFrom(to, from, offset);
}
emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); }
emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset )
{
write8(0x66);
RexRB(0, from, to);
write16( 0x7e0f );
WriteRmOffsetFrom(from, to, offset);
}
///////////////////////////////////////////////////////////////////////////////////////
emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); }
// shift right logical

View File

@ -124,6 +124,22 @@ static __forceinline void xWrite( T val )
namespace x86Emitter
{
//////////////////////////////////////////////////////////////////////////////////////////
// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const]
//
// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions
// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
// and some marginal speed gains as a result. (it's possible someday in the future the per-
// formance of the two instructions could change, so this constant is provided to restore MOVDQA
// use easily at a later time, if needed).
#define ALWAYS_USE_MOVAPS
#ifdef ALWAYS_USE_MOVAPS
static const bool AlwaysUseMovaps = true;
#else
static const bool AlwaysUseMovaps = false;
#endif
/////////////////////////////////////////////////////////////////////////////////////////////
// __emitline - preprocessors definition
//
@ -688,22 +704,6 @@ namespace x86Emitter
#include "implement/test.h"
#include "implement/jmpcall.h"
}
//////////////////////////////////////////////////////////////////////////////////////////
// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const]
//
// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions
// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
// and some marginal speed gains as a result. (it's possible someday in the future the per-
// formance of the two instructions could change, so this constant is provided to restore MOVDQA
// use easily at a later time, if needed).
#define ALWAYS_USE_MOVAPS
#ifdef ALWAYS_USE_MOVAPS
static const bool AlwaysUseMovaps = true;
#else
static const bool AlwaysUseMovaps = false;
#endif
}
#include "ix86_inlines.inl"