mirror of https://github.com/PCSX2/pcsx2.git
Added PMIN/PMAX/PMUL/PCMP/PSHUF/PUNPCK/UNPCK/PACK instructions to the new emitter [this basically finishes all MMX instructions -- just some SSE2/SSE3 and SSE4 mess left!]
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1035 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
cb57386d85
commit
5c312c36c7
|
@ -134,7 +134,7 @@ public:
|
|||
// This class combines x86 with SSE/SSE2 logic operations (ADD, OR, and NOT).
|
||||
// Note: ANDN [AndNot] is handled below separately.
|
||||
//
|
||||
template< G1Type InstType, u8 OpcodeSSE >
|
||||
template< G1Type InstType, u16 OpcodeSSE >
|
||||
class xImpl_G1Logic : public xImpl_Group1<InstType>
|
||||
{
|
||||
public:
|
||||
|
@ -149,7 +149,7 @@ public:
|
|||
// ------------------------------------------------------------------------
|
||||
// This class combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB).
|
||||
//
|
||||
template< G1Type InstType, u8 OpcodeSSE >
|
||||
template< G1Type InstType, u16 OpcodeSSE >
|
||||
class xImpl_G1Arith : public xImpl_G1Logic<InstType, OpcodeSSE >
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -59,7 +59,7 @@ public:
|
|||
// ------------------------------------------------------------------------
|
||||
// This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV.
|
||||
//
|
||||
template< G3Type InstType, u8 OpcodeSSE >
|
||||
template< G3Type InstType, u16 OpcodeSSE >
|
||||
class xImpl_Group3 : public Group3ImplAll<InstType>
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MMX / SSE Helper Functions!
|
||||
|
||||
extern void SimdPrefix( u8 prefix, u8 opcode );
|
||||
extern void SimdPrefix( u8 prefix, u16 opcode );
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// xmm emitter helpers for xmm instruction with prefixes.
|
||||
|
@ -30,21 +30,21 @@ extern void SimdPrefix( u8 prefix, u8 opcode );
|
|||
// instructions violate this "guideline.")
|
||||
//
|
||||
template< typename T, typename T2 >
|
||||
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
|
||||
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
|
||||
{
|
||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||
ModRM_Direct( to.Id, from.Id );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
|
||||
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
|
||||
{
|
||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||
EmitSibMagic( reg.Id, sib );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
|
||||
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
|
||||
{
|
||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||
xWriteDisp( reg.Id, data );
|
||||
|
@ -56,21 +56,21 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, con
|
|||
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
|
||||
//
|
||||
template< typename T, typename T2 >
|
||||
__emitinline void writeXMMop( u8 opcode, const xRegister<T>& to, const xRegister<T2>& from )
|
||||
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
|
||||
{
|
||||
SimdPrefix( 0, opcode );
|
||||
ModRM_Direct( to.Id, from.Id );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void writeXMMop( u8 opcode, const xRegister<T>& reg, const ModSibBase& sib )
|
||||
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
|
||||
{
|
||||
SimdPrefix( 0, opcode );
|
||||
EmitSibMagic( reg.Id, sib );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__emitinline void writeXMMop( u8 opcode, const xRegister<T>& reg, const void* data )
|
||||
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
|
||||
{
|
||||
SimdPrefix( 0, opcode );
|
||||
xWriteDisp( reg.Id, data );
|
||||
|
@ -80,7 +80,7 @@ __emitinline void writeXMMop( u8 opcode, const xRegister<T>& reg, const void* da
|
|||
// Moves to/from high/low portions of an xmm register.
|
||||
// These instructions cannot be used in reg/reg form.
|
||||
//
|
||||
template< u8 Opcode >
|
||||
template< u16 Opcode >
|
||||
class MovhlImplAll
|
||||
{
|
||||
protected:
|
||||
|
@ -104,7 +104,7 @@ public:
|
|||
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
|
||||
// do something kinda different! Fun!
|
||||
//
|
||||
template< u8 Opcode >
|
||||
template< u16 Opcode >
|
||||
class MovhlImpl_RtoR
|
||||
{
|
||||
public:
|
||||
|
@ -115,7 +115,7 @@ public:
|
|||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template< u8 Prefix, u8 Opcode, u8 OpcodeAlt >
|
||||
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
|
||||
class MovapsImplAll
|
||||
{
|
||||
public:
|
||||
|
@ -132,49 +132,75 @@ public:
|
|||
// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for
|
||||
// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms).
|
||||
//
|
||||
template< u8 Opcode >
|
||||
template< u16 Opcode >
|
||||
class SimdImpl_PackedLogic
|
||||
{
|
||||
public:
|
||||
template< typename T >
|
||||
__forceinline void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||
template< typename T >
|
||||
__forceinline void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||
template< typename T >
|
||||
__noinline void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||
template< typename T > __forceinline
|
||||
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||
template< typename T > __forceinline
|
||||
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||
template< typename T > __forceinline
|
||||
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||
|
||||
SimdImpl_PackedLogic() {} //GCWho?
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have reg,reg/rm forms only,
|
||||
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
|
||||
// like ANDPS/ANDPD
|
||||
//
|
||||
template< u8 Prefix, u8 Opcode >
|
||||
template< u8 Prefix, u16 Opcode >
|
||||
class SimdImpl_DestRegSSE
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
|
||||
SimdImpl_DestRegSSE() {} //GCWho?
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
|
||||
// (PSHUFD / PSHUFHW / etc).
|
||||
//
|
||||
template< u8 Prefix, u16 Opcode >
|
||||
class SimdImpl_DestRegImmSSE
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
|
||||
SimdImpl_DestRegImmSSE() {} //GCWho?
|
||||
};
|
||||
|
||||
template< u8 Prefix, u16 Opcode >
|
||||
class SimdImpl_DestRegImmMMX
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
|
||||
SimdImpl_DestRegImmMMX() {} //GCWho?
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
|
||||
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
|
||||
//
|
||||
template< u8 Prefix, u8 Opcode >
|
||||
template< u8 Prefix, u16 Opcode >
|
||||
class SimdImpl_DestRegEither
|
||||
{
|
||||
public:
|
||||
template< typename DestOperandType >
|
||||
__forceinline void operator()( const xRegisterSIMD<DestOperandType>& to, const xRegisterSIMD<DestOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
template< typename DestOperandType >
|
||||
__forceinline void operator()( const xRegisterSIMD<DestOperandType>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
template< typename DestOperandType >
|
||||
__noinline void operator()( const xRegisterSIMD<DestOperandType>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
template< typename DestOperandType > __forceinline
|
||||
void operator()( const xRegisterSIMD<DestOperandType>& to, const xRegisterSIMD<DestOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
template< typename DestOperandType > __forceinline
|
||||
void operator()( const xRegisterSIMD<DestOperandType>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
template< typename DestOperandType > __forceinline
|
||||
void operator()( const xRegisterSIMD<DestOperandType>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
|
||||
SimdImpl_DestRegEither() {} //GCWho?
|
||||
};
|
||||
|
@ -183,19 +209,19 @@ public:
|
|||
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
|
||||
// can be regDirect or ModRM (indirect).
|
||||
//
|
||||
template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
|
||||
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
|
||||
class SimdImpl_DestRegStrict
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||
__noinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||
|
||||
SimdImpl_DestRegStrict() {} //GCWho?
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template< u8 OpcodeSSE >
|
||||
template< u16 OpcodeSSE >
|
||||
class SimdImpl_PSPD_SSSD
|
||||
{
|
||||
public:
|
||||
|
@ -209,7 +235,7 @@ public:
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
//
|
||||
template< u8 OpcodeSSE >
|
||||
template< u16 OpcodeSSE >
|
||||
class SimdImpl_AndNot
|
||||
{
|
||||
public:
|
||||
|
@ -221,7 +247,7 @@ public:
|
|||
// ------------------------------------------------------------------------
|
||||
// For instructions that have SS/SD form only (UCOMI, etc)
|
||||
// AltPrefix - prefixed used for doubles (SD form).
|
||||
template< u8 AltPrefix, u8 OpcodeSSE >
|
||||
template< u8 AltPrefix, u16 OpcodeSSE >
|
||||
class SimdImpl_SS_SD
|
||||
{
|
||||
public:
|
||||
|
@ -232,7 +258,7 @@ public:
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
|
||||
template< u8 OpcodeSSE >
|
||||
template< u16 OpcodeSSE >
|
||||
class SimdImpl_rSqrt
|
||||
{
|
||||
public:
|
||||
|
@ -243,7 +269,7 @@ public:
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
// For instructions that have PS/SS/SD form only (most commonly Sqrt functions)
|
||||
template< u8 OpcodeSSE >
|
||||
template< u16 OpcodeSSE >
|
||||
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
|
||||
{
|
||||
public:
|
||||
|
@ -252,7 +278,7 @@ public:
|
|||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template< u8 OpcodeSSE >
|
||||
template< u16 OpcodeSSE >
|
||||
class SimdImpl_Shuffle
|
||||
{
|
||||
protected:
|
||||
|
@ -296,7 +322,7 @@ public:
|
|||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
//
|
||||
template< u8 Opcode1, u8 OpcodeImm, u8 Modcode >
|
||||
template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
|
||||
class SimdImpl_Shift
|
||||
{
|
||||
public:
|
||||
|
@ -330,17 +356,27 @@ public:
|
|||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template< u8 OpcodeBase1, u8 OpcodeBaseImm, u8 Modcode >
|
||||
class SimdImpl_ShiftAll
|
||||
// Used for PSRA
|
||||
template< u16 OpcodeBase1, u8 Modcode >
|
||||
class SimdImpl_ShiftWithoutQ
|
||||
{
|
||||
public:
|
||||
const SimdImpl_Shift<OpcodeBase1+1,OpcodeBaseImm+1,Modcode> W;
|
||||
const SimdImpl_Shift<OpcodeBase1+2,OpcodeBaseImm+2,Modcode> D;
|
||||
const SimdImpl_Shift<OpcodeBase1+3,OpcodeBaseImm+3,Modcode> Q;
|
||||
const SimdImpl_Shift<OpcodeBase1+1,0x71,Modcode> W;
|
||||
const SimdImpl_Shift<OpcodeBase1+2,0x72,Modcode> D;
|
||||
|
||||
SimdImpl_ShiftWithoutQ() {}
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template< u16 OpcodeBase1, u8 Modcode >
|
||||
class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
|
||||
{
|
||||
public:
|
||||
const SimdImpl_Shift<OpcodeBase1+3,0x73,Modcode> Q;
|
||||
|
||||
void DQ( const xRegisterSSE& to, u8 imm ) const
|
||||
{
|
||||
SimdPrefix( 0x66, OpcodeBaseImm+3 );
|
||||
SimdPrefix( 0x66, 0x73 );
|
||||
ModRM( 3, (int)Modcode+1, to.Id );
|
||||
xWrite<u8>( imm );
|
||||
}
|
||||
|
@ -350,26 +386,261 @@ public:
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
template< u8 OpcodeB, u8 OpcodeS, u8 OpcodeUS, u8 OpcodeQ >
|
||||
template< u16 OpcodeB, u16 OpcodeQ >
|
||||
class SimdImpl_AddSub
|
||||
{
|
||||
public:
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB> B;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+1> W;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+2> D;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
|
||||
|
||||
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeS> SB;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
|
||||
|
||||
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeS+1> SW;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
|
||||
|
||||
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeUS> USB;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
|
||||
|
||||
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeUS+1> USW;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
|
||||
|
||||
SimdImpl_AddSub() {}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
class SimdImpl_PMul
|
||||
{
|
||||
public:
|
||||
const SimdImpl_DestRegEither<0x66,0xd5> LW;
|
||||
const SimdImpl_DestRegEither<0x66,0xe5> HW;
|
||||
const SimdImpl_DestRegEither<0x66,0xe4> HUW;
|
||||
const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
|
||||
|
||||
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
|
||||
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
|
||||
// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
|
||||
// bits. Rounding is always performed by adding 1 to the least significant bit of the
|
||||
// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
|
||||
// immediately to the right of the most significant bit of each 18-bit intermediate
|
||||
// result and packed to the destination operand.
|
||||
//
|
||||
// Both operands can be MMX or XMM registers. Source can be register or memory.
|
||||
//
|
||||
const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
|
||||
|
||||
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
|
||||
// the low 32 bits of each product in xmm1.
|
||||
const SimdImpl_DestRegSSE<0x66,0x4038> LD;
|
||||
|
||||
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
|
||||
const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
|
||||
|
||||
SimdImpl_PMul() {}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
class SimdImpl_PCompare
|
||||
{
|
||||
public:
|
||||
SimdImpl_PCompare() {}
|
||||
|
||||
// Compare packed bytes for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const SimdImpl_DestRegEither<0x66,0x74> EQB;
|
||||
|
||||
// Compare packed words for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const SimdImpl_DestRegEither<0x66,0x75> EQW;
|
||||
|
||||
// Compare packed doublewords [32-bits] for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const SimdImpl_DestRegEither<0x66,0x76> EQD;
|
||||
|
||||
// Compare packed signed bytes for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const SimdImpl_DestRegEither<0x66,0x64> GTB;
|
||||
|
||||
// Compare packed signed words for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const SimdImpl_DestRegEither<0x66,0x65> GTW;
|
||||
|
||||
// Compare packed signed doublewords [32-bits] for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const SimdImpl_DestRegEither<0x66,0x66> GTD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
template< u8 Opcode1, u16 Opcode2 >
|
||||
class SimdImpl_PMinMax
|
||||
{
|
||||
public:
|
||||
SimdImpl_PMinMax() {}
|
||||
|
||||
// Compare packed unsigned byte integers in dest to src and store packed min/max
|
||||
// values in dest.
|
||||
// Operation can be performed on either MMX or SSE operands.
|
||||
const SimdImpl_DestRegEither<0x66,Opcode1> UB;
|
||||
|
||||
// Compare packed signed word integers in dest to src and store packed min/max
|
||||
// values in dest.
|
||||
// Operation can be performed on either MMX or SSE operands.
|
||||
const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
|
||||
|
||||
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
|
||||
|
||||
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
|
||||
|
||||
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
|
||||
|
||||
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
class SimdImpl_PShuffle
|
||||
{
|
||||
public:
|
||||
SimdImpl_PShuffle() {}
|
||||
|
||||
// Copies words from src and inserts them into dest at word locations selected with
|
||||
// the order operand (8 bit immediate).
|
||||
const SimdImpl_DestRegImmMMX<0x00,0x70> W;
|
||||
|
||||
// Copies doublewords from src and inserts them into dest at dword locations selected
|
||||
// with the order operand (8 bit immediate).
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x70> D;
|
||||
|
||||
// Copies words from the low quadword of src and inserts them into the low quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The high quadword of src is copied to the high quadword of dest.
|
||||
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
|
||||
|
||||
// Copies words from the high quadword of src and inserts them into the high quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The low quadword of src is copied to the low quadword of dest.
|
||||
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
|
||||
|
||||
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
||||
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
||||
// shuffle control mask is set, then constant zero is written in the result byte.
|
||||
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
||||
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
||||
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
||||
//
|
||||
// Operands can be MMX or XMM registers.
|
||||
const SimdImpl_DestRegEither<0x66,0x0038> B;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
class SimdImpl_PUnpack
|
||||
{
|
||||
public:
|
||||
SimdImpl_PUnpack() {}
|
||||
|
||||
// Unpack and interleave low-order bytes from src and dest into dest.
|
||||
const SimdImpl_DestRegEither<0x66,0x60> LBW;
|
||||
// Unpack and interleave low-order words from src and dest into dest.
|
||||
const SimdImpl_DestRegEither<0x66,0x61> LWD;
|
||||
// Unpack and interleave low-order doublewords from src and dest into dest.
|
||||
const SimdImpl_DestRegEither<0x66,0x62> LDQ;
|
||||
// Unpack and interleave low-order quadwords from src and dest into dest.
|
||||
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
|
||||
|
||||
// Unpack and interleave high-order bytes from src and dest into dest.
|
||||
const SimdImpl_DestRegEither<0x66,0x68> HBW;
|
||||
// Unpack and interleave high-order words from src and dest into dest.
|
||||
const SimdImpl_DestRegEither<0x66,0x69> HWD;
|
||||
// Unpack and interleave high-order doublewords from src and dest into dest.
|
||||
const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
|
||||
// Unpack and interleave high-order quadwords from src and dest into dest.
|
||||
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Pack with Signed or Unsigned Saturation
|
||||
//
|
||||
class SimdImpl_Pack
|
||||
{
|
||||
public:
|
||||
SimdImpl_Pack() {}
|
||||
|
||||
// Converts packed signed word integers from src and dest into packed signed
|
||||
// byte integers in dest, using signed saturation.
|
||||
const SimdImpl_DestRegEither<0x66,0x63> SSWB;
|
||||
|
||||
// Converts packed signed dword integers from src and dest into packed signed
|
||||
// word integers in dest, using signed saturation.
|
||||
const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
|
||||
|
||||
// Converts packed unsigned word integers from src and dest into packed unsigned
|
||||
// byte integers in dest, using unsigned saturation.
|
||||
const SimdImpl_DestRegEither<0x66,0x67> USWB;
|
||||
|
||||
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
||||
// unsigned word integers in dest, using signed saturation.
|
||||
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
class SimdImpl_Unpack
|
||||
{
|
||||
public:
|
||||
SimdImpl_Unpack() {}
|
||||
|
||||
// Unpacks the high doubleword [single-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
// dest[0] <- dest[2]
|
||||
// dest[1] <- src[2]
|
||||
// dest[2] <- dest[3]
|
||||
// dest[3] <- src[3]
|
||||
//
|
||||
const SimdImpl_DestRegSSE<0x00,0x15> HPS;
|
||||
|
||||
// Unpacks the high quadword [double-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
// dest.lo <- dest.hi
|
||||
// dest.hi <- src.hi
|
||||
//
|
||||
const SimdImpl_DestRegSSE<0x66,0x15> HPD;
|
||||
|
||||
// Unpacks the low doubleword [single-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
// dest[3] <- src[1]
|
||||
// dest[2] <- dest[1]
|
||||
// dest[1] <- src[0]
|
||||
// dest[0] <- dest[0]
|
||||
//
|
||||
const SimdImpl_DestRegSSE<0x00,0x14> LPS;
|
||||
|
||||
// Unpacks the low quadword [double-precision] values from src and dest into
|
||||
// dest, effectively moving the low portion of src into the upper portion of dest.
|
||||
// The result of dest is loaded as such:
|
||||
// dest.hi <- src.lo
|
||||
// dest.lo <- dest.lo [remains unchanged!]
|
||||
//
|
||||
const SimdImpl_DestRegSSE<0x66,0x14> LPD;
|
||||
};
|
||||
|
||||
|
|
|
@ -641,16 +641,35 @@ __emitinline void xBSWAP( const xRegister32& to )
|
|||
// MMX / XMM Instructions
|
||||
// (these will get put in their own file later)
|
||||
|
||||
__emitinline void Internal::SimdPrefix( u8 prefix, u8 opcode )
|
||||
// If the upper 8 bits of opcode are zero, the opcode is treated as a u8.
|
||||
// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst
|
||||
// 0x38, which is the only valid high word for 16 bit opcodes as such)
|
||||
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
||||
{
|
||||
if( prefix != 0 )
|
||||
{
|
||||
if( (opcode & 0xff00) != 0 )
|
||||
{
|
||||
jASSUME( (opcode & 0xff00) == 0x3800 );
|
||||
xWrite<u32>( (opcode<<16) | (0x0f00 | prefix) );
|
||||
}
|
||||
else
|
||||
{
|
||||
xWrite<u16>( 0x0f00 | prefix );
|
||||
xWrite<u8>( opcode );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( (opcode & 0xff00) != 0 )
|
||||
{
|
||||
jASSUME( (opcode & 0xff00) == 0x3800 );
|
||||
xWrite<u16>( opcode );
|
||||
}
|
||||
else
|
||||
xWrite<u16>( (opcode<<8) | 0x0f );
|
||||
}
|
||||
}
|
||||
|
||||
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
|
||||
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
|
||||
|
@ -735,11 +754,21 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
const SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL;
|
||||
const SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL;
|
||||
const SimdImpl_ShiftAll<0xd0, 2> xPSRL;
|
||||
const SimdImpl_ShiftAll<0xf0, 6> xPSLL;
|
||||
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||
|
||||
const SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD;
|
||||
const SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB;
|
||||
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||
const SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
|
||||
const SimdImpl_PMinMax<0xde,0x3c> xPMAX;
|
||||
const SimdImpl_PMinMax<0xda,0x38> xPMIN;
|
||||
|
||||
const SimdImpl_PMul xPMUL;
|
||||
const SimdImpl_PCompare xPCMP;
|
||||
const SimdImpl_PShuffle xPSHUF;
|
||||
const SimdImpl_PUnpack xPUNPCK;
|
||||
const SimdImpl_Unpack xUNPCK;
|
||||
const SimdImpl_Pack xPACK;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -497,11 +497,20 @@ namespace x86Emitter
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const Internal::SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL;
|
||||
extern const Internal::SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL;
|
||||
extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL;
|
||||
extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL;
|
||||
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||
|
||||
extern const Internal::SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD;
|
||||
extern const Internal::SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB;
|
||||
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||
extern const Internal::SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
|
||||
extern const Internal::SimdImpl_PMinMax<0xde,0x3c> xPMAX;
|
||||
extern const Internal::SimdImpl_PMinMax<0xda,0x38> xPMIN;
|
||||
|
||||
extern const Internal::SimdImpl_PMul xPMUL;
|
||||
extern const Internal::SimdImpl_PCompare xPCMP;
|
||||
extern const Internal::SimdImpl_PShuffle xPSHUF;
|
||||
extern const Internal::SimdImpl_PUnpack xPUNPCK;
|
||||
extern const Internal::SimdImpl_Unpack xUNPCK;
|
||||
extern const Internal::SimdImpl_Pack xPACK;
|
||||
}
|
||||
|
||||
|
|
|
@ -938,12 +938,6 @@ extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from);
|
|||
// emms
|
||||
extern void EMMS( void );
|
||||
|
||||
//**********************************************************************************/
|
||||
//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits
|
||||
//**********************************************************************************
|
||||
extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from);
|
||||
extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from);
|
||||
|
||||
extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from);
|
||||
|
||||
//*********************
|
||||
|
|
|
@ -41,6 +41,7 @@ emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( x
|
|||
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
|
||||
|
||||
emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); }
|
||||
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
|
||||
#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \
|
||||
emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \
|
||||
|
@ -75,6 +76,8 @@ DEFINE_LEGACY_LOGIC_OPCODE( XOR )
|
|||
|
||||
DEFINE_LEGACY_SHIFT_OPCODE( SLL )
|
||||
DEFINE_LEGACY_SHIFT_OPCODE( SRL )
|
||||
DEFINE_LEGACY_SHIFT_STUFF( SRA, D )
|
||||
DEFINE_LEGACY_SHIFT_STUFF( SRA, W )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, B )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, W )
|
||||
|
@ -94,136 +97,37 @@ DEFINE_LEGACY_ARITHMETIC( SUB, SW )
|
|||
DEFINE_LEGACY_ARITHMETIC( SUB, USB )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, USW )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQB );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQW );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQD );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTB );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTW );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTD );
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW );
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD );
|
||||
|
||||
|
||||
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); }
|
||||
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); }
|
||||
|
||||
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); }
|
||||
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* emms */
|
||||
emitterT void EMMS()
|
||||
{
|
||||
write16( 0x770F );
|
||||
}
|
||||
|
||||
// pmuludq m64 to r64 (sse2 only?)
|
||||
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from )
|
||||
{
|
||||
write16( 0xF40F );
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32( MEMADDR(from, 4) );
|
||||
}
|
||||
|
||||
// pmuludq r64 to r64 (sse2 only?)
|
||||
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0xF40F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x740F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x750F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x760F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from )
|
||||
{
|
||||
write16( 0x760F );
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32( MEMADDR(from, 4) );
|
||||
}
|
||||
|
||||
emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x640F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x650F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x660F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from )
|
||||
{
|
||||
write16( 0x660F );
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32( MEMADDR(from, 4) );
|
||||
}
|
||||
|
||||
emitterT void PSRAWItoR( x86MMXRegType to, u8 from )
|
||||
{
|
||||
write16( 0x710F );
|
||||
ModRM( 3, 4 , to );
|
||||
write8( from );
|
||||
}
|
||||
|
||||
emitterT void PSRADItoR( x86MMXRegType to, u8 from )
|
||||
{
|
||||
write16( 0x720F );
|
||||
ModRM( 3, 4 , to );
|
||||
write8( from );
|
||||
}
|
||||
|
||||
emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0xE20F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x6A0F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from )
|
||||
{
|
||||
write16( 0x6A0F );
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32( MEMADDR(from, 4) );
|
||||
}
|
||||
|
||||
emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from )
|
||||
{
|
||||
write16( 0x620F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from )
|
||||
{
|
||||
write16( 0x620F );
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32( MEMADDR(from, 4) );
|
||||
}
|
||||
|
||||
// untested
|
||||
emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
|
||||
{
|
||||
write16( 0x630F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
|
||||
{
|
||||
write16( 0x6B0F );
|
||||
ModRM( 3, to, from );
|
||||
}
|
||||
|
||||
emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 )
|
||||
{
|
||||
if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3);
|
||||
|
@ -231,20 +135,3 @@ emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 )
|
|||
ModRM( 3, to, from );
|
||||
write8( imm8 );
|
||||
}
|
||||
|
||||
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8)
|
||||
{
|
||||
write16(0x700f);
|
||||
ModRM( 3, to, from );
|
||||
write8(imm8);
|
||||
}
|
||||
|
||||
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8)
|
||||
{
|
||||
write16( 0x700f );
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32( MEMADDR(from, 4) );
|
||||
write8(imm8);
|
||||
}
|
||||
|
||||
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
|
|
|
@ -52,28 +52,6 @@ using namespace x86Emitter;
|
|||
write8( 0x66 ), \
|
||||
SSERtoR( code )
|
||||
|
||||
#define _SSERtoR66( code ) \
|
||||
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
|
||||
write8( 0x66 ), \
|
||||
RexRB(0, from, to), \
|
||||
write16( code ), \
|
||||
ModRM( 3, from, to )
|
||||
|
||||
#define SSE_SS_RtoR( code ) \
|
||||
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
|
||||
write8( 0xf3 ), \
|
||||
RexRB(0, to, from), \
|
||||
write16( code ), \
|
||||
ModRM( 3, to, from )
|
||||
|
||||
#define SSE_SD_MtoR( code, overb ) \
|
||||
assert( to < iREGCNT_XMM ) , \
|
||||
write8( 0xf2 ), \
|
||||
RexR(0, to), \
|
||||
write16( code ), \
|
||||
ModRM( 0, to, DISP32 ), \
|
||||
write32( MEMADDR(from, 4 + overb) )
|
||||
|
||||
#define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \
|
||||
emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \
|
||||
emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { xMOV##mod( (void*)to, xRegisterSSE(from) ); } \
|
||||
|
@ -117,6 +95,11 @@ using namespace x86Emitter;
|
|||
emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
|
||||
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
#define DEFINE_LEGACY_OP128( mod, sub ) \
|
||||
emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||
emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
|
||||
#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
|
||||
DEFINE_LEGACY_PSD_OPCODE( mod ) \
|
||||
DEFINE_LEGACY_SSSD_OPCODE( mod )
|
||||
|
@ -153,6 +136,24 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP )
|
|||
DEFINE_LEGACY_RSQRT_OPCODE( RSQRT )
|
||||
DEFINE_LEGACY_SQRT_OPCODE( SQRT )
|
||||
|
||||
DEFINE_LEGACY_OP128( PMUL, LW )
|
||||
DEFINE_LEGACY_OP128( PMUL, HW )
|
||||
DEFINE_LEGACY_OP128( PMUL, UDQ )
|
||||
|
||||
DEFINE_LEGACY_OP128( PMAX, SW )
|
||||
DEFINE_LEGACY_OP128( PMAX, UB )
|
||||
DEFINE_LEGACY_OP128( PMIN, SW )
|
||||
DEFINE_LEGACY_OP128( PMIN, UB )
|
||||
|
||||
DEFINE_LEGACY_OP128( UNPCK, LPS )
|
||||
DEFINE_LEGACY_OP128( UNPCK, HPS )
|
||||
DEFINE_LEGACY_OP128( PUNPCK, LQDQ )
|
||||
DEFINE_LEGACY_OP128( PUNPCK, HQDQ )
|
||||
|
||||
DEFINE_LEGACY_OP128( PACK, SSWB )
|
||||
DEFINE_LEGACY_OP128( PACK, SSDW )
|
||||
DEFINE_LEGACY_OP128( PACK, USWB )
|
||||
|
||||
|
||||
emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
@ -216,87 +217,56 @@ emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int of
|
|||
emitterT void SSE_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||
emitterT void SSE_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), (void*)from, imm8 ); }
|
||||
|
||||
|
||||
emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { xCVTPI2PS( xRegisterSSE(to), (u64*)from ); }
|
||||
emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { xCVTPI2PS( xRegisterSSE(to), xRegisterMMX(from) ); }
|
||||
|
||||
emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { xCVTPS2PI( xRegisterMMX(to), (u64*)from ); }
|
||||
emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { xCVTPS2PI( xRegisterMMX(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { xCVTTSS2SI( xRegister32(to), (u32*)from ); }
|
||||
emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xCVTTSS2SI( xRegister32(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { xCVTSI2SS( xRegisterSSE(to), (u32*)from ); }
|
||||
emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { xCVTSI2SS( xRegisterSSE(to), xRegister32(from) ); }
|
||||
|
||||
emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { xCVTSS2SD( xRegisterSSE(to), (u32*)from ); }
|
||||
emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSS2SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { xCVTSD2SS( xRegisterSSE(to), (u64*)from ); }
|
||||
emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSD2SS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTDQ2PS( xRegisterSSE(to), (u128*)from ); }
|
||||
emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTDQ2PS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTPS2DQ( xRegisterSSE(to), (u128*)from ); }
|
||||
emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
|
||||
emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||
emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), (void*)from, imm8 ); }
|
||||
emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||
emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), (void*)from, imm8 ); }
|
||||
emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||
emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); }
|
||||
|
||||
emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); }
|
||||
emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); }
|
||||
emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); }
|
||||
emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.LPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); }
|
||||
emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//PMAXSW: Packed Signed Integer Word Maximum *
|
||||
//**********************************************************************************
|
||||
//missing
|
||||
// SSE_PMAXSW_M64_to_MM
|
||||
// SSE2_PMAXSW_M128_to_XMM
|
||||
// SSE2_PMAXSW_XMM_to_XMM
|
||||
emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); }
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//PMINSW: Packed Signed Integer Word Minimum *
|
||||
//**********************************************************************************
|
||||
//missing
|
||||
// SSE_PMINSW_M64_to_MM
|
||||
// SSE2_PMINSW_M128_to_XMM
|
||||
// SSE2_PMINSW_XMM_to_XMM
|
||||
emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//PSHUFD: Shuffle Packed DoubleWords *
|
||||
//**********************************************************************************
|
||||
emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
|
||||
{
|
||||
SSERtoR66( 0x700F );
|
||||
write8( imm8 );
|
||||
}
|
||||
emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); }
|
||||
|
||||
emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); }
|
||||
emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); }
|
||||
emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); }
|
||||
emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); }
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data *
|
||||
//**********************************************************************************
|
||||
emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); }
|
||||
emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data *
|
||||
//**********************************************************************************
|
||||
emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); }
|
||||
emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); }
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//STMXCSR : Store Streaming SIMD Extension Control/Status *
|
||||
|
@ -317,23 +287,6 @@ emitterT void SSE_LDMXCSR( uptr from ) {
|
|||
write32( MEMADDR(from, 4) );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//PCMPxx: Compare Packed Integers *
|
||||
//**********************************************************************************
|
||||
emitterT void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); }
|
||||
emitterT void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); }
|
||||
emitterT void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); }
|
||||
emitterT void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); }
|
||||
emitterT void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); }
|
||||
emitterT void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); }
|
||||
emitterT void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); }
|
||||
emitterT void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); }
|
||||
emitterT void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); }
|
||||
emitterT void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
|
||||
emitterT void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); }
|
||||
emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//**********************************************************************************/
|
||||
//PEXTRW,PINSRW: Packed Extract/Insert Word *
|
||||
|
@ -341,88 +294,6 @@ emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66(
|
|||
emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
|
||||
emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// shift right arithmetic
|
||||
|
||||
emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); }
|
||||
emitterT void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); }
|
||||
emitterT void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8)
|
||||
{
|
||||
write8( 0x66 );
|
||||
RexB(0, to);
|
||||
write16( 0x710F );
|
||||
ModRM( 3, 4 , to );
|
||||
write8( imm8 );
|
||||
}
|
||||
|
||||
emitterT void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); }
|
||||
emitterT void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); }
|
||||
emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8)
|
||||
{
|
||||
write8( 0x66 );
|
||||
RexB(0, to);
|
||||
write16( 0x720F );
|
||||
ModRM( 3, 4 , to );
|
||||
write8( imm8 );
|
||||
}
|
||||
|
||||
emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); }
|
||||
emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); }
|
||||
|
||||
emitterT void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); }
|
||||
emitterT void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); }
|
||||
|
||||
emitterT void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); }
|
||||
emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); }
|
||||
|
||||
emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); }
|
||||
emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); }
|
||||
|
||||
//**********************************************************************************/
|
||||
//PACKSSWB,PACKSSDW: Pack Saturate Signed Word
|
||||
//**********************************************************************************
|
||||
emitterT void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); }
|
||||
emitterT void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); }
|
||||
emitterT void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); }
|
||||
emitterT void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); }
|
||||
|
||||
emitterT void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); }
|
||||
emitterT void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); }
|
||||
|
||||
//**********************************************************************************/
|
||||
//PUNPCKHWD: Unpack 16bit high
|
||||
//**********************************************************************************
|
||||
emitterT void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); }
|
||||
emitterT void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); }
|
||||
|
||||
emitterT void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); }
|
||||
emitterT void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); }
|
||||
|
||||
emitterT void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); }
|
||||
emitterT void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); }
|
||||
emitterT void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); }
|
||||
emitterT void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); }
|
||||
|
||||
emitterT void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); }
|
||||
emitterT void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); }
|
||||
emitterT void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); }
|
||||
emitterT void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); }
|
||||
|
||||
emitterT void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); }
|
||||
emitterT void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); }
|
||||
|
||||
emitterT void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); }
|
||||
emitterT void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); }
|
||||
|
||||
emitterT void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); }
|
||||
emitterT void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); }
|
||||
emitterT void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); }
|
||||
emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); }
|
||||
|
||||
emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); }
|
||||
emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); }
|
||||
|
||||
emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
|
||||
emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
|
||||
|
||||
|
@ -589,82 +460,6 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im
|
|||
write8(imm8);
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x3D380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x39380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x3F380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x3B380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexR(0, to);
|
||||
write24(0x3D380F);
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32(MEMADDR(from, 4));
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexR(0, to);
|
||||
write24(0x39380F);
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32(MEMADDR(from, 4));
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexR(0, to);
|
||||
write24(0x3F380F);
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32(MEMADDR(from, 4));
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexR(0, to);
|
||||
write24(0x3B380F);
|
||||
ModRM( 0, to, DISP32 );
|
||||
write32(MEMADDR(from, 4));
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x28380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions)
|
||||
//
|
||||
|
|
Loading…
Reference in New Issue