Added PMIN/PMAX/PMUL/PCMP/PSHUF/PUNPCK/UNPCK/PACK instructions to the new emitter [this basically finishes all MMX instructions -- just some SSE2/SSE3 and SSE4 mess left!]

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1035 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-21 01:40:45 +00:00
parent cb57386d85
commit 5c312c36c7
8 changed files with 478 additions and 493 deletions

View File

@ -134,7 +134,7 @@ public:
// This class combines x86 with SSE/SSE2 logic operations (ADD, OR, and NOT).
// Note: ANDN [AndNot] is handled below separately.
//
template< G1Type InstType, u8 OpcodeSSE >
template< G1Type InstType, u16 OpcodeSSE >
class xImpl_G1Logic : public xImpl_Group1<InstType>
{
public:
@ -149,7 +149,7 @@ public:
// ------------------------------------------------------------------------
// This class combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB).
//
template< G1Type InstType, u8 OpcodeSSE >
template< G1Type InstType, u16 OpcodeSSE >
class xImpl_G1Arith : public xImpl_G1Logic<InstType, OpcodeSSE >
{
public:

View File

@ -59,7 +59,7 @@ public:
// ------------------------------------------------------------------------
// This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV.
//
template< G3Type InstType, u8 OpcodeSSE >
template< G3Type InstType, u16 OpcodeSSE >
class xImpl_Group3 : public Group3ImplAll<InstType>
{
public:

View File

@ -21,7 +21,7 @@
//////////////////////////////////////////////////////////////////////////////////////////
// MMX / SSE Helper Functions!
extern void SimdPrefix( u8 prefix, u8 opcode );
extern void SimdPrefix( u8 prefix, u16 opcode );
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instruction with prefixes.
@ -30,21 +30,21 @@ extern void SimdPrefix( u8 prefix, u8 opcode );
// instructions violate this "guideline.")
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
xWriteDisp( reg.Id, data );
@ -56,21 +56,21 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister<T>& reg, con
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u8 opcode, const xRegister<T>& to, const xRegister<T2>& from )
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
{
SimdPrefix( 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
void writeXMMop( u8 opcode, const xRegister<T>& reg, const ModSibBase& sib )
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
{
SimdPrefix( 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u8 opcode, const xRegister<T>& reg, const void* data )
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
{
SimdPrefix( 0, opcode );
xWriteDisp( reg.Id, data );
@ -80,7 +80,7 @@ __emitinline void writeXMMop( u8 opcode, const xRegister<T>& reg, const void* da
// Moves to/from high/low portions of an xmm register.
// These instructions cannot be used in reg/reg form.
//
template< u8 Opcode >
template< u16 Opcode >
class MovhlImplAll
{
protected:
@ -104,7 +104,7 @@ public:
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
// do something kinda different! Fun!
//
template< u8 Opcode >
template< u16 Opcode >
class MovhlImpl_RtoR
{
public:
@ -115,7 +115,7 @@ public:
};
// ------------------------------------------------------------------------
template< u8 Prefix, u8 Opcode, u8 OpcodeAlt >
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
class MovapsImplAll
{
public:
@ -132,49 +132,75 @@ public:
// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for
// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms).
//
template< u8 Opcode >
template< u16 Opcode >
class SimdImpl_PackedLogic
{
public:
template< typename T >
__forceinline void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
template< typename T >
__forceinline void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); }
template< typename T >
__noinline void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
SimdImpl_PackedLogic() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have reg,reg/rm forms only,
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
// like ANDPS/ANDPD
//
template< u8 Prefix, u8 Opcode >
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
SimdImpl_DestRegSSE() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
// (PSHUFD / PSHUFHW / etc).
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
SimdImpl_DestRegImmSSE() {} //GCWho?
};
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmMMX
{
public:
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
SimdImpl_DestRegImmMMX() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
//
template< u8 Prefix, u8 Opcode >
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegEither
{
public:
template< typename DestOperandType >
__forceinline void operator()( const xRegisterSIMD<DestOperandType>& to, const xRegisterSIMD<DestOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename DestOperandType >
__forceinline void operator()( const xRegisterSIMD<DestOperandType>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename DestOperandType >
__noinline void operator()( const xRegisterSIMD<DestOperandType>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename DestOperandType > __forceinline
void operator()( const xRegisterSIMD<DestOperandType>& to, const xRegisterSIMD<DestOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename DestOperandType > __forceinline
void operator()( const xRegisterSIMD<DestOperandType>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename DestOperandType > __forceinline
void operator()( const xRegisterSIMD<DestOperandType>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
SimdImpl_DestRegEither() {} //GCWho?
};
@ -183,19 +209,19 @@ public:
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
// can be regDirect or ModRM (indirect).
//
template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
class SimdImpl_DestRegStrict
{
public:
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__noinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
SimdImpl_DestRegStrict() {} //GCWho?
};
// ------------------------------------------------------------------------
template< u8 OpcodeSSE >
template< u16 OpcodeSSE >
class SimdImpl_PSPD_SSSD
{
public:
@ -209,7 +235,7 @@ public:
// ------------------------------------------------------------------------
//
template< u8 OpcodeSSE >
template< u16 OpcodeSSE >
class SimdImpl_AndNot
{
public:
@ -221,7 +247,7 @@ public:
// ------------------------------------------------------------------------
// For instructions that have SS/SD form only (UCOMI, etc)
// AltPrefix - prefixed used for doubles (SD form).
template< u8 AltPrefix, u8 OpcodeSSE >
template< u8 AltPrefix, u16 OpcodeSSE >
class SimdImpl_SS_SD
{
public:
@ -232,7 +258,7 @@ public:
// ------------------------------------------------------------------------
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
template< u8 OpcodeSSE >
template< u16 OpcodeSSE >
class SimdImpl_rSqrt
{
public:
@ -243,7 +269,7 @@ public:
// ------------------------------------------------------------------------
// For instructions that have PS/SS/SD form only (most commonly Sqrt functions)
template< u8 OpcodeSSE >
template< u16 OpcodeSSE >
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
{
public:
@ -252,7 +278,7 @@ public:
};
// ------------------------------------------------------------------------
template< u8 OpcodeSSE >
template< u16 OpcodeSSE >
class SimdImpl_Shuffle
{
protected:
@ -296,7 +322,7 @@ public:
//////////////////////////////////////////////////////////////////////////////////////////
//
//
template< u8 Opcode1, u8 OpcodeImm, u8 Modcode >
template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
class SimdImpl_Shift
{
public:
@ -330,17 +356,27 @@ public:
};
// ------------------------------------------------------------------------
template< u8 OpcodeBase1, u8 OpcodeBaseImm, u8 Modcode >
class SimdImpl_ShiftAll
// Used for PSRA
template< u16 OpcodeBase1, u8 Modcode >
class SimdImpl_ShiftWithoutQ
{
public:
const SimdImpl_Shift<OpcodeBase1+1,OpcodeBaseImm+1,Modcode> W;
const SimdImpl_Shift<OpcodeBase1+2,OpcodeBaseImm+2,Modcode> D;
const SimdImpl_Shift<OpcodeBase1+3,OpcodeBaseImm+3,Modcode> Q;
const SimdImpl_Shift<OpcodeBase1+1,0x71,Modcode> W;
const SimdImpl_Shift<OpcodeBase1+2,0x72,Modcode> D;
SimdImpl_ShiftWithoutQ() {}
};
// ------------------------------------------------------------------------
template< u16 OpcodeBase1, u8 Modcode >
class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
{
public:
const SimdImpl_Shift<OpcodeBase1+3,0x73,Modcode> Q;
void DQ( const xRegisterSSE& to, u8 imm ) const
{
SimdPrefix( 0x66, OpcodeBaseImm+3 );
SimdPrefix( 0x66, 0x73 );
ModRM( 3, (int)Modcode+1, to.Id );
xWrite<u8>( imm );
}
@ -350,26 +386,261 @@ public:
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u8 OpcodeB, u8 OpcodeS, u8 OpcodeUS, u8 OpcodeQ >
template< u16 OpcodeB, u16 OpcodeQ >
class SimdImpl_AddSub
{
public:
const SimdImpl_DestRegEither<0x66,OpcodeB> B;
const SimdImpl_DestRegEither<0x66,OpcodeB+1> W;
const SimdImpl_DestRegEither<0x66,OpcodeB+2> D;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeS> SB;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeS+1> SW;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeUS> USB;
const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeUS+1> USW;
const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
SimdImpl_AddSub() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PMul
{
public:
const SimdImpl_DestRegEither<0x66,0xd5> LW;
const SimdImpl_DestRegEither<0x66,0xe5> HW;
const SimdImpl_DestRegEither<0x66,0xe4> HUW;
const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
// bits. Rounding is always performed by adding 1 to the least significant bit of the
// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
// immediately to the right of the most significant bit of each 18-bit intermediate
// result and packed to the destination operand.
//
// Both operands can be MMX or XMM registers. Source can be register or memory.
//
const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
// the low 32 bits of each product in xmm1.
const SimdImpl_DestRegSSE<0x66,0x4038> LD;
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
SimdImpl_PMul() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PCompare
{
public:
SimdImpl_PCompare() {}
// Compare packed bytes for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x74> EQB;
// Compare packed words for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x75> EQW;
// Compare packed doublewords [32-bits] for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x76> EQD;
// Compare packed signed bytes for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x64> GTB;
// Compare packed signed words for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x65> GTW;
// Compare packed signed doublewords [32-bits] for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x66> GTD;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u8 Opcode1, u16 Opcode2 >
class SimdImpl_PMinMax
{
public:
SimdImpl_PMinMax() {}
// Compare packed unsigned byte integers in dest to src and store packed min/max
// values in dest.
// Operation can be performed on either MMX or SSE operands.
const SimdImpl_DestRegEither<0x66,Opcode1> UB;
// Compare packed signed word integers in dest to src and store packed min/max
// values in dest.
// Operation can be performed on either MMX or SSE operands.
const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PShuffle
{
public:
SimdImpl_PShuffle() {}
// Copies words from src and inserts them into dest at word locations selected with
// the order operand (8 bit immediate).
const SimdImpl_DestRegImmMMX<0x00,0x70> W;
// Copies doublewords from src and inserts them into dest at dword locations selected
// with the order operand (8 bit immediate).
const SimdImpl_DestRegImmSSE<0x66,0x70> D;
// Copies words from the low quadword of src and inserts them into the low quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The high quadword of src is copied to the high quadword of dest.
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
// Copies words from the high quadword of src and inserts them into the high quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The low quadword of src is copied to the low quadword of dest.
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
// control mask in src. If the most significant bit (bit[7]) of each byte of the
// shuffle control mask is set, then constant zero is written in the result byte.
// Each byte in the shuffle control mask forms an index to permute the corresponding
// byte in dest. The value of each index is the least significant 4 bits (128-bit
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
//
// Operands can be MMX or XMM registers.
const SimdImpl_DestRegEither<0x66,0x0038> B;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PUnpack
{
public:
SimdImpl_PUnpack() {}
// Unpack and interleave low-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x60> LBW;
// Unpack and interleave low-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x61> LWD;
// Unpack and interleave low-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x62> LDQ;
// Unpack and interleave low-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
// Unpack and interleave high-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x68> HBW;
// Unpack and interleave high-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x69> HWD;
// Unpack and interleave high-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
// Unpack and interleave high-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
};
//////////////////////////////////////////////////////////////////////////////////////////
// Pack with Signed or Unsigned Saturation
//
class SimdImpl_Pack
{
public:
SimdImpl_Pack() {}
// Converts packed signed word integers from src and dest into packed signed
// byte integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x63> SSWB;
// Converts packed signed dword integers from src and dest into packed signed
// word integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
// Converts packed unsigned word integers from src and dest into packed unsigned
// byte integers in dest, using unsigned saturation.
const SimdImpl_DestRegEither<0x66,0x67> USWB;
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
// unsigned word integers in dest, using signed saturation.
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_Unpack
{
public:
SimdImpl_Unpack() {}
// Unpacks the high doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[0] <- dest[2]
// dest[1] <- src[2]
// dest[2] <- dest[3]
// dest[3] <- src[3]
//
const SimdImpl_DestRegSSE<0x00,0x15> HPS;
// Unpacks the high quadword [double-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest.lo <- dest.hi
// dest.hi <- src.hi
//
const SimdImpl_DestRegSSE<0x66,0x15> HPD;
// Unpacks the low doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[3] <- src[1]
// dest[2] <- dest[1]
// dest[1] <- src[0]
// dest[0] <- dest[0]
//
const SimdImpl_DestRegSSE<0x00,0x14> LPS;
// Unpacks the low quadword [double-precision] values from src and dest into
// dest, effectively moving the low portion of src into the upper portion of dest.
// The result of dest is loaded as such:
// dest.hi <- src.lo
// dest.lo <- dest.lo [remains unchanged!]
//
const SimdImpl_DestRegSSE<0x66,0x14> LPD;
};

View File

@ -641,16 +641,35 @@ __emitinline void xBSWAP( const xRegister32& to )
// MMX / XMM Instructions
// (these will get put in their own file later)
__emitinline void Internal::SimdPrefix( u8 prefix, u8 opcode )
// If the upper 8 bits of opcode are zero, the opcode is treated as a u8.
// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst
// 0x38, which is the only valid high word for 16 bit opcodes as such)
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
{
if( prefix != 0 )
{
if( (opcode & 0xff00) != 0 )
{
jASSUME( (opcode & 0xff00) == 0x3800 );
xWrite<u32>( (opcode<<16) | (0x0f00 | prefix) );
}
else
{
xWrite<u16>( 0x0f00 | prefix );
xWrite<u8>( opcode );
}
}
else
{
if( (opcode & 0xff00) != 0 )
{
jASSUME( (opcode & 0xff00) == 0x3800 );
xWrite<u16>( opcode );
}
else
xWrite<u16>( (opcode<<8) | 0x0f );
}
}
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
@ -735,11 +754,21 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S
// ------------------------------------------------------------------------
const SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL;
const SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL;
const SimdImpl_ShiftAll<0xd0, 2> xPSRL;
const SimdImpl_ShiftAll<0xf0, 6> xPSLL;
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
const SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD;
const SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB;
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
const SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
const SimdImpl_PMinMax<0xde,0x3c> xPMAX;
const SimdImpl_PMinMax<0xda,0x38> xPMIN;
const SimdImpl_PMul xPMUL;
const SimdImpl_PCompare xPCMP;
const SimdImpl_PShuffle xPSHUF;
const SimdImpl_PUnpack xPUNPCK;
const SimdImpl_Unpack xUNPCK;
const SimdImpl_Pack xPACK;
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -497,11 +497,20 @@ namespace x86Emitter
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL;
extern const Internal::SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL;
extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL;
extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL;
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
extern const Internal::SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD;
extern const Internal::SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB;
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
extern const Internal::SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
extern const Internal::SimdImpl_PMinMax<0xde,0x3c> xPMAX;
extern const Internal::SimdImpl_PMinMax<0xda,0x38> xPMIN;
extern const Internal::SimdImpl_PMul xPMUL;
extern const Internal::SimdImpl_PCompare xPCMP;
extern const Internal::SimdImpl_PShuffle xPSHUF;
extern const Internal::SimdImpl_PUnpack xPUNPCK;
extern const Internal::SimdImpl_Unpack xUNPCK;
extern const Internal::SimdImpl_Pack xPACK;
}

View File

@ -938,12 +938,6 @@ extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from);
// emms
extern void EMMS( void );
//**********************************************************************************/
//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits
//**********************************************************************************
extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from);
extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from);
extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from);
//*********************

View File

@ -41,6 +41,7 @@ emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( x
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); }
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }
#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \
emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \
@ -75,6 +76,8 @@ DEFINE_LEGACY_LOGIC_OPCODE( XOR )
DEFINE_LEGACY_SHIFT_OPCODE( SLL )
DEFINE_LEGACY_SHIFT_OPCODE( SRL )
DEFINE_LEGACY_SHIFT_STUFF( SRA, D )
DEFINE_LEGACY_SHIFT_STUFF( SRA, W )
DEFINE_LEGACY_ARITHMETIC( ADD, B )
DEFINE_LEGACY_ARITHMETIC( ADD, W )
@ -94,136 +97,37 @@ DEFINE_LEGACY_ARITHMETIC( SUB, SW )
DEFINE_LEGACY_ARITHMETIC( SUB, USB )
DEFINE_LEGACY_ARITHMETIC( SUB, USW )
DEFINE_LEGACY_ARITHMETIC( CMP, EQB );
DEFINE_LEGACY_ARITHMETIC( CMP, EQW );
DEFINE_LEGACY_ARITHMETIC( CMP, EQD );
DEFINE_LEGACY_ARITHMETIC( CMP, GTB );
DEFINE_LEGACY_ARITHMETIC( CMP, GTW );
DEFINE_LEGACY_ARITHMETIC( CMP, GTD );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD );
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); }
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); }
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); }
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); }
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
/* emms */
emitterT void EMMS()
{
write16( 0x770F );
}
// pmuludq m64 to r64 (sse2 only?)
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from )
{
write16( 0xF40F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
// pmuludq r64 to r64 (sse2 only?)
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xF40F );
ModRM( 3, to, from );
}
emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x740F );
ModRM( 3, to, from );
}
emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x750F );
ModRM( 3, to, from );
}
emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x760F );
ModRM( 3, to, from );
}
emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from )
{
write16( 0x760F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x640F );
ModRM( 3, to, from );
}
emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x650F );
ModRM( 3, to, from );
}
emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x660F );
ModRM( 3, to, from );
}
emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from )
{
write16( 0x660F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PSRAWItoR( x86MMXRegType to, u8 from )
{
write16( 0x710F );
ModRM( 3, 4 , to );
write8( from );
}
emitterT void PSRADItoR( x86MMXRegType to, u8 from )
{
write16( 0x720F );
ModRM( 3, 4 , to );
write8( from );
}
emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xE20F );
ModRM( 3, to, from );
}
emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x6A0F );
ModRM( 3, to, from );
}
emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from )
{
write16( 0x6A0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x620F );
ModRM( 3, to, from );
}
emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from )
{
write16( 0x620F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
// untested
emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
{
write16( 0x630F );
ModRM( 3, to, from );
}
emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
{
write16( 0x6B0F );
ModRM( 3, to, from );
}
emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 )
{
if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3);
@ -231,20 +135,3 @@ emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 )
ModRM( 3, to, from );
write8( imm8 );
}
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8)
{
write16(0x700f);
ModRM( 3, to, from );
write8(imm8);
}
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8)
{
write16( 0x700f );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
write8(imm8);
}
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }

View File

@ -52,28 +52,6 @@ using namespace x86Emitter;
write8( 0x66 ), \
SSERtoR( code )
#define _SSERtoR66( code ) \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
write8( 0x66 ), \
RexRB(0, from, to), \
write16( code ), \
ModRM( 3, from, to )
#define SSE_SS_RtoR( code ) \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
write8( 0xf3 ), \
RexRB(0, to, from), \
write16( code ), \
ModRM( 3, to, from )
#define SSE_SD_MtoR( code, overb ) \
assert( to < iREGCNT_XMM ) , \
write8( 0xf2 ), \
RexR(0, to), \
write16( code ), \
ModRM( 0, to, DISP32 ), \
write32( MEMADDR(from, 4 + overb) )
#define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \
emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \
emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { xMOV##mod( (void*)to, xRegisterSSE(from) ); } \
@ -117,6 +95,11 @@ using namespace x86Emitter;
emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_OP128( mod, sub ) \
emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); }
#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
DEFINE_LEGACY_PSD_OPCODE( mod ) \
DEFINE_LEGACY_SSSD_OPCODE( mod )
@ -153,6 +136,24 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP )
DEFINE_LEGACY_RSQRT_OPCODE( RSQRT )
DEFINE_LEGACY_SQRT_OPCODE( SQRT )
DEFINE_LEGACY_OP128( PMUL, LW )
DEFINE_LEGACY_OP128( PMUL, HW )
DEFINE_LEGACY_OP128( PMUL, UDQ )
DEFINE_LEGACY_OP128( PMAX, SW )
DEFINE_LEGACY_OP128( PMAX, UB )
DEFINE_LEGACY_OP128( PMIN, SW )
DEFINE_LEGACY_OP128( PMIN, UB )
DEFINE_LEGACY_OP128( UNPCK, LPS )
DEFINE_LEGACY_OP128( UNPCK, HPS )
DEFINE_LEGACY_OP128( PUNPCK, LQDQ )
DEFINE_LEGACY_OP128( PUNPCK, HQDQ )
DEFINE_LEGACY_OP128( PACK, SSWB )
DEFINE_LEGACY_OP128( PACK, SSDW )
DEFINE_LEGACY_OP128( PACK, USWB )
emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); }
@ -216,87 +217,56 @@ emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int of
emitterT void SSE_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { xCVTPI2PS( xRegisterSSE(to), (u64*)from ); }
emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { xCVTPI2PS( xRegisterSSE(to), xRegisterMMX(from) ); }
emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { xCVTPS2PI( xRegisterMMX(to), (u64*)from ); }
emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { xCVTPS2PI( xRegisterMMX(to), xRegisterSSE(from) ); }
emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { xCVTTSS2SI( xRegister32(to), (u32*)from ); }
emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xCVTTSS2SI( xRegister32(to), xRegisterSSE(from) ); }
emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { xCVTSI2SS( xRegisterSSE(to), (u32*)from ); }
emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { xCVTSI2SS( xRegisterSSE(to), xRegister32(from) ); }
emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { xCVTSS2SD( xRegisterSSE(to), (u32*)from ); }
emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSS2SD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { xCVTSD2SS( xRegisterSSE(to), (u64*)from ); }
emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSD2SS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTDQ2PS( xRegisterSSE(to), (u128*)from ); }
emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTDQ2PS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTPS2DQ( xRegisterSSE(to), (u128*)from ); }
emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); }
emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); }
emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); }
emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.LPS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); }
emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); }
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PMAXSW: Packed Signed Integer Word Maximum *
//**********************************************************************************
//missing
// SSE_PMAXSW_M64_to_MM
// SSE2_PMAXSW_M128_to_XMM
// SSE2_PMAXSW_XMM_to_XMM
emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); }
///////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PMINSW: Packed Signed Integer Word Minimum *
//**********************************************************************************
//missing
// SSE_PMINSW_M64_to_MM
// SSE2_PMINSW_M128_to_XMM
// SSE2_PMINSW_XMM_to_XMM
emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); }
////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PSHUFD: Shuffle Packed DoubleWords *
//**********************************************************************************
emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
{
SSERtoR66( 0x700F );
write8( imm8 );
}
emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); }
emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); }
emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); }
emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); }
emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); }
///////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data *
//**********************************************************************************
emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); }
emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); }
////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data *
//**********************************************************************************
emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); }
emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); }
/////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//STMXCSR : Store Streaming SIMD Extension Control/Status *
@ -317,23 +287,6 @@ emitterT void SSE_LDMXCSR( uptr from ) {
write32( MEMADDR(from, 4) );
}
///////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PCMPxx: Compare Packed Integers *
//**********************************************************************************
emitterT void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); }
emitterT void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); }
emitterT void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); }
emitterT void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); }
emitterT void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); }
emitterT void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); }
emitterT void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); }
emitterT void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); }
emitterT void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); }
emitterT void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
emitterT void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); }
emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); }
////////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PEXTRW,PINSRW: Packed Extract/Insert Word *
@ -341,88 +294,6 @@ emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66(
emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
///////////////////////////////////////////////////////////////////////////////////////
// shift right arithmetic
emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); }
emitterT void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); }
emitterT void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8)
{
write8( 0x66 );
RexB(0, to);
write16( 0x710F );
ModRM( 3, 4 , to );
write8( imm8 );
}
emitterT void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); }
emitterT void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); }
emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8)
{
write8( 0x66 );
RexB(0, to);
write16( 0x720F );
ModRM( 3, 4 , to );
write8( imm8 );
}
emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); }
emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); }
emitterT void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); }
emitterT void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); }
emitterT void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); }
emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); }
emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); }
emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); }
//**********************************************************************************/
//PACKSSWB,PACKSSDW: Pack Saturate Signed Word
//**********************************************************************************
emitterT void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); }
emitterT void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); }
emitterT void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); }
emitterT void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); }
emitterT void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); }
emitterT void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); }
//**********************************************************************************/
//PUNPCKHWD: Unpack 16bit high
//**********************************************************************************
emitterT void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); }
emitterT void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); }
emitterT void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); }
emitterT void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); }
emitterT void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); }
emitterT void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); }
emitterT void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); }
emitterT void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); }
emitterT void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); }
emitterT void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); }
emitterT void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); }
emitterT void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); }
emitterT void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); }
emitterT void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); }
emitterT void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); }
emitterT void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); }
emitterT void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); }
emitterT void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); }
emitterT void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); }
emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); }
emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); }
emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); }
emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
@ -589,82 +460,6 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im
write8(imm8);
}
emitterT void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x3D380F);
ModRM(3, to, from);
}
emitterT void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x39380F);
ModRM(3, to, from);
}
emitterT void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x3F380F);
ModRM(3, to, from);
}
emitterT void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x3B380F);
ModRM(3, to, from);
}
emitterT void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from)
{
write8(0x66);
RexR(0, to);
write24(0x3D380F);
ModRM( 0, to, DISP32 );
write32(MEMADDR(from, 4));
}
emitterT void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from)
{
write8(0x66);
RexR(0, to);
write24(0x39380F);
ModRM( 0, to, DISP32 );
write32(MEMADDR(from, 4));
}
emitterT void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from)
{
write8(0x66);
RexR(0, to);
write24(0x3F380F);
ModRM( 0, to, DISP32 );
write32(MEMADDR(from, 4));
}
emitterT void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from)
{
write8(0x66);
RexR(0, to);
write24(0x3B380F);
ModRM( 0, to, DISP32 );
write32(MEMADDR(from, 4));
}
emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x28380F);
ModRM(3, to, from);
}
//////////////////////////////////////////////////////////////////////////////////////////
// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions)
//