diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index bbc6735b51..8b38f35b04 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -134,7 +134,7 @@ public: // This class combines x86 with SSE/SSE2 logic operations (ADD, OR, and NOT). // Note: ANDN [AndNot] is handled below separately. // -template< G1Type InstType, u8 OpcodeSSE > +template< G1Type InstType, u16 OpcodeSSE > class xImpl_G1Logic : public xImpl_Group1 { public: @@ -149,7 +149,7 @@ public: // ------------------------------------------------------------------------ // This class combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB). // -template< G1Type InstType, u8 OpcodeSSE > +template< G1Type InstType, u16 OpcodeSSE > class xImpl_G1Arith : public xImpl_G1Logic { public: diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index a88049789f..f5d72b3611 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -59,7 +59,7 @@ public: // ------------------------------------------------------------------------ // This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV. // -template< G3Type InstType, u8 OpcodeSSE > +template< G3Type InstType, u16 OpcodeSSE > class xImpl_Group3 : public Group3ImplAll { public: diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index d69107bdfe..3d47f0b6e9 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -21,7 +21,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// // MMX / SSE Helper Functions! -extern void SimdPrefix( u8 prefix, u8 opcode ); +extern void SimdPrefix( u8 prefix, u16 opcode ); // ------------------------------------------------------------------------ // xmm emitter helpers for xmm instruction with prefixes. @@ -30,21 +30,21 @@ extern void SimdPrefix( u8 prefix, u8 opcode ); // instructions violate this "guideline.") // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) { SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) { SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) { SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); xWriteDisp( reg.Id, data ); @@ -56,21 +56,21 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, con // some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 opcode, const xRegister& to, const xRegister& from ) +__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) { SimdPrefix( 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 opcode, const xRegister& reg, const ModSibBase& sib ) +__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) { SimdPrefix( 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 opcode, const xRegister& reg, const void* data ) +__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) { SimdPrefix( 0, opcode ); xWriteDisp( reg.Id, data ); @@ -80,7 +80,7 @@ __emitinline void writeXMMop( u8 opcode, const xRegister& reg, const void* da // Moves to/from high/low portions of an xmm register. // These instructions cannot be used in reg/reg form. // -template< u8 Opcode > +template< u16 Opcode > class MovhlImplAll { protected: @@ -104,7 +104,7 @@ public: // RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but // do something kinda different! Fun! // -template< u8 Opcode > +template< u16 Opcode > class MovhlImpl_RtoR { public: @@ -115,7 +115,7 @@ public: }; // ------------------------------------------------------------------------ -template< u8 Prefix, u8 Opcode, u8 OpcodeAlt > +template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > class MovapsImplAll { public: @@ -132,49 +132,75 @@ public: // SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for // a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms). // -template< u8 Opcode > +template< u16 Opcode > class SimdImpl_PackedLogic { public: - template< typename T > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } SimdImpl_PackedLogic() {} //GCWho? }; // ------------------------------------------------------------------------ -// For implementing SSE-only logic operations that have reg,reg/rm forms only, +// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only, // like ANDPS/ANDPD // -template< u8 Prefix, u8 Opcode > +template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegSSE { public: __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } SimdImpl_DestRegSSE() {} //GCWho? }; +// ------------------------------------------------------------------------ +// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only +// (PSHUFD / PSHUFHW / etc). +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmSSE +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmSSE() {} //GCWho? +}; + +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmMMX +{ +public: + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmMMX() {} //GCWho? +}; + // ------------------------------------------------------------------------ // For implementing MMX/SSE operations that have reg,reg/rm forms only, // but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops). // -template< u8 Prefix, u8 Opcode > +template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegEither { public: - template< typename DestOperandType > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > __forceinline + void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > __forceinline + void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > __forceinline + void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } SimdImpl_DestRegEither() {} //GCWho? }; @@ -183,19 +209,19 @@ public: // For implementing MMX/SSE operations which the destination *must* be a register, but the source // can be regDirect or ModRM (indirect). // -template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > +template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > class SimdImpl_DestRegStrict { public: - __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __noinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } SimdImpl_DestRegStrict() {} //GCWho? }; // ------------------------------------------------------------------------ -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_PSPD_SSSD { public: @@ -209,7 +235,7 @@ public: // ------------------------------------------------------------------------ // -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_AndNot { public: @@ -221,7 +247,7 @@ public: // ------------------------------------------------------------------------ // For instructions that have SS/SD form only (UCOMI, etc) // AltPrefix - prefixed used for doubles (SD form). -template< u8 AltPrefix, u8 OpcodeSSE > +template< u8 AltPrefix, u16 OpcodeSSE > class SimdImpl_SS_SD { public: @@ -232,7 +258,7 @@ public: // ------------------------------------------------------------------------ // For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_rSqrt { public: @@ -243,7 +269,7 @@ public: // ------------------------------------------------------------------------ // For instructions that have PS/SS/SD form only (most commonly Sqrt functions) -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_Sqrt : public SimdImpl_rSqrt { public: @@ -252,7 +278,7 @@ public: }; // ------------------------------------------------------------------------ -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_Shuffle { protected: @@ -296,7 +322,7 @@ public: ////////////////////////////////////////////////////////////////////////////////////////// // // -template< u8 Opcode1, u8 OpcodeImm, u8 Modcode > +template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > class SimdImpl_Shift { public: @@ -330,17 +356,27 @@ public: }; // ------------------------------------------------------------------------ -template< u8 OpcodeBase1, u8 OpcodeBaseImm, u8 Modcode > -class SimdImpl_ShiftAll +// Used for PSRA +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_ShiftWithoutQ { public: - const SimdImpl_Shift W; - const SimdImpl_Shift D; - const SimdImpl_Shift Q; + const SimdImpl_Shift W; + const SimdImpl_Shift D; + + SimdImpl_ShiftWithoutQ() {} +}; + +// ------------------------------------------------------------------------ +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ +{ +public: + const SimdImpl_Shift Q; void DQ( const xRegisterSSE& to, u8 imm ) const { - SimdPrefix( 0x66, OpcodeBaseImm+3 ); + SimdPrefix( 0x66, 0x73 ); ModRM( 3, (int)Modcode+1, to.Id ); xWrite( imm ); } @@ -350,26 +386,261 @@ public: ////////////////////////////////////////////////////////////////////////////////////////// // -template< u8 OpcodeB, u8 OpcodeS, u8 OpcodeUS, u8 OpcodeQ > +template< u16 OpcodeB, u16 OpcodeQ > class SimdImpl_AddSub { public: - const SimdImpl_DestRegEither<0x66,OpcodeB> B; - const SimdImpl_DestRegEither<0x66,OpcodeB+1> W; - const SimdImpl_DestRegEither<0x66,OpcodeB+2> D; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D; const SimdImpl_DestRegEither<0x66,OpcodeQ> Q; // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeS> SB; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB; // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeS+1> SW; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW; // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeUS> USB; + const SimdImpl_DestRegEither<0x66,OpcodeB> USB; // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeUS+1> USW; + const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW; SimdImpl_AddSub() {} -}; \ No newline at end of file +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PMul +{ +public: + const SimdImpl_DestRegEither<0x66,0xd5> LW; + const SimdImpl_DestRegEither<0x66,0xe5> HW; + const SimdImpl_DestRegEither<0x66,0xe4> HUW; + const SimdImpl_DestRegEither<0x66,0xf4> UDQ; + + // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the + // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit + // integers. Each intermediate 32-bit integer is truncated to the 18 most significant + // bits. Rounding is always performed by adding 1 to the least significant bit of the + // 18-bit intermediate result. The final result is obtained by selecting the 16 bits + // immediately to the right of the most significant bit of each 18-bit intermediate + // result and packed to the destination operand. + // + // Both operands can be MMX or XMM registers. Source can be register or memory. + // + const SimdImpl_DestRegEither<0x66,0x0b38> HRSW; + + // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store + // the low 32 bits of each product in xmm1. + const SimdImpl_DestRegSSE<0x66,0x4038> LD; + + // [SSE-4.1] Multiply the packed signed dword integers in dest with src. + const SimdImpl_DestRegSSE<0x66,0x2838> DQ; + + SimdImpl_PMul() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PCompare +{ +public: + SimdImpl_PCompare() {} + + // Compare packed bytes for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x74> EQB; + + // Compare packed words for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x75> EQW; + + // Compare packed doublewords [32-bits] for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x76> EQD; + + // Compare packed signed bytes for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x64> GTB; + + // Compare packed signed words for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x65> GTW; + + // Compare packed signed doublewords [32-bits] for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x66> GTD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u8 Opcode1, u16 Opcode2 > +class SimdImpl_PMinMax +{ +public: + SimdImpl_PMinMax() {} + + // Compare packed unsigned byte integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1> UB; + + // Compare packed signed word integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW; + + // [SSE-4.1] Compare packed signed byte integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB; + + // [SSE-4.1] Compare packed signed doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD; + + // [SSE-4.1] Compare packed unsigned word integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW; + + // [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PShuffle +{ +public: + SimdImpl_PShuffle() {} + + // Copies words from src and inserts them into dest at word locations selected with + // the order operand (8 bit immediate). + const SimdImpl_DestRegImmMMX<0x00,0x70> W; + + // Copies doublewords from src and inserts them into dest at dword locations selected + // with the order operand (8 bit immediate). + const SimdImpl_DestRegImmSSE<0x66,0x70> D; + + // Copies words from the low quadword of src and inserts them into the low quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The high quadword of src is copied to the high quadword of dest. + const SimdImpl_DestRegImmSSE<0xf2,0x70> LW; + + // Copies words from the high quadword of src and inserts them into the high quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The low quadword of src is copied to the low quadword of dest. + const SimdImpl_DestRegImmSSE<0xf3,0x70> HW; + + // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle + // control mask in src. If the most significant bit (bit[7]) of each byte of the + // shuffle control mask is set, then constant zero is written in the result byte. + // Each byte in the shuffle control mask forms an index to permute the corresponding + // byte in dest. The value of each index is the least significant 4 bits (128-bit + // operation) or 3 bits (64-bit operation) of the shuffle control byte. + // + // Operands can be MMX or XMM registers. + const SimdImpl_DestRegEither<0x66,0x0038> B; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PUnpack +{ +public: + SimdImpl_PUnpack() {} + + // Unpack and interleave low-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x60> LBW; + // Unpack and interleave low-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x61> LWD; + // Unpack and interleave low-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x62> LDQ; + // Unpack and interleave low-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ; + + // Unpack and interleave high-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x68> HBW; + // Unpack and interleave high-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x69> HWD; + // Unpack and interleave high-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x6a> HDQ; + // Unpack and interleave high-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Pack with Signed or Unsigned Saturation +// +class SimdImpl_Pack +{ +public: + SimdImpl_Pack() {} + + // Converts packed signed word integers from src and dest into packed signed + // byte integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x63> SSWB; + + // Converts packed signed dword integers from src and dest into packed signed + // word integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x6b> SSDW; + + // Converts packed unsigned word integers from src and dest into packed unsigned + // byte integers in dest, using unsigned saturation. + const SimdImpl_DestRegEither<0x66,0x67> USWB; + + // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed + // unsigned word integers in dest, using signed saturation. + const SimdImpl_DestRegSSE<0x66,0x2b38> USDW; +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_Unpack +{ +public: + SimdImpl_Unpack() {} + + // Unpacks the high doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[0] <- dest[2] + // dest[1] <- src[2] + // dest[2] <- dest[3] + // dest[3] <- src[3] + // + const SimdImpl_DestRegSSE<0x00,0x15> HPS; + + // Unpacks the high quadword [double-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest.lo <- dest.hi + // dest.hi <- src.hi + // + const SimdImpl_DestRegSSE<0x66,0x15> HPD; + + // Unpacks the low doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[3] <- src[1] + // dest[2] <- dest[1] + // dest[1] <- src[0] + // dest[0] <- dest[0] + // + const SimdImpl_DestRegSSE<0x00,0x14> LPS; + + // Unpacks the low quadword [double-precision] values from src and dest into + // dest, effectively moving the low portion of src into the upper portion of dest. + // The result of dest is loaded as such: + // dest.hi <- src.lo + // dest.lo <- dest.lo [remains unchanged!] + // + const SimdImpl_DestRegSSE<0x66,0x14> LPD; +}; + diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index e987f180c0..9125feaa3e 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -641,15 +641,34 @@ __emitinline void xBSWAP( const xRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) -__emitinline void Internal::SimdPrefix( u8 prefix, u8 opcode ) +// If the upper 8 bits of opcode are zero, the opcode is treated as a u8. +// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst +// 0x38, which is the only valid high word for 16 bit opcodes as such) +__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) { if( prefix != 0 ) { - xWrite( 0x0f00 | prefix ); - xWrite( opcode ); + if( (opcode & 0xff00) != 0 ) + { + jASSUME( (opcode & 0xff00) == 0x3800 ); + xWrite( (opcode<<16) | (0x0f00 | prefix) ); + } + else + { + xWrite( 0x0f00 | prefix ); + xWrite( opcode ); + } } else - xWrite( (opcode<<8) | 0x0f ); + { + if( (opcode & 0xff00) != 0 ) + { + jASSUME( (opcode & 0xff00) == 0x3800 ); + xWrite( opcode ); + } + else + xWrite( (opcode<<8) | 0x0f ); + } } const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; @@ -735,11 +754,21 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S // ------------------------------------------------------------------------ -const SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL; -const SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL; +const SimdImpl_ShiftAll<0xd0, 2> xPSRL; +const SimdImpl_ShiftAll<0xf0, 6> xPSLL; +const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; -const SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD; -const SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB; +const SimdImpl_AddSub<0xdc, 0xd4> xPADD; +const SimdImpl_AddSub<0xd8, 0xfb> xPSUB; +const SimdImpl_PMinMax<0xde,0x3c> xPMAX; +const SimdImpl_PMinMax<0xda,0x38> xPMIN; + +const SimdImpl_PMul xPMUL; +const SimdImpl_PCompare xPCMP; +const SimdImpl_PShuffle xPSHUF; +const SimdImpl_PUnpack xPUNPCK; +const SimdImpl_Unpack xUNPCK; +const SimdImpl_Pack xPACK; ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index dd1b1d5299..b0c4bfca9d 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -465,43 +465,52 @@ namespace x86Emitter // ------------------------------------------------------------------------ // OMG Evil. I went cross-eyed an hour ago doing this. // - extern const Internal::SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; extern const Internal::SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; extern const Internal::SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; extern const Internal::SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; extern const Internal::SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; - extern const Internal::SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + extern const Internal::SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; extern const Internal::SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; extern const Internal::SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; extern const Internal::SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL; - extern const Internal::SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL; - - extern const Internal::SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD; - extern const Internal::SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB; - + extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL; + extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL; + extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; + + extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD; + extern const Internal::SimdImpl_AddSub<0xd8, 0xfb> xPSUB; + extern const Internal::SimdImpl_PMinMax<0xde,0x3c> xPMAX; + extern const Internal::SimdImpl_PMinMax<0xda,0x38> xPMIN; + + extern const Internal::SimdImpl_PMul xPMUL; + extern const Internal::SimdImpl_PCompare xPCMP; + extern const Internal::SimdImpl_PShuffle xPSHUF; + extern const Internal::SimdImpl_PUnpack xPUNPCK; + extern const Internal::SimdImpl_Unpack xUNPCK; + extern const Internal::SimdImpl_Pack xPACK; } diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index f36522852f..f903e120cb 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -938,12 +938,6 @@ extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); // emms extern void EMMS( void ); -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits -//********************************************************************************** -extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); -extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); - extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); //********************* diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp index 7bbca83e33..f0571df61a 100644 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp @@ -27,11 +27,11 @@ using namespace x86Emitter; -emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } -emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } -emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } -emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } -emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); } emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); } @@ -40,7 +40,8 @@ emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); } emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } -emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } #define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \ emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \ @@ -66,7 +67,7 @@ emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \ DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \ DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \ - emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } + emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } DEFINE_LEGACY_LOGIC_OPCODE( AND ) DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) @@ -75,6 +76,8 @@ DEFINE_LEGACY_LOGIC_OPCODE( XOR ) DEFINE_LEGACY_SHIFT_OPCODE( SLL ) DEFINE_LEGACY_SHIFT_OPCODE( SRL ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, D ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, W ) DEFINE_LEGACY_ARITHMETIC( ADD, B ) DEFINE_LEGACY_ARITHMETIC( ADD, W ) @@ -94,136 +97,37 @@ DEFINE_LEGACY_ARITHMETIC( SUB, SW ) DEFINE_LEGACY_ARITHMETIC( SUB, USB ) DEFINE_LEGACY_ARITHMETIC( SUB, USW ) +DEFINE_LEGACY_ARITHMETIC( CMP, EQB ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQW ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQD ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTB ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTW ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTD ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD ); + + +emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); } +emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); } + +emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); } +emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); } + +////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// + /* emms */ emitterT void EMMS() { write16( 0x770F ); } -// pmuludq m64 to r64 (sse2 only?) -emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xF40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// pmuludq r64 to r64 (sse2 only?) -emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF40F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x740F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x750F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x760F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x760F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x640F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x650F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x660F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x660F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PSRAWItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( from ); -} - -emitterT void PSRADItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( from ); -} - -emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xE20F ); - ModRM( 3, to, from ); -} - -emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x6A0F ); - ModRM( 3, to, from ); -} - -emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x6A0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x620F ); - ModRM( 3, to, from ); -} - -emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x620F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// untested -emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) -{ - write16( 0x630F ); - ModRM( 3, to, from ); -} - -emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) -{ - write16( 0x6B0F ); - ModRM( 3, to, from ); -} - emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); @@ -231,20 +135,3 @@ emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) ModRM( 3, to, from ); write8( imm8 ); } - -emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) -{ - write16(0x700f); - ModRM( 3, to, from ); - write8(imm8); -} - -emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) -{ - write16( 0x700f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); - write8(imm8); -} - -emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index e239af26be..9a97441b87 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -52,28 +52,6 @@ using namespace x86Emitter; write8( 0x66 ), \ SSERtoR( code ) -#define _SSERtoR66( code ) \ - assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \ - write8( 0x66 ), \ - RexRB(0, from, to), \ - write16( code ), \ - ModRM( 3, from, to ) - -#define SSE_SS_RtoR( code ) \ - assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \ - write8( 0xf3 ), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) - -#define SSE_SD_MtoR( code, overb ) \ - assert( to < iREGCNT_XMM ) , \ - write8( 0xf2 ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) - #define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \ emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { xMOV##mod( (void*)to, xRegisterSSE(from) ); } \ @@ -117,6 +95,11 @@ using namespace x86Emitter; emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } +#define DEFINE_LEGACY_OP128( mod, sub ) \ + emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); } + + #define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ DEFINE_LEGACY_PSD_OPCODE( mod ) \ DEFINE_LEGACY_SSSD_OPCODE( mod ) @@ -153,6 +136,24 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP ) DEFINE_LEGACY_RSQRT_OPCODE( RSQRT ) DEFINE_LEGACY_SQRT_OPCODE( SQRT ) +DEFINE_LEGACY_OP128( PMUL, LW ) +DEFINE_LEGACY_OP128( PMUL, HW ) +DEFINE_LEGACY_OP128( PMUL, UDQ ) + +DEFINE_LEGACY_OP128( PMAX, SW ) +DEFINE_LEGACY_OP128( PMAX, UB ) +DEFINE_LEGACY_OP128( PMIN, SW ) +DEFINE_LEGACY_OP128( PMIN, UB ) + +DEFINE_LEGACY_OP128( UNPCK, LPS ) +DEFINE_LEGACY_OP128( UNPCK, HPS ) +DEFINE_LEGACY_OP128( PUNPCK, LQDQ ) +DEFINE_LEGACY_OP128( PUNPCK, HQDQ ) + +DEFINE_LEGACY_OP128( PACK, SSWB ) +DEFINE_LEGACY_OP128( PACK, SSDW ) +DEFINE_LEGACY_OP128( PACK, USWB ) + emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -216,87 +217,56 @@ emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int of emitterT void SSE_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), (void*)from, imm8 ); } - emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { xCVTPI2PS( xRegisterSSE(to), (u64*)from ); } emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { xCVTPI2PS( xRegisterSSE(to), xRegisterMMX(from) ); } - emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { xCVTPS2PI( xRegisterMMX(to), (u64*)from ); } emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { xCVTPS2PI( xRegisterMMX(to), xRegisterSSE(from) ); } - emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { xCVTTSS2SI( xRegister32(to), (u32*)from ); } emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xCVTTSS2SI( xRegister32(to), xRegisterSSE(from) ); } - emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { xCVTSI2SS( xRegisterSSE(to), (u32*)from ); } emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { xCVTSI2SS( xRegisterSSE(to), xRegister32(from) ); } emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { xCVTSS2SD( xRegisterSSE(to), (u32*)from ); } emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSS2SD( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { xCVTSD2SS( xRegisterSSE(to), (u64*)from ); } emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSD2SS( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTDQ2PS( xRegisterSSE(to), (u128*)from ); } emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTDQ2PS( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTPS2DQ( xRegisterSSE(to), (u128*)from ); } emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); } +emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); } + +emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), (void*)from, imm8 ); } +emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), (void*)from, imm8 ); } +emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); } + +emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); } +emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); } + +emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); } +emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); } + +emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); } +emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.LPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); } +emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); } + ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PMAXSW: Packed Signed Integer Word Maximum * -//********************************************************************************** -//missing -// SSE_PMAXSW_M64_to_MM -// SSE2_PMAXSW_M128_to_XMM -// SSE2_PMAXSW_XMM_to_XMM -emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PMINSW: Packed Signed Integer Word Minimum * -//********************************************************************************** -//missing -// SSE_PMINSW_M64_to_MM -// SSE2_PMINSW_M128_to_XMM -// SSE2_PMINSW_XMM_to_XMM -emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PSHUFD: Shuffle Packed DoubleWords * -//********************************************************************************** -emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) -{ - SSERtoR66( 0x700F ); - write8( imm8 ); -} -emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } - -emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } -emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } -emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } -emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } - -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * -//********************************************************************************** -emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } -emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * -//********************************************************************************** -emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } -emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } - ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //STMXCSR : Store Streaming SIMD Extension Control/Status * @@ -317,23 +287,6 @@ emitterT void SSE_LDMXCSR( uptr from ) { write32( MEMADDR(from, 4) ); } -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PCMPxx: Compare Packed Integers * -//********************************************************************************** -emitterT void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } -emitterT void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } -emitterT void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } -emitterT void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } -emitterT void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } -emitterT void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } -emitterT void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } -emitterT void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } -emitterT void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } -emitterT void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } -emitterT void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } -emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } - //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PEXTRW,PINSRW: Packed Extract/Insert Word * @@ -341,88 +294,6 @@ emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } -/////////////////////////////////////////////////////////////////////////////////////// - -// shift right arithmetic - -emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } -emitterT void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } -emitterT void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } -emitterT void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } -emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } -emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } - -emitterT void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } -emitterT void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } - -emitterT void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } -emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } - -emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } -emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word -//********************************************************************************** -emitterT void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } -emitterT void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } -emitterT void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } -emitterT void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } - -emitterT void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } -emitterT void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } - -//**********************************************************************************/ -//PUNPCKHWD: Unpack 16bit high -//********************************************************************************** -emitterT void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } -emitterT void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } - -emitterT void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } -emitterT void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } - -emitterT void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } -emitterT void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } -emitterT void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } -emitterT void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } - -emitterT void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } -emitterT void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } -emitterT void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } -emitterT void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } - -emitterT void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } -emitterT void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } - -emitterT void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } -emitterT void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } - -emitterT void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } -emitterT void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } -emitterT void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } -emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } - -emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } -emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } - emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } @@ -589,82 +460,6 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im write8(imm8); } -emitterT void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3D380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x39380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3F380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3B380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3D380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x39380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3F380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3B380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x28380F); - ModRM(3, to, from); -} - ////////////////////////////////////////////////////////////////////////////////////////// // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) //