From cc48702b17db423fd12b119282f6992a7ba73c0a Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 20 Apr 2009 15:22:02 +0000 Subject: [PATCH] Emitter: Implemented SQRT/RSQRT/UCOM and all variations of SSE CVTs (omg those were a nightmare) [also patched old emitter to use new MOVD implementations -- missed those earlier]. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1031 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/group1.h | 21 +- pcsx2/x86/ix86/implement/jmpcall.h | 2 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 122 ++++++-- pcsx2/x86/ix86/ix86.cpp | 64 +++- pcsx2/x86/ix86/ix86_instructions.h | 60 +++- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 412 ++++++++------------------ pcsx2/x86/ix86/ix86_types.h | 32 +- 7 files changed, 355 insertions(+), 358 deletions(-) diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 3979e2cff4..0fb4640e5b 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -164,8 +164,8 @@ class G1LogicImpl_PlusSSE : public Group1ImplAll public: using Group1ImplAll::operator(); - const SSELogicImpl<0x00,OpcodeSSE> PS; - const SSELogicImpl<0x66,OpcodeSSE> PD; + const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision + const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision G1LogicImpl_PlusSSE() {} }; @@ -179,8 +179,8 @@ class G1ArithmeticImpl_PlusSSE : public G1LogicImpl_PlusSSE::operator(); - const SSELogicImpl<0xf3,OpcodeSSE> SS; - const SSELogicImpl<0xf2,OpcodeSSE> SD; + const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision + const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision G1ArithmeticImpl_PlusSSE() {} }; @@ -191,18 +191,19 @@ class G1CompareImpl_PlusSSE : Group1ImplAll< G1Type_CMP > protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + Woot() {} }; public: using Group1ImplAll< G1Type_CMP >::operator(); - Woot<0x00> PS; - Woot<0x66> PD; - Woot<0xf3> SS; - Woot<0xf2> SD; + const Woot<0x00> PS; + const Woot<0x66> PD; + const Woot<0xf3> SS; + const Woot<0xf2> SD; G1CompareImpl_PlusSSE() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/implement/jmpcall.h b/pcsx2/x86/ix86/implement/jmpcall.h index 453f2d4e14..7858b07a92 100644 --- a/pcsx2/x86/ix86/implement/jmpcall.h +++ b/pcsx2/x86/ix86/implement/jmpcall.h @@ -70,7 +70,7 @@ public: __forceinline void operator()( const T* func ) const { if( isJmp ) - iJccKnownTarget( Jcc_Unconditional, (void*)func ); + iJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func ); else { // calls are relative to the instruction after this one, and length is diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 99c3cbb912..412630cd20 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -21,17 +21,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// // MMX / SSE Helper Functions! -template< typename T > -__emitinline void SimdPrefix( u8 opcode, u8 prefix=0 ) -{ - if( sizeof( T ) == 16 && prefix != 0 ) - { - xWrite( 0x0f00 | prefix ); - xWrite( opcode ); - } - else - xWrite( (opcode<<8) | 0x0f ); -} +extern void SimdPrefix( u8 prefix, u8 opcode ); // ------------------------------------------------------------------------ // xmm emitter helpers for xmm instruction with prefixes. @@ -40,23 +30,23 @@ __emitinline void SimdPrefix( u8 opcode, u8 prefix=0 ) // instructions violate this "guideline.") // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& to, const xRegister& from ) +__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const ModSibBase& sib ) +void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const void* data ) +__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); xWriteDisp( reg.Id, data ); } @@ -68,21 +58,21 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, con template< typename T, typename T2 > __emitinline void writeXMMop( u8 opcode, const xRegister& to, const xRegister& from ) { - SimdPrefix( opcode ); + SimdPrefix( 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > void writeXMMop( u8 opcode, const xRegister& reg, const ModSibBase& sib ) { - SimdPrefix( opcode ); + SimdPrefix( 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > __emitinline void writeXMMop( u8 opcode, const xRegister& reg, const void* data ) { - SimdPrefix( opcode ); + SimdPrefix( 0, opcode ); xWriteDisp( reg.Id, data ); } @@ -170,6 +160,34 @@ public: SSELogicImpl() {} //GCWho? }; +// ------------------------------------------------------------------------ +// For implementing MMX/SSE operations which the destination *must* be a register, but the source +// can be regDirect or ModRM (indirect). +// +template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > +class SSEImpl_DestRegForm +{ +public: + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __noinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + + SSEImpl_DestRegForm() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +template< u8 OpcodeSSE > +class SSEImpl_PSPD_SSSD +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision + const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision + const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision + const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision + + SSEImpl_PSPD_SSSD() {} //GChow? +}; + // ------------------------------------------------------------------------ // template< u8 OpcodeSSE > @@ -178,10 +196,62 @@ class SSEAndNotImpl public: const SSELogicImpl<0x00,OpcodeSSE> PS; const SSELogicImpl<0x66,OpcodeSSE> PD; - SSEAndNotImpl() {} }; +// ------------------------------------------------------------------------ +// For instructions that have SS/SD form only (UCOMI, etc) +// AltPrefix - prefixed used for doubles (SD form). +template< u8 AltPrefix, u8 OpcodeSSE > +class SSEImpl_SS_SD +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> SS; + const SSELogicImpl SD; + SSEImpl_SS_SD() {} +}; + +// ------------------------------------------------------------------------ +// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) +template< u8 OpcodeSSE > +class SSE_rSqrtImpl +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0xf3,OpcodeSSE> SS; + SSE_rSqrtImpl() {} +}; + +// ------------------------------------------------------------------------ +// For instructions that have PS/SS/SD form only (most commonly Sqrt functions) +template< u8 OpcodeSSE > +class SSE_SqrtImpl : public SSE_rSqrtImpl +{ +public: + const SSELogicImpl<0xf2,OpcodeSSE> SD; + SSE_SqrtImpl() {} +}; + +// ------------------------------------------------------------------------ +template< u8 OpcodeSSE > +class SSEImpl_Shuffle +{ +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + Woot() {} + }; + +public: + const Woot<0x00> PS; + const Woot<0x66> PD; + + SSEImpl_Shuffle() {} //GCWhat? +}; + // ------------------------------------------------------------------------ template< SSE2_ComparisonType CType > class SSECompareImpl @@ -192,13 +262,13 @@ protected: __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + Woot() {} }; public: - Woot<0x00> PS; - Woot<0x66> PD; - Woot<0xf3> SS; - Woot<0xf2> SD; - + const Woot<0x00> PS; + const Woot<0x66> PD; + const Woot<0xf3> SS; + const Woot<0xf2> SD; SSECompareImpl() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 2affc9c74e..2a2eb665a7 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -138,8 +138,6 @@ namespace Internal __forceinline void ModRM( uint mod, uint reg, uint rm ) { xWrite( (mod << 6) | (reg << 3) | rm ); - //*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; - //x86Ptr++; } __forceinline void ModRM_Direct( uint reg, uint rm ) @@ -150,8 +148,6 @@ namespace Internal __forceinline void SibSB( u32 ss, u32 index, u32 base ) { xWrite( (ss << 6) | (index << 3) | base ); - //*(u32*)x86Ptr = (ss << 6) | (index << 3) | base; - //x86Ptr++; } __forceinline void xWriteDisp( int regfield, s32 displacement ) @@ -645,6 +641,17 @@ __emitinline void xBSWAP( const xRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) +__emitinline void Internal::SimdPrefix( u8 prefix, u8 opcode ) +{ + if( prefix != 0 ) + { + xWrite( 0x0f00 | prefix ); + xWrite( opcode ); + } + else + xWrite( (opcode<<8) | 0x0f ); +} + const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS; const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD; @@ -670,11 +677,50 @@ const PLogicImplAll<0xef> xPXOR; const SSEAndNotImpl<0x55> xANDN; -// Compute Reciprocal Packed Single-Precision Floating-Point Values -const SSELogicImpl<0,0x53> xRCPPS; +const SSEImpl_SS_SD<0x66,0x2e> xUCOMI; +const SSE_rSqrtImpl<0x53> xRCP; +const SSE_rSqrtImpl<0x52> xRSQRT; +const SSE_SqrtImpl<0x51> xSQRT; -// Compute Reciprocal of Scalar Single-Precision Floating-Point Value -const SSELogicImpl<0xf3,0x53> xRCPSS; +const SSEImpl_PSPD_SSSD<0x5f> xMAX; +const SSEImpl_PSPD_SSSD<0x5d> xMIN; +const SSEImpl_Shuffle<0xc6> xSHUF; + +// ------------------------------------------------------------------------ +// SSE Conversion Operations, as looney as they are. +// +// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing +// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). +// +const SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; +const SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; + +const SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; +const SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; +const SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; + +const SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; +const SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + +const SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; +const SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; +const SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + +const SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; +const SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; +const SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; +const SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + +const SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; +const SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + +const SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; +const SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; +const SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; +const SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + +const SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; +const SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; // ------------------------------------------------------------------------ @@ -724,7 +770,7 @@ __forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) // Manual implementation of this form of MOVQ, since its parameters are unique in a way // that breaks the template inference of writeXMMop(); - SimdPrefix( 0xd6, 0xf2 ); + SimdPrefix( 0xf2, 0xd6 ); ModRM_Direct( to.Id, from.Id ); } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 1421fba6dd..dbbfb9874c 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -347,14 +347,15 @@ namespace x86Emitter Internal::writeXMMop( 0x66, 0x7e, from, dest ); } + // ------------------------------------------------------------------------ - + // xMASKMOV: // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. // The default memory location is specified by DS:EDI. The most significant bit in each byte // of the mask operand determines whether the corresponding byte in the source operand is // written to the corresponding byte location in memory. - + template< typename T > static __forceinline void xMASKMOV( const xRegisterSIMD& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); } @@ -368,7 +369,7 @@ namespace x86Emitter // template< typename T > static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } - + // ------------------------------------------------------------------------ extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ); @@ -409,10 +410,9 @@ namespace x86Emitter extern void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ); extern void xMOVNTQ( void* to, const xRegisterMMX& from ); extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ); - - ////////////////////////////////////////////////////////////////////////////////////////// - // - + + // ------------------------------------------------------------------------ + extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS; extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS; @@ -433,6 +433,8 @@ namespace x86Emitter extern const Internal::MovhlImplAll<0x16> xMOVH; extern const Internal::MovhlImplAll<0x12> xMOVL; + // ------------------------------------------------------------------------ + extern const Internal::PLogicImplAll<0xdb> xPAND; extern const Internal::PLogicImplAll<0xdf> xPANDN; extern const Internal::PLogicImplAll<0xeb> xPOR; @@ -440,9 +442,15 @@ namespace x86Emitter extern const Internal::SSEAndNotImpl<0x55> xANDN; - extern const Internal::SSELogicImpl<0,0x53> xRCPPS; - extern const Internal::SSELogicImpl<0xf3,0x53> xRCPSS; + extern const Internal::SSEImpl_SS_SD<0x66,0x2e> xUCOMI; + extern const Internal::SSE_rSqrtImpl<0x53> xRCP; + extern const Internal::SSE_rSqrtImpl<0x52> xRSQRT; + extern const Internal::SSE_SqrtImpl<0x51> xSQRT; + extern const Internal::SSEImpl_PSPD_SSSD<0x5f> xMAX; + extern const Internal::SSEImpl_PSPD_SSSD<0x5d> xMIN; + extern const Internal::SSEImpl_Shuffle<0xc6> xSHUF; + // ------------------------------------------------------------------------ extern const Internal::SSECompareImpl xCMPEQ; @@ -453,5 +461,39 @@ namespace x86Emitter extern const Internal::SSECompareImpl xCMPNLT; extern const Internal::SSECompareImpl xCMPNLE; extern const Internal::SSECompareImpl xCMPORD; + + // ------------------------------------------------------------------------ + // OMG Evil. I went cross-eyed an hour ago doing this. + // + extern const Internal::SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; + extern const Internal::SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; + + extern const Internal::SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; + extern const Internal::SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; + extern const Internal::SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; + + extern const Internal::SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; + extern const Internal::SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + + extern const Internal::SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; + extern const Internal::SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; + extern const Internal::SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + + extern const Internal::SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; + extern const Internal::SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; + extern const Internal::SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + + extern const Internal::SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + + extern const Internal::SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; + extern const Internal::SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; + extern const Internal::SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + + extern const Internal::SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 163e24614d..8c259b5385 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -97,14 +97,6 @@ using namespace x86Emitter; ModRM( 0, to, DISP32 ), \ write32( MEMADDR(from, 4 + overb) ) \ -#define SSE_SD_RtoM( code, overb ) \ - assert( from < iREGCNT_XMM) , \ - write8( 0xf2 ), \ - RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) \ - #define SSE_SD_RtoR( code ) \ assert( to < iREGCNT_XMM && from < iREGCNT_XMM) , \ write8( 0xf2 ), \ @@ -112,30 +104,6 @@ using namespace x86Emitter; write16( code ), \ ModRM( 3, to, from ) -#define CMPPSMtoR( op ) \ - SSEMtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPPSRtoR( op ) \ - SSERtoR( 0xc20f ), \ - write8( op ) - -#define CMPSSMtoR( op ) \ - SSE_SS_MtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPSSRtoR( op ) \ - SSE_SS_RtoR( 0xc20f ), \ - write8( op ) - -#define CMPSDMtoR( op ) \ - SSE_SD_MtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPSDRtoR( op ) \ - SSE_SD_RtoR( 0xc20f ), \ - write8( op ) - #define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \ emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { xMOV##mod( (void*)to, xRegisterSSE(from) ); } \ @@ -146,17 +114,92 @@ using namespace x86Emitter; emitterT void sse##_MOV##mod##RtoRmS( x86IntRegType to, x86SSERegType from, x86IntRegType from2, int scale ) \ { xMOV##mod( ptr[xAddressReg(to)+xAddressReg(from2)], xRegisterSSE(from) ); } +#define DEFINE_LEGACY_PSD_OPCODE( mod ) \ + emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_SSSD_OPCODE( mod ) \ + emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_CMP_OPCODE( comp ) \ + emitterT void SSE_CMP##comp##PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_CMP##comp##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_CMP##comp##PD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_CMP##comp##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PD( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE_CMP##comp##SS_M32_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_CMP##comp##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_CMP##comp##SD_M64_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_CMP##comp##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_RSQRT_OPCODE(mod) \ + emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_SQRT_OPCODE(mod) \ + DEFINE_LEGACY_RSQRT_OPCODE(mod) \ + emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ + DEFINE_LEGACY_PSD_OPCODE( mod ) \ + DEFINE_LEGACY_SSSD_OPCODE( mod ) + DEFINE_LEGACY_MOV_OPCODE( UPS, SSE ) DEFINE_LEGACY_MOV_OPCODE( APS, SSE ) DEFINE_LEGACY_MOV_OPCODE( DQA, SSE2 ) DEFINE_LEGACY_MOV_OPCODE( DQU, SSE2 ) +DEFINE_LEGACY_PSD_OPCODE( AND ) +DEFINE_LEGACY_PSD_OPCODE( ANDN ) +DEFINE_LEGACY_PSD_OPCODE( OR ) +DEFINE_LEGACY_PSD_OPCODE( XOR ) -//**********************************************************************************/ -//MOVAPS: Move aligned Packed Single Precision FP values * -//********************************************************************************** +DEFINE_LEGACY_PSSD_OPCODE( SUB ) +DEFINE_LEGACY_PSSD_OPCODE( ADD ) +DEFINE_LEGACY_PSSD_OPCODE( MUL ) +DEFINE_LEGACY_PSSD_OPCODE( DIV ) -emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } +DEFINE_LEGACY_PSSD_OPCODE( MIN ) +DEFINE_LEGACY_PSSD_OPCODE( MAX ) + +DEFINE_LEGACY_CMP_OPCODE( EQ ) +DEFINE_LEGACY_CMP_OPCODE( LT ) +DEFINE_LEGACY_CMP_OPCODE( LE ) +DEFINE_LEGACY_CMP_OPCODE( UNORD ) +DEFINE_LEGACY_CMP_OPCODE( NE ) +DEFINE_LEGACY_CMP_OPCODE( NLT ) +DEFINE_LEGACY_CMP_OPCODE( NLE ) +DEFINE_LEGACY_CMP_OPCODE( ORD ) + +DEFINE_LEGACY_SSSD_OPCODE( UCOMI ) +DEFINE_LEGACY_RSQRT_OPCODE( RCP ) +DEFINE_LEGACY_RSQRT_OPCODE( RSQRT ) +DEFINE_LEGACY_SQRT_OPCODE( SQRT ) + + +emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { xMOVDZX( xRegisterSSE(to), (void*)from ); } +emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { xMOVDZX( xRegisterSSE(to), xRegister32(from) ); } +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + xMOVDZX( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); +} + +emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { xMOVD( (void*)to, xRegisterSSE(from) ); } +emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { xMOVD( xRegister32(to), xRegisterSSE(from) ); } +emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + xMOVD( ptr[xAddressReg(from)+offset], xRegisterSSE(from) ); +} emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVQZX( xRegisterSSE(to), (void*)from ); } emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVQZX( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -165,9 +208,6 @@ emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) { xM emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) { xMOVQ( xRegisterSSE(to), xRegisterMMX(from) ); } -//**********************************************************************************/ -//MOVSS: Move Scalar Single-Precision FP value * -//********************************************************************************** emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { xMOVSSZX( xRegisterSSE(to), (void*)from ); } emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { xMOVSS( (void*)to, xRegisterSSE(from) ); } emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVSS( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -181,189 +221,69 @@ emitterT void SSE2_MOVSD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int of emitterT void SSE2_MOVSD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVSD( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } -//**********************************************************************************/ -//MOVLPS: Move low Packed Single-Precision FP * -//********************************************************************************** + emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVL.PS( xRegisterSSE(to), (void*)from ); } emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVL.PS( (void*)to, xRegisterSSE(from) ); } emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVL.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVL.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVHPS: Move High Packed Single-Precision FP * -//********************************************************************************** emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVH.PS( xRegisterSSE(to), (void*)from ); } emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.PS( (void*)to, xRegisterSSE(from) ); } emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVLHPS: Moved packed Single-Precision FP low to high * -//********************************************************************************** emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVHLPS: Moved packed Single-Precision FP High to Low * -//********************************************************************************** emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); } - -#define DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PD( xRegisterSSE(to), xRegisterSSE(from) ); } - -#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ - DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } - -DEFINE_LEGACY_PSD_OPCODE( AND ) -DEFINE_LEGACY_PSD_OPCODE( ANDN ) -DEFINE_LEGACY_PSD_OPCODE( OR ) -DEFINE_LEGACY_PSD_OPCODE( XOR ) - -DEFINE_LEGACY_PSSD_OPCODE( SUB ) -DEFINE_LEGACY_PSSD_OPCODE( ADD ) -DEFINE_LEGACY_PSSD_OPCODE( MUL ) -DEFINE_LEGACY_PSSD_OPCODE( DIV ) - -emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xRCPPS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { xRCPPS( xRegisterSSE(to), (void*)from ); } - -emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xRCPSS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { xRCPSS( xRegisterSSE(to), (void*)from ); } - -//////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//Packed Single-Precision FP compare (CMPccPS) * -//********************************************************************************** - -#define DEFINE_LEGACY_CMP_OPCODE( comp ) \ - emitterT void SSE_CMP##comp##PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_CMP##comp##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_CMP##comp##PD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_CMP##comp##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PD( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE_CMP##comp##SS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_CMP##comp##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_CMP##comp##SD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_CMP##comp##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SD( xRegisterSSE(to), xRegisterSSE(from) ); } - -DEFINE_LEGACY_CMP_OPCODE( EQ ) -DEFINE_LEGACY_CMP_OPCODE( LT ) -DEFINE_LEGACY_CMP_OPCODE( LE ) -DEFINE_LEGACY_CMP_OPCODE( UNORD ) -DEFINE_LEGACY_CMP_OPCODE( NE ) -DEFINE_LEGACY_CMP_OPCODE( NLT ) -DEFINE_LEGACY_CMP_OPCODE( NLE ) -DEFINE_LEGACY_CMP_OPCODE( ORD ) - -emitterT void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); } +emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) { - RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + xSHUF.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset], imm8 ); } -emitterT void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); -} +emitterT void SSE_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), (void*)from, imm8 ); } -emitterT void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) -{ - write8(0x66); - RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} -emitterT void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); -} +emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { xCVTPI2PS( xRegisterSSE(to), (u64*)from ); } +emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { xCVTPI2PS( xRegisterSSE(to), xRegisterMMX(from) ); } + +emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { xCVTPS2PI( xRegisterMMX(to), (u64*)from ); } +emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { xCVTPS2PI( xRegisterMMX(to), xRegisterSSE(from) ); } + +emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { xCVTTSS2SI( xRegister32(to), (u32*)from ); } +emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xCVTTSS2SI( xRegister32(to), xRegisterSSE(from) ); } + +emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { xCVTSI2SS( xRegisterSSE(to), (u32*)from ); } +emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { xCVTSI2SS( xRegisterSSE(to), xRegister32(from) ); } + +emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { xCVTSS2SD( xRegisterSSE(to), (u32*)from ); } +emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSS2SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { xCVTSD2SS( xRegisterSSE(to), (u64*)from ); } +emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSD2SS( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTDQ2PS( xRegisterSSE(to), (u128*)from ); } +emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTDQ2PS( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTPS2DQ( xRegisterSSE(to), (u128*)from ); } +emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } ////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * -//********************************************************************************** -emitterT void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } -emitterT void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } +////////////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * -//********************************************************************************** -emitterT void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } -emitterT void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SQRTPS : Packed Single-Precision FP Square Root * -//********************************************************************************** -emitterT void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } -emitterT void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SQRTSS : Scalar Single-Precision FP Square Root * -//********************************************************************************** -emitterT void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } -emitterT void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } - -emitterT void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } -emitterT void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MAXPS: Return Packed Single-Precision FP Maximum * -//********************************************************************************** -emitterT void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } -emitterT void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } - -emitterT void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } -emitterT void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MAXSS: Return Scalar Single-Precision FP Maximum * -//********************************************************************************** -emitterT void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } -emitterT void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } - -emitterT void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } -emitterT void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * -//********************************************************************************** -emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } -emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * -//********************************************************************************** -emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } -emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } +//emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } +// emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } +// +// emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } +// emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } +/* emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { @@ -372,8 +292,9 @@ emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) write16(0x2c0f); ModRM(3, to, from); } +*/ -emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } +/*emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { write8(0xf3); @@ -384,50 +305,26 @@ emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } +*/ -emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } +/*emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * -//********************************************************************************** emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } -//**********************************************************************************/ -//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * -//********************************************************************************** emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } -emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MINPS: Return Packed Single-Precision FP Minimum * -//********************************************************************************** -emitterT void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } -emitterT void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } +emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); }*/ -emitterT void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } -emitterT void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } - -////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MINSS: Return Scalar Single-Precision FP Minimum * -//********************************************************************************** -emitterT void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } -emitterT void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } - -emitterT void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } -emitterT void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PMAXSW: Packed Signed Integer Word Maximum * //********************************************************************************** //missing - // SSE_PMAXSW_M64_to_MM +// SSE_PMAXSW_M64_to_MM // SSE2_PMAXSW_M128_to_XMM // SSE2_PMAXSW_XMM_to_XMM emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } @@ -437,33 +334,11 @@ emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERt //PMINSW: Packed Signed Integer Word Minimum * //********************************************************************************** //missing - // SSE_PMINSW_M64_to_MM +// SSE_PMINSW_M64_to_MM // SSE2_PMINSW_M128_to_XMM // SSE2_PMINSW_XMM_to_XMM emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SHUFPS: Shuffle Packed Single-Precision FP Values * -//********************************************************************************** -emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } -emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } - -emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) -{ - RexRB(0, to, from); - write16(0xc60f); - WriteRmOffsetFrom(to, from, offset); - write8(imm8); -} - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SHUFPD: Shuffle Packed Double-Precision FP Values * -//********************************************************************************** -emitterT void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } -emitterT void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } - //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PSHUFD: Shuffle Packed DoubleWords * @@ -565,43 +440,6 @@ emitterT void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSER emitterT void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } /////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVD: Move Dword(32bit) to /from XMM reg * -//********************************************************************************** -emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } -emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } - -emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x6e0f ); - ModRM( 0, to, from); -} - -emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x6e0f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } -emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } - -emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) -{ - write8(0x66); - RexRB(0, from, to); - write16( 0x7e0f ); - WriteRmOffsetFrom(from, to, offset); -} - -/////////////////////////////////////////////////////////////////////////////////////// - -emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); } - // shift right logical diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 368c4a36b9..a9604653b6 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -124,6 +124,22 @@ static __forceinline void xWrite( T val ) namespace x86Emitter { +////////////////////////////////////////////////////////////////////////////////////////// +// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const] +// +// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions +// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache +// and some marginal speed gains as a result. (it's possible someday in the future the per- +// formance of the two instructions could change, so this constant is provided to restore MOVDQA +// use easily at a later time, if needed). +#define ALWAYS_USE_MOVAPS + +#ifdef ALWAYS_USE_MOVAPS + static const bool AlwaysUseMovaps = true; +#else + static const bool AlwaysUseMovaps = false; +#endif + ///////////////////////////////////////////////////////////////////////////////////////////// // __emitline - preprocessors definition // @@ -688,22 +704,6 @@ namespace x86Emitter #include "implement/test.h" #include "implement/jmpcall.h" } - - ////////////////////////////////////////////////////////////////////////////////////////// - // ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const] - // - // This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions - // do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache - // and some marginal speed gains as a result. (it's possible someday in the future the per- - // formance of the two instructions could change, so this constant is provided to restore MOVDQA - // use easily at a later time, if needed). - #define ALWAYS_USE_MOVAPS - - #ifdef ALWAYS_USE_MOVAPS - static const bool AlwaysUseMovaps = true; - #else - static const bool AlwaysUseMovaps = false; - #endif } #include "ix86_inlines.inl"