From 28c81a2ff1e3e0693eec02ccd422dbfe40200b7a Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 20 Apr 2009 00:06:51 +0000 Subject: [PATCH] Bugfix for the new emitter: ([xmm/m128],xmm) form of MOVQ was generating the wrong opcode (introduced in r1011). [other changes are a work in progress for more instruction additions] git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1026 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/group1.h | 61 +++++++--- pcsx2/x86/ix86/implement/group2.h | 2 +- pcsx2/x86/ix86/implement/group3.h | 117 ++++++++++++++++++- pcsx2/x86/ix86/implement/xmm/movqss.h | 63 +++++++--- pcsx2/x86/ix86/ix86.cpp | 158 ++++---------------------- pcsx2/x86/ix86/ix86_instructions.h | 79 ++++++++++--- pcsx2/x86/ix86/ix86_legacy.cpp | 10 +- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 20 ++-- pcsx2/x86/ix86/ix86_types.h | 61 +--------- 9 files changed, 308 insertions(+), 263 deletions(-) diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 5d338c4368..8089593cc3 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -34,7 +34,7 @@ enum G1Type }; // ------------------------------------------------------------------- -template< G1Type InstType, typename ImmType > +template< typename ImmType > class Group1Impl { protected: @@ -46,42 +46,42 @@ protected: public: Group1Impl() {} // because GCC doesn't like static classes - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); ModRM_Direct( from.Id, to.Id ); } - static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, const ModSibBase& sibdest, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); EmitSibMagic( from.Id, sibdest ); } - static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, const ModSibBase& sibsrc ) { prefix16(); iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); EmitSibMagic( to.Id, sibsrc ); } - static __emitinline void Emit( void* dest, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, void* dest, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); iWriteDisp( from.Id, dest ); } - static __emitinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, const void* src ) { prefix16(); iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); iWriteDisp( to.Id, src ); } - static __emitinline void Emit( const iRegister& to, int imm ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, int imm ) { prefix16(); if( !Is8BitOperand() && is_s8( imm ) ) @@ -103,7 +103,7 @@ public: } } - static __emitinline void Emit( const ModSibStrict& sibdest, int imm ) + static __emitinline void Emit( G1Type InstType, const ModSibStrict& sibdest, int imm ) { if( Is8BitOperand() ) { @@ -132,26 +132,55 @@ class Group1ImplAll { public: template< typename T > - __forceinline void operator()( const iRegister& to, const iRegister& from ) const { Group1Impl::Emit( to, from ); } + __forceinline void operator()( const iRegister& to, const iRegister& from ) const { Group1Impl::Emit( InstType, to, from ); } template< typename T > - __forceinline void operator()( const iRegister& to, const void* src ) const { Group1Impl::Emit( to, src ); } + __forceinline void operator()( const iRegister& to, const void* src ) const { Group1Impl::Emit( InstType, to, src ); } template< typename T > - __forceinline void operator()( void* dest, const iRegister& from ) const { Group1Impl::Emit( dest, from ); } + __forceinline void operator()( void* dest, const iRegister& from ) const { Group1Impl::Emit( InstType, dest, from ); } template< typename T > - __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { Group1Impl::Emit( sibdest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { Group1Impl::Emit( InstType, sibdest, from ); } template< typename T > - __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( to, sibsrc ); } + __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( InstType, to, sibsrc ); } // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler // perspective. (using uint tends to make the compiler try and fail to match signed immediates with // one of the other overloads). template< typename T > - __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { Group1Impl::Emit( sibdest, imm ); } + __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { Group1Impl::Emit( InstType, sibdest, imm ); } template< typename T > - void operator()( const iRegister& to, int imm ) const { Group1Impl::Emit( to, imm ); } - + __forceinline void operator()( const iRegister& to, int imm ) const { Group1Impl::Emit( InstType, to, imm ); } Group1ImplAll() {} // Why does GCC need these? }; +template< G1Type InstType, u8 OpcodeSSE > +class G1LogicImpl : public Group1ImplAll +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0x66,OpcodeSSE> PD; + + G1LogicImpl() {} +}; + +template< G1Type InstType, u8 OpcodeSSE > +class G1ArithmeticImpl : public G1LogicImpl +{ +public: + const SSELogicImpl<0xf3,OpcodeSSE> SS; + const SSELogicImpl<0xf2,OpcodeSSE> SD; + + G1ArithmeticImpl() {} +}; + + +template< u8 OpcodeSSE > +class SSEAndNotImpl +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0x66,OpcodeSSE> PD; + + SSEAndNotImpl() {} +}; \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index f5767c282e..a694263008 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -122,7 +122,7 @@ public: template< typename T > __noinline void operator()( const ModSibStrict& sibdest, u8 imm ) const { Group2Impl::Emit( sibdest, imm ); } - template< typename T > void operator()( const iRegister& to, u8 imm ) const + template< typename T > __forceinline void operator()( const iRegister& to, u8 imm ) const { Group2Impl::Emit( to, imm ); } Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index 05543cdd05..a614e4b09d 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -71,4 +71,119 @@ public: __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( InstType, from ); } Group3ImplAll() {} -}; \ No newline at end of file +}; + + +template< G3Type InstType, u8 OpcodeSSE > +class G3Impl_PlusSSE : public Group3ImplAll +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0x66,OpcodeSSE> PD; + const SSELogicImpl<0xf3,OpcodeSSE> SS; + const SSELogicImpl<0xf2,OpcodeSSE> SD; + + G3Impl_PlusSSE() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// The following iMul-specific forms are valid for 16 and 32 bit register operands only! + +template< typename ImmType > +class iMulImpl +{ +protected: + static const uint OperandSize = sizeof(ImmType); + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const iRegister& from ) + { + prefix16(); + write16( 0xaf0f ); + ModRM_Direct( to.Id, from.Id ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const void* src ) + { + prefix16(); + write16( 0xaf0f ); + iWriteDisp( to.Id, src ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) + { + prefix16(); + write16( 0xaf0f ); + EmitSibMagic( to.Id, src ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + ModRM_Direct( to.Id, from.Id ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const void* src, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + iWriteDisp( to.Id, src ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + EmitSibMagic( to.Id, src ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } +}; + + +class iMul_PlusSSE : public G3Impl_PlusSSE +{ +protected: + typedef iMulImpl iMUL32; + typedef iMulImpl iMUL16; + +public: + + template< typename T > + __forceinline void operator()( const iRegister& from ) const { Group3Impl::Emit( G3Type_iMUL, from ); } + + template< typename T > + __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( G3Type_iMUL, from ); } + + __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { iMUL32::Emit( to, from ); } + __forceinline void operator()( const iRegister32& to, const void* src ) const { iMUL32::Emit( to, src ); } + __forceinline void operator()( const iRegister32& to, const iRegister32& from, s32 imm ) const{ iMUL32::Emit( to, from, imm ); } + __noinline void operator()( const iRegister32& to, const ModSibBase& src ) const { iMUL32::Emit( to, src ); } + __noinline void operator()( const iRegister32& to, const ModSibBase& from, s32 imm ) const { iMUL32::Emit( to, from, imm ); } + + __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { iMUL16::Emit( to, from ); } + __forceinline void operator()( const iRegister16& to, const void* src ) const { iMUL16::Emit( to, src ); } + __forceinline void operator()( const iRegister16& to, const iRegister16& from, s16 imm ) const{ iMUL16::Emit( to, from, imm ); } + __noinline void operator()( const iRegister16& to, const ModSibBase& src ) const { iMUL16::Emit( to, src ); } + __noinline void operator()( const iRegister16& to, const ModSibBase& from, s16 imm ) const { iMUL16::Emit( to, from, imm ); } + + iMul_PlusSSE() {} +}; diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 4f0fdd1614..41f2b3071a 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -124,15 +124,9 @@ class PLogicImplAll { public: template< typename T > - __forceinline void operator()( const iRegisterSIMD& to, const iRegisterSIMD& from ) const - { - writeXMMop( 0x66, Opcode, to, from ); - } + __forceinline void operator()( const iRegisterSIMD& to, const iRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } template< typename T > - __forceinline void operator()( const iRegisterSIMD& to, const void* from ) const - { - writeXMMop( 0x66, Opcode, to, from ); - } + __forceinline void operator()( const iRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } template< typename T > __noinline void operator()( const iRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } @@ -141,19 +135,52 @@ public: // ------------------------------------------------------------------------ // For implementing SSE-only logic operations, like ANDPS/ANDPD +// template< u8 Prefix, u8 Opcode > -class PLogicImplSSE +class SSELogicImpl { public: - __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const - { - writeXMMop( Prefix, Opcode, to, from ); - } - __forceinline void operator()( const iRegisterSSE& to, const void* from ) const - { - writeXMMop( Prefix, Opcode, to, from ); - } + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - PLogicImplSSE() {} //GCWho? + SSELogicImpl() {} //GCWho? +}; + + +// ------------------------------------------------------------------------ +// For implementing SSE-only comparison operations, like CMPEQPS. +// +enum SSE2_ComparisonType +{ + SSE2_Equal = 0, + SSE2_Less, + SSE2_LessOrEqual, + SSE2_Unordered, + SSE2_NotEqual, + SSE2_NotLess, + SSE2_NotLessOrEqual, + SSE2_Ordered +}; + +template< u8 Prefix > +class SSECompareImplGeneric +{ +public: + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } + + SSECompareImplGeneric() {} //GCWhat? +}; + +template< u8 Prefix, u8 Opcode, SSE2_ComparisonType CType > +class SSECompareImpl +{ +public: + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } + + SSECompareImpl() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 86ad0f234d..362d446037 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -257,13 +257,15 @@ using namespace Internal; const MovImplAll iMOV; const TestImplAll iTEST; -const Group1ImplAll iADD; -const Group1ImplAll iOR; +const G1LogicImpl iAND; +const G1LogicImpl iOR; +const G1LogicImpl iXOR; + +const G1ArithmeticImpl iADD; +const G1ArithmeticImpl iSUB; + const Group1ImplAll iADC; const Group1ImplAll iSBB; -const Group1ImplAll iAND; -const Group1ImplAll iSUB; -const Group1ImplAll iXOR; const Group1ImplAll iCMP; const Group2ImplAll iROL; @@ -278,7 +280,8 @@ const Group3ImplAll iNOT; const Group3ImplAll iNEG; const Group3ImplAll iUMUL; const Group3ImplAll iUDIV; -const Group3ImplAll iSDIV; +const G3Impl_PlusSSE iDIV; +const iMul_PlusSSE iMUL; const IncDecImplAll iINC; const IncDecImplAll iDEC; @@ -610,99 +613,6 @@ __emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_fla EmitLeaMagic( to, src, preserve_flags ); } -////////////////////////////////////////////////////////////////////////////////////////// -// The following iMul-specific forms are valid for 16 and 32 bit register operands only! - -template< typename ImmType > -class iMulImpl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - -public: - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from ) - { - prefix16(); - write16( 0xaf0f ); - ModRM_Direct( to.Id, from.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src ) - { - prefix16(); - write16( 0xaf0f ); - iWriteDisp( to.Id, src ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) - { - prefix16(); - write16( 0xaf0f ); - EmitSibMagic( to.Id, src ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - ModRM_Direct( to.Id, from.Id ); - if( is_s8( imm ) ) - write8( imm ); - else - iWrite( imm ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - iWriteDisp( to.Id, src ); - if( is_s8( imm ) ) - write8( imm ); - else - iWrite( imm ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - EmitSibMagic( to.Id, src ); - if( is_s8( imm ) ) - write8( imm ); - else - iWrite( imm ); - } -}; - -// ------------------------------------------------------------------------ -// iMUL's special forms (unique to iMUL alone), and valid for 32/16 bit operands only, -// thus noi templates are used. - -namespace Internal -{ - typedef iMulImpl iMUL32; - typedef iMulImpl iMUL16; -} - -__forceinline void iSMUL( const iRegister32& to, const iRegister32& from ) { iMUL32::Emit( to, from ); } -__forceinline void iSMUL( const iRegister32& to, const void* src ) { iMUL32::Emit( to, src ); } -__forceinline void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } -__noinline void iSMUL( const iRegister32& to, const ModSibBase& src ) { iMUL32::Emit( to, src ); } -__noinline void iSMUL( const iRegister32& to, const ModSibBase& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } - -__forceinline void iSMUL( const iRegister16& to, const iRegister16& from ) { iMUL16::Emit( to, from ); } -__forceinline void iSMUL( const iRegister16& to, const void* src ) { iMUL16::Emit( to, src ); } -__forceinline void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } -__noinline void iSMUL( const iRegister16& to, const ModSibBase& src ) { iMUL16::Emit( to, src ); } -__noinline void iSMUL( const iRegister16& to, const ModSibBase& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } ////////////////////////////////////////////////////////////////////////////////////////// // Push / Pop Emitters @@ -758,40 +668,13 @@ const PLogicImplAll<0xdf> iPANDN; const PLogicImplAll<0xeb> iPOR; const PLogicImplAll<0xef> iPXOR; -const PLogicImplSSE<0x00,0x54> iANDPS; -const PLogicImplSSE<0x66,0x54> iANDPD; -const PLogicImplSSE<0x00,0x55> iANDNPS; -const PLogicImplSSE<0x66,0x55> iANDNPD; -const PLogicImplSSE<0x00,0x56> iORPS; -const PLogicImplSSE<0x66,0x56> iORPD; -const PLogicImplSSE<0x00,0x57> iXORPS; -const PLogicImplSSE<0x66,0x57> iXORPD; - -const PLogicImplSSE<0x00,0x5c> iSUBPS; -const PLogicImplSSE<0x66,0x5c> iSUBPD; -const PLogicImplSSE<0xf3,0x5c> iSUBSS; -const PLogicImplSSE<0xf2,0x5c> iSUBSD; - -const PLogicImplSSE<0x00,0x58> iADDPS; -const PLogicImplSSE<0x66,0x58> iADDPD; -const PLogicImplSSE<0xf3,0x58> iADDSS; -const PLogicImplSSE<0xf2,0x58> iADDSD; - -const PLogicImplSSE<0x00,0x59> iMULPS; -const PLogicImplSSE<0x66,0x59> iMULPD; -const PLogicImplSSE<0xf3,0x59> iMULSS; -const PLogicImplSSE<0xf2,0x59> iMULSD; - -const PLogicImplSSE<0x00,0x5e> iDIVPS; -const PLogicImplSSE<0x66,0x5e> iDIVPD; -const PLogicImplSSE<0xf3,0x5e> iDIVSS; -const PLogicImplSSE<0xf2,0x5e> iDIVSD; +const SSEAndNotImpl<0x55> iANDN; // Compute Reciprocal Packed Single-Precision Floating-Point Values -const PLogicImplSSE<0,0x53> iRCPPS; +const SSELogicImpl<0,0x53> iRCPPS; // Compute Reciprocal of Scalar Single-Precision Floating-Point Value -const PLogicImplSSE<0xf3,0x53> iRCPSS; +const SSELogicImpl<0xf3,0x53> iRCPSS; // Moves from XMM to XMM, with the *upper 64 bits* of the destination register @@ -800,20 +683,27 @@ __forceinline void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ) { // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__noinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } +__forceinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. __forceinline void iMOVQZX( const iRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); } -__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } -__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } + +__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } +__forceinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } __forceinline void iMOVQ( const iRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); } __forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } __forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, from, dest ); } -__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, from, dest ); } + +// This form of iMOVQ is Intel's adeptly named 'MOVQ2DQ' __forceinline void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); } + +// This form of iMOVQ is Intel's adeptly named 'MOVDQ2Q' __forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ) { // Manual implementation of this form of MOVQ, since its parameters are unique in a way diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 5b776567ad..2b68790756 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -38,13 +38,15 @@ namespace x86Emitter // ------------------------------------------------------------------------ // Group 1 Instruction Class - extern const Internal::Group1ImplAll iADD; - extern const Internal::Group1ImplAll iOR; + extern const Internal::G1LogicImpl iAND; + extern const Internal::G1LogicImpl iOR; + extern const Internal::G1LogicImpl iXOR; + extern const Internal::SSEAndNotImpl<0x55> iANDN; + + extern const Internal::G1ArithmeticImpl iADD; + extern const Internal::G1ArithmeticImpl iSUB; extern const Internal::Group1ImplAll iADC; extern const Internal::Group1ImplAll iSBB; - extern const Internal::Group1ImplAll iAND; - extern const Internal::Group1ImplAll iSUB; - extern const Internal::Group1ImplAll iXOR; extern const Internal::Group1ImplAll iCMP; // ------------------------------------------------------------------------ @@ -72,7 +74,8 @@ namespace x86Emitter extern const Internal::Group3ImplAll iNEG; extern const Internal::Group3ImplAll iUMUL; extern const Internal::Group3ImplAll iUDIV; - extern const Internal::Group3ImplAll iSDIV; + extern const Internal::G3Impl_PlusSSE iDIV; + extern const Internal::iMul_PlusSSE iMUL; extern const Internal::IncDecImplAll iINC; extern const Internal::IncDecImplAll iDEC; @@ -205,22 +208,22 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // MUL / DIV instructions - extern void iSMUL( const iRegister32& to, const iRegister32& from ); - extern void iSMUL( const iRegister32& to, const void* src ); - extern void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ); - extern void iSMUL( const iRegister32& to, const ModSibBase& src ); - extern void iSMUL( const iRegister32& to, const ModSibBase& src, s32 imm ); + /*extern void iMUL( const iRegister32& to, const iRegister32& from ); + extern void iMUL( const iRegister32& to, const void* src ); + extern void iMUL( const iRegister32& to, const iRegister32& from, s32 imm ); + extern void iMUL( const iRegister32& to, const ModSibBase& src ); + extern void iMUL( const iRegister32& to, const ModSibBase& src, s32 imm ); - extern void iSMUL( const iRegister16& to, const iRegister16& from ); - extern void iSMUL( const iRegister16& to, const void* src ); - extern void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ); - extern void iSMUL( const iRegister16& to, const ModSibBase& src ); - extern void iSMUL( const iRegister16& to, const ModSibBase& src, s16 imm ); + extern void iMUL( const iRegister16& to, const iRegister16& from ); + extern void iMUL( const iRegister16& to, const void* src ); + extern void iMUL( const iRegister16& to, const iRegister16& from, s16 imm ); + extern void iMUL( const iRegister16& to, const ModSibBase& src ); + extern void iMUL( const iRegister16& to, const ModSibBase& src, s16 imm ); template< typename T > - __forceinline void iSMUL( const iRegister& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } + __forceinline void iMUL( const iRegister& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } template< typename T > - __noinline void iSMUL( const ModSibStrict& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } + __noinline void iMUL( const ModSibStrict& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); }*/ ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! @@ -431,6 +434,46 @@ namespace x86Emitter extern void iMOVHLPS( const iRegisterSSE& to, const iRegisterSSE& from ); extern void iMOVLHPD( const iRegisterSSE& to, const iRegisterSSE& from ); extern void iMOVHLPD( const iRegisterSSE& to, const iRegisterSSE& from ); + + ////////////////////////////////////////////////////////////////////////////////////////// + // + + extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVAPS; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVUPS; + + extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> iMOVAPD; + extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> iMOVUPD; + +#ifdef ALWAYS_USE_MOVAPS + extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> iMOVDQA; + extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> iMOVDQU; +#else + extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVDQA; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVDQU; +#endif + + extern const Internal::MovhlImplAll<0, 0x16> iMOVHPS; + extern const Internal::MovhlImplAll<0, 0x12> iMOVLPS; + extern const Internal::MovhlImplAll<0x66, 0x16> iMOVHPD; + extern const Internal::MovhlImplAll<0x66, 0x12> iMOVLPD; + + extern const Internal::PLogicImplAll<0xdb> iPAND; + extern const Internal::PLogicImplAll<0xdf> iPANDN; + extern const Internal::PLogicImplAll<0xeb> iPOR; + extern const Internal::PLogicImplAll<0xef> iPXOR; + + extern const Internal::SSELogicImpl<0,0x53> iRCPPS; + extern const Internal::SSELogicImpl<0xf3,0x53> iRCPSS; + + extern const Internal::SSECompareImplGeneric<0x00> iCMPPS; + extern const Internal::SSECompareImplGeneric<0x66> iCMPPD; + extern const Internal::SSECompareImplGeneric<0xf3> iCMPSS; + extern const Internal::SSECompareImplGeneric<0xf2> iCMPSD; + + extern const Internal::SSECompareImplGeneric<0x00> iCMPPS; + extern const Internal::SSECompareImplGeneric<0x66> iCMPPD; + extern const Internal::SSECompareImplGeneric<0xf3> iCMPSS; + extern const Internal::SSECompareImplGeneric<0xf2> iCMPSD; } diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index e3daec9b59..a484579f88 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -207,26 +207,26 @@ emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) /* mul eax by r32 to edx:eax */ emitterT void MUL32R( x86IntRegType from ) { iUMUL( iRegister32(from) ); } /* imul eax by r32 to edx:eax */ -emitterT void IMUL32R( x86IntRegType from ) { iSMUL( iRegister32(from) ); } +emitterT void IMUL32R( x86IntRegType from ) { iMUL( iRegister32(from) ); } /* mul eax by m32 to edx:eax */ emitterT void MUL32M( u32 from ) { iUMUL( ptr32[from] ); } /* imul eax by m32 to edx:eax */ -emitterT void IMUL32M( u32 from ) { iSMUL( ptr32[from] ); } +emitterT void IMUL32M( u32 from ) { iMUL( ptr32[from] ); } /* imul r32 by r32 to r32 */ emitterT void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) { - iSMUL( iRegister32(to), iRegister32(from) ); + iMUL( iRegister32(to), iRegister32(from) ); } /* div eax by r32 to edx:eax */ emitterT void DIV32R( x86IntRegType from ) { iUDIV( iRegister32(from) ); } /* idiv eax by r32 to edx:eax */ -emitterT void IDIV32R( x86IntRegType from ) { iSDIV( iRegister32(from) ); } +emitterT void IDIV32R( x86IntRegType from ) { iDIV( iRegister32(from) ); } /* div eax by m32 to edx:eax */ emitterT void DIV32M( u32 from ) { iUDIV( ptr32[from] ); } /* idiv eax by m32 to edx:eax */ -emitterT void IDIV32M( u32 from ) { iSDIV( ptr32[from] ); } +emitterT void IDIV32M( u32 from ) { iDIV( ptr32[from] ); } emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 50818e75f4..72190a2f67 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -216,17 +216,17 @@ emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { i #define DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod##PS( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##PS( iRegisterSSE(to), iRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod##PD( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##PD( iRegisterSSE(to), iRegisterSSE(from) ); } + emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod.PS( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.PS( iRegisterSSE(to), iRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod.PD( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.PD( iRegisterSSE(to), iRegisterSSE(from) ); } #define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod##SS( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##SS( iRegisterSSE(to), iRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod##SD( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##SD( iRegisterSSE(to), iRegisterSSE(from) ); } + emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod.SS( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.SS( iRegisterSSE(to), iRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod.SD( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.SD( iRegisterSSE(to), iRegisterSSE(from) ); } DEFINE_LEGACY_PSD_OPCODE( AND ) DEFINE_LEGACY_PSD_OPCODE( ANDN ) @@ -246,7 +246,7 @@ emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { iRCPSS( iR //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ -//Packed Single-Precission FP compare (CMPccPS) * +//Packed Single-Precision FP compare (CMPccPS) * //********************************************************************************** //missing SSE_CMPPS_I8_to_XMM // SSE_CMPPS_M32_to_XMM @@ -270,7 +270,7 @@ emitterT void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ -//Scalar Single-Precission FP compare (CMPccSS) * +//Scalar Single-Precision FP compare (CMPccSS) * //********************************************************************************** //missing SSE_CMPSS_I8_to_XMM // SSE_CMPSS_M32_to_XMM diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index d70ff1b1ea..095fc1a6fc 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -662,6 +662,7 @@ namespace x86Emitter extern void EmitSibMagic( uint regfield, const ModSibBase& info ); // ------------------------------------------------------------------------ + #include "implement/xmm/movqss.h" #include "implement/group1.h" #include "implement/group2.h" #include "implement/group3.h" @@ -671,7 +672,6 @@ namespace x86Emitter #include "implement/bittest.h" #include "implement/test.h" #include "implement/jmpcall.h" - #include "implement/xmm/movqss.h" } ////////////////////////////////////////////////////////////////////////////////////////// @@ -689,65 +689,6 @@ namespace x86Emitter #else static const bool AlwaysUseMovaps = false; #endif - - extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVAPS; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVUPS; - - extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> iMOVAPD; - extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> iMOVUPD; - - #ifdef ALWAYS_USE_MOVAPS - extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> iMOVDQA; - extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> iMOVDQU; - #else - extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVDQA; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVDQU; - #endif - - extern const Internal::MovhlImplAll<0, 0x16> iMOVHPS; - extern const Internal::MovhlImplAll<0, 0x12> iMOVLPS; - extern const Internal::MovhlImplAll<0x66, 0x16> iMOVHPD; - extern const Internal::MovhlImplAll<0x66, 0x12> iMOVLPD; - - extern const Internal::PLogicImplAll<0xdb> iPAND; - extern const Internal::PLogicImplAll<0xdf> iPANDN; - extern const Internal::PLogicImplAll<0xeb> iPOR; - extern const Internal::PLogicImplAll<0xef> iPXOR; - - extern const Internal::PLogicImplSSE<0x00,0x54> iANDPS; - extern const Internal::PLogicImplSSE<0x66,0x54> iANDPD; - extern const Internal::PLogicImplSSE<0x00,0x55> iANDNPS; - extern const Internal::PLogicImplSSE<0x66,0x55> iANDNPD; - extern const Internal::PLogicImplSSE<0x00,0x56> iORPS; - extern const Internal::PLogicImplSSE<0x66,0x56> iORPD; - extern const Internal::PLogicImplSSE<0x00,0x57> iXORPS; - extern const Internal::PLogicImplSSE<0x66,0x57> iXORPD; - - extern const Internal::PLogicImplSSE<0x00,0x5c> iSUBPS; - extern const Internal::PLogicImplSSE<0x66,0x5c> iSUBPD; - extern const Internal::PLogicImplSSE<0xf3,0x5c> iSUBSS; - extern const Internal::PLogicImplSSE<0xf2,0x5c> iSUBSD; - - extern const Internal::PLogicImplSSE<0x00,0x58> iADDPS; - extern const Internal::PLogicImplSSE<0x66,0x58> iADDPD; - extern const Internal::PLogicImplSSE<0xf3,0x58> iADDSS; - extern const Internal::PLogicImplSSE<0xf2,0x58> iADDSD; - - extern const Internal::PLogicImplSSE<0x00,0x59> iMULPS; - extern const Internal::PLogicImplSSE<0x66,0x59> iMULPD; - extern const Internal::PLogicImplSSE<0xf3,0x59> iMULSS; - extern const Internal::PLogicImplSSE<0xf2,0x59> iMULSD; - - extern const Internal::PLogicImplSSE<0x00,0x5e> iDIVPS; - extern const Internal::PLogicImplSSE<0x66,0x5e> iDIVPD; - extern const Internal::PLogicImplSSE<0xf3,0x5e> iDIVSS; - extern const Internal::PLogicImplSSE<0xf2,0x5e> iDIVSD; - - - - extern const Internal::PLogicImplSSE<0,0x53> iRCPPS; - extern const Internal::PLogicImplSSE<0xf3,0x53> iRCPSS; - } #include "ix86_inlines.inl"