diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 3731bff67c..5b03403ef2 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -905,14 +905,6 @@ - - - - @@ -921,6 +913,14 @@ RelativePath="..\..\Dump.h" > + + + + @@ -2965,10 +2965,6 @@ RelativePath="..\..\x86\ix86\ix86_legacy_internal.h" > - - @@ -2977,6 +2973,10 @@ RelativePath="..\..\x86\ix86\ix86_legacy_types.h" > + + @@ -3028,6 +3028,10 @@ RelativePath="..\..\x86\ix86\implement\test.h" > + + diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 846ab01a3e..2faa34ba69 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -935,6 +935,8 @@ void psxRecompileNextInstruction(int delayslot) #ifdef _DEBUG static void printfn() { + extern void iDumpPsxRegisters(u32 startpc, u32 temp); + static int lastrec = 0; static int curcount = 0; const int skip = 0; @@ -962,6 +964,8 @@ void iopRecRecompile(u32 startpc) u32 willbranch3 = 0; #ifdef _DEBUG + extern void iDumpPsxRegisters(u32 startpc, u32 temp); + if( psxdump & 4 ) iDumpPsxRegisters(startpc, 0); #endif diff --git a/pcsx2/x86/ix86-32/iR5900Move.cpp b/pcsx2/x86/ix86-32/iR5900Move.cpp index d61ade09ab..0d1878e26b 100644 --- a/pcsx2/x86/ix86-32/iR5900Move.cpp +++ b/pcsx2/x86/ix86-32/iR5900Move.cpp @@ -316,7 +316,7 @@ void recMFHILO1(int hi) if( reghi >= 0 ) { if( regd >= 0 ) { - SSEX_MOVHLPS_XMM_to_XMM(regd, reghi); + SSE_MOVHLPS_XMM_to_XMM(regd, reghi); xmmregs[regd].mode |= MODE_WRITE; } else { diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h index 7fcdfb5027..a3829a66af 100644 --- a/pcsx2/x86/ix86/implement/bittest.h +++ b/pcsx2/x86/ix86/implement/bittest.h @@ -32,152 +32,39 @@ enum G8Type G8Type_BTC, }; -////////////////////////////////////////////////////////////////////////////////////////// -// Notes: Bit Test instructions are valid on 16/32 bit operands only. -// -template< G8Type InstType, typename ImmType > -class Group8Impl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - -public: - Group8Impl() {} // For the love of GCC. - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& bitbase, const xRegister& bitoffset ) - { - prefix16(); - xWrite( 0x0f ); - xWrite( 0xa3 | (InstType << 2) ); - ModRM_Direct( bitoffset.Id, bitbase.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( void* bitbase, const xRegister& bitoffset ) - { - prefix16(); - xWrite( 0x0f ); - xWrite( 0xa3 | (InstType << 2) ); - xWriteDisp( bitoffset.Id, bitbase ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& bitbase, const xRegister& bitoffset ) - { - prefix16(); - xWrite( 0x0f ); - xWrite( 0xa3 | (InstType << 2) ); - EmitSibMagic( bitoffset.Id, bitbase ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& bitbase, u8 immoffset ) - { - prefix16(); - xWrite( 0xba0f ); - ModRM_Direct( InstType, bitbase.Id ); - xWrite( immoffset ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibStrict& bitbase, u8 immoffset ) - { - prefix16(); - xWrite( 0xba0f ); - EmitSibMagic( InstType, bitbase ); - xWrite( immoffset ); - } -}; - -// ------------------------------------------------------------------- -// -template< G8Type InstType > -class Group8ImplAll -{ -protected: - typedef Group8Impl m_32; - typedef Group8Impl m_16; - -public: - __forceinline void operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __forceinline void operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - __forceinline void operator()( void* bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __forceinline void operator()( void* bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibBase& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibBase& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - - __noinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - void operator()( const xRegister& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - void operator()( const xRegister& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - - Group8ImplAll() {} -}; - - ////////////////////////////////////////////////////////////////////////////////////////// // BSF / BSR -- 16/32 operands supported only. // -template< bool isReverse, typename ImmType > +// 0xbc [fwd] / 0xbd [rev] +// +template< u16 Opcode > class BitScanImpl { -protected: - static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - static void emitbase() - { - prefix16(); - xWrite( 0x0f ); - xWrite( isReverse ? 0xbd : 0xbc ); - } - -public: - BitScanImpl() {} // For the love of GCC. - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from ) - { - emitbase(); - ModRM_Direct( to.Id, from.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const void* src ) - { - emitbase(); - xWriteDisp( to.Id, src ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const ModSibBase& sibsrc ) - { - emitbase(); - EmitSibMagic( to.Id, sibsrc ); - } -}; - - -// ------------------------------------------------------------------- -// BSF/BSR -- 16 and 32 bit operand forms only! -// -template< bool isReverse > -class BitScanImplAll -{ -protected: - typedef BitScanImpl m_32; - typedef BitScanImpl m_16; - public: - __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { m_32::Emit( to, from ); } - __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { m_16::Emit( to, from ); } - __forceinline void operator()( const xRegister32& to, const void* src ) const { m_32::Emit( to, src ); } - __forceinline void operator()( const xRegister16& to, const void* src ) const { m_16::Emit( to, src ); } - __noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } - __noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } + BitScanImpl() {} - BitScanImplAll() {} + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { xOpWrite0F( Opcode, to, src ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { xOpWrite0F( 0x66, Opcode, to, src ); } + __forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { xOpWrite0F( Opcode, to, sibsrc ); } + __forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { xOpWrite0F( 0x66, Opcode, to, sibsrc ); } +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Bit Test Instructions - Valid on 16/32 bit instructions only. +// +template< G8Type InstType > +class Group8Impl : public BitScanImpl<0xa3 | (InstType << 2)> +{ +public: + using BitScanImpl<0xa3 | (InstType << 2)>::operator(); + + __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite( bitoffset ); } + __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite( bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite( bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite( bitoffset ); } + + Group8Impl() {} }; diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 8b38f35b04..424ec212dc 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -167,9 +167,9 @@ class xImpl_G1Compare : xImpl_Group1< G1Type_CMP > protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } Woot() {} }; diff --git a/pcsx2/x86/ix86/implement/xchg.h b/pcsx2/x86/ix86/implement/xchg.h new file mode 100644 index 0000000000..2128728458 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xchg.h @@ -0,0 +1,22 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// This header file is intended to be the future home of xchg, cmpxchg, xadd, and +// other threading-related exchange instructions. diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h index dae4a0c0c3..0b25bd827d 100644 --- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -28,19 +28,25 @@ class _SimdShiftHelper public: _SimdShiftHelper() {} - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode1, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( 0x66, Opcode1, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); } - __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { writeXMMop( Opcode1, to, from ); } - __forceinline void operator()( const xRegisterMMX& to, const void* from ) const { writeXMMop( Opcode1, to, from ); } - __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { writeXMMop( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode1, to, from ); } - template< typename OperandType > - __emitinline void operator()( const xRegisterSIMD& to, u8 imm8 ) const + __emitinline void operator()( const xRegisterSSE& to, u8 imm8 ) const { - SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); + SimdPrefix( 0x66, OpcodeImm ); + ModRM( 3, (int)Modcode, to.Id ); + xWrite( imm8 ); + } + + __emitinline void operator()( const xRegisterMMX& to, u8 imm8 ) const + { + SimdPrefix( 0x00, OpcodeImm ); ModRM( 3, (int)Modcode, to.Id ); xWrite( imm8 ); } @@ -68,11 +74,11 @@ class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ public: const _SimdShiftHelper Q; - void DQ( const xRegisterSSE& to, u8 imm ) const + void DQ( const xRegisterSSE& to, u8 imm8 ) const { SimdPrefix( 0x66, 0x73 ); ModRM( 3, (int)Modcode+1, to.Id ); - xWrite( imm ); + xWrite( imm8 ); } SimdImpl_Shift() {} @@ -156,8 +162,8 @@ template< u16 OpcodeSSE > class SimdImpl_Sqrt : public SimdImpl_rSqrt { public: - const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; SimdImpl_Sqrt() {} + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -165,9 +171,9 @@ public: class SimdImpl_AndNot { public: + SimdImpl_AndNot() {} const SimdImpl_DestRegSSE<0x00,0x55> PS; const SimdImpl_DestRegSSE<0x66,0x55> PD; - SimdImpl_AndNot() {} }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -281,4 +287,88 @@ public: // * Adds the double-precision floating-point values in the high and low quadwords of // *src* stores the result in the high quadword of dest. const SimdImpl_DestRegSSE<0x66, 0x7c> PD; -}; \ No newline at end of file +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// DotProduct calculation (SSE4.1 only!) +// +class SimdImpl_DotProduct +{ +public: + SimdImpl_DotProduct() {} + + // [SSE-4.1] Conditionally multiplies the packed single precision floating-point + // values in dest with the packed single-precision floats in src depending on a + // mask extracted from the high 4 bits of the immediate byte. If a condition mask + // bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value + // of 0.0. The four resulting single-precision values are summed into an inter- + // mediate result. + // + // The intermediate result is conditionally broadcasted to the destination using a + // broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast + // mask bit is 1, the intermediate result is copied to the corresponding dword + // element in dest. If a broadcast mask bit is zero, the corresponding element in + // the destination is set to zero. + // + SimdImpl_DestRegImmSSE<0x66,0x403a> PS; + + // [SSE-4.1] + SimdImpl_DestRegImmSSE<0x66,0x413a> PD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Rounds floating point values (packed or single scalar) by an arbitrary rounding mode. +// (SSE4.1 only!) +class SimdImpl_Round +{ +public: + SimdImpl_Round() {} + + // [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x083a> PS; + + // [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x093a> PD; + + // [SSE-4.1] Rounds the single-precision src value and stores in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x0a3a> SS; + + // [SSE-4.1] Rounds the double-precision src value and stores in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x0b3a> SD; +}; diff --git a/pcsx2/x86/ix86/implement/xmm/basehelpers.h b/pcsx2/x86/ix86/implement/xmm/basehelpers.h index 7094322b3d..0cce6b2369 100644 --- a/pcsx2/x86/ix86/implement/xmm/basehelpers.h +++ b/pcsx2/x86/ix86/implement/xmm/basehelpers.h @@ -23,57 +23,106 @@ extern void SimdPrefix( u8 prefix, u16 opcode ); -// ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instruction with prefixes. -// These functions also support deducing the use of the prefix from the template parameters, -// since most xmm instructions use a prefix and most mmx instructions do not. (some mov -// instructions violate this "guideline.") -// -template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) +extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib ); +extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data ); +extern void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib ); +extern void xOpWrite0F( u16 opcode, int instId, const void* data ); + +template< typename T2 > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, int instId, const xRegister& from ) { - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - ModRM_Direct( to.Id, from.Id ); + SimdPrefix( prefix, opcode ); + ModRM_Direct( instId, from.Id ); } -template< typename T > -__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +template< typename T2 > __emitinline +void xOpWrite0F( u16 opcode, int instId, const xRegister& from ) { - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - EmitSibMagic( reg.Id, sib ); + xOpWrite0F( 0, opcode, instId, from ); } -template< typename T > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) { - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - xWriteDisp( reg.Id, data ); + xOpWrite0F( prefix, opcode, to.Id, from ); +} + +template< typename T > __noinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +{ + xOpWrite0F( prefix, opcode, reg.Id, sib ); +} + +template< typename T > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +{ + xOpWrite0F( prefix, opcode, reg.Id, data ); } // ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instructions *without* prefixes. -// These are normally used for special instructions that have MMX forms only (non-SSE), however -// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. // -template< typename T, typename T2 > -__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, u8 imm8 ) { - SimdPrefix( 0, opcode ); - ModRM_Direct( to.Id, from.Id ); + xOpWrite0F( prefix, opcode, to, from ); + xWrite( imm8 ); } -template< typename T > -__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) +template< typename T > __noinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, u8 imm8 ) { - SimdPrefix( 0, opcode ); - EmitSibMagic( reg.Id, sib ); + xOpWrite0F( prefix, opcode, reg, sib ); + xWrite( imm8 ); } -template< typename T > -__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) +template< typename T > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const void* data, u8 imm8 ) { - SimdPrefix( 0, opcode ); - xWriteDisp( reg.Id, data ); + xOpWrite0F( prefix, opcode, reg, data ); + xWrite( imm8 ); +} + +// ------------------------------------------------------------------------ + +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& to, const xRegister& from ) +{ + xOpWrite0F( 0, opcode, to, from ); +} + +template< typename T > __noinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const ModSibBase& sib ) +{ + xOpWrite0F( 0, opcode, reg, sib ); +} + +template< typename T > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const void* data ) +{ + xOpWrite0F( 0, opcode, reg, data ); +} + +// ------------------------------------------------------------------------ + +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& to, const xRegister& from, u8 imm8 ) +{ + xOpWrite0F( opcode, to, from ); + xWrite( imm8 ); +} + +template< typename T > __noinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const ModSibBase& sib, u8 imm8 ) +{ + xOpWrite0F( opcode, reg, sib ); + xWrite( imm8 ); +} + +template< typename T > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const void* data, u8 imm8 ) +{ + xOpWrite0F( opcode, reg, data ); + xWrite( imm8 ); } // ------------------------------------------------------------------------ @@ -84,9 +133,9 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegSSE { public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } SimdImpl_DestRegSSE() {} //GCWho? }; @@ -99,9 +148,9 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegImmSSE { public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); } SimdImpl_DestRegImmSSE() {} //GCWho? }; @@ -110,9 +159,9 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegImmMMX { public: - __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); } SimdImpl_DestRegImmMMX() {} //GCWho? }; @@ -125,27 +174,33 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegEither { public: - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); } SimdImpl_DestRegEither() {} //GCWho? }; // ------------------------------------------------------------------------ -// For implementing MMX/SSE operations which the destination *must* be a register, but the source -// can be regDirect or ModRM (indirect). +// For implementing MMX/SSE operations where the destination *must* be a register, but the +// source can be Direct or Indirect (ModRM/SibSB). The SrcOperandType template parameter +// is used to enforce type strictness of the (void*) parameter and ModSib<> parameter, so +// that the programmer must be explicit in specifying desired operand size. +// +// IMPORTANT: This helper assumes the prefix opcode is written *always* -- regardless of +// MMX or XMM register status. // template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > class SimdImpl_DestRegStrict { public: - __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } SimdImpl_DestRegStrict() {} //GCWho? }; diff --git a/pcsx2/x86/ix86/implement/xmm/comparisons.h b/pcsx2/x86/ix86/implement/xmm/comparisons.h index c71b53d3d5..a7e3197038 100644 --- a/pcsx2/x86/ix86/implement/xmm/comparisons.h +++ b/pcsx2/x86/ix86/implement/xmm/comparisons.h @@ -41,9 +41,9 @@ class SimdImpl_Compare protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } Woot() {} }; @@ -128,4 +128,3 @@ public: // packed min/max values in dest. (SSE operands only) const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; }; - diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h index 7fcd7a1d8d..93fc620799 100644 --- a/pcsx2/x86/ix86/implement/xmm/moremovs.h +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -30,10 +30,10 @@ protected: struct Woot { Woot() {} - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); } }; public: @@ -51,26 +51,104 @@ template< u16 Opcode > class MovhlImpl_RtoR { public: - __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); } - __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); } MovhlImpl_RtoR() {} //GCC. }; -// ------------------------------------------------------------------------ -template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > -class MovapsImplAll +////////////////////////////////////////////////////////////////////////////////////////// +// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD +// +// All implementations of Unaligned Movs will, when possible, use aligned movs instead. +// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement +// which can be checked for alignment at runtime. +// +template< u8 Prefix, bool isAligned > +class SimdImpl_MoveSSE { + static const u16 OpcodeA = 0x28; // Aligned [aps] form + static const u16 OpcodeU = 0x10; // unaligned [ups] form + public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - - MovapsImplAll() {} //GCC. + SimdImpl_MoveSSE() {} //GCC. + + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const + { + if( to != from ) xOpWrite0F( Prefix, OpcodeA, to, from ); + } + + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const + { + xOpWrite0F( Prefix, (isAligned || ((uptr)from & 0x0f) == 0) ? OpcodeA : OpcodeU, to, from ); + } + + __forceinline void operator()( void* to, const xRegisterSSE& from ) const + { + xOpWrite0F( Prefix, (isAligned || ((uptr)to & 0x0f) == 0) ? OpcodeA+1 : OpcodeU+1, from, to ); + } + + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() ); + xOpWrite0F( Prefix, isReallyAligned ? OpcodeA : OpcodeU, to, from ); + } + + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() ); + xOpWrite0F( Prefix, isReallyAligned ? OpcodeA+1 : OpcodeU+1, from, to ); + } }; +////////////////////////////////////////////////////////////////////////////////////////// +// Implementations for MOVDQA / MOVDQU +// +template< u8 Prefix, bool isAligned > +class SimdImpl_MoveDQ +{ + static const u8 PrefixA = 0x66; // Aligned [aps] form + static const u8 PrefixU = 0xf3; // unaligned [ups] form + + static const u16 Opcode = 0x6f; + static const u16 Opcode_Alt = 0x7f; // alternate ModRM encoding (reverse src/dst) + +public: + SimdImpl_MoveDQ() {} //GCC. + + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const + { + if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from ); + } + + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const + { + xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode, to, from ); + } + + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const + { + xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode_Alt, to, from ); + } + + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() ); + xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from ); + } + + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() ); + xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, to, from ); + } +}; + + ////////////////////////////////////////////////////////////////////////////////////////// // template< u8 AltPrefix, u16 OpcodeSSE > @@ -83,12 +161,79 @@ public: }; ////////////////////////////////////////////////////////////////////////////////////////// +// Blend - Conditional copying of values in src into dest. // class SimdImpl_Blend { +public: + // [SSE-4.1] Conditionally copies dword values from src to dest, depending on the + // mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a + // dword element in a 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // SimdImpl_DestRegImmSSE<0x66,0x0c3a> PS; + + // [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the + // mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a + // quadword element in a 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // SimdImpl_DestRegImmSSE<0x66,0x0d3a> PD; - SimdImpl_DestRegImmSSE<0x66,0x1438> VPS; - SimdImpl_DestRegImmSSE<0x66,0x1538> VPD; -}; \ No newline at end of file + // [SSE-4.1] Conditionally copies dword values from src to dest, depending on the + // mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds + // to a dword element in the 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // + SimdImpl_DestRegSSE<0x66,0x1438> VPS; + + // [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the + // mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds + // to a quadword element in the 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // + SimdImpl_DestRegSSE<0x66,0x1538> VPD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed Move with Sign or Zero extension. +// +template< bool SignExtend > +class SimdImpl_PMove +{ + static const u16 OpcodeBase = SignExtend ? 0x2038 : 0x3038; + +public: + // [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase,xRegisterSSE,xRegisterSSE,u64> BW; + + // [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x100,xRegisterSSE,xRegisterSSE,u32> BD; + + // [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x200,xRegisterSSE,xRegisterSSE,u16> BQ; + + // [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x300,xRegisterSSE,xRegisterSSE,u64> WD; + + // [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x400,xRegisterSSE,xRegisterSSE,u32> WQ; + + // [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x500,xRegisterSSE,xRegisterSSE,u64> DQ; +}; + diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h index f4056c9369..ee306e51d4 100644 --- a/pcsx2/x86/ix86/implement/xmm/shufflepack.h +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -26,9 +26,9 @@ class SimdImpl_Shuffle protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } Woot() {} }; @@ -182,20 +182,17 @@ protected: __forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } }; @@ -203,28 +200,13 @@ public: SimdImpl_PInsert() {} // Operation can be performed on either MMX or SSE src operands. - template< typename T > - __forceinline void W( const xRegisterSIMD& to, const xRegister32& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc4, to, from ); - xWrite( imm8 ); - } + __forceinline void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterSSE& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); } - // Operation can be performed on either MMX or SSE src operands. - template< typename T > - __forceinline void W( const xRegisterSIMD& to, const void* from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc4, to, from ); - xWrite( imm8 ); - } - - // Operation can be performed on either MMX or SSE src operands. - template< typename T > - __forceinline void W( const xRegisterSIMD& to, const ModSibBase& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc4, to, from ); - xWrite( imm8 ); - } + __forceinline void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterMMX& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); } // [SSE-4.1] const ByteDwordForms<0x20> B; @@ -250,20 +232,17 @@ protected: __forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } __forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 ); } __forceinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 ); } }; @@ -276,24 +255,11 @@ public: // // [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension! // - template< typename T > - __forceinline void W( const xRegister32& to, const xRegisterSIMD& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc5, to, from, true ); - xWrite( imm8 ); - } + __forceinline void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); } + __forceinline void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); } - __forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0x153a, from, dest ); - xWrite( imm8 ); - } - - __forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0x153a, from, dest ); - xWrite( imm8 ); - } + __forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); } + __forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); } // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits // of dest are zero-extended (cleared). This can be used to extract any single packed diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 0453ffc8e1..4d272fd154 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -160,8 +160,41 @@ namespace Internal { xWriteDisp( regfield, (s32)address ); } + + ////////////////////////////////////////////////////////////////////////////////////////// + // emitter helpers for xmm instruction with prefixes, most of which are using + // the basic opcode format (items inside braces denote optional or conditional + // emission): + // + // [Prefix] / 0x0f / [OpcodePrefix] / Opcode / ModRM+[SibSB] + // + // Prefixes are typically 0x66, 0xf2, or 0xf3. OpcodePrefixes are either 0x38 or + // 0x3a [and other value will result in assertion failue]. + // + __emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib ) + { + SimdPrefix( prefix, opcode ); + EmitSibMagic( instId, sib ); + } - // ------------------------------------------------------------------------ + __emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data ) + { + SimdPrefix( prefix, opcode ); + xWriteDisp( instId, data ); + } + + __emitinline void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib ) + { + xOpWrite0F( 0, opcode, instId, sib ); + } + + __emitinline void xOpWrite0F( u16 opcode, int instId, const void* data ) + { + xOpWrite0F( 0, opcode, instId, data ); + } + + + ////////////////////////////////////////////////////////////////////////////////////////// // returns TRUE if this instruction requires SIB to be encoded, or FALSE if the // instruction ca be encoded as ModRm alone. static __forceinline bool NeedsSibMagic( const ModSibBase& info ) @@ -288,13 +321,13 @@ const MovExtendImplAll xMOVSX; const DwordShiftImplAll xSHLD; const DwordShiftImplAll xSHRD; -const Group8ImplAll xBT; -const Group8ImplAll xBTR; -const Group8ImplAll xBTS; -const Group8ImplAll xBTC; +const Group8Impl xBT; +const Group8Impl xBTR; +const Group8Impl xBTS; +const Group8Impl xBTC; -const BitScanImplAll xBSF; -const BitScanImplAll xBSR; +const BitScanImpl<0xbc> xBSF; +const BitScanImpl<0xbd> xBSR; // ------------------------------------------------------------------------ const CMovImplGeneric xCMOV; @@ -635,320 +668,4 @@ __emitinline void xBSWAP( const xRegister32& to ) write8( 0xC8 | to.Id ); } - -////////////////////////////////////////////////////////////////////////////////////////// -// MMX / XMM Instructions -// (these will get put in their own file later) - -// ------------------------------------------------------------------------ -// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is -// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4 -// instructions). Any other lower value assumes the upper value is 0 and ignored. -// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will -// generate an assertion. -// -__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) -{ - const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); - - // If the lower byte is not a valid previx and the upper byte is non-zero it - // means we made a mistake! - if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 ); - - if( prefix != 0 ) - { - if( is16BitOpcode ) - xWrite( (opcode<<16) | 0x0f00 | prefix ); - else - { - xWrite( 0x0f00 | prefix ); - xWrite( opcode ); - } - } - else - { - if( is16BitOpcode ) - { - xWrite( 0x0f ); - xWrite( opcode ); - } - else - xWrite( (opcode<<8) | 0x0f ); - } -} - -// [SSE-3] -const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; -// [SSE-3] -const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; - -const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; -const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS; -const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD; -const MovapsImplAll< 0x66, 0x10, 0x11 > xMOVUPD; - -#ifdef ALWAYS_USE_MOVAPS -const MovapsImplAll< 0x66, 0x6f, 0x7f > xMOVDQA; -const MovapsImplAll< 0xf3, 0x6f, 0x7f > xMOVDQU; -#else -const MovapsImplAll< 0, 0x28, 0x29 > xMOVDQA; -const MovapsImplAll< 0, 0x10, 0x11 > xMOVDQU; -#endif - -const MovhlImplAll<0x16> xMOVH; -const MovhlImplAll<0x12> xMOVL; -const MovhlImpl_RtoR<0x16> xMOVLH; -const MovhlImpl_RtoR<0x12> xMOVHL; - -const SimdImpl_DestRegEither<0x66,0xdb> xPAND; -const SimdImpl_DestRegEither<0x66,0xdf> xPANDN; -const SimdImpl_DestRegEither<0x66,0xeb> xPOR; -const SimdImpl_DestRegEither<0x66,0xef> xPXOR; - -const SimdImpl_AndNot xANDN; - -const SimdImpl_UcomI<0x66,0x2e> xUCOMI; -const SimdImpl_rSqrt<0x53> xRCP; -const SimdImpl_rSqrt<0x52> xRSQRT; -const SimdImpl_Sqrt<0x51> xSQRT; - -const SimdImpl_MinMax<0x5f> xMAX; -const SimdImpl_MinMax<0x5d> xMIN; -const SimdImpl_Shuffle<0xc6> xSHUF; - -// ------------------------------------------------------------------------ - -const SimdImpl_Compare xCMPEQ; -const SimdImpl_Compare xCMPLT; -const SimdImpl_Compare xCMPLE; -const SimdImpl_Compare xCMPUNORD; -const SimdImpl_Compare xCMPNE; -const SimdImpl_Compare xCMPNLT; -const SimdImpl_Compare xCMPNLE; -const SimdImpl_Compare xCMPORD; - -// ------------------------------------------------------------------------ -// SSE Conversion Operations, as looney as they are. -// -// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing -// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). -// -const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; -const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; - -const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; -const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; -const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; - -const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; -const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; - -const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; -const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; -const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; - -const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; -const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; -const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; -const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; - -const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; -const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; - -const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; -const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; -const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; -const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; - -const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; -const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; - -// ------------------------------------------------------------------------ - -const SimdImpl_Shift<0xd0, 2> xPSRL; -const SimdImpl_Shift<0xf0, 6> xPSLL; -const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; - -const SimdImpl_AddSub<0xdc, 0xd4> xPADD; -const SimdImpl_AddSub<0xd8, 0xfb> xPSUB; -const SimdImpl_PMinMax<0xde,0x3c> xPMAX; -const SimdImpl_PMinMax<0xda,0x38> xPMIN; - -const SimdImpl_PMul xPMUL; -const SimdImpl_PCompare xPCMP; -const SimdImpl_PShuffle xPSHUF; -const SimdImpl_PUnpack xPUNPCK; -const SimdImpl_Unpack xUNPCK; -const SimdImpl_Pack xPACK; - -const SimdImpl_PAbsolute xPABS; -const SimdImpl_PSign xPSIGN; -const SimdImpl_PInsert xPINSR; -const SimdImpl_PExtract xPEXTR; -const SimdImpl_PMultAdd xPMADD; -const SimdImpl_HorizAdd xHADD; - - -////////////////////////////////////////////////////////////////////////////////////////// -// - -__emitinline void xEMMS() -{ - xWrite( 0x770F ); -} - -// Store Streaming SIMD Extension Control/Status to Mem32. -__emitinline void xSTMXCSR( u32* dest ) -{ - SimdPrefix( 0, 0xae ); - xWriteDisp( 3, dest ); -} - -// Load Streaming SIMD Extension Control/Status from Mem32. -__emitinline void xLDMXCSR( const u32* src ) -{ - SimdPrefix( 0, 0xae ); - xWriteDisp( 2, src ); -} - - -// Moves from XMM to XMM, with the *upper 64 bits* of the destination register -// being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); } - -// Moves from XMM to XMM, with the *upper 64 bits* of the destination register -// being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } - -// Moves from XMM to XMM, with the *upper 64 bits* of the destination register -// being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); } - -// Moves lower quad of XMM to ptr64 (no bits are cleared) -__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } -// Moves lower quad of XMM to ptr64 (no bits are cleared) -__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } - -__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } -__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } -__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); } -__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } -__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } - -// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ' -__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); } - -// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q' -__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) -{ - // Manual implementation of this form of MOVQ, since its parameters are unique in a way - // that breaks the template inference of writeXMMop(); - - SimdPrefix( 0xf2, 0xd6 ); - ModRM_Direct( to.Id, from.Id ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// - -#define IMPLEMENT_xMOVS( ssd, prefix ) \ - __forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \ - __forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } - -IMPLEMENT_xMOVS( SS, 0xf3 ) -IMPLEMENT_xMOVS( SD, 0xf2 ) - -////////////////////////////////////////////////////////////////////////////////////////// -// Non-temporal movs only support a register as a target (ie, load form only, no stores) -// - -__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from ) -{ - xWrite( 0x2A380f66 ); - xWriteDisp( to.Id, from ); -} - -__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) -{ - xWrite( 0x2A380f66 ); - EmitSibMagic( to.Id, from ); -} - -__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } -__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } - -__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } -__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } -__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } -__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } - -__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } -__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } - -__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } -__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } - -////////////////////////////////////////////////////////////////////////////////////////// -// INSERTPS / EXTRACTPS [SSE4.1 only!] -// -// [TODO] these might be served better as classes, especially if other instructions use -// the M32,sse,imm form (I forget offhand if any do). - - -// [SSE-4.1] Insert a single-precision floating-point value from src into a specified -// location in dest, and selectively zero out the data elements in dest according to -// the mask field in the immediate byte. The source operand can be a memory location -// (32 bits) or an XMM register (lower 32 bits used). -// -// Imm8 provides three fields: -// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if -// the source is a memory operand. -// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest. -// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written -// with 0.0 if set to 1. -// -__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x213a, to, from ); - xWrite( imm8 ); -} - -__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x213a, to, from ); - xWrite( imm8 ); -} - -__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x213a, to, from ); - xWrite( imm8 ); -} - -// [SSE-4.1] Extract a single-precision floating-point value from src at an offset -// determined by imm8[1-0]*32. The extracted single precision floating-point value -// is stored into the low 32-bits of dest (or at a 32-bit memory pointer). -// -__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x173a, to, from, true ); - xWrite( imm8 ); -} - -__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x173a, from, dest, true ); - xWrite( imm8 ); -} - -__emitinline void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x173a, from, dest, true ); - xWrite( imm8 ); -} - - } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 65ce383144..32d210edbc 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -86,16 +86,16 @@ namespace x86Emitter extern const Internal::DwordShiftImplAll xSHLD; extern const Internal::DwordShiftImplAll xSHRD; - extern const Internal::Group8ImplAll xBT; - extern const Internal::Group8ImplAll xBTR; - extern const Internal::Group8ImplAll xBTS; - extern const Internal::Group8ImplAll xBTC; + extern const Internal::Group8Impl xBT; + extern const Internal::Group8Impl xBTR; + extern const Internal::Group8Impl xBTS; + extern const Internal::Group8Impl xBTC; extern const Internal::JmpCallImplAll xJMP; extern const Internal::JmpCallImplAll xCALL; - extern const Internal::BitScanImplAll xBSF; - extern const Internal::BitScanImplAll xBSR; + extern const Internal::BitScanImpl<0xbc> xBSF; + extern const Internal::BitScanImpl<0xbd> xBSR; // ------------------------------------------------------------------------ extern const Internal::CMovImplGeneric xCMOV; @@ -299,95 +299,28 @@ namespace x86Emitter typedef xForwardJPO xForwardJPO8; typedef xForwardJPO xForwardJPO32; - ////////////////////////////////////////////////////////////////////////////////////////// - // MMX Mov Instructions (MOVD, MOVQ, MOVSS). - // - // Notes: - // * Some of the functions have been renamed to more clearly reflect what they actually - // do. Namely we've affixed "ZX" to several MOVs that take a register as a destination - // since that's what they do (MOVD clears upper 32/96 bits, etc). - // - - // ------------------------------------------------------------------------ - // MOVD has valid forms for MMX and XMM registers. - // - template< typename T > - __emitinline void xMOVDZX( const xRegisterSIMD& to, const xRegister32& from ) - { - Internal::writeXMMop( 0x66, 0x6e, to, from ); - } - - template< typename T > - __emitinline void xMOVDZX( const xRegisterSIMD& to, const void* src ) - { - Internal::writeXMMop( 0x66, 0x6e, to, src ); - } - - template< typename T > - void xMOVDZX( const xRegisterSIMD& to, const ModSibBase& src ) - { - Internal::writeXMMop( 0x66, 0x6e, to, src ); - } - - template< typename T > - __emitinline void xMOVD( const xRegister32& to, const xRegisterSIMD& from ) - { - Internal::writeXMMop( 0x66, 0x7e, from, to ); - } - - template< typename T > - __emitinline void xMOVD( void* dest, const xRegisterSIMD& from ) - { - Internal::writeXMMop( 0x66, 0x7e, from, dest ); - } - - template< typename T > - void xMOVD( const ModSibBase& dest, const xRegisterSIMD& from ) - { - Internal::writeXMMop( 0x66, 0x7e, from, dest ); - } - - - // ------------------------------------------------------------------------ - - // xMASKMOV: - // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. - // The default memory location is specified by DS:EDI. The most significant bit in each byte - // of the mask operand determines whether the corresponding byte in the source operand is - // written to the corresponding byte location in memory. - - template< typename T > - static __forceinline void xMASKMOV( const xRegisterSIMD& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); } - - // xPMOVMSKB: - // Creates a mask made up of the most significant bit of each byte of the source - // operand and stores the result in the low byte or word of the destination operand. - // Upper bits of the destination are cleared to zero. - // - // When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on - // 128-bit (SSE) source, the byte mask is 16-bits. - // - template< typename T > - static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } - - // [sSSE-3] Concatenates dest and source operands into an intermediate composite, - // shifts the composite at byte granularity to the right by a constant immediate, - // and extracts the right-aligned result into the destination. - // - template< typename T > - static __forceinline void xPALIGNR( const xRegisterSIMD& to, const xRegisterSIMD& from, u8 imm8 ) - { - Internal::writeXMMop( 0x66, 0x0f3a, to, from ); - xWrite( imm8 ); - } - - // ------------------------------------------------------------------------ extern void xEMMS(); extern void xSTMXCSR( u32* dest ); extern void xLDMXCSR( const u32* src ); + extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ); + extern void xMOVDZX( const xRegisterSSE& to, const void* src ); + extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src ); + + extern void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ); + extern void xMOVDZX( const xRegisterMMX& to, const void* src ); + extern void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src ); + + extern void xMOVD( const xRegister32& to, const xRegisterSSE& from ); + extern void xMOVD( void* dest, const xRegisterSSE& from ); + extern void xMOVD( const ModSibBase& dest, const xRegisterSSE& from ); + + extern void xMOVD( const xRegister32& to, const xRegisterMMX& from ); + extern void xMOVD( void* dest, const xRegisterMMX& from ); + extern void xMOVD( const ModSibBase& dest, const xRegisterMMX& from ); + extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ); extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ); extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ); @@ -430,6 +363,39 @@ namespace x86Emitter extern void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from ); extern void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from ); + extern void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ); + extern void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ); + extern void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ); + extern void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ); + extern void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ); + extern void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ); + + // ------------------------------------------------------------------------ + + extern const Internal::SimdImpl_MoveSSE<0x00,true> xMOVAPS; + extern const Internal::SimdImpl_MoveSSE<0x00,false> xMOVUPS; + +#ifdef ALWAYS_USE_MOVAPS + extern const Internal::SimdImpl_MoveSSE<0,true> xMOVDQA; + extern const Internal::SimdImpl_MoveSSE<0,false> xMOVDQU; + extern const Internal::SimdImpl_MoveSSE<0,true> xMOVAPD; + extern const Internal::SimdImpl_MoveSSE<0,false> xMOVUPD; +#else + extern const Internal::SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA; + extern const Internal::SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU; + extern const Internal::SimdImpl_MoveSSE<0x66,true> xMOVAPD; + extern const Internal::SimdImpl_MoveSSE<0x66,false> xMOVUPD; +#endif + + extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH; + extern const Internal::MovhlImpl_RtoR<0x12> xMOVHL; + + extern const Internal::MovhlImplAll<0x16> xMOVH; + extern const Internal::MovhlImplAll<0x12> xMOVL; + + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ); extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ); extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ); @@ -438,38 +404,13 @@ namespace x86Emitter extern void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ); extern void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ); - // ------------------------------------------------------------------------ - - extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; - extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; - - extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS; - - extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> xMOVAPD; - extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> xMOVUPD; - -#ifdef ALWAYS_USE_MOVAPS - extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> xMOVDQA; - extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> xMOVDQU; -#else - extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVDQA; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVDQU; -#endif - - extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH; - extern const Internal::MovhlImpl_RtoR<0x12> xMOVHL; - - extern const Internal::MovhlImplAll<0x16> xMOVH; - extern const Internal::MovhlImplAll<0x12> xMOVL; - // ------------------------------------------------------------------------ extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND; extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN; extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR; extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR; - + extern const Internal::SimdImpl_AndNot xANDN; extern const Internal::SimdImpl_UcomI<0x66,0x2e> xUCOMI; @@ -482,6 +423,8 @@ namespace x86Emitter extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF; // ------------------------------------------------------------------------ + + extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; extern const Internal::SimdImpl_Compare xCMPEQ; extern const Internal::SimdImpl_Compare xCMPLT; @@ -527,8 +470,8 @@ namespace x86Emitter // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL; extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL; + extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL; extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD; @@ -550,5 +493,12 @@ namespace x86Emitter extern const Internal::SimdImpl_PMultAdd xPMADD; extern const Internal::SimdImpl_HorizAdd xHADD; + extern const Internal::SimdImpl_Blend xBLEND; + extern const Internal::SimdImpl_DotProduct xDP; + extern const Internal::SimdImpl_Round xROUND; + + extern const Internal::SimdImpl_PMove xPMOVSX; + extern const Internal::SimdImpl_PMove xPMOVZX; + } diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp deleted file mode 100644 index d8bdb0b8a2..0000000000 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "PrecompiledHeader.h" -#include "ix86_legacy_internal.h" - -//------------------------------------------------------------------ -// MMX instructions -// -// note: r64 = mm -//------------------------------------------------------------------ - -using namespace x86Emitter; - -emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } -emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } -emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } -emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } -emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } - -emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); } -emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); } -emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); } -emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } -emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); } -emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } - -emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } -emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } - -#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \ - emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \ - emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \ - emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); } - -#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \ - emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ - emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ - emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } - -#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \ - emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ - emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ - emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \ - emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); } - -#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \ - DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \ - DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \ - DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \ - emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } - -DEFINE_LEGACY_LOGIC_OPCODE( AND ) -DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) -DEFINE_LEGACY_LOGIC_OPCODE( OR ) -DEFINE_LEGACY_LOGIC_OPCODE( XOR ) - -DEFINE_LEGACY_SHIFT_OPCODE( SLL ) -DEFINE_LEGACY_SHIFT_OPCODE( SRL ) -DEFINE_LEGACY_SHIFT_STUFF( SRA, D ) -DEFINE_LEGACY_SHIFT_STUFF( SRA, W ) - -DEFINE_LEGACY_ARITHMETIC( ADD, B ) -DEFINE_LEGACY_ARITHMETIC( ADD, W ) -DEFINE_LEGACY_ARITHMETIC( ADD, D ) -DEFINE_LEGACY_ARITHMETIC( ADD, Q ) -DEFINE_LEGACY_ARITHMETIC( ADD, SB ) -DEFINE_LEGACY_ARITHMETIC( ADD, SW ) -DEFINE_LEGACY_ARITHMETIC( ADD, USB ) -DEFINE_LEGACY_ARITHMETIC( ADD, USW ) - -DEFINE_LEGACY_ARITHMETIC( SUB, B ) -DEFINE_LEGACY_ARITHMETIC( SUB, W ) -DEFINE_LEGACY_ARITHMETIC( SUB, D ) -DEFINE_LEGACY_ARITHMETIC( SUB, Q ) -DEFINE_LEGACY_ARITHMETIC( SUB, SB ) -DEFINE_LEGACY_ARITHMETIC( SUB, SW ) -DEFINE_LEGACY_ARITHMETIC( SUB, USB ) -DEFINE_LEGACY_ARITHMETIC( SUB, USW ) - -DEFINE_LEGACY_ARITHMETIC( CMP, EQB ); -DEFINE_LEGACY_ARITHMETIC( CMP, EQW ); -DEFINE_LEGACY_ARITHMETIC( CMP, EQD ); -DEFINE_LEGACY_ARITHMETIC( CMP, GTB ); -DEFINE_LEGACY_ARITHMETIC( CMP, GTW ); -DEFINE_LEGACY_ARITHMETIC( CMP, GTD ); - -DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW ); - -DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD ); - - -emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); } -emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); } - -emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); } -emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); } - -emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); } - -emitterT void EMMS() { xEMMS(); } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index d2845f2a09..169f6ac100 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -22,35 +22,109 @@ using namespace x86Emitter; +// ------------------------------------------------------------------------ +// MMX / SSE Mixed Bag +// ------------------------------------------------------------------------ -//------------------------------------------------------------------ -// SSE instructions -//------------------------------------------------------------------ +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } -#define SSEMtoR( code, overb ) \ - assert( to < iREGCNT_XMM ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) +emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); } +emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); } +emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); } +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } +emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); } +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } -#define SSERtoR( code ) \ - assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } -#define SSEMtoR66( code ) \ - write8( 0x66 ), \ - SSEMtoR( code, 0 ) +#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \ + emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \ + emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); } -#define SSERtoM66( code ) \ - write8( 0x66 ), \ - SSERtoM( code, 0 ) +#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \ + emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ + emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } -#define SSERtoR66( code ) \ - write8( 0x66 ), \ - SSERtoR( code ) +#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \ + emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ + emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \ + emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); } + +#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \ + emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } + +DEFINE_LEGACY_LOGIC_OPCODE( AND ) +DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) +DEFINE_LEGACY_LOGIC_OPCODE( OR ) +DEFINE_LEGACY_LOGIC_OPCODE( XOR ) + +DEFINE_LEGACY_SHIFT_OPCODE( SLL ) +DEFINE_LEGACY_SHIFT_OPCODE( SRL ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, D ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, W ) + +DEFINE_LEGACY_ARITHMETIC( ADD, B ) +DEFINE_LEGACY_ARITHMETIC( ADD, W ) +DEFINE_LEGACY_ARITHMETIC( ADD, D ) +DEFINE_LEGACY_ARITHMETIC( ADD, Q ) +DEFINE_LEGACY_ARITHMETIC( ADD, SB ) +DEFINE_LEGACY_ARITHMETIC( ADD, SW ) +DEFINE_LEGACY_ARITHMETIC( ADD, USB ) +DEFINE_LEGACY_ARITHMETIC( ADD, USW ) + +DEFINE_LEGACY_ARITHMETIC( SUB, B ) +DEFINE_LEGACY_ARITHMETIC( SUB, W ) +DEFINE_LEGACY_ARITHMETIC( SUB, D ) +DEFINE_LEGACY_ARITHMETIC( SUB, Q ) +DEFINE_LEGACY_ARITHMETIC( SUB, SB ) +DEFINE_LEGACY_ARITHMETIC( SUB, SW ) +DEFINE_LEGACY_ARITHMETIC( SUB, USB ) +DEFINE_LEGACY_ARITHMETIC( SUB, USW ) + +DEFINE_LEGACY_ARITHMETIC( CMP, EQB ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQW ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQD ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTB ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTW ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTD ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD ); + + +emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); } +emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); } + +emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); } +emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); } + +emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); } + +emitterT void EMMS() { xEMMS(); } + +// ------------------------------------------------------------------------ +// Begin SSE-Only Part! +// ------------------------------------------------------------------------ #define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \ @@ -290,73 +364,17 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xDP.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) { xDP.PS( xRegisterSSE(to), (void*)from, imm8 ); } + +emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) { xBLEND.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xBLEND.VPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { xBLEND.VPS( xRegisterSSE(to), (void*)from ); } + +emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMOVSX.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } -////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////// - - -// SSE4.1 - -emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - write24(0x403A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) -{ - write8(0x66); - write24(0x403A0F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); - write8(imm8); -} - -emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0C3A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x14380F); - ModRM(3, to, from); -} - -emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x14380F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x25380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x35380F); - ModRM(3, to, from); -} ////////////////////////////////////////////////////////////////////////////////////////// // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) diff --git a/pcsx2/x86/ix86/ix86_simd.cpp b/pcsx2/x86/ix86/ix86_simd.cpp new file mode 100644 index 0000000000..42754cce18 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_simd.cpp @@ -0,0 +1,388 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" + +#include "System.h" +#include "ix86_internal.h" + +namespace x86Emitter { + +using namespace Internal; + +// ------------------------------------------------------------------------ +// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is +// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4 +// instructions). Any other lower value assumes the upper value is 0 and ignored. +// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will +// generate an assertion. +// +__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) +{ + const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); + + // If the lower byte is not a valid previx and the upper byte is non-zero it + // means we made a mistake! + if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 ); + + if( prefix != 0 ) + { + if( is16BitOpcode ) + xWrite( (opcode<<16) | 0x0f00 | prefix ); + else + { + xWrite( 0x0f00 | prefix ); + xWrite( opcode ); + } + } + else + { + if( is16BitOpcode ) + { + xWrite( 0x0f ); + xWrite( opcode ); + } + else + xWrite( (opcode<<8) | 0x0f ); + } +} + +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + +const SimdImpl_MoveSSE<0x00,true> xMOVAPS; + +// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead. +// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement +// which can be checked for alignment at runtime. +const SimdImpl_MoveSSE<0x00,false> xMOVUPS; + +#ifdef ALWAYS_USE_MOVAPS +const SimdImpl_MoveSSE<0,true> xMOVDQA; +const SimdImpl_MoveSSE<0,true> xMOVAPD; + +// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead. +// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement +// which can be checked for alignment at runtime. +const SimdImpl_MoveSSE<0,false> xMOVDQU; +const SimdImpl_MoveSSE<0,false> xMOVUPD; +#else +const SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA; +const SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU; +const SimdImpl_MoveSSE<0x66,true> xMOVAPD; +const SimdImpl_MoveSSE<0x66,false> xMOVUPD; +#endif + +const MovhlImplAll<0x16> xMOVH; +const MovhlImplAll<0x12> xMOVL; +const MovhlImpl_RtoR<0x16> xMOVLH; +const MovhlImpl_RtoR<0x12> xMOVHL; + +const SimdImpl_DestRegEither<0x66,0xdb> xPAND; +const SimdImpl_DestRegEither<0x66,0xdf> xPANDN; +const SimdImpl_DestRegEither<0x66,0xeb> xPOR; +const SimdImpl_DestRegEither<0x66,0xef> xPXOR; + +const SimdImpl_AndNot xANDN; + +const SimdImpl_UcomI<0x66,0x2e> xUCOMI; +const SimdImpl_rSqrt<0x53> xRCP; +const SimdImpl_rSqrt<0x52> xRSQRT; +const SimdImpl_Sqrt<0x51> xSQRT; + +const SimdImpl_MinMax<0x5f> xMAX; +const SimdImpl_MinMax<0x5d> xMIN; +const SimdImpl_Shuffle<0xc6> xSHUF; + +// ------------------------------------------------------------------------ + +// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag +// only if all bits in the result are 0. PTEST also sets the CF flag according +// to the following condition: (xmm2/m128 AND NOT xmm1) == 0; +extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; + +const SimdImpl_Compare xCMPEQ; +const SimdImpl_Compare xCMPLT; +const SimdImpl_Compare xCMPLE; +const SimdImpl_Compare xCMPUNORD; +const SimdImpl_Compare xCMPNE; +const SimdImpl_Compare xCMPNLT; +const SimdImpl_Compare xCMPNLE; +const SimdImpl_Compare xCMPORD; + +// ------------------------------------------------------------------------ +// SSE Conversion Operations, as looney as they are. +// +// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing +// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). +// +const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; +const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; + +const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; +const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; +const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; + +const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; +const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + +const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; +const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; +const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + +const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; +const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; +const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; +const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + +const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; +const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + +const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; +const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; +const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; +const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + +const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; +const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + +// ------------------------------------------------------------------------ + +const SimdImpl_Shift<0xd0, 2> xPSRL; +const SimdImpl_Shift<0xf0, 6> xPSLL; +const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; + +const SimdImpl_AddSub<0xdc, 0xd4> xPADD; +const SimdImpl_AddSub<0xd8, 0xfb> xPSUB; +const SimdImpl_PMinMax<0xde,0x3c> xPMAX; +const SimdImpl_PMinMax<0xda,0x38> xPMIN; + +const SimdImpl_PMul xPMUL; +const SimdImpl_PCompare xPCMP; +const SimdImpl_PShuffle xPSHUF; +const SimdImpl_PUnpack xPUNPCK; +const SimdImpl_Unpack xUNPCK; +const SimdImpl_Pack xPACK; + +const SimdImpl_PAbsolute xPABS; +const SimdImpl_PSign xPSIGN; +const SimdImpl_PInsert xPINSR; +const SimdImpl_PExtract xPEXTR; +const SimdImpl_PMultAdd xPMADD; +const SimdImpl_HorizAdd xHADD; + +const SimdImpl_Blend xBLEND; +const SimdImpl_DotProduct xDP; +const SimdImpl_Round xROUND; + +const SimdImpl_PMove xPMOVSX; +const SimdImpl_PMove xPMOVZX; + + +////////////////////////////////////////////////////////////////////////////////////////// +// + +__emitinline void xEMMS() +{ + xWrite( 0x770F ); +} + +// Store Streaming SIMD Extension Control/Status to Mem32. +__emitinline void xSTMXCSR( u32* dest ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 3, dest ); +} + +// Load Streaming SIMD Extension Control/Status from Mem32. +__emitinline void xLDMXCSR( const u32* src ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 2, src ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// MMX Mov Instructions (MOVD, MOVQ, MOVSS). +// +// Notes: +// * Some of the functions have been renamed to more clearly reflect what they actually +// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination +// since that's what they do (MOVD clears upper 32/96 bits, etc). +// +// * MOVD has valid forms for MMX and XMM registers. +// + +__forceinline void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ) { xOpWrite0F( 0x66, 0x6e, to, from ); } +__forceinline void xMOVDZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0x66, 0x6e, to, src ); } +__forceinline void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0x66, 0x6e, to, src ); } + +__forceinline void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ) { xOpWrite0F( 0x6e, to, from ); } +__forceinline void xMOVDZX( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6e, to, src ); } +__forceinline void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6e, to, src ); } + +__forceinline void xMOVD( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, to ); } +__forceinline void xMOVD( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); } +__forceinline void xMOVD( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); } + +__forceinline void xMOVD( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, to ); } +__forceinline void xMOVD( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); } +__forceinline void xMOVD( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); } + + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0xf3, 0x7e, to, from ); } + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } + +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); } +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); } + +__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) xOpWrite0F( 0x6f, to, from ); } +__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6f, to, src ); } +__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6f, to, src ); } +__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); } +__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); } + +// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ' +__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf3, 0xd6, to, from ); } + +// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q' +__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) +{ + // Manual implementation of this form of MOVQ, since its parameters are unique in a way + // that breaks the template inference of writeXMMop(); + + SimdPrefix( 0xf2, 0xd6 ); + ModRM_Direct( to.Id, from.Id ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// + +#define IMPLEMENT_xMOVS( ssd, prefix ) \ + __forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) xOpWrite0F( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { xOpWrite0F( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { xOpWrite0F( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } \ + __forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } + +IMPLEMENT_xMOVS( SS, 0xf3 ) +IMPLEMENT_xMOVS( SD, 0xf2 ) + +////////////////////////////////////////////////////////////////////////////////////////// +// Non-temporal movs only support a register as a target (ie, load form only, no stores) +// + +__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from ) +{ + xWrite( 0x2A380f66 ); + xWriteDisp( to.Id, from ); +} + +__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) +{ + xWrite( 0x2A380f66 ); + EmitSibMagic( to.Id, from ); +} + +__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); } +__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); } + +__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); } +__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); } +__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); } +__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); } + +__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); } +__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); } + +// ------------------------------------------------------------------------ + +__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x50, to, from ); } +__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x66, 0x50, to, from, true ); } + +// xMASKMOV: +// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. +// The default memory location is specified by DS:EDI. The most significant bit in each byte +// of the mask operand determines whether the corresponding byte in the source operand is +// written to the corresponding byte location in memory. +__forceinline void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xf7, to, from ); } +__forceinline void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf7, to, from ); } + +// xPMOVMSKB: +// Creates a mask made up of the most significant bit of each byte of the source +// operand and stores the result in the low byte or word of the destination operand. +// Upper bits of the destination are cleared to zero. +// +// When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on +// 128-bit (SSE) source, the byte mask is 16-bits. +// +__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd7, to, from ); } +__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0xd7, to, from ); } + +// [sSSE-3] Concatenates dest and source operands into an intermediate composite, +// shifts the composite at byte granularity to the right by a constant immediate, +// and extracts the right-aligned result into the destination. +// +__forceinline void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x0f3a, to, from, imm8 ); } +__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); } + + +////////////////////////////////////////////////////////////////////////////////////////// +// INSERTPS / EXTRACTPS [SSE4.1 only!] +// +// [TODO] these might be served better as classes, especially if other instructions use +// the M32,sse,imm form (I forget offhand if any do). + + +// [SSE-4.1] Insert a single-precision floating-point value from src into a specified +// location in dest, and selectively zero out the data elements in dest according to +// the mask field in the immediate byte. The source operand can be a memory location +// (32 bits) or an XMM register (lower 32 bits used). +// +// Imm8 provides three fields: +// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if +// the source is a memory operand. +// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest. +// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written +// with 0.0 if set to 1. +// +__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); } +__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); } +__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); } + +// [SSE-4.1] Extract a single-precision floating-point value from src at an offset +// determined by imm8[1-0]*32. The extracted single precision floating-point value +// is stored into the low 32-bits of dest (or at a 32-bit memory pointer). +// +__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); } +__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); } +__emitinline void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); } + +} \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_sse_helpers.h b/pcsx2/x86/ix86/ix86_sse_helpers.h index b198c336b5..f568282d9e 100644 --- a/pcsx2/x86/ix86/ix86_sse_helpers.h +++ b/pcsx2/x86/ix86/ix86_sse_helpers.h @@ -48,4 +48,3 @@ extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from ); extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from ); extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 7cca341cd3..6ecd400e98 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -252,28 +252,6 @@ namespace x86Emitter } }; - ////////////////////////////////////////////////////////////////////////////////////////// - // - template< typename OperandType > - class xRegisterSIMD : public xRegister - { - public: - static const xRegisterSIMD Empty; // defined as an empty/unused value (-1) - - public: - xRegisterSIMD(): xRegister() {} - xRegisterSIMD( const xRegisterSIMD& src ) : xRegister( src.Id ) {} - xRegisterSIMD( const xRegister& src ) : xRegister( src ) {} - explicit xRegisterSIMD( int regId ) : xRegister( regId ) {} - - xRegisterSIMD& operator=( const xRegisterSIMD& src ) - { - xRegister::Id = src.Id; - return *this; - } - }; - - // ------------------------------------------------------------------------ // Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which // means it finds undeclared variables when MSVC does not (Since MSVC compiles templates @@ -282,8 +260,8 @@ namespace x86Emitter // all about the the templated code in haphazard fashion. Yay.. >_< // - typedef xRegisterSIMD xRegisterSSE; - typedef xRegisterSIMD xRegisterMMX; + typedef xRegister xRegisterSSE; + typedef xRegister xRegisterMMX; typedef xRegister xRegister32; typedef xRegister xRegister16; typedef xRegister xRegister8;