Finished the emitter, complete with code cleanups! :) (added last few SSE instructions, and inserted placebos for some future additions to the x86 portion, regarding xchg/xadd/etc).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1047 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-23 12:39:59 +00:00
parent ac0768e9a3
commit ef565303a5
18 changed files with 1072 additions and 974 deletions

View File

@ -905,14 +905,6 @@
<Filter
Name="Misc"
>
<File
RelativePath="..\..\HashMap.h"
>
</File>
<File
RelativePath="..\..\HashTools.cpp"
>
</File>
<File
RelativePath="..\..\Dump.cpp"
>
@ -921,6 +913,14 @@
RelativePath="..\..\Dump.h"
>
</File>
<File
RelativePath="..\..\HashMap.h"
>
</File>
<File
RelativePath="..\..\HashTools.cpp"
>
</File>
<File
RelativePath="..\..\Misc.cpp"
>
@ -2965,10 +2965,6 @@
RelativePath="..\..\x86\ix86\ix86_legacy_internal.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_mmx.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_sse.cpp"
>
@ -2977,6 +2973,10 @@
RelativePath="..\..\x86\ix86\ix86_legacy_types.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_simd.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_sse_helpers.h"
>
@ -3028,6 +3028,10 @@
RelativePath="..\..\x86\ix86\implement\test.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\implement\xchg.h"
>
</File>
<Filter
Name="xmm"
>

View File

@ -935,6 +935,8 @@ void psxRecompileNextInstruction(int delayslot)
#ifdef _DEBUG
static void printfn()
{
extern void iDumpPsxRegisters(u32 startpc, u32 temp);
static int lastrec = 0;
static int curcount = 0;
const int skip = 0;
@ -962,6 +964,8 @@ void iopRecRecompile(u32 startpc)
u32 willbranch3 = 0;
#ifdef _DEBUG
extern void iDumpPsxRegisters(u32 startpc, u32 temp);
if( psxdump & 4 )
iDumpPsxRegisters(startpc, 0);
#endif

View File

@ -316,7 +316,7 @@ void recMFHILO1(int hi)
if( reghi >= 0 ) {
if( regd >= 0 ) {
SSEX_MOVHLPS_XMM_to_XMM(regd, reghi);
SSE_MOVHLPS_XMM_to_XMM(regd, reghi);
xmmregs[regd].mode |= MODE_WRITE;
}
else {

View File

@ -32,152 +32,39 @@ enum G8Type
G8Type_BTC,
};
//////////////////////////////////////////////////////////////////////////////////////////
// Notes: Bit Test instructions are valid on 16/32 bit operands only.
//
template< G8Type InstType, typename ImmType >
class Group8Impl
{
protected:
static const uint OperandSize = sizeof(ImmType);
static void prefix16() { if( OperandSize == 2 ) xWrite<u8>( 0x66 ); }
public:
Group8Impl() {} // For the love of GCC.
// ------------------------------------------------------------------------
static __emitinline void Emit( const xRegister<ImmType>& bitbase, const xRegister<ImmType>& bitoffset )
{
prefix16();
xWrite<u8>( 0x0f );
xWrite<u8>( 0xa3 | (InstType << 2) );
ModRM_Direct( bitoffset.Id, bitbase.Id );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( void* bitbase, const xRegister<ImmType>& bitoffset )
{
prefix16();
xWrite<u8>( 0x0f );
xWrite<u8>( 0xa3 | (InstType << 2) );
xWriteDisp( bitoffset.Id, bitbase );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibBase& bitbase, const xRegister<ImmType>& bitoffset )
{
prefix16();
xWrite<u8>( 0x0f );
xWrite<u8>( 0xa3 | (InstType << 2) );
EmitSibMagic( bitoffset.Id, bitbase );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const xRegister<ImmType>& bitbase, u8 immoffset )
{
prefix16();
xWrite<u16>( 0xba0f );
ModRM_Direct( InstType, bitbase.Id );
xWrite<u8>( immoffset );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibStrict<ImmType>& bitbase, u8 immoffset )
{
prefix16();
xWrite<u16>( 0xba0f );
EmitSibMagic( InstType, bitbase );
xWrite<u8>( immoffset );
}
};
// -------------------------------------------------------------------
//
template< G8Type InstType >
class Group8ImplAll
{
protected:
typedef Group8Impl<InstType,u32> m_32;
typedef Group8Impl<InstType,u32> m_16;
public:
__forceinline void operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
__forceinline void operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
__forceinline void operator()( void* bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
__forceinline void operator()( void* bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
__noinline void operator()( const ModSibBase& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
__noinline void operator()( const ModSibBase& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
__noinline void operator()( const ModSibStrict<u32>& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
__noinline void operator()( const ModSibStrict<u16>& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
void operator()( const xRegister<u32>& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
void operator()( const xRegister<u16>& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
Group8ImplAll() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
// BSF / BSR -- 16/32 operands supported only.
//
template< bool isReverse, typename ImmType >
// 0xbc [fwd] / 0xbd [rev]
//
template< u16 Opcode >
class BitScanImpl
{
protected:
static const uint OperandSize = sizeof(ImmType);
static void prefix16() { if( OperandSize == 2 ) xWrite<u8>( 0x66 ); }
static void emitbase()
{
prefix16();
xWrite<u8>( 0x0f );
xWrite<u8>( isReverse ? 0xbd : 0xbc );
}
public:
BitScanImpl() {} // For the love of GCC.
BitScanImpl() {}
// ------------------------------------------------------------------------
static __emitinline void Emit( const xRegister<ImmType>& to, const xRegister<ImmType>& from )
{
emitbase();
ModRM_Direct( to.Id, from.Id );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const xRegister<ImmType>& to, const void* src )
{
emitbase();
xWriteDisp( to.Id, src );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const xRegister<ImmType>& to, const ModSibBase& sibsrc )
{
emitbase();
EmitSibMagic( to.Id, sibsrc );
}
__forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { xOpWrite0F( Opcode, to, from ); }
__forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
__forceinline void operator()( const xRegister32& to, const void* src ) const { xOpWrite0F( Opcode, to, src ); }
__forceinline void operator()( const xRegister16& to, const void* src ) const { xOpWrite0F( 0x66, Opcode, to, src ); }
__forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { xOpWrite0F( Opcode, to, sibsrc ); }
__forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { xOpWrite0F( 0x66, Opcode, to, sibsrc ); }
};
// -------------------------------------------------------------------
// BSF/BSR -- 16 and 32 bit operand forms only!
//////////////////////////////////////////////////////////////////////////////////////////
// Bit Test Instructions - Valid on 16/32 bit instructions only.
//
template< bool isReverse >
class BitScanImplAll
template< G8Type InstType >
class Group8Impl : public BitScanImpl<0xa3 | (InstType << 2)>
{
protected:
typedef BitScanImpl<isReverse,u32> m_32;
typedef BitScanImpl<isReverse,u32> m_16;
public:
__forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { m_32::Emit( to, from ); }
__forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { m_16::Emit( to, from ); }
__forceinline void operator()( const xRegister32& to, const void* src ) const { m_32::Emit( to, src ); }
__forceinline void operator()( const xRegister16& to, const void* src ) const { m_16::Emit( to, src ); }
__noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); }
__noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); }
using BitScanImpl<0xa3 | (InstType << 2)>::operator();
BitScanImplAll() {}
__forceinline void operator()( const ModSibStrict<u32>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
__forceinline void operator()( const ModSibStrict<u16>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
void operator()( const xRegister<u32>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
void operator()( const xRegister<u16>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
Group8Impl() {}
};

View File

@ -167,9 +167,9 @@ class xImpl_G1Compare : xImpl_Group1< G1Type_CMP >
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
Woot() {}
};

View File

@ -0,0 +1,22 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
// This header file is intended to be the future home of xchg, cmpxchg, xadd, and
// other threading-related exchange instructions.

View File

@ -28,19 +28,25 @@ class _SimdShiftHelper
public:
_SimdShiftHelper() {}
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode1, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( 0x66, Opcode1, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode1, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { writeXMMop( Opcode1, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from ) const { writeXMMop( Opcode1, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { writeXMMop( Opcode1, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode1, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode1, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode1, to, from ); }
template< typename OperandType >
__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm8 ) const
__emitinline void operator()( const xRegisterSSE& to, u8 imm8 ) const
{
SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
SimdPrefix( 0x66, OpcodeImm );
ModRM( 3, (int)Modcode, to.Id );
xWrite<u8>( imm8 );
}
__emitinline void operator()( const xRegisterMMX& to, u8 imm8 ) const
{
SimdPrefix( 0x00, OpcodeImm );
ModRM( 3, (int)Modcode, to.Id );
xWrite<u8>( imm8 );
}
@ -68,11 +74,11 @@ class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
public:
const _SimdShiftHelper<OpcodeBase1+3,0x73,Modcode> Q;
void DQ( const xRegisterSSE& to, u8 imm ) const
void DQ( const xRegisterSSE& to, u8 imm8 ) const
{
SimdPrefix( 0x66, 0x73 );
ModRM( 3, (int)Modcode+1, to.Id );
xWrite<u8>( imm );
xWrite<u8>( imm8 );
}
SimdImpl_Shift() {}
@ -156,8 +162,8 @@ template< u16 OpcodeSSE >
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
{
public:
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
SimdImpl_Sqrt() {}
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -165,9 +171,9 @@ public:
class SimdImpl_AndNot
{
public:
SimdImpl_AndNot() {}
const SimdImpl_DestRegSSE<0x00,0x55> PS;
const SimdImpl_DestRegSSE<0x66,0x55> PD;
SimdImpl_AndNot() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -282,3 +288,87 @@ public:
// *src* stores the result in the high quadword of dest.
const SimdImpl_DestRegSSE<0x66, 0x7c> PD;
};
//////////////////////////////////////////////////////////////////////////////////////////
// DotProduct calculation (SSE4.1 only!)
//
class SimdImpl_DotProduct
{
public:
SimdImpl_DotProduct() {}
// [SSE-4.1] Conditionally multiplies the packed single precision floating-point
// values in dest with the packed single-precision floats in src depending on a
// mask extracted from the high 4 bits of the immediate byte. If a condition mask
// bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value
// of 0.0. The four resulting single-precision values are summed into an inter-
// mediate result.
//
// The intermediate result is conditionally broadcasted to the destination using a
// broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast
// mask bit is 1, the intermediate result is copied to the corresponding dword
// element in dest. If a broadcast mask bit is zero, the corresponding element in
// the destination is set to zero.
//
SimdImpl_DestRegImmSSE<0x66,0x403a> PS;
// [SSE-4.1]
SimdImpl_DestRegImmSSE<0x66,0x413a> PD;
};
//////////////////////////////////////////////////////////////////////////////////////////
// Rounds floating point values (packed or single scalar) by an arbitrary rounding mode.
// (SSE4.1 only!)
class SimdImpl_Round
{
public:
SimdImpl_Round() {}
// [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest.
//
// Imm8 specifies control fields for the rounding operation:
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
// Bits 1:0 - Specifies a rounding mode for this instruction only.
//
// Rounding Mode Reference:
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
//
const SimdImpl_DestRegImmSSE<0x66,0x083a> PS;
// [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest.
//
// Imm8 specifies control fields for the rounding operation:
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
// Bits 1:0 - Specifies a rounding mode for this instruction only.
//
// Rounding Mode Reference:
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
//
const SimdImpl_DestRegImmSSE<0x66,0x093a> PD;
// [SSE-4.1] Rounds the single-precision src value and stores in dest.
//
// Imm8 specifies control fields for the rounding operation:
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
// Bits 1:0 - Specifies a rounding mode for this instruction only.
//
// Rounding Mode Reference:
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
//
const SimdImpl_DestRegImmSSE<0x66,0x0a3a> SS;
// [SSE-4.1] Rounds the double-precision src value and stores in dest.
//
// Imm8 specifies control fields for the rounding operation:
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
// Bits 1:0 - Specifies a rounding mode for this instruction only.
//
// Rounding Mode Reference:
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
//
const SimdImpl_DestRegImmSSE<0x66,0x0b3a> SD;
};

View File

@ -23,57 +23,106 @@
extern void SimdPrefix( u8 prefix, u16 opcode );
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instruction with prefixes.
// These functions also support deducing the use of the prefix from the template parameters,
// since most xmm instructions use a prefix and most mmx instructions do not. (some mov
// instructions violate this "guideline.")
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib );
extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data );
extern void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib );
extern void xOpWrite0F( u16 opcode, int instId, const void* data );
template< typename T2 > __emitinline
void xOpWrite0F( u8 prefix, u16 opcode, int instId, const xRegister<T2>& from )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
ModRM_Direct( to.Id, from.Id );
SimdPrefix( prefix, opcode );
ModRM_Direct( instId, from.Id );
}
template< typename T >
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
template< typename T2 > __emitinline
void xOpWrite0F( u16 opcode, int instId, const xRegister<T2>& from )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
EmitSibMagic( reg.Id, sib );
xOpWrite0F( 0, opcode, instId, from );
}
template< typename T >
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
template< typename T, typename T2 > __emitinline
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
xWriteDisp( reg.Id, data );
xOpWrite0F( prefix, opcode, to.Id, from );
}
template< typename T > __noinline
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
{
xOpWrite0F( prefix, opcode, reg.Id, sib );
}
template< typename T > __emitinline
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
{
xOpWrite0F( prefix, opcode, reg.Id, data );
}
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instructions *without* prefixes.
// These are normally used for special instructions that have MMX forms only (non-SSE), however
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
template< typename T, typename T2 > __emitinline
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, u8 imm8 )
{
SimdPrefix( 0, opcode );
ModRM_Direct( to.Id, from.Id );
xOpWrite0F( prefix, opcode, to, from );
xWrite<u8>( imm8 );
}
template< typename T >
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
template< typename T > __noinline
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, u8 imm8 )
{
SimdPrefix( 0, opcode );
EmitSibMagic( reg.Id, sib );
xOpWrite0F( prefix, opcode, reg, sib );
xWrite<u8>( imm8 );
}
template< typename T >
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
template< typename T > __emitinline
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, u8 imm8 )
{
SimdPrefix( 0, opcode );
xWriteDisp( reg.Id, data );
xOpWrite0F( prefix, opcode, reg, data );
xWrite<u8>( imm8 );
}
// ------------------------------------------------------------------------
template< typename T, typename T2 > __emitinline
void xOpWrite0F( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
{
xOpWrite0F( 0, opcode, to, from );
}
template< typename T > __noinline
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
{
xOpWrite0F( 0, opcode, reg, sib );
}
template< typename T > __emitinline
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const void* data )
{
xOpWrite0F( 0, opcode, reg, data );
}
// ------------------------------------------------------------------------
template< typename T, typename T2 > __emitinline
void xOpWrite0F( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, u8 imm8 )
{
xOpWrite0F( opcode, to, from );
xWrite<u8>( imm8 );
}
template< typename T > __noinline
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, u8 imm8 )
{
xOpWrite0F( opcode, reg, sib );
xWrite<u8>( imm8 );
}
template< typename T > __emitinline
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const void* data, u8 imm8 )
{
xOpWrite0F( opcode, reg, data );
xWrite<u8>( imm8 );
}
// ------------------------------------------------------------------------
@ -84,9 +133,9 @@ template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
SimdImpl_DestRegSSE() {} //GCWho?
};
@ -99,9 +148,9 @@ template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
SimdImpl_DestRegImmSSE() {} //GCWho?
};
@ -110,9 +159,9 @@ template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmMMX
{
public:
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
SimdImpl_DestRegImmMMX() {} //GCWho?
};
@ -125,27 +174,33 @@ template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegEither
{
public:
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); }
SimdImpl_DestRegEither() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
// can be regDirect or ModRM (indirect).
// For implementing MMX/SSE operations where the destination *must* be a register, but the
// source can be Direct or Indirect (ModRM/SibSB). The SrcOperandType template parameter
// is used to enforce type strictness of the (void*) parameter and ModSib<> parameter, so
// that the programmer must be explicit in specifying desired operand size.
//
// IMPORTANT: This helper assumes the prefix opcode is written *always* -- regardless of
// MMX or XMM register status.
//
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
class SimdImpl_DestRegStrict
{
public:
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
SimdImpl_DestRegStrict() {} //GCWho?
};

View File

@ -41,9 +41,9 @@ class SimdImpl_Compare
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
Woot() {}
};
@ -128,4 +128,3 @@ public:
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
};

View File

@ -30,10 +30,10 @@ protected:
struct Woot
{
Woot() {}
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); }
};
public:
@ -51,26 +51,104 @@ template< u16 Opcode >
class MovhlImpl_RtoR
{
public:
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); }
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); }
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
MovhlImpl_RtoR() {} //GCC.
};
// ------------------------------------------------------------------------
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
class MovapsImplAll
//////////////////////////////////////////////////////////////////////////////////////////
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
//
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
//
template< u8 Prefix, bool isAligned >
class SimdImpl_MoveSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
static const u16 OpcodeA = 0x28; // Aligned [aps] form
static const u16 OpcodeU = 0x10; // unaligned [ups] form
MovapsImplAll() {} //GCC.
public:
SimdImpl_MoveSSE() {} //GCC.
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
{
if( to != from ) xOpWrite0F( Prefix, OpcodeA, to, from );
}
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const
{
xOpWrite0F( Prefix, (isAligned || ((uptr)from & 0x0f) == 0) ? OpcodeA : OpcodeU, to, from );
}
__forceinline void operator()( void* to, const xRegisterSSE& from ) const
{
xOpWrite0F( Prefix, (isAligned || ((uptr)to & 0x0f) == 0) ? OpcodeA+1 : OpcodeU+1, from, to );
}
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() );
xOpWrite0F( Prefix, isReallyAligned ? OpcodeA : OpcodeU, to, from );
}
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
xOpWrite0F( Prefix, isReallyAligned ? OpcodeA+1 : OpcodeU+1, from, to );
}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Implementations for MOVDQA / MOVDQU
//
template< u8 Prefix, bool isAligned >
class SimdImpl_MoveDQ
{
static const u8 PrefixA = 0x66; // Aligned [aps] form
static const u8 PrefixU = 0xf3; // unaligned [ups] form
static const u16 Opcode = 0x6f;
static const u16 Opcode_Alt = 0x7f; // alternate ModRM encoding (reverse src/dst)
public:
SimdImpl_MoveDQ() {} //GCC.
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
{
if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from );
}
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const
{
xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode, to, from );
}
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const
{
xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode_Alt, to, from );
}
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() );
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from );
}
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, to, from );
}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u8 AltPrefix, u16 OpcodeSSE >
@ -83,12 +161,79 @@ public:
};
//////////////////////////////////////////////////////////////////////////////////////////
// Blend - Conditional copying of values in src into dest.
//
class SimdImpl_Blend
{
public:
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
// mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a
// dword element in a 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegImmSSE<0x66,0x0c3a> PS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
// quadword element in a 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegImmSSE<0x66,0x0d3a> PD;
SimdImpl_DestRegImmSSE<0x66,0x1438> VPS;
SimdImpl_DestRegImmSSE<0x66,0x1538> VPD;
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
// to a dword element in the 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegSSE<0x66,0x1438> VPS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
// to a quadword element in the 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegSSE<0x66,0x1538> VPD;
};
//////////////////////////////////////////////////////////////////////////////////////////
// Packed Move with Sign or Zero extension.
//
template< bool SignExtend >
class SimdImpl_PMove
{
static const u16 OpcodeBase = SignExtend ? 0x2038 : 0x3038;
public:
// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase,xRegisterSSE,xRegisterSSE,u64> BW;
// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x100,xRegisterSSE,xRegisterSSE,u32> BD;
// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x200,xRegisterSSE,xRegisterSSE,u16> BQ;
// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x300,xRegisterSSE,xRegisterSSE,u64> WD;
// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x400,xRegisterSSE,xRegisterSSE,u32> WQ;
// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x500,xRegisterSSE,xRegisterSSE,u64> DQ;
};

View File

@ -26,9 +26,9 @@ class SimdImpl_Shuffle
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
Woot() {}
};
@ -182,20 +182,17 @@ protected:
__forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm8 );
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
}
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm8 );
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
}
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm8 );
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
}
};
@ -203,28 +200,13 @@ public:
SimdImpl_PInsert() {}
// Operation can be performed on either MMX or SSE src operands.
template< typename T >
__forceinline void W( const xRegisterSIMD<T>& to, const xRegister32& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc4, to, from );
xWrite<u8>( imm8 );
}
__forceinline void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
__forceinline void W( const xRegisterSSE& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
__forceinline void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
// Operation can be performed on either MMX or SSE src operands.
template< typename T >
__forceinline void W( const xRegisterSIMD<T>& to, const void* from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc4, to, from );
xWrite<u8>( imm8 );
}
// Operation can be performed on either MMX or SSE src operands.
template< typename T >
__forceinline void W( const xRegisterSIMD<T>& to, const ModSibBase& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc4, to, from );
xWrite<u8>( imm8 );
}
__forceinline void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
__forceinline void W( const xRegisterMMX& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
__forceinline void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
// [SSE-4.1]
const ByteDwordForms<0x20> B;
@ -250,20 +232,17 @@ protected:
__forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm8 );
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
}
__forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest );
xWrite<u8>( imm8 );
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 );
}
__forceinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest );
xWrite<u8>( imm8 );
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 );
}
};
@ -276,24 +255,11 @@ public:
//
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
//
template< typename T >
__forceinline void W( const xRegister32& to, const xRegisterSIMD<T>& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc5, to, from, true );
xWrite<u8>( imm8 );
}
__forceinline void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); }
__forceinline void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); }
__forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0x153a, from, dest );
xWrite<u8>( imm8 );
}
__forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0x153a, from, dest );
xWrite<u8>( imm8 );
}
__forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
__forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed

View File

@ -161,7 +161,40 @@ namespace Internal
xWriteDisp( regfield, (s32)address );
}
// ------------------------------------------------------------------------
//////////////////////////////////////////////////////////////////////////////////////////
// emitter helpers for xmm instruction with prefixes, most of which are using
// the basic opcode format (items inside braces denote optional or conditional
// emission):
//
// [Prefix] / 0x0f / [OpcodePrefix] / Opcode / ModRM+[SibSB]
//
// Prefixes are typically 0x66, 0xf2, or 0xf3. OpcodePrefixes are either 0x38 or
// 0x3a [and other value will result in assertion failue].
//
__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib )
{
SimdPrefix( prefix, opcode );
EmitSibMagic( instId, sib );
}
__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data )
{
SimdPrefix( prefix, opcode );
xWriteDisp( instId, data );
}
__emitinline void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib )
{
xOpWrite0F( 0, opcode, instId, sib );
}
__emitinline void xOpWrite0F( u16 opcode, int instId, const void* data )
{
xOpWrite0F( 0, opcode, instId, data );
}
//////////////////////////////////////////////////////////////////////////////////////////
// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
// instruction ca be encoded as ModRm alone.
static __forceinline bool NeedsSibMagic( const ModSibBase& info )
@ -288,13 +321,13 @@ const MovExtendImplAll<true> xMOVSX;
const DwordShiftImplAll<false> xSHLD;
const DwordShiftImplAll<true> xSHRD;
const Group8ImplAll<G8Type_BT> xBT;
const Group8ImplAll<G8Type_BTR> xBTR;
const Group8ImplAll<G8Type_BTS> xBTS;
const Group8ImplAll<G8Type_BTC> xBTC;
const Group8Impl<G8Type_BT> xBT;
const Group8Impl<G8Type_BTR> xBTR;
const Group8Impl<G8Type_BTS> xBTS;
const Group8Impl<G8Type_BTC> xBTC;
const BitScanImplAll<false> xBSF;
const BitScanImplAll<true> xBSR;
const BitScanImpl<0xbc> xBSF;
const BitScanImpl<0xbd> xBSR;
// ------------------------------------------------------------------------
const CMovImplGeneric xCMOV;
@ -635,320 +668,4 @@ __emitinline void xBSWAP( const xRegister32& to )
write8( 0xC8 | to.Id );
}
//////////////////////////////////////////////////////////////////////////////////////////
// MMX / XMM Instructions
// (these will get put in their own file later)
// ------------------------------------------------------------------------
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
// instructions). Any other lower value assumes the upper value is 0 and ignored.
// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
// generate an assertion.
//
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
{
const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
// If the lower byte is not a valid previx and the upper byte is non-zero it
// means we made a mistake!
if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 );
if( prefix != 0 )
{
if( is16BitOpcode )
xWrite<u32>( (opcode<<16) | 0x0f00 | prefix );
else
{
xWrite<u16>( 0x0f00 | prefix );
xWrite<u8>( opcode );
}
}
else
{
if( is16BitOpcode )
{
xWrite<u8>( 0x0f );
xWrite<u16>( opcode );
}
else
xWrite<u16>( (opcode<<8) | 0x0f );
}
}
// [SSE-3]
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
// [SSE-3]
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD;
const MovapsImplAll< 0x66, 0x10, 0x11 > xMOVUPD;
#ifdef ALWAYS_USE_MOVAPS
const MovapsImplAll< 0x66, 0x6f, 0x7f > xMOVDQA;
const MovapsImplAll< 0xf3, 0x6f, 0x7f > xMOVDQU;
#else
const MovapsImplAll< 0, 0x28, 0x29 > xMOVDQA;
const MovapsImplAll< 0, 0x10, 0x11 > xMOVDQU;
#endif
const MovhlImplAll<0x16> xMOVH;
const MovhlImplAll<0x12> xMOVL;
const MovhlImpl_RtoR<0x16> xMOVLH;
const MovhlImpl_RtoR<0x12> xMOVHL;
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
const SimdImpl_AndNot xANDN;
const SimdImpl_UcomI<0x66,0x2e> xUCOMI;
const SimdImpl_rSqrt<0x53> xRCP;
const SimdImpl_rSqrt<0x52> xRSQRT;
const SimdImpl_Sqrt<0x51> xSQRT;
const SimdImpl_MinMax<0x5f> xMAX;
const SimdImpl_MinMax<0x5d> xMIN;
const SimdImpl_Shuffle<0xc6> xSHUF;
// ------------------------------------------------------------------------
const SimdImpl_Compare<SSE2_Equal> xCMPEQ;
const SimdImpl_Compare<SSE2_Less> xCMPLT;
const SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
const SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
const SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
const SimdImpl_Compare<SSE2_NotLess> xCMPNLT;
const SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
const SimdImpl_Compare<SSE2_Ordered> xCMPORD;
// ------------------------------------------------------------------------
// SSE Conversion Operations, as looney as they are.
//
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
//
const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD;
const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS;
const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ;
const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI;
const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS;
const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD;
const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS;
const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ;
const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD;
const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI;
const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI;
const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS;
const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD;
const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS;
const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD;
const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI;
const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ;
const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI;
const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ;
const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI;
const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI;
const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI;
// ------------------------------------------------------------------------
const SimdImpl_Shift<0xd0, 2> xPSRL;
const SimdImpl_Shift<0xf0, 6> xPSLL;
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
const SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
const SimdImpl_PMinMax<0xde,0x3c> xPMAX;
const SimdImpl_PMinMax<0xda,0x38> xPMIN;
const SimdImpl_PMul xPMUL;
const SimdImpl_PCompare xPCMP;
const SimdImpl_PShuffle xPSHUF;
const SimdImpl_PUnpack xPUNPCK;
const SimdImpl_Unpack xUNPCK;
const SimdImpl_Pack xPACK;
const SimdImpl_PAbsolute xPABS;
const SimdImpl_PSign xPSIGN;
const SimdImpl_PInsert xPINSR;
const SimdImpl_PExtract xPEXTR;
const SimdImpl_PMultAdd xPMADD;
const SimdImpl_HorizAdd xHADD;
//////////////////////////////////////////////////////////////////////////////////////////
//
__emitinline void xEMMS()
{
xWrite<u16>( 0x770F );
}
// Store Streaming SIMD Extension Control/Status to Mem32.
__emitinline void xSTMXCSR( u32* dest )
{
SimdPrefix( 0, 0xae );
xWriteDisp( 3, dest );
}
// Load Streaming SIMD Extension Control/Status from Mem32.
__emitinline void xLDMXCSR( const u32* src )
{
SimdPrefix( 0, 0xae );
xWriteDisp( 2, src );
}
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); }
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); }
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); }
// Moves lower quad of XMM to ptr64 (no bits are cleared)
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); }
// Moves lower quad of XMM to ptr64 (no bits are cleared)
__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); }
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); }
__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); }
__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); }
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); }
__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); }
// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ'
__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); }
// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q'
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from )
{
// Manual implementation of this form of MOVQ, since its parameters are unique in a way
// that breaks the template inference of writeXMMop();
SimdPrefix( 0xf2, 0xd6 );
ModRM_Direct( to.Id, from.Id );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
#define IMPLEMENT_xMOVS( ssd, prefix ) \
__forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \
__forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \
__forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); }
IMPLEMENT_xMOVS( SS, 0xf3 )
IMPLEMENT_xMOVS( SD, 0xf2 )
//////////////////////////////////////////////////////////////////////////////////////////
// Non-temporal movs only support a register as a target (ie, load form only, no stores)
//
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from )
{
xWrite<u32>( 0x2A380f66 );
xWriteDisp( to.Id, from );
}
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from )
{
xWrite<u32>( 0x2A380f66 );
EmitSibMagic( to.Id, from );
}
__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); }
__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); }
__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); }
__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); }
__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); }
__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); }
__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x50, to, from ); }
__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); }
//////////////////////////////////////////////////////////////////////////////////////////
// INSERTPS / EXTRACTPS [SSE4.1 only!]
//
// [TODO] these might be served better as classes, especially if other instructions use
// the M32,sse,imm form (I forget offhand if any do).
// [SSE-4.1] Insert a single-precision floating-point value from src into a specified
// location in dest, and selectively zero out the data elements in dest according to
// the mask field in the immediate byte. The source operand can be a memory location
// (32 bits) or an XMM register (lower 32 bits used).
//
// Imm8 provides three fields:
// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if
// the source is a memory operand.
// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest.
// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written
// with 0.0 if set to 1.
//
__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 )
{
writeXMMop( 0x66, 0x213a, to, from );
xWrite<u8>( imm8 );
}
__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 )
{
writeXMMop( 0x66, 0x213a, to, from );
xWrite<u8>( imm8 );
}
__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 )
{
writeXMMop( 0x66, 0x213a, to, from );
xWrite<u8>( imm8 );
}
// [SSE-4.1] Extract a single-precision floating-point value from src at an offset
// determined by imm8[1-0]*32. The extracted single precision floating-point value
// is stored into the low 32-bits of dest (or at a 32-bit memory pointer).
//
__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 )
{
writeXMMop( 0x66, 0x173a, to, from, true );
xWrite<u8>( imm8 );
}
__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 )
{
writeXMMop( 0x66, 0x173a, from, dest, true );
xWrite<u8>( imm8 );
}
__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 )
{
writeXMMop( 0x66, 0x173a, from, dest, true );
xWrite<u8>( imm8 );
}
}

View File

@ -86,16 +86,16 @@ namespace x86Emitter
extern const Internal::DwordShiftImplAll<false> xSHLD;
extern const Internal::DwordShiftImplAll<true> xSHRD;
extern const Internal::Group8ImplAll<Internal::G8Type_BT> xBT;
extern const Internal::Group8ImplAll<Internal::G8Type_BTR> xBTR;
extern const Internal::Group8ImplAll<Internal::G8Type_BTS> xBTS;
extern const Internal::Group8ImplAll<Internal::G8Type_BTC> xBTC;
extern const Internal::Group8Impl<Internal::G8Type_BT> xBT;
extern const Internal::Group8Impl<Internal::G8Type_BTR> xBTR;
extern const Internal::Group8Impl<Internal::G8Type_BTS> xBTS;
extern const Internal::Group8Impl<Internal::G8Type_BTC> xBTC;
extern const Internal::JmpCallImplAll<true> xJMP;
extern const Internal::JmpCallImplAll<false> xCALL;
extern const Internal::BitScanImplAll<false> xBSF;
extern const Internal::BitScanImplAll<true> xBSR;
extern const Internal::BitScanImpl<0xbc> xBSF;
extern const Internal::BitScanImpl<0xbd> xBSR;
// ------------------------------------------------------------------------
extern const Internal::CMovImplGeneric xCMOV;
@ -299,95 +299,28 @@ namespace x86Emitter
typedef xForwardJPO<s8> xForwardJPO8;
typedef xForwardJPO<s32> xForwardJPO32;
//////////////////////////////////////////////////////////////////////////////////////////
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
//
// Notes:
// * Some of the functions have been renamed to more clearly reflect what they actually
// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination
// since that's what they do (MOVD clears upper 32/96 bits, etc).
//
// ------------------------------------------------------------------------
// MOVD has valid forms for MMX and XMM registers.
//
template< typename T >
__emitinline void xMOVDZX( const xRegisterSIMD<T>& to, const xRegister32& from )
{
Internal::writeXMMop( 0x66, 0x6e, to, from );
}
template< typename T >
__emitinline void xMOVDZX( const xRegisterSIMD<T>& to, const void* src )
{
Internal::writeXMMop( 0x66, 0x6e, to, src );
}
template< typename T >
void xMOVDZX( const xRegisterSIMD<T>& to, const ModSibBase& src )
{
Internal::writeXMMop( 0x66, 0x6e, to, src );
}
template< typename T >
__emitinline void xMOVD( const xRegister32& to, const xRegisterSIMD<T>& from )
{
Internal::writeXMMop( 0x66, 0x7e, from, to );
}
template< typename T >
__emitinline void xMOVD( void* dest, const xRegisterSIMD<T>& from )
{
Internal::writeXMMop( 0x66, 0x7e, from, dest );
}
template< typename T >
void xMOVD( const ModSibBase& dest, const xRegisterSIMD<T>& from )
{
Internal::writeXMMop( 0x66, 0x7e, from, dest );
}
// ------------------------------------------------------------------------
// xMASKMOV:
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.
// The default memory location is specified by DS:EDI. The most significant bit in each byte
// of the mask operand determines whether the corresponding byte in the source operand is
// written to the corresponding byte location in memory.
template< typename T >
static __forceinline void xMASKMOV( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); }
// xPMOVMSKB:
// Creates a mask made up of the most significant bit of each byte of the source
// operand and stores the result in the low byte or word of the destination operand.
// Upper bits of the destination are cleared to zero.
//
// When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on
// 128-bit (SSE) source, the byte mask is 16-bits.
//
template< typename T >
static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); }
// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
// shifts the composite at byte granularity to the right by a constant immediate,
// and extracts the right-aligned result into the destination.
//
template< typename T >
static __forceinline void xPALIGNR( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from, u8 imm8 )
{
Internal::writeXMMop( 0x66, 0x0f3a, to, from );
xWrite<u8>( imm8 );
}
// ------------------------------------------------------------------------
extern void xEMMS();
extern void xSTMXCSR( u32* dest );
extern void xLDMXCSR( const u32* src );
extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from );
extern void xMOVDZX( const xRegisterSSE& to, const void* src );
extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src );
extern void xMOVDZX( const xRegisterMMX& to, const xRegister32& from );
extern void xMOVDZX( const xRegisterMMX& to, const void* src );
extern void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src );
extern void xMOVD( const xRegister32& to, const xRegisterSSE& from );
extern void xMOVD( void* dest, const xRegisterSSE& from );
extern void xMOVD( const ModSibBase& dest, const xRegisterSSE& from );
extern void xMOVD( const xRegister32& to, const xRegisterMMX& from );
extern void xMOVD( void* dest, const xRegisterMMX& from );
extern void xMOVD( const ModSibBase& dest, const xRegisterMMX& from );
extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from );
extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from );
extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from );
@ -430,31 +363,28 @@ namespace x86Emitter
extern void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from );
extern void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from );
extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 );
extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
extern void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 );
extern void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 );
extern void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 );
extern void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from );
extern void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from );
extern void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from );
extern void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from );
extern void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
extern void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 );
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS;
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS;
extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> xMOVAPD;
extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> xMOVUPD;
extern const Internal::SimdImpl_MoveSSE<0x00,true> xMOVAPS;
extern const Internal::SimdImpl_MoveSSE<0x00,false> xMOVUPS;
#ifdef ALWAYS_USE_MOVAPS
extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> xMOVDQA;
extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> xMOVDQU;
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVDQA;
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVDQU;
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVAPD;
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVUPD;
#else
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVDQA;
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVDQU;
extern const Internal::SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA;
extern const Internal::SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU;
extern const Internal::SimdImpl_MoveSSE<0x66,true> xMOVAPD;
extern const Internal::SimdImpl_MoveSSE<0x66,false> xMOVUPD;
#endif
extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH;
@ -463,6 +393,17 @@ namespace x86Emitter
extern const Internal::MovhlImplAll<0x16> xMOVH;
extern const Internal::MovhlImplAll<0x12> xMOVL;
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 );
extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
extern void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 );
extern void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 );
extern void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 );
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND;
@ -483,6 +424,8 @@ namespace x86Emitter
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST;
extern const Internal::SimdImpl_Compare<SSE2_Equal> xCMPEQ;
extern const Internal::SimdImpl_Compare<SSE2_Less> xCMPLT;
extern const Internal::SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
@ -527,8 +470,8 @@ namespace x86Emitter
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL;
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
@ -550,5 +493,12 @@ namespace x86Emitter
extern const Internal::SimdImpl_PMultAdd xPMADD;
extern const Internal::SimdImpl_HorizAdd xHADD;
extern const Internal::SimdImpl_Blend xBLEND;
extern const Internal::SimdImpl_DotProduct xDP;
extern const Internal::SimdImpl_Round xROUND;
extern const Internal::SimdImpl_PMove<true> xPMOVSX;
extern const Internal::SimdImpl_PMove<false> xPMOVZX;
}

View File

@ -1,124 +0,0 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "ix86_legacy_internal.h"
//------------------------------------------------------------------
// MMX instructions
//
// note: r64 = mm
//------------------------------------------------------------------
using namespace x86Emitter;
emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); }
emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); }
emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); }
emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); }
emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); }
emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); }
emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); }
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); }
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }
#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \
emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \
emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \
emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); }
#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); }
#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); }
#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \
DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \
DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \
DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \
emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); }
DEFINE_LEGACY_LOGIC_OPCODE( AND )
DEFINE_LEGACY_LOGIC_OPCODE( ANDN )
DEFINE_LEGACY_LOGIC_OPCODE( OR )
DEFINE_LEGACY_LOGIC_OPCODE( XOR )
DEFINE_LEGACY_SHIFT_OPCODE( SLL )
DEFINE_LEGACY_SHIFT_OPCODE( SRL )
DEFINE_LEGACY_SHIFT_STUFF( SRA, D )
DEFINE_LEGACY_SHIFT_STUFF( SRA, W )
DEFINE_LEGACY_ARITHMETIC( ADD, B )
DEFINE_LEGACY_ARITHMETIC( ADD, W )
DEFINE_LEGACY_ARITHMETIC( ADD, D )
DEFINE_LEGACY_ARITHMETIC( ADD, Q )
DEFINE_LEGACY_ARITHMETIC( ADD, SB )
DEFINE_LEGACY_ARITHMETIC( ADD, SW )
DEFINE_LEGACY_ARITHMETIC( ADD, USB )
DEFINE_LEGACY_ARITHMETIC( ADD, USW )
DEFINE_LEGACY_ARITHMETIC( SUB, B )
DEFINE_LEGACY_ARITHMETIC( SUB, W )
DEFINE_LEGACY_ARITHMETIC( SUB, D )
DEFINE_LEGACY_ARITHMETIC( SUB, Q )
DEFINE_LEGACY_ARITHMETIC( SUB, SB )
DEFINE_LEGACY_ARITHMETIC( SUB, SW )
DEFINE_LEGACY_ARITHMETIC( SUB, USB )
DEFINE_LEGACY_ARITHMETIC( SUB, USW )
DEFINE_LEGACY_ARITHMETIC( CMP, EQB );
DEFINE_LEGACY_ARITHMETIC( CMP, EQW );
DEFINE_LEGACY_ARITHMETIC( CMP, EQD );
DEFINE_LEGACY_ARITHMETIC( CMP, GTB );
DEFINE_LEGACY_ARITHMETIC( CMP, GTW );
DEFINE_LEGACY_ARITHMETIC( CMP, GTD );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD );
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); }
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); }
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); }
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); }
emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); }
emitterT void EMMS() { xEMMS(); }

View File

@ -22,35 +22,109 @@
using namespace x86Emitter;
// ------------------------------------------------------------------------
// MMX / SSE Mixed Bag
// ------------------------------------------------------------------------
//------------------------------------------------------------------
// SSE instructions
//------------------------------------------------------------------
emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); }
emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); }
emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); }
emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
#define SSEMtoR( code, overb ) \
assert( to < iREGCNT_XMM ), \
RexR(0, to), \
write16( code ), \
ModRM( 0, to, DISP32 ), \
write32( MEMADDR(from, 4 + overb) )
emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); }
emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); }
emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); }
emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); }
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
#define SSERtoR( code ) \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
RexRB(0, to, from), \
write16( code ), \
ModRM( 3, to, from )
emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); }
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }
#define SSEMtoR66( code ) \
write8( 0x66 ), \
SSEMtoR( code, 0 )
#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \
emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \
emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \
emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); }
#define SSERtoM66( code ) \
write8( 0x66 ), \
SSERtoM( code, 0 )
#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); }
#define SSERtoR66( code ) \
write8( 0x66 ), \
SSERtoR( code )
#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); }
#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \
DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \
DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \
DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \
emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); }
DEFINE_LEGACY_LOGIC_OPCODE( AND )
DEFINE_LEGACY_LOGIC_OPCODE( ANDN )
DEFINE_LEGACY_LOGIC_OPCODE( OR )
DEFINE_LEGACY_LOGIC_OPCODE( XOR )
DEFINE_LEGACY_SHIFT_OPCODE( SLL )
DEFINE_LEGACY_SHIFT_OPCODE( SRL )
DEFINE_LEGACY_SHIFT_STUFF( SRA, D )
DEFINE_LEGACY_SHIFT_STUFF( SRA, W )
DEFINE_LEGACY_ARITHMETIC( ADD, B )
DEFINE_LEGACY_ARITHMETIC( ADD, W )
DEFINE_LEGACY_ARITHMETIC( ADD, D )
DEFINE_LEGACY_ARITHMETIC( ADD, Q )
DEFINE_LEGACY_ARITHMETIC( ADD, SB )
DEFINE_LEGACY_ARITHMETIC( ADD, SW )
DEFINE_LEGACY_ARITHMETIC( ADD, USB )
DEFINE_LEGACY_ARITHMETIC( ADD, USW )
DEFINE_LEGACY_ARITHMETIC( SUB, B )
DEFINE_LEGACY_ARITHMETIC( SUB, W )
DEFINE_LEGACY_ARITHMETIC( SUB, D )
DEFINE_LEGACY_ARITHMETIC( SUB, Q )
DEFINE_LEGACY_ARITHMETIC( SUB, SB )
DEFINE_LEGACY_ARITHMETIC( SUB, SW )
DEFINE_LEGACY_ARITHMETIC( SUB, USB )
DEFINE_LEGACY_ARITHMETIC( SUB, USW )
DEFINE_LEGACY_ARITHMETIC( CMP, EQB );
DEFINE_LEGACY_ARITHMETIC( CMP, EQW );
DEFINE_LEGACY_ARITHMETIC( CMP, EQD );
DEFINE_LEGACY_ARITHMETIC( CMP, GTB );
DEFINE_LEGACY_ARITHMETIC( CMP, GTW );
DEFINE_LEGACY_ARITHMETIC( CMP, GTD );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW );
DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD );
DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD );
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); }
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); }
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); }
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); }
emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); }
emitterT void EMMS() { xEMMS(); }
// ------------------------------------------------------------------------
// Begin SSE-Only Part!
// ------------------------------------------------------------------------
#define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \
emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \
@ -290,73 +364,17 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im
emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xDP.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) { xDP.PS( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) { xBLEND.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xBLEND.VPS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { xBLEND.VPS( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMOVSX.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); }
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
// SSE4.1
emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
{
write8(0x66);
write24(0x403A0F);
ModRM(3, to, from);
write8(imm8);
}
emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8)
{
write8(0x66);
write24(0x403A0F);
ModRM(0, to, DISP32);
write32(MEMADDR(from, 4));
write8(imm8);
}
emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8)
{
write8(0x66);
RexRB(0, to, from);
write24(0x0C3A0F);
ModRM(3, to, from);
write8(imm8);
}
emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x14380F);
ModRM(3, to, from);
}
emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from)
{
write8(0x66);
RexR(0, to);
write24(0x14380F);
ModRM(0, to, DISP32);
write32(MEMADDR(from, 4));
}
emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x25380F);
ModRM(3, to, from);
}
emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x35380F);
ModRM(3, to, from);
}
//////////////////////////////////////////////////////////////////////////////////////////
// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions)

View File

@ -0,0 +1,388 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "System.h"
#include "ix86_internal.h"
namespace x86Emitter {
using namespace Internal;
// ------------------------------------------------------------------------
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
// instructions). Any other lower value assumes the upper value is 0 and ignored.
// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
// generate an assertion.
//
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
{
const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
// If the lower byte is not a valid previx and the upper byte is non-zero it
// means we made a mistake!
if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 );
if( prefix != 0 )
{
if( is16BitOpcode )
xWrite<u32>( (opcode<<16) | 0x0f00 | prefix );
else
{
xWrite<u16>( 0x0f00 | prefix );
xWrite<u8>( opcode );
}
}
else
{
if( is16BitOpcode )
{
xWrite<u8>( 0x0f );
xWrite<u16>( opcode );
}
else
xWrite<u16>( (opcode<<8) | 0x0f );
}
}
// [SSE-3]
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
// [SSE-3]
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
const SimdImpl_MoveSSE<0x00,true> xMOVAPS;
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
const SimdImpl_MoveSSE<0x00,false> xMOVUPS;
#ifdef ALWAYS_USE_MOVAPS
const SimdImpl_MoveSSE<0,true> xMOVDQA;
const SimdImpl_MoveSSE<0,true> xMOVAPD;
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
const SimdImpl_MoveSSE<0,false> xMOVDQU;
const SimdImpl_MoveSSE<0,false> xMOVUPD;
#else
const SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA;
const SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU;
const SimdImpl_MoveSSE<0x66,true> xMOVAPD;
const SimdImpl_MoveSSE<0x66,false> xMOVUPD;
#endif
const MovhlImplAll<0x16> xMOVH;
const MovhlImplAll<0x12> xMOVL;
const MovhlImpl_RtoR<0x16> xMOVLH;
const MovhlImpl_RtoR<0x12> xMOVHL;
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
const SimdImpl_AndNot xANDN;
const SimdImpl_UcomI<0x66,0x2e> xUCOMI;
const SimdImpl_rSqrt<0x53> xRCP;
const SimdImpl_rSqrt<0x52> xRSQRT;
const SimdImpl_Sqrt<0x51> xSQRT;
const SimdImpl_MinMax<0x5f> xMAX;
const SimdImpl_MinMax<0x5d> xMIN;
const SimdImpl_Shuffle<0xc6> xSHUF;
// ------------------------------------------------------------------------
// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag
// only if all bits in the result are 0. PTEST also sets the CF flag according
// to the following condition: (xmm2/m128 AND NOT xmm1) == 0;
extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST;
const SimdImpl_Compare<SSE2_Equal> xCMPEQ;
const SimdImpl_Compare<SSE2_Less> xCMPLT;
const SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
const SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
const SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
const SimdImpl_Compare<SSE2_NotLess> xCMPNLT;
const SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
const SimdImpl_Compare<SSE2_Ordered> xCMPORD;
// ------------------------------------------------------------------------
// SSE Conversion Operations, as looney as they are.
//
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
//
const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD;
const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS;
const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ;
const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI;
const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS;
const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD;
const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS;
const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ;
const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD;
const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI;
const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI;
const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS;
const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD;
const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS;
const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD;
const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI;
const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ;
const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI;
const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ;
const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI;
const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI;
const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI;
// ------------------------------------------------------------------------
const SimdImpl_Shift<0xd0, 2> xPSRL;
const SimdImpl_Shift<0xf0, 6> xPSLL;
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
const SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
const SimdImpl_PMinMax<0xde,0x3c> xPMAX;
const SimdImpl_PMinMax<0xda,0x38> xPMIN;
const SimdImpl_PMul xPMUL;
const SimdImpl_PCompare xPCMP;
const SimdImpl_PShuffle xPSHUF;
const SimdImpl_PUnpack xPUNPCK;
const SimdImpl_Unpack xUNPCK;
const SimdImpl_Pack xPACK;
const SimdImpl_PAbsolute xPABS;
const SimdImpl_PSign xPSIGN;
const SimdImpl_PInsert xPINSR;
const SimdImpl_PExtract xPEXTR;
const SimdImpl_PMultAdd xPMADD;
const SimdImpl_HorizAdd xHADD;
const SimdImpl_Blend xBLEND;
const SimdImpl_DotProduct xDP;
const SimdImpl_Round xROUND;
const SimdImpl_PMove<true> xPMOVSX;
const SimdImpl_PMove<false> xPMOVZX;
//////////////////////////////////////////////////////////////////////////////////////////
//
__emitinline void xEMMS()
{
xWrite<u16>( 0x770F );
}
// Store Streaming SIMD Extension Control/Status to Mem32.
__emitinline void xSTMXCSR( u32* dest )
{
SimdPrefix( 0, 0xae );
xWriteDisp( 3, dest );
}
// Load Streaming SIMD Extension Control/Status from Mem32.
__emitinline void xLDMXCSR( const u32* src )
{
SimdPrefix( 0, 0xae );
xWriteDisp( 2, src );
}
//////////////////////////////////////////////////////////////////////////////////////////
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
//
// Notes:
// * Some of the functions have been renamed to more clearly reflect what they actually
// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination
// since that's what they do (MOVD clears upper 32/96 bits, etc).
//
// * MOVD has valid forms for MMX and XMM registers.
//
__forceinline void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ) { xOpWrite0F( 0x66, 0x6e, to, from ); }
__forceinline void xMOVDZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0x66, 0x6e, to, src ); }
__forceinline void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0x66, 0x6e, to, src ); }
__forceinline void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ) { xOpWrite0F( 0x6e, to, from ); }
__forceinline void xMOVDZX( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6e, to, src ); }
__forceinline void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6e, to, src ); }
__forceinline void xMOVD( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, to ); }
__forceinline void xMOVD( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); }
__forceinline void xMOVD( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); }
__forceinline void xMOVD( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, to ); }
__forceinline void xMOVD( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); }
__forceinline void xMOVD( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); }
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0xf3, 0x7e, to, from ); }
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); }
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); }
// Moves lower quad of XMM to ptr64 (no bits are cleared)
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); }
// Moves lower quad of XMM to ptr64 (no bits are cleared)
__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); }
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) xOpWrite0F( 0x6f, to, from ); }
__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6f, to, src ); }
__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6f, to, src ); }
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); }
__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); }
// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ'
__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf3, 0xd6, to, from ); }
// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q'
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from )
{
// Manual implementation of this form of MOVQ, since its parameters are unique in a way
// that breaks the template inference of writeXMMop();
SimdPrefix( 0xf2, 0xd6 );
ModRM_Direct( to.Id, from.Id );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
#define IMPLEMENT_xMOVS( ssd, prefix ) \
__forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) xOpWrite0F( prefix, 0x10, to, from ); } \
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { xOpWrite0F( prefix, 0x10, to, from ); } \
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { xOpWrite0F( prefix, 0x10, to, from ); } \
__forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } \
__forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); }
IMPLEMENT_xMOVS( SS, 0xf3 )
IMPLEMENT_xMOVS( SD, 0xf2 )
//////////////////////////////////////////////////////////////////////////////////////////
// Non-temporal movs only support a register as a target (ie, load form only, no stores)
//
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from )
{
xWrite<u32>( 0x2A380f66 );
xWriteDisp( to.Id, from );
}
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from )
{
xWrite<u32>( 0x2A380f66 );
EmitSibMagic( to.Id, from );
}
__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); }
__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); }
__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); }
__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); }
__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); }
__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); }
__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); }
__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); }
// ------------------------------------------------------------------------
__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x50, to, from ); }
__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x66, 0x50, to, from, true ); }
// xMASKMOV:
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.
// The default memory location is specified by DS:EDI. The most significant bit in each byte
// of the mask operand determines whether the corresponding byte in the source operand is
// written to the corresponding byte location in memory.
__forceinline void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xf7, to, from ); }
__forceinline void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf7, to, from ); }
// xPMOVMSKB:
// Creates a mask made up of the most significant bit of each byte of the source
// operand and stores the result in the low byte or word of the destination operand.
// Upper bits of the destination are cleared to zero.
//
// When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on
// 128-bit (SSE) source, the byte mask is 16-bits.
//
__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd7, to, from ); }
__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0xd7, to, from ); }
// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
// shifts the composite at byte granularity to the right by a constant immediate,
// and extracts the right-aligned result into the destination.
//
__forceinline void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x0f3a, to, from, imm8 ); }
__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); }
//////////////////////////////////////////////////////////////////////////////////////////
// INSERTPS / EXTRACTPS [SSE4.1 only!]
//
// [TODO] these might be served better as classes, especially if other instructions use
// the M32,sse,imm form (I forget offhand if any do).
// [SSE-4.1] Insert a single-precision floating-point value from src into a specified
// location in dest, and selectively zero out the data elements in dest according to
// the mask field in the immediate byte. The source operand can be a memory location
// (32 bits) or an XMM register (lower 32 bits used).
//
// Imm8 provides three fields:
// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if
// the source is a memory operand.
// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest.
// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written
// with 0.0 if set to 1.
//
__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
// [SSE-4.1] Extract a single-precision floating-point value from src at an offset
// determined by imm8[1-0]*32. The extracted single precision floating-point value
// is stored into the low 32-bits of dest (or at a 32-bit memory pointer).
//
__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); }
__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
}

View File

@ -48,4 +48,3 @@ extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from );
extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from );
extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );

View File

@ -252,28 +252,6 @@ namespace x86Emitter
}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< typename OperandType >
class xRegisterSIMD : public xRegister<OperandType>
{
public:
static const xRegisterSIMD Empty; // defined as an empty/unused value (-1)
public:
xRegisterSIMD(): xRegister<OperandType>() {}
xRegisterSIMD( const xRegisterSIMD& src ) : xRegister<OperandType>( src.Id ) {}
xRegisterSIMD( const xRegister<OperandType>& src ) : xRegister<OperandType>( src ) {}
explicit xRegisterSIMD( int regId ) : xRegister<OperandType>( regId ) {}
xRegisterSIMD<OperandType>& operator=( const xRegisterSIMD<OperandType>& src )
{
xRegister<OperandType>::Id = src.Id;
return *this;
}
};
// ------------------------------------------------------------------------
// Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which
// means it finds undeclared variables when MSVC does not (Since MSVC compiles templates
@ -282,8 +260,8 @@ namespace x86Emitter
// all about the the templated code in haphazard fashion. Yay.. >_<
//
typedef xRegisterSIMD<u128> xRegisterSSE;
typedef xRegisterSIMD<u64> xRegisterMMX;
typedef xRegister<u128> xRegisterSSE;
typedef xRegister<u64> xRegisterMMX;
typedef xRegister<u32> xRegister32;
typedef xRegister<u16> xRegister16;
typedef xRegister<u8> xRegister8;