mirror of https://github.com/PCSX2/pcsx2.git
Finished the emitter, complete with code cleanups! :) (added last few SSE instructions, and inserted placebos for some future additions to the x86 portion, regarding xchg/xadd/etc).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1047 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ac0768e9a3
commit
ef565303a5
|
@ -905,14 +905,6 @@
|
|||
<Filter
|
||||
Name="Misc"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\HashMap.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\HashTools.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Dump.cpp"
|
||||
>
|
||||
|
@ -921,6 +913,14 @@
|
|||
RelativePath="..\..\Dump.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\HashMap.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\HashTools.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\Misc.cpp"
|
||||
>
|
||||
|
@ -2965,10 +2965,6 @@
|
|||
RelativePath="..\..\x86\ix86\ix86_legacy_internal.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\ix86\ix86_legacy_mmx.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\ix86\ix86_legacy_sse.cpp"
|
||||
>
|
||||
|
@ -2977,6 +2973,10 @@
|
|||
RelativePath="..\..\x86\ix86\ix86_legacy_types.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\ix86\ix86_simd.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\ix86\ix86_sse_helpers.h"
|
||||
>
|
||||
|
@ -3028,6 +3028,10 @@
|
|||
RelativePath="..\..\x86\ix86\implement\test.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\ix86\implement\xchg.h"
|
||||
>
|
||||
</File>
|
||||
<Filter
|
||||
Name="xmm"
|
||||
>
|
||||
|
|
|
@ -935,6 +935,8 @@ void psxRecompileNextInstruction(int delayslot)
|
|||
#ifdef _DEBUG
|
||||
static void printfn()
|
||||
{
|
||||
extern void iDumpPsxRegisters(u32 startpc, u32 temp);
|
||||
|
||||
static int lastrec = 0;
|
||||
static int curcount = 0;
|
||||
const int skip = 0;
|
||||
|
@ -962,6 +964,8 @@ void iopRecRecompile(u32 startpc)
|
|||
u32 willbranch3 = 0;
|
||||
|
||||
#ifdef _DEBUG
|
||||
extern void iDumpPsxRegisters(u32 startpc, u32 temp);
|
||||
|
||||
if( psxdump & 4 )
|
||||
iDumpPsxRegisters(startpc, 0);
|
||||
#endif
|
||||
|
|
|
@ -316,7 +316,7 @@ void recMFHILO1(int hi)
|
|||
|
||||
if( reghi >= 0 ) {
|
||||
if( regd >= 0 ) {
|
||||
SSEX_MOVHLPS_XMM_to_XMM(regd, reghi);
|
||||
SSE_MOVHLPS_XMM_to_XMM(regd, reghi);
|
||||
xmmregs[regd].mode |= MODE_WRITE;
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -32,152 +32,39 @@ enum G8Type
|
|||
G8Type_BTC,
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Notes: Bit Test instructions are valid on 16/32 bit operands only.
|
||||
//
|
||||
template< G8Type InstType, typename ImmType >
|
||||
class Group8Impl
|
||||
{
|
||||
protected:
|
||||
static const uint OperandSize = sizeof(ImmType);
|
||||
|
||||
static void prefix16() { if( OperandSize == 2 ) xWrite<u8>( 0x66 ); }
|
||||
|
||||
public:
|
||||
Group8Impl() {} // For the love of GCC.
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( const xRegister<ImmType>& bitbase, const xRegister<ImmType>& bitoffset )
|
||||
{
|
||||
prefix16();
|
||||
xWrite<u8>( 0x0f );
|
||||
xWrite<u8>( 0xa3 | (InstType << 2) );
|
||||
ModRM_Direct( bitoffset.Id, bitbase.Id );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( void* bitbase, const xRegister<ImmType>& bitoffset )
|
||||
{
|
||||
prefix16();
|
||||
xWrite<u8>( 0x0f );
|
||||
xWrite<u8>( 0xa3 | (InstType << 2) );
|
||||
xWriteDisp( bitoffset.Id, bitbase );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( const ModSibBase& bitbase, const xRegister<ImmType>& bitoffset )
|
||||
{
|
||||
prefix16();
|
||||
xWrite<u8>( 0x0f );
|
||||
xWrite<u8>( 0xa3 | (InstType << 2) );
|
||||
EmitSibMagic( bitoffset.Id, bitbase );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( const xRegister<ImmType>& bitbase, u8 immoffset )
|
||||
{
|
||||
prefix16();
|
||||
xWrite<u16>( 0xba0f );
|
||||
ModRM_Direct( InstType, bitbase.Id );
|
||||
xWrite<u8>( immoffset );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( const ModSibStrict<ImmType>& bitbase, u8 immoffset )
|
||||
{
|
||||
prefix16();
|
||||
xWrite<u16>( 0xba0f );
|
||||
EmitSibMagic( InstType, bitbase );
|
||||
xWrite<u8>( immoffset );
|
||||
}
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
//
|
||||
template< G8Type InstType >
|
||||
class Group8ImplAll
|
||||
{
|
||||
protected:
|
||||
typedef Group8Impl<InstType,u32> m_32;
|
||||
typedef Group8Impl<InstType,u32> m_16;
|
||||
|
||||
public:
|
||||
__forceinline void operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
|
||||
__forceinline void operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
|
||||
__forceinline void operator()( void* bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
|
||||
__forceinline void operator()( void* bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
|
||||
__noinline void operator()( const ModSibBase& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
|
||||
__noinline void operator()( const ModSibBase& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
|
||||
|
||||
__noinline void operator()( const ModSibStrict<u32>& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
|
||||
__noinline void operator()( const ModSibStrict<u16>& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
|
||||
void operator()( const xRegister<u32>& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); }
|
||||
void operator()( const xRegister<u16>& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); }
|
||||
|
||||
Group8ImplAll() {}
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BSF / BSR -- 16/32 operands supported only.
|
||||
//
|
||||
template< bool isReverse, typename ImmType >
|
||||
// 0xbc [fwd] / 0xbd [rev]
|
||||
//
|
||||
template< u16 Opcode >
|
||||
class BitScanImpl
|
||||
{
|
||||
protected:
|
||||
static const uint OperandSize = sizeof(ImmType);
|
||||
static void prefix16() { if( OperandSize == 2 ) xWrite<u8>( 0x66 ); }
|
||||
static void emitbase()
|
||||
{
|
||||
prefix16();
|
||||
xWrite<u8>( 0x0f );
|
||||
xWrite<u8>( isReverse ? 0xbd : 0xbc );
|
||||
}
|
||||
|
||||
public:
|
||||
BitScanImpl() {} // For the love of GCC.
|
||||
BitScanImpl() {}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( const xRegister<ImmType>& to, const xRegister<ImmType>& from )
|
||||
{
|
||||
emitbase();
|
||||
ModRM_Direct( to.Id, from.Id );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( const xRegister<ImmType>& to, const void* src )
|
||||
{
|
||||
emitbase();
|
||||
xWriteDisp( to.Id, src );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
static __emitinline void Emit( const xRegister<ImmType>& to, const ModSibBase& sibsrc )
|
||||
{
|
||||
emitbase();
|
||||
EmitSibMagic( to.Id, sibsrc );
|
||||
}
|
||||
__forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { xOpWrite0F( Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegister32& to, const void* src ) const { xOpWrite0F( Opcode, to, src ); }
|
||||
__forceinline void operator()( const xRegister16& to, const void* src ) const { xOpWrite0F( 0x66, Opcode, to, src ); }
|
||||
__forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { xOpWrite0F( Opcode, to, sibsrc ); }
|
||||
__forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { xOpWrite0F( 0x66, Opcode, to, sibsrc ); }
|
||||
};
|
||||
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// BSF/BSR -- 16 and 32 bit operand forms only!
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Bit Test Instructions - Valid on 16/32 bit instructions only.
|
||||
//
|
||||
template< bool isReverse >
|
||||
class BitScanImplAll
|
||||
template< G8Type InstType >
|
||||
class Group8Impl : public BitScanImpl<0xa3 | (InstType << 2)>
|
||||
{
|
||||
protected:
|
||||
typedef BitScanImpl<isReverse,u32> m_32;
|
||||
typedef BitScanImpl<isReverse,u32> m_16;
|
||||
|
||||
public:
|
||||
__forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { m_32::Emit( to, from ); }
|
||||
__forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { m_16::Emit( to, from ); }
|
||||
__forceinline void operator()( const xRegister32& to, const void* src ) const { m_32::Emit( to, src ); }
|
||||
__forceinline void operator()( const xRegister16& to, const void* src ) const { m_16::Emit( to, src ); }
|
||||
__noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); }
|
||||
__noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); }
|
||||
using BitScanImpl<0xa3 | (InstType << 2)>::operator();
|
||||
|
||||
BitScanImplAll() {}
|
||||
__forceinline void operator()( const ModSibStrict<u32>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
|
||||
__forceinline void operator()( const ModSibStrict<u16>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
|
||||
void operator()( const xRegister<u32>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
|
||||
void operator()( const xRegister<u16>& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite<u8>( bitoffset ); }
|
||||
|
||||
Group8Impl() {}
|
||||
};
|
||||
|
||||
|
|
|
@ -167,9 +167,9 @@ class xImpl_G1Compare : xImpl_Group1< G1Type_CMP >
|
|||
protected:
|
||||
template< u8 Prefix > struct Woot
|
||||
{
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
|
||||
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( cmptype ); }
|
||||
Woot() {}
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
/* Pcsx2 - Pc Ps2 Emulator
|
||||
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
// This header file is intended to be the future home of xchg, cmpxchg, xadd, and
|
||||
// other threading-related exchange instructions.
|
|
@ -28,19 +28,25 @@ class _SimdShiftHelper
|
|||
public:
|
||||
_SimdShiftHelper() {}
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( 0x66, Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
|
||||
|
||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { writeXMMop( Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const void* from ) const { writeXMMop( Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { writeXMMop( Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode1, to, from ); }
|
||||
|
||||
|
||||
template< typename OperandType >
|
||||
__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm8 ) const
|
||||
__emitinline void operator()( const xRegisterSSE& to, u8 imm8 ) const
|
||||
{
|
||||
SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
|
||||
SimdPrefix( 0x66, OpcodeImm );
|
||||
ModRM( 3, (int)Modcode, to.Id );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
__emitinline void operator()( const xRegisterMMX& to, u8 imm8 ) const
|
||||
{
|
||||
SimdPrefix( 0x00, OpcodeImm );
|
||||
ModRM( 3, (int)Modcode, to.Id );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
@ -68,11 +74,11 @@ class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
|
|||
public:
|
||||
const _SimdShiftHelper<OpcodeBase1+3,0x73,Modcode> Q;
|
||||
|
||||
void DQ( const xRegisterSSE& to, u8 imm ) const
|
||||
void DQ( const xRegisterSSE& to, u8 imm8 ) const
|
||||
{
|
||||
SimdPrefix( 0x66, 0x73 );
|
||||
ModRM( 3, (int)Modcode+1, to.Id );
|
||||
xWrite<u8>( imm );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
SimdImpl_Shift() {}
|
||||
|
@ -156,8 +162,8 @@ template< u16 OpcodeSSE >
|
|||
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
|
||||
{
|
||||
public:
|
||||
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
|
||||
SimdImpl_Sqrt() {}
|
||||
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -165,9 +171,9 @@ public:
|
|||
class SimdImpl_AndNot
|
||||
{
|
||||
public:
|
||||
SimdImpl_AndNot() {}
|
||||
const SimdImpl_DestRegSSE<0x00,0x55> PS;
|
||||
const SimdImpl_DestRegSSE<0x66,0x55> PD;
|
||||
SimdImpl_AndNot() {}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -282,3 +288,87 @@ public:
|
|||
// *src* stores the result in the high quadword of dest.
|
||||
const SimdImpl_DestRegSSE<0x66, 0x7c> PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// DotProduct calculation (SSE4.1 only!)
|
||||
//
|
||||
class SimdImpl_DotProduct
|
||||
{
|
||||
public:
|
||||
SimdImpl_DotProduct() {}
|
||||
|
||||
// [SSE-4.1] Conditionally multiplies the packed single precision floating-point
|
||||
// values in dest with the packed single-precision floats in src depending on a
|
||||
// mask extracted from the high 4 bits of the immediate byte. If a condition mask
|
||||
// bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value
|
||||
// of 0.0. The four resulting single-precision values are summed into an inter-
|
||||
// mediate result.
|
||||
//
|
||||
// The intermediate result is conditionally broadcasted to the destination using a
|
||||
// broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast
|
||||
// mask bit is 1, the intermediate result is copied to the corresponding dword
|
||||
// element in dest. If a broadcast mask bit is zero, the corresponding element in
|
||||
// the destination is set to zero.
|
||||
//
|
||||
SimdImpl_DestRegImmSSE<0x66,0x403a> PS;
|
||||
|
||||
// [SSE-4.1]
|
||||
SimdImpl_DestRegImmSSE<0x66,0x413a> PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Rounds floating point values (packed or single scalar) by an arbitrary rounding mode.
|
||||
// (SSE4.1 only!)
|
||||
class SimdImpl_Round
|
||||
{
|
||||
public:
|
||||
SimdImpl_Round() {}
|
||||
|
||||
// [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x083a> PS;
|
||||
|
||||
// [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x093a> PD;
|
||||
|
||||
// [SSE-4.1] Rounds the single-precision src value and stores in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x0a3a> SS;
|
||||
|
||||
// [SSE-4.1] Rounds the double-precision src value and stores in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x0b3a> SD;
|
||||
};
|
||||
|
|
|
@ -23,57 +23,106 @@
|
|||
|
||||
extern void SimdPrefix( u8 prefix, u16 opcode );
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// xmm emitter helpers for xmm instruction with prefixes.
|
||||
// These functions also support deducing the use of the prefix from the template parameters,
|
||||
// since most xmm instructions use a prefix and most mmx instructions do not. (some mov
|
||||
// instructions violate this "guideline.")
|
||||
//
|
||||
template< typename T, typename T2 >
|
||||
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
|
||||
extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib );
|
||||
extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data );
|
||||
extern void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib );
|
||||
extern void xOpWrite0F( u16 opcode, int instId, const void* data );
|
||||
|
||||
template< typename T2 > __emitinline
|
||||
void xOpWrite0F( u8 prefix, u16 opcode, int instId, const xRegister<T2>& from )
|
||||
{
|
||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||
ModRM_Direct( to.Id, from.Id );
|
||||
SimdPrefix( prefix, opcode );
|
||||
ModRM_Direct( instId, from.Id );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
|
||||
template< typename T2 > __emitinline
|
||||
void xOpWrite0F( u16 opcode, int instId, const xRegister<T2>& from )
|
||||
{
|
||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||
EmitSibMagic( reg.Id, sib );
|
||||
xOpWrite0F( 0, opcode, instId, from );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
|
||||
template< typename T, typename T2 > __emitinline
|
||||
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
|
||||
{
|
||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||
xWriteDisp( reg.Id, data );
|
||||
xOpWrite0F( prefix, opcode, to.Id, from );
|
||||
}
|
||||
|
||||
template< typename T > __noinline
|
||||
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
|
||||
{
|
||||
xOpWrite0F( prefix, opcode, reg.Id, sib );
|
||||
}
|
||||
|
||||
template< typename T > __emitinline
|
||||
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
|
||||
{
|
||||
xOpWrite0F( prefix, opcode, reg.Id, data );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// xmm emitter helpers for xmm instructions *without* prefixes.
|
||||
// These are normally used for special instructions that have MMX forms only (non-SSE), however
|
||||
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
|
||||
//
|
||||
template< typename T, typename T2 >
|
||||
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
|
||||
template< typename T, typename T2 > __emitinline
|
||||
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, u8 imm8 )
|
||||
{
|
||||
SimdPrefix( 0, opcode );
|
||||
ModRM_Direct( to.Id, from.Id );
|
||||
xOpWrite0F( prefix, opcode, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
|
||||
template< typename T > __noinline
|
||||
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, u8 imm8 )
|
||||
{
|
||||
SimdPrefix( 0, opcode );
|
||||
EmitSibMagic( reg.Id, sib );
|
||||
xOpWrite0F( prefix, opcode, reg, sib );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
|
||||
template< typename T > __emitinline
|
||||
void xOpWrite0F( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, u8 imm8 )
|
||||
{
|
||||
SimdPrefix( 0, opcode );
|
||||
xWriteDisp( reg.Id, data );
|
||||
xOpWrite0F( prefix, opcode, reg, data );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
template< typename T, typename T2 > __emitinline
|
||||
void xOpWrite0F( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
|
||||
{
|
||||
xOpWrite0F( 0, opcode, to, from );
|
||||
}
|
||||
|
||||
template< typename T > __noinline
|
||||
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
|
||||
{
|
||||
xOpWrite0F( 0, opcode, reg, sib );
|
||||
}
|
||||
|
||||
template< typename T > __emitinline
|
||||
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const void* data )
|
||||
{
|
||||
xOpWrite0F( 0, opcode, reg, data );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
template< typename T, typename T2 > __emitinline
|
||||
void xOpWrite0F( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, u8 imm8 )
|
||||
{
|
||||
xOpWrite0F( opcode, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
template< typename T > __noinline
|
||||
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, u8 imm8 )
|
||||
{
|
||||
xOpWrite0F( opcode, reg, sib );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
template< typename T > __emitinline
|
||||
void xOpWrite0F( u16 opcode, const xRegister<T>& reg, const void* data, u8 imm8 )
|
||||
{
|
||||
xOpWrite0F( opcode, reg, data );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -84,9 +133,9 @@ template< u8 Prefix, u16 Opcode >
|
|||
class SimdImpl_DestRegSSE
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
|
||||
SimdImpl_DestRegSSE() {} //GCWho?
|
||||
};
|
||||
|
@ -99,9 +148,9 @@ template< u8 Prefix, u16 Opcode >
|
|||
class SimdImpl_DestRegImmSSE
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
|
||||
|
||||
SimdImpl_DestRegImmSSE() {} //GCWho?
|
||||
};
|
||||
|
@ -110,9 +159,9 @@ template< u8 Prefix, u16 Opcode >
|
|||
class SimdImpl_DestRegImmMMX
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
|
||||
|
||||
SimdImpl_DestRegImmMMX() {} //GCWho?
|
||||
};
|
||||
|
@ -125,27 +174,33 @@ template< u8 Prefix, u16 Opcode >
|
|||
class SimdImpl_DestRegEither
|
||||
{
|
||||
public:
|
||||
template< typename T > __forceinline
|
||||
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
template< typename T > __forceinline
|
||||
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
template< typename T > __forceinline
|
||||
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
|
||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); }
|
||||
|
||||
SimdImpl_DestRegEither() {} //GCWho?
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
|
||||
// can be regDirect or ModRM (indirect).
|
||||
// For implementing MMX/SSE operations where the destination *must* be a register, but the
|
||||
// source can be Direct or Indirect (ModRM/SibSB). The SrcOperandType template parameter
|
||||
// is used to enforce type strictness of the (void*) parameter and ModSib<> parameter, so
|
||||
// that the programmer must be explicit in specifying desired operand size.
|
||||
//
|
||||
// IMPORTANT: This helper assumes the prefix opcode is written *always* -- regardless of
|
||||
// MMX or XMM register status.
|
||||
//
|
||||
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
|
||||
class SimdImpl_DestRegStrict
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
|
||||
SimdImpl_DestRegStrict() {} //GCWho?
|
||||
};
|
||||
|
|
|
@ -41,9 +41,9 @@ class SimdImpl_Compare
|
|||
protected:
|
||||
template< u8 Prefix > struct Woot
|
||||
{
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||
Woot() {}
|
||||
};
|
||||
|
||||
|
@ -128,4 +128,3 @@ public:
|
|||
// packed min/max values in dest. (SSE operands only)
|
||||
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
|
||||
};
|
||||
|
||||
|
|
|
@ -30,10 +30,10 @@ protected:
|
|||
struct Woot
|
||||
{
|
||||
Woot() {}
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); }
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -51,26 +51,104 @@ template< u16 Opcode >
|
|||
class MovhlImpl_RtoR
|
||||
{
|
||||
public:
|
||||
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); }
|
||||
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); }
|
||||
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
|
||||
|
||||
MovhlImpl_RtoR() {} //GCC.
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
|
||||
class MovapsImplAll
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
|
||||
//
|
||||
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||
// which can be checked for alignment at runtime.
|
||||
//
|
||||
template< u8 Prefix, bool isAligned >
|
||||
class SimdImpl_MoveSSE
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
|
||||
static const u16 OpcodeA = 0x28; // Aligned [aps] form
|
||||
static const u16 OpcodeU = 0x10; // unaligned [ups] form
|
||||
|
||||
MovapsImplAll() {} //GCC.
|
||||
public:
|
||||
SimdImpl_MoveSSE() {} //GCC.
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
|
||||
{
|
||||
if( to != from ) xOpWrite0F( Prefix, OpcodeA, to, from );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const
|
||||
{
|
||||
xOpWrite0F( Prefix, (isAligned || ((uptr)from & 0x0f) == 0) ? OpcodeA : OpcodeU, to, from );
|
||||
}
|
||||
|
||||
__forceinline void operator()( void* to, const xRegisterSSE& from ) const
|
||||
{
|
||||
xOpWrite0F( Prefix, (isAligned || ((uptr)to & 0x0f) == 0) ? OpcodeA+1 : OpcodeU+1, from, to );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() );
|
||||
xOpWrite0F( Prefix, isReallyAligned ? OpcodeA : OpcodeU, to, from );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
|
||||
xOpWrite0F( Prefix, isReallyAligned ? OpcodeA+1 : OpcodeU+1, from, to );
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementations for MOVDQA / MOVDQU
|
||||
//
|
||||
template< u8 Prefix, bool isAligned >
|
||||
class SimdImpl_MoveDQ
|
||||
{
|
||||
static const u8 PrefixA = 0x66; // Aligned [aps] form
|
||||
static const u8 PrefixU = 0xf3; // unaligned [ups] form
|
||||
|
||||
static const u16 Opcode = 0x6f;
|
||||
static const u16 Opcode_Alt = 0x7f; // alternate ModRM encoding (reverse src/dst)
|
||||
|
||||
public:
|
||||
SimdImpl_MoveDQ() {} //GCC.
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
|
||||
{
|
||||
if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const
|
||||
{
|
||||
xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode, to, from );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const
|
||||
{
|
||||
xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode_Alt, to, from );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() );
|
||||
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
|
||||
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, to, from );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
template< u8 AltPrefix, u16 OpcodeSSE >
|
||||
|
@ -83,12 +161,79 @@ public:
|
|||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Blend - Conditional copying of values in src into dest.
|
||||
//
|
||||
class SimdImpl_Blend
|
||||
{
|
||||
public:
|
||||
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
||||
// mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a
|
||||
// dword element in a 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
SimdImpl_DestRegImmSSE<0x66,0x0c3a> PS;
|
||||
|
||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
|
||||
// quadword element in a 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
SimdImpl_DestRegImmSSE<0x66,0x0d3a> PD;
|
||||
|
||||
SimdImpl_DestRegImmSSE<0x66,0x1438> VPS;
|
||||
SimdImpl_DestRegImmSSE<0x66,0x1538> VPD;
|
||||
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
||||
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||
// to a dword element in the 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
SimdImpl_DestRegSSE<0x66,0x1438> VPS;
|
||||
|
||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||
// to a quadword element in the 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
SimdImpl_DestRegSSE<0x66,0x1538> VPD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed Move with Sign or Zero extension.
|
||||
//
|
||||
template< bool SignExtend >
|
||||
class SimdImpl_PMove
|
||||
{
|
||||
static const u16 OpcodeBase = SignExtend ? 0x2038 : 0x3038;
|
||||
|
||||
public:
|
||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
|
||||
// and store them in dest.
|
||||
SimdImpl_DestRegStrict<0x66,OpcodeBase,xRegisterSSE,xRegisterSSE,u64> BW;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
|
||||
// and store them in dest.
|
||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x100,xRegisterSSE,xRegisterSSE,u32> BD;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
|
||||
// and store them in dest.
|
||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x200,xRegisterSSE,xRegisterSSE,u16> BQ;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
|
||||
// and store them in dest.
|
||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x300,xRegisterSSE,xRegisterSSE,u64> WD;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
|
||||
// and store them in dest.
|
||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x400,xRegisterSSE,xRegisterSSE,u32> WQ;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
|
||||
// and store them in dest.
|
||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x500,xRegisterSSE,xRegisterSSE,u64> DQ;
|
||||
};
|
||||
|
||||
|
|
|
@ -26,9 +26,9 @@ class SimdImpl_Shuffle
|
|||
protected:
|
||||
template< u8 Prefix > struct Woot
|
||||
{
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||
Woot() {}
|
||||
};
|
||||
|
||||
|
@ -182,20 +182,17 @@ protected:
|
|||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -203,28 +200,13 @@ public:
|
|||
SimdImpl_PInsert() {}
|
||||
|
||||
// Operation can be performed on either MMX or SSE src operands.
|
||||
template< typename T >
|
||||
__forceinline void W( const xRegisterSIMD<T>& to, const xRegister32& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, 0xc4, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
__forceinline void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
|
||||
__forceinline void W( const xRegisterSSE& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
|
||||
__forceinline void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
|
||||
|
||||
// Operation can be performed on either MMX or SSE src operands.
|
||||
template< typename T >
|
||||
__forceinline void W( const xRegisterSIMD<T>& to, const void* from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, 0xc4, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
// Operation can be performed on either MMX or SSE src operands.
|
||||
template< typename T >
|
||||
__forceinline void W( const xRegisterSIMD<T>& to, const ModSibBase& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, 0xc4, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
__forceinline void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
|
||||
__forceinline void W( const xRegisterMMX& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
|
||||
__forceinline void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
|
||||
|
||||
// [SSE-4.1]
|
||||
const ByteDwordForms<0x20> B;
|
||||
|
@ -250,20 +232,17 @@ protected:
|
|||
|
||||
__forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
|
||||
}
|
||||
|
||||
__forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest );
|
||||
xWrite<u8>( imm8 );
|
||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 );
|
||||
}
|
||||
|
||||
__forceinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest );
|
||||
xWrite<u8>( imm8 );
|
||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 );
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -276,24 +255,11 @@ public:
|
|||
//
|
||||
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
|
||||
//
|
||||
template< typename T >
|
||||
__forceinline void W( const xRegister32& to, const xRegisterSIMD<T>& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, 0xc5, to, from, true );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
__forceinline void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); }
|
||||
__forceinline void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); }
|
||||
|
||||
__forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, 0x153a, from, dest );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
__forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||
{
|
||||
writeXMMop( 0x66, 0x153a, from, dest );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
__forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
|
||||
__forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
|
||||
|
||||
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
|
|
|
@ -161,7 +161,40 @@ namespace Internal
|
|||
xWriteDisp( regfield, (s32)address );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// emitter helpers for xmm instruction with prefixes, most of which are using
|
||||
// the basic opcode format (items inside braces denote optional or conditional
|
||||
// emission):
|
||||
//
|
||||
// [Prefix] / 0x0f / [OpcodePrefix] / Opcode / ModRM+[SibSB]
|
||||
//
|
||||
// Prefixes are typically 0x66, 0xf2, or 0xf3. OpcodePrefixes are either 0x38 or
|
||||
// 0x3a [and other value will result in assertion failue].
|
||||
//
|
||||
__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib )
|
||||
{
|
||||
SimdPrefix( prefix, opcode );
|
||||
EmitSibMagic( instId, sib );
|
||||
}
|
||||
|
||||
__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data )
|
||||
{
|
||||
SimdPrefix( prefix, opcode );
|
||||
xWriteDisp( instId, data );
|
||||
}
|
||||
|
||||
__emitinline void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib )
|
||||
{
|
||||
xOpWrite0F( 0, opcode, instId, sib );
|
||||
}
|
||||
|
||||
__emitinline void xOpWrite0F( u16 opcode, int instId, const void* data )
|
||||
{
|
||||
xOpWrite0F( 0, opcode, instId, data );
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
|
||||
// instruction ca be encoded as ModRm alone.
|
||||
static __forceinline bool NeedsSibMagic( const ModSibBase& info )
|
||||
|
@ -288,13 +321,13 @@ const MovExtendImplAll<true> xMOVSX;
|
|||
const DwordShiftImplAll<false> xSHLD;
|
||||
const DwordShiftImplAll<true> xSHRD;
|
||||
|
||||
const Group8ImplAll<G8Type_BT> xBT;
|
||||
const Group8ImplAll<G8Type_BTR> xBTR;
|
||||
const Group8ImplAll<G8Type_BTS> xBTS;
|
||||
const Group8ImplAll<G8Type_BTC> xBTC;
|
||||
const Group8Impl<G8Type_BT> xBT;
|
||||
const Group8Impl<G8Type_BTR> xBTR;
|
||||
const Group8Impl<G8Type_BTS> xBTS;
|
||||
const Group8Impl<G8Type_BTC> xBTC;
|
||||
|
||||
const BitScanImplAll<false> xBSF;
|
||||
const BitScanImplAll<true> xBSR;
|
||||
const BitScanImpl<0xbc> xBSF;
|
||||
const BitScanImpl<0xbd> xBSR;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
const CMovImplGeneric xCMOV;
|
||||
|
@ -635,320 +668,4 @@ __emitinline void xBSWAP( const xRegister32& to )
|
|||
write8( 0xC8 | to.Id );
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MMX / XMM Instructions
|
||||
// (these will get put in their own file later)
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
|
||||
// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
|
||||
// instructions). Any other lower value assumes the upper value is 0 and ignored.
|
||||
// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
|
||||
// generate an assertion.
|
||||
//
|
||||
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
||||
{
|
||||
const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
|
||||
|
||||
// If the lower byte is not a valid previx and the upper byte is non-zero it
|
||||
// means we made a mistake!
|
||||
if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 );
|
||||
|
||||
if( prefix != 0 )
|
||||
{
|
||||
if( is16BitOpcode )
|
||||
xWrite<u32>( (opcode<<16) | 0x0f00 | prefix );
|
||||
else
|
||||
{
|
||||
xWrite<u16>( 0x0f00 | prefix );
|
||||
xWrite<u8>( opcode );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( is16BitOpcode )
|
||||
{
|
||||
xWrite<u8>( 0x0f );
|
||||
xWrite<u16>( opcode );
|
||||
}
|
||||
else
|
||||
xWrite<u16>( (opcode<<8) | 0x0f );
|
||||
}
|
||||
}
|
||||
|
||||
// [SSE-3]
|
||||
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
||||
// [SSE-3]
|
||||
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
||||
|
||||
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
|
||||
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
|
||||
const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD;
|
||||
const MovapsImplAll< 0x66, 0x10, 0x11 > xMOVUPD;
|
||||
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
const MovapsImplAll< 0x66, 0x6f, 0x7f > xMOVDQA;
|
||||
const MovapsImplAll< 0xf3, 0x6f, 0x7f > xMOVDQU;
|
||||
#else
|
||||
const MovapsImplAll< 0, 0x28, 0x29 > xMOVDQA;
|
||||
const MovapsImplAll< 0, 0x10, 0x11 > xMOVDQU;
|
||||
#endif
|
||||
|
||||
const MovhlImplAll<0x16> xMOVH;
|
||||
const MovhlImplAll<0x12> xMOVL;
|
||||
const MovhlImpl_RtoR<0x16> xMOVLH;
|
||||
const MovhlImpl_RtoR<0x12> xMOVHL;
|
||||
|
||||
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
|
||||
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
|
||||
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
|
||||
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
|
||||
|
||||
const SimdImpl_AndNot xANDN;
|
||||
|
||||
const SimdImpl_UcomI<0x66,0x2e> xUCOMI;
|
||||
const SimdImpl_rSqrt<0x53> xRCP;
|
||||
const SimdImpl_rSqrt<0x52> xRSQRT;
|
||||
const SimdImpl_Sqrt<0x51> xSQRT;
|
||||
|
||||
const SimdImpl_MinMax<0x5f> xMAX;
|
||||
const SimdImpl_MinMax<0x5d> xMIN;
|
||||
const SimdImpl_Shuffle<0xc6> xSHUF;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
const SimdImpl_Compare<SSE2_Equal> xCMPEQ;
|
||||
const SimdImpl_Compare<SSE2_Less> xCMPLT;
|
||||
const SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
|
||||
const SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
|
||||
const SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
|
||||
const SimdImpl_Compare<SSE2_NotLess> xCMPNLT;
|
||||
const SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
|
||||
const SimdImpl_Compare<SSE2_Ordered> xCMPORD;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// SSE Conversion Operations, as looney as they are.
|
||||
//
|
||||
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
|
||||
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
|
||||
//
|
||||
const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD;
|
||||
const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ;
|
||||
const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI;
|
||||
const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD;
|
||||
const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ;
|
||||
const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD;
|
||||
const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI;
|
||||
const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS;
|
||||
const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI;
|
||||
|
||||
const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ;
|
||||
const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ;
|
||||
const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
const SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||
const SimdImpl_Shift<0xf0, 6> xPSLL;
|
||||
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||
|
||||
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||
const SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
|
||||
const SimdImpl_PMinMax<0xde,0x3c> xPMAX;
|
||||
const SimdImpl_PMinMax<0xda,0x38> xPMIN;
|
||||
|
||||
const SimdImpl_PMul xPMUL;
|
||||
const SimdImpl_PCompare xPCMP;
|
||||
const SimdImpl_PShuffle xPSHUF;
|
||||
const SimdImpl_PUnpack xPUNPCK;
|
||||
const SimdImpl_Unpack xUNPCK;
|
||||
const SimdImpl_Pack xPACK;
|
||||
|
||||
const SimdImpl_PAbsolute xPABS;
|
||||
const SimdImpl_PSign xPSIGN;
|
||||
const SimdImpl_PInsert xPINSR;
|
||||
const SimdImpl_PExtract xPEXTR;
|
||||
const SimdImpl_PMultAdd xPMADD;
|
||||
const SimdImpl_HorizAdd xHADD;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
||||
__emitinline void xEMMS()
|
||||
{
|
||||
xWrite<u16>( 0x770F );
|
||||
}
|
||||
|
||||
// Store Streaming SIMD Extension Control/Status to Mem32.
|
||||
__emitinline void xSTMXCSR( u32* dest )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
xWriteDisp( 3, dest );
|
||||
}
|
||||
|
||||
// Load Streaming SIMD Extension Control/Status from Mem32.
|
||||
__emitinline void xLDMXCSR( const u32* src )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
xWriteDisp( 2, src );
|
||||
}
|
||||
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); }
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); }
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); }
|
||||
|
||||
// Moves lower quad of XMM to ptr64 (no bits are cleared)
|
||||
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); }
|
||||
// Moves lower quad of XMM to ptr64 (no bits are cleared)
|
||||
__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); }
|
||||
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); }
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); }
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); }
|
||||
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); }
|
||||
__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); }
|
||||
|
||||
// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ'
|
||||
__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); }
|
||||
|
||||
// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q'
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from )
|
||||
{
|
||||
// Manual implementation of this form of MOVQ, since its parameters are unique in a way
|
||||
// that breaks the template inference of writeXMMop();
|
||||
|
||||
SimdPrefix( 0xf2, 0xd6 );
|
||||
ModRM_Direct( to.Id, from.Id );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
||||
#define IMPLEMENT_xMOVS( ssd, prefix ) \
|
||||
__forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \
|
||||
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \
|
||||
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \
|
||||
__forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \
|
||||
__forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); }
|
||||
|
||||
IMPLEMENT_xMOVS( SS, 0xf3 )
|
||||
IMPLEMENT_xMOVS( SD, 0xf2 )
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Non-temporal movs only support a register as a target (ie, load form only, no stores)
|
||||
//
|
||||
|
||||
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from )
|
||||
{
|
||||
xWrite<u32>( 0x2A380f66 );
|
||||
xWriteDisp( to.Id, from );
|
||||
}
|
||||
|
||||
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from )
|
||||
{
|
||||
xWrite<u32>( 0x2A380f66 );
|
||||
EmitSibMagic( to.Id, from );
|
||||
}
|
||||
|
||||
__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); }
|
||||
__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); }
|
||||
|
||||
__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); }
|
||||
__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); }
|
||||
__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); }
|
||||
__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); }
|
||||
|
||||
__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
||||
__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
||||
|
||||
__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x50, to, from ); }
|
||||
__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// INSERTPS / EXTRACTPS [SSE4.1 only!]
|
||||
//
|
||||
// [TODO] these might be served better as classes, especially if other instructions use
|
||||
// the M32,sse,imm form (I forget offhand if any do).
|
||||
|
||||
|
||||
// [SSE-4.1] Insert a single-precision floating-point value from src into a specified
|
||||
// location in dest, and selectively zero out the data elements in dest according to
|
||||
// the mask field in the immediate byte. The source operand can be a memory location
|
||||
// (32 bits) or an XMM register (lower 32 bits used).
|
||||
//
|
||||
// Imm8 provides three fields:
|
||||
// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if
|
||||
// the source is a memory operand.
|
||||
// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest.
|
||||
// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written
|
||||
// with 0.0 if set to 1.
|
||||
//
|
||||
__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 )
|
||||
{
|
||||
writeXMMop( 0x66, 0x213a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 )
|
||||
{
|
||||
writeXMMop( 0x66, 0x213a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 )
|
||||
{
|
||||
writeXMMop( 0x66, 0x213a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
// [SSE-4.1] Extract a single-precision floating-point value from src at an offset
|
||||
// determined by imm8[1-0]*32. The extracted single precision floating-point value
|
||||
// is stored into the low 32-bits of dest (or at a 32-bit memory pointer).
|
||||
//
|
||||
__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 )
|
||||
{
|
||||
writeXMMop( 0x66, 0x173a, to, from, true );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 )
|
||||
{
|
||||
writeXMMop( 0x66, 0x173a, from, dest, true );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 )
|
||||
{
|
||||
writeXMMop( 0x66, 0x173a, from, dest, true );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -86,16 +86,16 @@ namespace x86Emitter
|
|||
extern const Internal::DwordShiftImplAll<false> xSHLD;
|
||||
extern const Internal::DwordShiftImplAll<true> xSHRD;
|
||||
|
||||
extern const Internal::Group8ImplAll<Internal::G8Type_BT> xBT;
|
||||
extern const Internal::Group8ImplAll<Internal::G8Type_BTR> xBTR;
|
||||
extern const Internal::Group8ImplAll<Internal::G8Type_BTS> xBTS;
|
||||
extern const Internal::Group8ImplAll<Internal::G8Type_BTC> xBTC;
|
||||
extern const Internal::Group8Impl<Internal::G8Type_BT> xBT;
|
||||
extern const Internal::Group8Impl<Internal::G8Type_BTR> xBTR;
|
||||
extern const Internal::Group8Impl<Internal::G8Type_BTS> xBTS;
|
||||
extern const Internal::Group8Impl<Internal::G8Type_BTC> xBTC;
|
||||
|
||||
extern const Internal::JmpCallImplAll<true> xJMP;
|
||||
extern const Internal::JmpCallImplAll<false> xCALL;
|
||||
|
||||
extern const Internal::BitScanImplAll<false> xBSF;
|
||||
extern const Internal::BitScanImplAll<true> xBSR;
|
||||
extern const Internal::BitScanImpl<0xbc> xBSF;
|
||||
extern const Internal::BitScanImpl<0xbd> xBSR;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
extern const Internal::CMovImplGeneric xCMOV;
|
||||
|
@ -299,95 +299,28 @@ namespace x86Emitter
|
|||
typedef xForwardJPO<s8> xForwardJPO8;
|
||||
typedef xForwardJPO<s32> xForwardJPO32;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
|
||||
//
|
||||
// Notes:
|
||||
// * Some of the functions have been renamed to more clearly reflect what they actually
|
||||
// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination
|
||||
// since that's what they do (MOVD clears upper 32/96 bits, etc).
|
||||
//
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// MOVD has valid forms for MMX and XMM registers.
|
||||
//
|
||||
template< typename T >
|
||||
__emitinline void xMOVDZX( const xRegisterSIMD<T>& to, const xRegister32& from )
|
||||
{
|
||||
Internal::writeXMMop( 0x66, 0x6e, to, from );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__emitinline void xMOVDZX( const xRegisterSIMD<T>& to, const void* src )
|
||||
{
|
||||
Internal::writeXMMop( 0x66, 0x6e, to, src );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void xMOVDZX( const xRegisterSIMD<T>& to, const ModSibBase& src )
|
||||
{
|
||||
Internal::writeXMMop( 0x66, 0x6e, to, src );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__emitinline void xMOVD( const xRegister32& to, const xRegisterSIMD<T>& from )
|
||||
{
|
||||
Internal::writeXMMop( 0x66, 0x7e, from, to );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
__emitinline void xMOVD( void* dest, const xRegisterSIMD<T>& from )
|
||||
{
|
||||
Internal::writeXMMop( 0x66, 0x7e, from, dest );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void xMOVD( const ModSibBase& dest, const xRegisterSIMD<T>& from )
|
||||
{
|
||||
Internal::writeXMMop( 0x66, 0x7e, from, dest );
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
// xMASKMOV:
|
||||
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.
|
||||
// The default memory location is specified by DS:EDI. The most significant bit in each byte
|
||||
// of the mask operand determines whether the corresponding byte in the source operand is
|
||||
// written to the corresponding byte location in memory.
|
||||
|
||||
template< typename T >
|
||||
static __forceinline void xMASKMOV( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); }
|
||||
|
||||
// xPMOVMSKB:
|
||||
// Creates a mask made up of the most significant bit of each byte of the source
|
||||
// operand and stores the result in the low byte or word of the destination operand.
|
||||
// Upper bits of the destination are cleared to zero.
|
||||
//
|
||||
// When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on
|
||||
// 128-bit (SSE) source, the byte mask is 16-bits.
|
||||
//
|
||||
template< typename T >
|
||||
static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); }
|
||||
|
||||
// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
|
||||
// shifts the composite at byte granularity to the right by a constant immediate,
|
||||
// and extracts the right-aligned result into the destination.
|
||||
//
|
||||
template< typename T >
|
||||
static __forceinline void xPALIGNR( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from, u8 imm8 )
|
||||
{
|
||||
Internal::writeXMMop( 0x66, 0x0f3a, to, from );
|
||||
xWrite<u8>( imm8 );
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern void xEMMS();
|
||||
extern void xSTMXCSR( u32* dest );
|
||||
extern void xLDMXCSR( const u32* src );
|
||||
|
||||
extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from );
|
||||
extern void xMOVDZX( const xRegisterSSE& to, const void* src );
|
||||
extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src );
|
||||
|
||||
extern void xMOVDZX( const xRegisterMMX& to, const xRegister32& from );
|
||||
extern void xMOVDZX( const xRegisterMMX& to, const void* src );
|
||||
extern void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src );
|
||||
|
||||
extern void xMOVD( const xRegister32& to, const xRegisterSSE& from );
|
||||
extern void xMOVD( void* dest, const xRegisterSSE& from );
|
||||
extern void xMOVD( const ModSibBase& dest, const xRegisterSSE& from );
|
||||
|
||||
extern void xMOVD( const xRegister32& to, const xRegisterMMX& from );
|
||||
extern void xMOVD( void* dest, const xRegisterMMX& from );
|
||||
extern void xMOVD( const ModSibBase& dest, const xRegisterMMX& from );
|
||||
|
||||
extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from );
|
||||
extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from );
|
||||
extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from );
|
||||
|
@ -430,31 +363,28 @@ namespace x86Emitter
|
|||
extern void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from );
|
||||
extern void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from );
|
||||
|
||||
extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 );
|
||||
extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
|
||||
|
||||
extern void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from );
|
||||
extern void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from );
|
||||
extern void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from );
|
||||
extern void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from );
|
||||
extern void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 );
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
||||
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
||||
|
||||
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS;
|
||||
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS;
|
||||
|
||||
extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> xMOVAPD;
|
||||
extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> xMOVUPD;
|
||||
extern const Internal::SimdImpl_MoveSSE<0x00,true> xMOVAPS;
|
||||
extern const Internal::SimdImpl_MoveSSE<0x00,false> xMOVUPS;
|
||||
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> xMOVDQA;
|
||||
extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> xMOVDQU;
|
||||
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVDQA;
|
||||
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVDQU;
|
||||
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVAPD;
|
||||
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVUPD;
|
||||
#else
|
||||
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVDQA;
|
||||
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVDQU;
|
||||
extern const Internal::SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA;
|
||||
extern const Internal::SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU;
|
||||
extern const Internal::SimdImpl_MoveSSE<0x66,true> xMOVAPD;
|
||||
extern const Internal::SimdImpl_MoveSSE<0x66,false> xMOVUPD;
|
||||
#endif
|
||||
|
||||
extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH;
|
||||
|
@ -463,6 +393,17 @@ namespace x86Emitter
|
|||
extern const Internal::MovhlImplAll<0x16> xMOVH;
|
||||
extern const Internal::MovhlImplAll<0x12> xMOVL;
|
||||
|
||||
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
||||
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
||||
|
||||
extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 );
|
||||
extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
|
||||
|
||||
extern void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 );
|
||||
extern void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 );
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND;
|
||||
|
@ -483,6 +424,8 @@ namespace x86Emitter
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST;
|
||||
|
||||
extern const Internal::SimdImpl_Compare<SSE2_Equal> xCMPEQ;
|
||||
extern const Internal::SimdImpl_Compare<SSE2_Less> xCMPLT;
|
||||
extern const Internal::SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
|
||||
|
@ -527,8 +470,8 @@ namespace x86Emitter
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||
extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL;
|
||||
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||
|
||||
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||
|
@ -550,5 +493,12 @@ namespace x86Emitter
|
|||
extern const Internal::SimdImpl_PMultAdd xPMADD;
|
||||
extern const Internal::SimdImpl_HorizAdd xHADD;
|
||||
|
||||
extern const Internal::SimdImpl_Blend xBLEND;
|
||||
extern const Internal::SimdImpl_DotProduct xDP;
|
||||
extern const Internal::SimdImpl_Round xROUND;
|
||||
|
||||
extern const Internal::SimdImpl_PMove<true> xPMOVSX;
|
||||
extern const Internal::SimdImpl_PMove<false> xPMOVZX;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -1,124 +0,0 @@
|
|||
/* Pcsx2 - Pc Ps2 Emulator
|
||||
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#include "PrecompiledHeader.h"
|
||||
#include "ix86_legacy_internal.h"
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MMX instructions
|
||||
//
|
||||
// note: r64 = mm
|
||||
//------------------------------------------------------------------
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); }
|
||||
emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); }
|
||||
emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
|
||||
emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
|
||||
|
||||
emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); }
|
||||
emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); }
|
||||
emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); }
|
||||
emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
|
||||
emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); }
|
||||
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
|
||||
|
||||
emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); }
|
||||
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
|
||||
#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \
|
||||
emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \
|
||||
emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \
|
||||
emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||
emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \
|
||||
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
|
||||
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
|
||||
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \
|
||||
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
|
||||
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
|
||||
emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \
|
||||
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \
|
||||
emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); }
|
||||
|
||||
#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \
|
||||
DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \
|
||||
DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \
|
||||
DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \
|
||||
emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); }
|
||||
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( AND )
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( ANDN )
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( OR )
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( XOR )
|
||||
|
||||
DEFINE_LEGACY_SHIFT_OPCODE( SLL )
|
||||
DEFINE_LEGACY_SHIFT_OPCODE( SRL )
|
||||
DEFINE_LEGACY_SHIFT_STUFF( SRA, D )
|
||||
DEFINE_LEGACY_SHIFT_STUFF( SRA, W )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, B )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, W )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, D )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, Q )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, SB )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, SW )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, USB )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, USW )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, B )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, W )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, D )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, Q )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, SB )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, SW )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, USB )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, USW )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQB );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQW );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQD );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTB );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTW );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTD );
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW );
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD );
|
||||
|
||||
|
||||
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); }
|
||||
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); }
|
||||
|
||||
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); }
|
||||
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); }
|
||||
|
||||
emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); }
|
||||
|
||||
emitterT void EMMS() { xEMMS(); }
|
|
@ -22,35 +22,109 @@
|
|||
|
||||
using namespace x86Emitter;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// MMX / SSE Mixed Bag
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// SSE instructions
|
||||
//------------------------------------------------------------------
|
||||
emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); }
|
||||
emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); }
|
||||
emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
|
||||
emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
|
||||
|
||||
#define SSEMtoR( code, overb ) \
|
||||
assert( to < iREGCNT_XMM ), \
|
||||
RexR(0, to), \
|
||||
write16( code ), \
|
||||
ModRM( 0, to, DISP32 ), \
|
||||
write32( MEMADDR(from, 4 + overb) )
|
||||
emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); }
|
||||
emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); }
|
||||
emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); }
|
||||
emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); }
|
||||
emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); }
|
||||
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); }
|
||||
|
||||
#define SSERtoR( code ) \
|
||||
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
|
||||
RexRB(0, to, from), \
|
||||
write16( code ), \
|
||||
ModRM( 3, to, from )
|
||||
emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); }
|
||||
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); }
|
||||
|
||||
#define SSEMtoR66( code ) \
|
||||
write8( 0x66 ), \
|
||||
SSEMtoR( code, 0 )
|
||||
#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \
|
||||
emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \
|
||||
emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \
|
||||
emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||
emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
#define SSERtoM66( code ) \
|
||||
write8( 0x66 ), \
|
||||
SSERtoM( code, 0 )
|
||||
#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \
|
||||
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
|
||||
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
|
||||
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
#define SSERtoR66( code ) \
|
||||
write8( 0x66 ), \
|
||||
SSERtoR( code )
|
||||
#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \
|
||||
emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \
|
||||
emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \
|
||||
emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \
|
||||
emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||
emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \
|
||||
emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); }
|
||||
|
||||
#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \
|
||||
DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \
|
||||
DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \
|
||||
DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \
|
||||
emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); }
|
||||
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( AND )
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( ANDN )
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( OR )
|
||||
DEFINE_LEGACY_LOGIC_OPCODE( XOR )
|
||||
|
||||
DEFINE_LEGACY_SHIFT_OPCODE( SLL )
|
||||
DEFINE_LEGACY_SHIFT_OPCODE( SRL )
|
||||
DEFINE_LEGACY_SHIFT_STUFF( SRA, D )
|
||||
DEFINE_LEGACY_SHIFT_STUFF( SRA, W )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, B )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, W )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, D )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, Q )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, SB )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, SW )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, USB )
|
||||
DEFINE_LEGACY_ARITHMETIC( ADD, USW )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, B )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, W )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, D )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, Q )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, SB )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, SW )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, USB )
|
||||
DEFINE_LEGACY_ARITHMETIC( SUB, USW )
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQB );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQW );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, EQD );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTB );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTW );
|
||||
DEFINE_LEGACY_ARITHMETIC( CMP, GTD );
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW );
|
||||
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD );
|
||||
DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD );
|
||||
|
||||
|
||||
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); }
|
||||
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); }
|
||||
|
||||
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); }
|
||||
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); }
|
||||
|
||||
emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); }
|
||||
|
||||
emitterT void EMMS() { xEMMS(); }
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Begin SSE-Only Part!
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
#define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \
|
||||
emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \
|
||||
|
@ -290,73 +364,17 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im
|
|||
emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||
emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); }
|
||||
|
||||
emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xDP.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||
emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) { xDP.PS( xRegisterSSE(to), (void*)from, imm8 ); }
|
||||
|
||||
emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) { xBLEND.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||
emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xBLEND.VPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { xBLEND.VPS( xRegisterSSE(to), (void*)from ); }
|
||||
|
||||
emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMOVSX.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||
|
||||
emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// SSE4.1
|
||||
|
||||
emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
|
||||
{
|
||||
write8(0x66);
|
||||
write24(0x403A0F);
|
||||
ModRM(3, to, from);
|
||||
write8(imm8);
|
||||
}
|
||||
|
||||
emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8)
|
||||
{
|
||||
write8(0x66);
|
||||
write24(0x403A0F);
|
||||
ModRM(0, to, DISP32);
|
||||
write32(MEMADDR(from, 4));
|
||||
write8(imm8);
|
||||
}
|
||||
|
||||
emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x0C3A0F);
|
||||
ModRM(3, to, from);
|
||||
write8(imm8);
|
||||
}
|
||||
|
||||
emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x14380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexR(0, to);
|
||||
write24(0x14380F);
|
||||
ModRM(0, to, DISP32);
|
||||
write32(MEMADDR(from, 4));
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x25380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
||||
{
|
||||
write8(0x66);
|
||||
RexRB(0, to, from);
|
||||
write24(0x35380F);
|
||||
ModRM(3, to, from);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions)
|
||||
|
|
|
@ -0,0 +1,388 @@
|
|||
/* Pcsx2 - Pc Ps2 Emulator
|
||||
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#include "PrecompiledHeader.h"
|
||||
|
||||
#include "System.h"
|
||||
#include "ix86_internal.h"
|
||||
|
||||
namespace x86Emitter {
|
||||
|
||||
using namespace Internal;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
|
||||
// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
|
||||
// instructions). Any other lower value assumes the upper value is 0 and ignored.
|
||||
// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
|
||||
// generate an assertion.
|
||||
//
|
||||
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
||||
{
|
||||
const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
|
||||
|
||||
// If the lower byte is not a valid previx and the upper byte is non-zero it
|
||||
// means we made a mistake!
|
||||
if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 );
|
||||
|
||||
if( prefix != 0 )
|
||||
{
|
||||
if( is16BitOpcode )
|
||||
xWrite<u32>( (opcode<<16) | 0x0f00 | prefix );
|
||||
else
|
||||
{
|
||||
xWrite<u16>( 0x0f00 | prefix );
|
||||
xWrite<u8>( opcode );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( is16BitOpcode )
|
||||
{
|
||||
xWrite<u8>( 0x0f );
|
||||
xWrite<u16>( opcode );
|
||||
}
|
||||
else
|
||||
xWrite<u16>( (opcode<<8) | 0x0f );
|
||||
}
|
||||
}
|
||||
|
||||
// [SSE-3]
|
||||
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
||||
// [SSE-3]
|
||||
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
||||
|
||||
const SimdImpl_MoveSSE<0x00,true> xMOVAPS;
|
||||
|
||||
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||
// which can be checked for alignment at runtime.
|
||||
const SimdImpl_MoveSSE<0x00,false> xMOVUPS;
|
||||
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
const SimdImpl_MoveSSE<0,true> xMOVDQA;
|
||||
const SimdImpl_MoveSSE<0,true> xMOVAPD;
|
||||
|
||||
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||
// which can be checked for alignment at runtime.
|
||||
const SimdImpl_MoveSSE<0,false> xMOVDQU;
|
||||
const SimdImpl_MoveSSE<0,false> xMOVUPD;
|
||||
#else
|
||||
const SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA;
|
||||
const SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU;
|
||||
const SimdImpl_MoveSSE<0x66,true> xMOVAPD;
|
||||
const SimdImpl_MoveSSE<0x66,false> xMOVUPD;
|
||||
#endif
|
||||
|
||||
const MovhlImplAll<0x16> xMOVH;
|
||||
const MovhlImplAll<0x12> xMOVL;
|
||||
const MovhlImpl_RtoR<0x16> xMOVLH;
|
||||
const MovhlImpl_RtoR<0x12> xMOVHL;
|
||||
|
||||
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
|
||||
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
|
||||
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
|
||||
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
|
||||
|
||||
const SimdImpl_AndNot xANDN;
|
||||
|
||||
const SimdImpl_UcomI<0x66,0x2e> xUCOMI;
|
||||
const SimdImpl_rSqrt<0x53> xRCP;
|
||||
const SimdImpl_rSqrt<0x52> xRSQRT;
|
||||
const SimdImpl_Sqrt<0x51> xSQRT;
|
||||
|
||||
const SimdImpl_MinMax<0x5f> xMAX;
|
||||
const SimdImpl_MinMax<0x5d> xMIN;
|
||||
const SimdImpl_Shuffle<0xc6> xSHUF;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag
|
||||
// only if all bits in the result are 0. PTEST also sets the CF flag according
|
||||
// to the following condition: (xmm2/m128 AND NOT xmm1) == 0;
|
||||
extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST;
|
||||
|
||||
const SimdImpl_Compare<SSE2_Equal> xCMPEQ;
|
||||
const SimdImpl_Compare<SSE2_Less> xCMPLT;
|
||||
const SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
|
||||
const SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
|
||||
const SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
|
||||
const SimdImpl_Compare<SSE2_NotLess> xCMPNLT;
|
||||
const SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
|
||||
const SimdImpl_Compare<SSE2_Ordered> xCMPORD;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// SSE Conversion Operations, as looney as they are.
|
||||
//
|
||||
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
|
||||
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
|
||||
//
|
||||
const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD;
|
||||
const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ;
|
||||
const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI;
|
||||
const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD;
|
||||
const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ;
|
||||
const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD;
|
||||
const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI;
|
||||
const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS;
|
||||
const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI;
|
||||
|
||||
const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ;
|
||||
const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ;
|
||||
const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI;
|
||||
|
||||
const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI;
|
||||
const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
const SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||
const SimdImpl_Shift<0xf0, 6> xPSLL;
|
||||
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||
|
||||
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||
const SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
|
||||
const SimdImpl_PMinMax<0xde,0x3c> xPMAX;
|
||||
const SimdImpl_PMinMax<0xda,0x38> xPMIN;
|
||||
|
||||
const SimdImpl_PMul xPMUL;
|
||||
const SimdImpl_PCompare xPCMP;
|
||||
const SimdImpl_PShuffle xPSHUF;
|
||||
const SimdImpl_PUnpack xPUNPCK;
|
||||
const SimdImpl_Unpack xUNPCK;
|
||||
const SimdImpl_Pack xPACK;
|
||||
|
||||
const SimdImpl_PAbsolute xPABS;
|
||||
const SimdImpl_PSign xPSIGN;
|
||||
const SimdImpl_PInsert xPINSR;
|
||||
const SimdImpl_PExtract xPEXTR;
|
||||
const SimdImpl_PMultAdd xPMADD;
|
||||
const SimdImpl_HorizAdd xHADD;
|
||||
|
||||
const SimdImpl_Blend xBLEND;
|
||||
const SimdImpl_DotProduct xDP;
|
||||
const SimdImpl_Round xROUND;
|
||||
|
||||
const SimdImpl_PMove<true> xPMOVSX;
|
||||
const SimdImpl_PMove<false> xPMOVZX;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
||||
__emitinline void xEMMS()
|
||||
{
|
||||
xWrite<u16>( 0x770F );
|
||||
}
|
||||
|
||||
// Store Streaming SIMD Extension Control/Status to Mem32.
|
||||
__emitinline void xSTMXCSR( u32* dest )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
xWriteDisp( 3, dest );
|
||||
}
|
||||
|
||||
// Load Streaming SIMD Extension Control/Status from Mem32.
|
||||
__emitinline void xLDMXCSR( const u32* src )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
xWriteDisp( 2, src );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
|
||||
//
|
||||
// Notes:
|
||||
// * Some of the functions have been renamed to more clearly reflect what they actually
|
||||
// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination
|
||||
// since that's what they do (MOVD clears upper 32/96 bits, etc).
|
||||
//
|
||||
// * MOVD has valid forms for MMX and XMM registers.
|
||||
//
|
||||
|
||||
__forceinline void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ) { xOpWrite0F( 0x66, 0x6e, to, from ); }
|
||||
__forceinline void xMOVDZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0x66, 0x6e, to, src ); }
|
||||
__forceinline void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0x66, 0x6e, to, src ); }
|
||||
|
||||
__forceinline void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ) { xOpWrite0F( 0x6e, to, from ); }
|
||||
__forceinline void xMOVDZX( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6e, to, src ); }
|
||||
__forceinline void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6e, to, src ); }
|
||||
|
||||
__forceinline void xMOVD( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, to ); }
|
||||
__forceinline void xMOVD( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); }
|
||||
__forceinline void xMOVD( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); }
|
||||
|
||||
__forceinline void xMOVD( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, to ); }
|
||||
__forceinline void xMOVD( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); }
|
||||
__forceinline void xMOVD( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); }
|
||||
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0xf3, 0x7e, to, from ); }
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); }
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); }
|
||||
|
||||
// Moves lower quad of XMM to ptr64 (no bits are cleared)
|
||||
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); }
|
||||
// Moves lower quad of XMM to ptr64 (no bits are cleared)
|
||||
__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); }
|
||||
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) xOpWrite0F( 0x6f, to, from ); }
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6f, to, src ); }
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6f, to, src ); }
|
||||
__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); }
|
||||
__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); }
|
||||
|
||||
// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ'
|
||||
__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf3, 0xd6, to, from ); }
|
||||
|
||||
// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q'
|
||||
__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from )
|
||||
{
|
||||
// Manual implementation of this form of MOVQ, since its parameters are unique in a way
|
||||
// that breaks the template inference of writeXMMop();
|
||||
|
||||
SimdPrefix( 0xf2, 0xd6 );
|
||||
ModRM_Direct( to.Id, from.Id );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
||||
#define IMPLEMENT_xMOVS( ssd, prefix ) \
|
||||
__forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) xOpWrite0F( prefix, 0x10, to, from ); } \
|
||||
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { xOpWrite0F( prefix, 0x10, to, from ); } \
|
||||
__forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { xOpWrite0F( prefix, 0x10, to, from ); } \
|
||||
__forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } \
|
||||
__forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); }
|
||||
|
||||
IMPLEMENT_xMOVS( SS, 0xf3 )
|
||||
IMPLEMENT_xMOVS( SD, 0xf2 )
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Non-temporal movs only support a register as a target (ie, load form only, no stores)
|
||||
//
|
||||
|
||||
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from )
|
||||
{
|
||||
xWrite<u32>( 0x2A380f66 );
|
||||
xWriteDisp( to.Id, from );
|
||||
}
|
||||
|
||||
__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from )
|
||||
{
|
||||
xWrite<u32>( 0x2A380f66 );
|
||||
EmitSibMagic( to.Id, from );
|
||||
}
|
||||
|
||||
__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); }
|
||||
__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); }
|
||||
|
||||
__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); }
|
||||
__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); }
|
||||
__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); }
|
||||
__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); }
|
||||
|
||||
__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); }
|
||||
__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); }
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x50, to, from ); }
|
||||
__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x66, 0x50, to, from, true ); }
|
||||
|
||||
// xMASKMOV:
|
||||
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.
|
||||
// The default memory location is specified by DS:EDI. The most significant bit in each byte
|
||||
// of the mask operand determines whether the corresponding byte in the source operand is
|
||||
// written to the corresponding byte location in memory.
|
||||
__forceinline void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xf7, to, from ); }
|
||||
__forceinline void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf7, to, from ); }
|
||||
|
||||
// xPMOVMSKB:
|
||||
// Creates a mask made up of the most significant bit of each byte of the source
|
||||
// operand and stores the result in the low byte or word of the destination operand.
|
||||
// Upper bits of the destination are cleared to zero.
|
||||
//
|
||||
// When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on
|
||||
// 128-bit (SSE) source, the byte mask is 16-bits.
|
||||
//
|
||||
__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd7, to, from ); }
|
||||
__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0xd7, to, from ); }
|
||||
|
||||
// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
|
||||
// shifts the composite at byte granularity to the right by a constant immediate,
|
||||
// and extracts the right-aligned result into the destination.
|
||||
//
|
||||
__forceinline void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x0f3a, to, from, imm8 ); }
|
||||
__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); }
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// INSERTPS / EXTRACTPS [SSE4.1 only!]
|
||||
//
|
||||
// [TODO] these might be served better as classes, especially if other instructions use
|
||||
// the M32,sse,imm form (I forget offhand if any do).
|
||||
|
||||
|
||||
// [SSE-4.1] Insert a single-precision floating-point value from src into a specified
|
||||
// location in dest, and selectively zero out the data elements in dest according to
|
||||
// the mask field in the immediate byte. The source operand can be a memory location
|
||||
// (32 bits) or an XMM register (lower 32 bits used).
|
||||
//
|
||||
// Imm8 provides three fields:
|
||||
// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if
|
||||
// the source is a memory operand.
|
||||
// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest.
|
||||
// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written
|
||||
// with 0.0 if set to 1.
|
||||
//
|
||||
__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
|
||||
__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
|
||||
__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
|
||||
|
||||
// [SSE-4.1] Extract a single-precision floating-point value from src at an offset
|
||||
// determined by imm8[1-0]*32. The extracted single precision floating-point value
|
||||
// is stored into the low 32-bits of dest (or at a 32-bit memory pointer).
|
||||
//
|
||||
__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); }
|
||||
__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
|
||||
__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
|
||||
|
||||
}
|
|
@ -48,4 +48,3 @@ extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from );
|
|||
extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
|
||||
extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from );
|
||||
extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
|
||||
extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
|
|
@ -252,28 +252,6 @@ namespace x86Emitter
|
|||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
template< typename OperandType >
|
||||
class xRegisterSIMD : public xRegister<OperandType>
|
||||
{
|
||||
public:
|
||||
static const xRegisterSIMD Empty; // defined as an empty/unused value (-1)
|
||||
|
||||
public:
|
||||
xRegisterSIMD(): xRegister<OperandType>() {}
|
||||
xRegisterSIMD( const xRegisterSIMD& src ) : xRegister<OperandType>( src.Id ) {}
|
||||
xRegisterSIMD( const xRegister<OperandType>& src ) : xRegister<OperandType>( src ) {}
|
||||
explicit xRegisterSIMD( int regId ) : xRegister<OperandType>( regId ) {}
|
||||
|
||||
xRegisterSIMD<OperandType>& operator=( const xRegisterSIMD<OperandType>& src )
|
||||
{
|
||||
xRegister<OperandType>::Id = src.Id;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which
|
||||
// means it finds undeclared variables when MSVC does not (Since MSVC compiles templates
|
||||
|
@ -282,8 +260,8 @@ namespace x86Emitter
|
|||
// all about the the templated code in haphazard fashion. Yay.. >_<
|
||||
//
|
||||
|
||||
typedef xRegisterSIMD<u128> xRegisterSSE;
|
||||
typedef xRegisterSIMD<u64> xRegisterMMX;
|
||||
typedef xRegister<u128> xRegisterSSE;
|
||||
typedef xRegister<u64> xRegisterMMX;
|
||||
typedef xRegister<u32> xRegister32;
|
||||
typedef xRegister<u16> xRegister16;
|
||||
typedef xRegister<u8> xRegister8;
|
||||
|
|
Loading…
Reference in New Issue