mirror of https://github.com/PCSX2/pcsx2.git
Emitter Rewrite, Part 3 of 5: Finished all SIMD instructions, except those embedded into base instruction groups (CMPSS/SD, DIVSS/SD, etc).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2135 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
04c86ea6d3
commit
fcdb429bb9
|
@ -332,14 +332,6 @@
|
||||||
<Filter
|
<Filter
|
||||||
Name="Implement_Simd"
|
Name="Implement_Simd"
|
||||||
>
|
>
|
||||||
<File
|
|
||||||
RelativePath="..\..\include\x86emitter\implement\xmm\moremovs.h"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\include\x86emitter\implement\xmm\shufflepack.h"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\include\x86emitter\implement\simd_arithmetic.h"
|
RelativePath="..\..\include\x86emitter\implement\simd_arithmetic.h"
|
||||||
>
|
>
|
||||||
|
@ -352,6 +344,14 @@
|
||||||
RelativePath="..\..\include\x86emitter\implement\simd_helpers.h"
|
RelativePath="..\..\include\x86emitter\implement\simd_helpers.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\include\x86emitter\implement\simd_moremovs.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\include\x86emitter\implement\simd_shufflepack.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\include\x86emitter\implement\simd_templated_helpers.h"
|
RelativePath="..\..\include\x86emitter\implement\simd_templated_helpers.h"
|
||||||
>
|
>
|
||||||
|
|
|
@ -23,7 +23,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#define OpWriteSSE( pre, op ) xOpWrite0F( pre, op, to, from )
|
#define OpWriteSSE( pre, op ) xOpWrite0F( pre, op, to, from )
|
||||||
#define OpWriteMMX( op ) xOpWrite0F( op, to, from )
|
|
||||||
|
|
||||||
extern void SimdPrefix( u8 prefix, u16 opcode );
|
extern void SimdPrefix( u8 prefix, u16 opcode );
|
||||||
extern void EmitSibMagic( uint regfield, const void* address );
|
extern void EmitSibMagic( uint regfield, const void* address );
|
||||||
|
|
|
@ -18,10 +18,11 @@
|
||||||
// Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares
|
// Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares
|
||||||
// with cmov many similarities).
|
// with cmov many similarities).
|
||||||
|
|
||||||
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
// --------------------------------------------------------------------------------------
|
||||||
// MOV instruction Implementation
|
// MovImplAll
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// MOV instruction Implementation, plus many SIMD sub-mov variants.
|
||||||
|
|
||||||
class MovImplAll
|
class MovImplAll
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
/* PCSX2 - PS2 Emulator for PCs
|
||||||
|
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||||
|
*
|
||||||
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||||
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||||
|
* ation, either version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE. See the GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace x86Emitter {
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// xImplSimd_MovHL
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// Moves to/from high/low portions of an xmm register.
|
||||||
|
// These instructions cannot be used in reg/reg form.
|
||||||
|
//
|
||||||
|
struct xImplSimd_MovHL
|
||||||
|
{
|
||||||
|
u16 Opcode;
|
||||||
|
|
||||||
|
void PS( const xRegisterSSE& to, const ModSibBase& from ) const;
|
||||||
|
void PS( const ModSibBase& to, const xRegisterSSE& from ) const;
|
||||||
|
|
||||||
|
void PD( const xRegisterSSE& to, const ModSibBase& from ) const;
|
||||||
|
void PD( const ModSibBase& to, const xRegisterSSE& from ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// xImplSimd_MovHL_RtoR
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
|
||||||
|
// do something kinda different! Fun!
|
||||||
|
//
|
||||||
|
struct xImplSimd_MovHL_RtoR
|
||||||
|
{
|
||||||
|
u16 Opcode;
|
||||||
|
|
||||||
|
void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// xImplSimd_MoveSSE
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
|
||||||
|
//
|
||||||
|
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||||
|
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||||
|
// which can be checked for alignment at runtime.
|
||||||
|
//
|
||||||
|
struct xImplSimd_MoveSSE
|
||||||
|
{
|
||||||
|
u8 Prefix;
|
||||||
|
bool isAligned;
|
||||||
|
|
||||||
|
void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
|
||||||
|
void operator()( const ModSibBase& to, const xRegisterSSE& from ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// xImplSimd_MoveDQ
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// Implementations for MOVDQA / MOVDQU
|
||||||
|
//
|
||||||
|
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||||
|
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||||
|
// which can be checked for alignment at runtime.
|
||||||
|
|
||||||
|
struct xImplSimd_MoveDQ
|
||||||
|
{
|
||||||
|
u8 Prefix;
|
||||||
|
bool isAligned;
|
||||||
|
|
||||||
|
void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
|
||||||
|
void operator()( const ModSibBase& to, const xRegisterSSE& from ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// xImplSimd_Blend
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// Blend - Conditional copying of values in src into dest.
|
||||||
|
//
|
||||||
|
struct xImplSimd_Blend
|
||||||
|
{
|
||||||
|
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
||||||
|
// mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a
|
||||||
|
// dword element in a 128-bit operand.
|
||||||
|
//
|
||||||
|
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||||
|
// to dest, else the dword element in dest is left unchanged.
|
||||||
|
//
|
||||||
|
xImplSimd_DestRegImmSSE PS;
|
||||||
|
|
||||||
|
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||||
|
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
|
||||||
|
// quadword element in a 128-bit operand.
|
||||||
|
//
|
||||||
|
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||||
|
// to dest, else the dword element in dest is left unchanged.
|
||||||
|
//
|
||||||
|
xImplSimd_DestRegImmSSE PD;
|
||||||
|
|
||||||
|
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
||||||
|
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||||
|
// to a dword element in the 128-bit operand.
|
||||||
|
//
|
||||||
|
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||||
|
// to dest, else the dword element in dest is left unchanged.
|
||||||
|
//
|
||||||
|
xImplSimd_DestRegSSE VPS;
|
||||||
|
|
||||||
|
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||||
|
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||||
|
// to a quadword element in the 128-bit operand.
|
||||||
|
//
|
||||||
|
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||||
|
// to dest, else the dword element in dest is left unchanged.
|
||||||
|
//
|
||||||
|
xImplSimd_DestRegSSE VPD;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// xImplSimd_PMove
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// Packed Move with Sign or Zero extension.
|
||||||
|
//
|
||||||
|
struct xImplSimd_PMove
|
||||||
|
{
|
||||||
|
u16 OpcodeBase;
|
||||||
|
|
||||||
|
// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
|
||||||
|
// and store them in dest.
|
||||||
|
void BW( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void BW( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
|
||||||
|
|
||||||
|
// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
|
||||||
|
// and store them in dest.
|
||||||
|
void BD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void BD( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const;
|
||||||
|
|
||||||
|
// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
|
||||||
|
// and store them in dest.
|
||||||
|
void BQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void BQ( const xRegisterSSE& to, const ModSibStrict<u16>& from ) const;
|
||||||
|
|
||||||
|
// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
|
||||||
|
// and store them in dest.
|
||||||
|
void WD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void WD( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
|
||||||
|
|
||||||
|
// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
|
||||||
|
// and store them in dest.
|
||||||
|
void WQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void WQ( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const;
|
||||||
|
|
||||||
|
// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
|
||||||
|
// and store them in dest.
|
||||||
|
void DQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||||
|
void DQ( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -15,50 +15,44 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
namespace x86Emitter {
|
||||||
//
|
|
||||||
template< u16 OpcodeSSE >
|
// --------------------------------------------------------------------------------------
|
||||||
class SimdImpl_Shuffle
|
// xImplSimd_Shuffle
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
struct xImplSimd_Shuffle
|
||||||
{
|
{
|
||||||
protected:
|
inline void _selector_assertion_check( u8 selector ) const;
|
||||||
template< u8 Prefix > struct Woot
|
|
||||||
{
|
void PS( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite8( cmptype ); }
|
void PS( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const;
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite8( cmptype ); }
|
|
||||||
Woot() {}
|
void PD( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
|
||||||
|
void PD( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
// --------------------------------------------------------------------------------------
|
||||||
const Woot<0x00> PS;
|
// xImplSimd_PShuffle
|
||||||
const Woot<0x66> PD;
|
// --------------------------------------------------------------------------------------
|
||||||
|
struct xImplSimd_PShuffle
|
||||||
SimdImpl_Shuffle() {} //GCWhat?
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
class SimdImpl_PShuffle
|
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
SimdImpl_PShuffle() {}
|
|
||||||
|
|
||||||
// Copies words from src and inserts them into dest at word locations selected with
|
// Copies words from src and inserts them into dest at word locations selected with
|
||||||
// the order operand (8 bit immediate).
|
// the order operand (8 bit immediate).
|
||||||
const SimdImpl_DestRegImmMMX<0x00,0x70> W;
|
const xImplSimd_DestRegImmMMX W;
|
||||||
|
|
||||||
// Copies doublewords from src and inserts them into dest at dword locations selected
|
// Copies doublewords from src and inserts them into dest at dword locations selected
|
||||||
// with the order operand (8 bit immediate).
|
// with the order operand (8 bit immediate).
|
||||||
const SimdImpl_DestRegImmSSE<0x66,0x70> D;
|
const xImplSimd_DestRegImmSSE D;
|
||||||
|
|
||||||
// Copies words from the low quadword of src and inserts them into the low quadword
|
// Copies words from the low quadword of src and inserts them into the low quadword
|
||||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||||
// The high quadword of src is copied to the high quadword of dest.
|
// The high quadword of src is copied to the high quadword of dest.
|
||||||
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
|
const xImplSimd_DestRegImmSSE LW;
|
||||||
|
|
||||||
// Copies words from the high quadword of src and inserts them into the high quadword
|
// Copies words from the high quadword of src and inserts them into the high quadword
|
||||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||||
// The low quadword of src is copied to the low quadword of dest.
|
// The low quadword of src is copied to the low quadword of dest.
|
||||||
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
|
const xImplSimd_DestRegImmSSE HW;
|
||||||
|
|
||||||
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
||||||
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
||||||
|
@ -68,68 +62,62 @@ public:
|
||||||
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
||||||
//
|
//
|
||||||
// Operands can be MMX or XMM registers.
|
// Operands can be MMX or XMM registers.
|
||||||
const SimdImpl_DestRegEither<0x66,0x0038> B;
|
const xImplSimd_DestRegEither B;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
// --------------------------------------------------------------------------------------
|
||||||
//
|
// SimdImpl_PUnpack
|
||||||
class SimdImpl_PUnpack
|
// --------------------------------------------------------------------------------------
|
||||||
|
struct SimdImpl_PUnpack
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
SimdImpl_PUnpack() {}
|
|
||||||
|
|
||||||
// Unpack and interleave low-order bytes from src and dest into dest.
|
// Unpack and interleave low-order bytes from src and dest into dest.
|
||||||
const SimdImpl_DestRegEither<0x66,0x60> LBW;
|
const xImplSimd_DestRegEither LBW;
|
||||||
// Unpack and interleave low-order words from src and dest into dest.
|
// Unpack and interleave low-order words from src and dest into dest.
|
||||||
const SimdImpl_DestRegEither<0x66,0x61> LWD;
|
const xImplSimd_DestRegEither LWD;
|
||||||
// Unpack and interleave low-order doublewords from src and dest into dest.
|
// Unpack and interleave low-order doublewords from src and dest into dest.
|
||||||
const SimdImpl_DestRegEither<0x66,0x62> LDQ;
|
const xImplSimd_DestRegEither LDQ;
|
||||||
// Unpack and interleave low-order quadwords from src and dest into dest.
|
// Unpack and interleave low-order quadwords from src and dest into dest.
|
||||||
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
|
const xImplSimd_DestRegSSE LQDQ;
|
||||||
|
|
||||||
// Unpack and interleave high-order bytes from src and dest into dest.
|
// Unpack and interleave high-order bytes from src and dest into dest.
|
||||||
const SimdImpl_DestRegEither<0x66,0x68> HBW;
|
const xImplSimd_DestRegEither HBW;
|
||||||
// Unpack and interleave high-order words from src and dest into dest.
|
// Unpack and interleave high-order words from src and dest into dest.
|
||||||
const SimdImpl_DestRegEither<0x66,0x69> HWD;
|
const xImplSimd_DestRegEither HWD;
|
||||||
// Unpack and interleave high-order doublewords from src and dest into dest.
|
// Unpack and interleave high-order doublewords from src and dest into dest.
|
||||||
const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
|
const xImplSimd_DestRegEither HDQ;
|
||||||
// Unpack and interleave high-order quadwords from src and dest into dest.
|
// Unpack and interleave high-order quadwords from src and dest into dest.
|
||||||
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
|
const xImplSimd_DestRegSSE HQDQ;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
// --------------------------------------------------------------------------------------
|
||||||
|
// SimdImpl_Pack
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
// Pack with Signed or Unsigned Saturation
|
// Pack with Signed or Unsigned Saturation
|
||||||
//
|
//
|
||||||
class SimdImpl_Pack
|
struct SimdImpl_Pack
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
SimdImpl_Pack() {}
|
|
||||||
|
|
||||||
// Converts packed signed word integers from src and dest into packed signed
|
// Converts packed signed word integers from src and dest into packed signed
|
||||||
// byte integers in dest, using signed saturation.
|
// byte integers in dest, using signed saturation.
|
||||||
const SimdImpl_DestRegEither<0x66,0x63> SSWB;
|
const xImplSimd_DestRegEither SSWB;
|
||||||
|
|
||||||
// Converts packed signed dword integers from src and dest into packed signed
|
// Converts packed signed dword integers from src and dest into packed signed
|
||||||
// word integers in dest, using signed saturation.
|
// word integers in dest, using signed saturation.
|
||||||
const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
|
const xImplSimd_DestRegEither SSDW;
|
||||||
|
|
||||||
// Converts packed unsigned word integers from src and dest into packed unsigned
|
// Converts packed unsigned word integers from src and dest into packed unsigned
|
||||||
// byte integers in dest, using unsigned saturation.
|
// byte integers in dest, using unsigned saturation.
|
||||||
const SimdImpl_DestRegEither<0x66,0x67> USWB;
|
const xImplSimd_DestRegEither USWB;
|
||||||
|
|
||||||
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
||||||
// unsigned word integers in dest, using signed saturation.
|
// unsigned word integers in dest, using signed saturation.
|
||||||
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
|
const xImplSimd_DestRegSSE USDW;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
// SimdImpl_Unpack
|
||||||
//
|
// --------------------------------------------------------------------------------------
|
||||||
class SimdImpl_Unpack
|
struct xImplSimd_Unpack
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
SimdImpl_Unpack() {}
|
|
||||||
|
|
||||||
// Unpacks the high doubleword [single-precision] values from src and dest into
|
// Unpacks the high doubleword [single-precision] values from src and dest into
|
||||||
// dest, such that the result of dest looks like this:
|
// dest, such that the result of dest looks like this:
|
||||||
// dest[0] <- dest[2]
|
// dest[0] <- dest[2]
|
||||||
|
@ -137,14 +125,14 @@ public:
|
||||||
// dest[2] <- dest[3]
|
// dest[2] <- dest[3]
|
||||||
// dest[3] <- src[3]
|
// dest[3] <- src[3]
|
||||||
//
|
//
|
||||||
const SimdImpl_DestRegSSE<0x00,0x15> HPS;
|
const xImplSimd_DestRegSSE HPS;
|
||||||
|
|
||||||
// Unpacks the high quadword [double-precision] values from src and dest into
|
// Unpacks the high quadword [double-precision] values from src and dest into
|
||||||
// dest, such that the result of dest looks like this:
|
// dest, such that the result of dest looks like this:
|
||||||
// dest.lo <- dest.hi
|
// dest.lo <- dest.hi
|
||||||
// dest.hi <- src.hi
|
// dest.hi <- src.hi
|
||||||
//
|
//
|
||||||
const SimdImpl_DestRegSSE<0x66,0x15> HPD;
|
const xImplSimd_DestRegSSE HPD;
|
||||||
|
|
||||||
// Unpacks the low doubleword [single-precision] values from src and dest into
|
// Unpacks the low doubleword [single-precision] values from src and dest into
|
||||||
// dest, such that the result of dest looks like this:
|
// dest, such that the result of dest looks like this:
|
||||||
|
@ -153,7 +141,7 @@ public:
|
||||||
// dest[1] <- src[0]
|
// dest[1] <- src[0]
|
||||||
// dest[0] <- dest[0]
|
// dest[0] <- dest[0]
|
||||||
//
|
//
|
||||||
const SimdImpl_DestRegSSE<0x00,0x14> LPS;
|
const xImplSimd_DestRegSSE LPS;
|
||||||
|
|
||||||
// Unpacks the low quadword [double-precision] values from src and dest into
|
// Unpacks the low quadword [double-precision] values from src and dest into
|
||||||
// dest, effectively moving the low portion of src into the upper portion of dest.
|
// dest, effectively moving the low portion of src into the upper portion of dest.
|
||||||
|
@ -161,47 +149,39 @@ public:
|
||||||
// dest.hi <- src.lo
|
// dest.hi <- src.lo
|
||||||
// dest.lo <- dest.lo [remains unchanged!]
|
// dest.lo <- dest.lo [remains unchanged!]
|
||||||
//
|
//
|
||||||
const SimdImpl_DestRegSSE<0x66,0x14> LPD;
|
const xImplSimd_DestRegSSE LPD;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
struct xImplSimd_InsertExtractHelper
|
||||||
|
{
|
||||||
|
u16 Opcode;
|
||||||
|
|
||||||
|
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||||
|
void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
|
||||||
|
|
||||||
|
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||||
|
void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// SimdImpl_PInsert
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
// PINSRW/B/D [all but Word form are SSE4.1 only!]
|
// PINSRW/B/D [all but Word form are SSE4.1 only!]
|
||||||
//
|
//
|
||||||
class SimdImpl_PInsert
|
struct xImplSimd_PInsert
|
||||||
{
|
{
|
||||||
protected:
|
void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
|
||||||
template< u16 Opcode >
|
void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const;
|
||||||
class ByteDwordForms
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ByteDwordForms() {}
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
|
void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const;
|
||||||
{
|
void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const;
|
||||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
|
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||||
{
|
xImplSimd_InsertExtractHelper B;
|
||||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||||
SimdImpl_PInsert() {}
|
xImplSimd_InsertExtractHelper D;
|
||||||
|
|
||||||
// Operation can be performed on either MMX or SSE src operands.
|
|
||||||
__forceinline void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
|
|
||||||
__forceinline void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
|
|
||||||
|
|
||||||
__forceinline void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
|
|
||||||
__forceinline void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
|
|
||||||
|
|
||||||
// [SSE-4.1]
|
|
||||||
const ByteDwordForms<0x20> B;
|
|
||||||
|
|
||||||
// [SSE-4.1]
|
|
||||||
const ByteDwordForms<0x22> D;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -210,47 +190,26 @@ public:
|
||||||
//
|
//
|
||||||
// Note: Word form's indirect memory form is only available in SSE4.1.
|
// Note: Word form's indirect memory form is only available in SSE4.1.
|
||||||
//
|
//
|
||||||
class SimdImpl_PExtract
|
struct SimdImpl_PExtract
|
||||||
{
|
{
|
||||||
protected:
|
|
||||||
template< u16 Opcode >
|
|
||||||
class ByteDwordForms
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ByteDwordForms() {}
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const
|
|
||||||
{
|
|
||||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
|
|
||||||
{
|
|
||||||
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
SimdImpl_PExtract() {}
|
|
||||||
|
|
||||||
// Copies the word element specified by imm8 from src to dest. The upper bits
|
// Copies the word element specified by imm8 from src to dest. The upper bits
|
||||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||||
// word value from src into an x86 32 bit register.
|
// word value from src into an x86 32 bit register.
|
||||||
//
|
//
|
||||||
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
|
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
|
||||||
//
|
//
|
||||||
__forceinline void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); }
|
void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const;
|
||||||
__forceinline void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); }
|
void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const;
|
||||||
|
void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const;
|
||||||
__forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
|
|
||||||
|
|
||||||
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
||||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||||
// byte value from src into an x86 32 bit register.
|
// byte value from src into an x86 32 bit register.
|
||||||
const ByteDwordForms<0x14> B;
|
const xImplSimd_InsertExtractHelper B;
|
||||||
|
|
||||||
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
||||||
// used to extract any single packed dword value from src into an x86 32 bit register.
|
// used to extract any single packed dword value from src into an x86 32 bit register.
|
||||||
const ByteDwordForms<0x16> D;
|
const xImplSimd_InsertExtractHelper D;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
}
|
|
@ -36,64 +36,3 @@ public:
|
||||||
|
|
||||||
SimdImpl_DestRegSSE() {} //GCWho?
|
SimdImpl_DestRegSSE() {} //GCWho?
|
||||||
};
|
};
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
|
|
||||||
// (PSHUFD / PSHUFHW / etc).
|
|
||||||
//
|
|
||||||
template< u8 Prefix, u16 Opcode >
|
|
||||||
class SimdImpl_DestRegImmSSE
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegImmSSE() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
template< u8 Prefix, u16 Opcode >
|
|
||||||
class SimdImpl_DestRegImmMMX
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
|
|
||||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegImmMMX() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
|
|
||||||
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
|
|
||||||
//
|
|
||||||
template< u8 Prefix, u16 Opcode >
|
|
||||||
class SimdImpl_DestRegEither
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegEither() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For implementing MMX/SSE operations where the destination *must* be a register, but the
|
|
||||||
// source can be Direct or Indirect (ModRM/SibSB). The SrcOperandType template parameter
|
|
||||||
// is used to enforce type strictness of the (void*) parameter and ModSib<> parameter, so
|
|
||||||
// that the programmer must be explicit in specifying desired operand size.
|
|
||||||
//
|
|
||||||
// IMPORTANT: This helper assumes the prefix opcode is written *always* -- regardless of
|
|
||||||
// MMX or XMM register status.
|
|
||||||
//
|
|
||||||
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
|
|
||||||
class SimdImpl_DestRegStrict
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegStrict() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
|
@ -1,211 +0,0 @@
|
||||||
/* PCSX2 - PS2 Emulator for PCs
|
|
||||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
|
||||||
*
|
|
||||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
||||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
||||||
* ation, either version 3 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE. See the GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Moves to/from high/low portions of an xmm register.
|
|
||||||
// These instructions cannot be used in reg/reg form.
|
|
||||||
//
|
|
||||||
template< u16 Opcode >
|
|
||||||
class MovhlImplAll
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
template< u8 Prefix >
|
|
||||||
struct Woot
|
|
||||||
{
|
|
||||||
Woot() {}
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); }
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
const Woot<0x00> PS;
|
|
||||||
const Woot<0x66> PD;
|
|
||||||
|
|
||||||
MovhlImplAll() {} //GCC.
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
|
|
||||||
// do something kinda different! Fun!
|
|
||||||
//
|
|
||||||
template< u16 Opcode >
|
|
||||||
class MovhlImpl_RtoR
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); }
|
|
||||||
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
|
|
||||||
|
|
||||||
MovhlImpl_RtoR() {} //GCC.
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
|
|
||||||
//
|
|
||||||
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
|
||||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
|
||||||
// which can be checked for alignment at runtime.
|
|
||||||
//
|
|
||||||
|
|
||||||
template< u8 Prefix, bool isAligned >
|
|
||||||
class SimdImpl_MoveSSE
|
|
||||||
{
|
|
||||||
static const u16 OpcodeA = 0x28; // Aligned [aps] form
|
|
||||||
static const u16 OpcodeU = 0x10; // unaligned [ups] form
|
|
||||||
|
|
||||||
public:
|
|
||||||
SimdImpl_MoveSSE() {} //GCC.
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
|
|
||||||
{
|
|
||||||
if( to != from ) xOpWrite0F( Prefix, OpcodeA, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
|
|
||||||
{
|
|
||||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
|
||||||
bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() );
|
|
||||||
u16 opcode;
|
|
||||||
|
|
||||||
// See previous comment.
|
|
||||||
if (isReallyAligned)
|
|
||||||
opcode = OpcodeA;
|
|
||||||
else
|
|
||||||
opcode = OpcodeU;
|
|
||||||
|
|
||||||
xOpWrite0F( Prefix, opcode, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
|
|
||||||
{
|
|
||||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
|
||||||
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
|
|
||||||
xOpWrite0F( Prefix, isReallyAligned ? OpcodeA+1 : OpcodeU+1, from, to );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Implementations for MOVDQA / MOVDQU
|
|
||||||
//
|
|
||||||
template< u8 Prefix, bool isAligned >
|
|
||||||
class SimdImpl_MoveDQ
|
|
||||||
{
|
|
||||||
static const u8 PrefixA = 0x66; // Aligned [aps] form
|
|
||||||
static const u8 PrefixU = 0xf3; // unaligned [ups] form
|
|
||||||
|
|
||||||
static const u16 Opcode = 0x6f;
|
|
||||||
static const u16 Opcode_Alt = 0x7f; // alternate ModRM encoding (reverse src/dst)
|
|
||||||
|
|
||||||
public:
|
|
||||||
SimdImpl_MoveDQ() {} //GCC.
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
|
|
||||||
{
|
|
||||||
if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
|
|
||||||
{
|
|
||||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
|
||||||
bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() );
|
|
||||||
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
|
|
||||||
{
|
|
||||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
|
||||||
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
|
|
||||||
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, from, to );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Blend - Conditional copying of values in src into dest.
|
|
||||||
//
|
|
||||||
class SimdImpl_Blend
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
|
||||||
// mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a
|
|
||||||
// dword element in a 128-bit operand.
|
|
||||||
//
|
|
||||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
|
||||||
// to dest, else the dword element in dest is left unchanged.
|
|
||||||
//
|
|
||||||
SimdImpl_DestRegImmSSE<0x66,0x0c3a> PS;
|
|
||||||
|
|
||||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
|
||||||
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
|
|
||||||
// quadword element in a 128-bit operand.
|
|
||||||
//
|
|
||||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
|
||||||
// to dest, else the dword element in dest is left unchanged.
|
|
||||||
//
|
|
||||||
SimdImpl_DestRegImmSSE<0x66,0x0d3a> PD;
|
|
||||||
|
|
||||||
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
|
||||||
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
|
||||||
// to a dword element in the 128-bit operand.
|
|
||||||
//
|
|
||||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
|
||||||
// to dest, else the dword element in dest is left unchanged.
|
|
||||||
//
|
|
||||||
SimdImpl_DestRegSSE<0x66,0x1438> VPS;
|
|
||||||
|
|
||||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
|
||||||
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
|
||||||
// to a quadword element in the 128-bit operand.
|
|
||||||
//
|
|
||||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
|
||||||
// to dest, else the dword element in dest is left unchanged.
|
|
||||||
//
|
|
||||||
SimdImpl_DestRegSSE<0x66,0x1538> VPD;
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Packed Move with Sign or Zero extension.
|
|
||||||
//
|
|
||||||
template< bool SignExtend >
|
|
||||||
class SimdImpl_PMove
|
|
||||||
{
|
|
||||||
static const u16 OpcodeBase = SignExtend ? 0x2038 : 0x3038;
|
|
||||||
|
|
||||||
public:
|
|
||||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
|
|
||||||
// and store them in dest.
|
|
||||||
SimdImpl_DestRegStrict<0x66,OpcodeBase,xRegisterSSE,xRegisterSSE,u64> BW;
|
|
||||||
|
|
||||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
|
|
||||||
// and store them in dest.
|
|
||||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x100,xRegisterSSE,xRegisterSSE,u32> BD;
|
|
||||||
|
|
||||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
|
|
||||||
// and store them in dest.
|
|
||||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x200,xRegisterSSE,xRegisterSSE,u16> BQ;
|
|
||||||
|
|
||||||
// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
|
|
||||||
// and store them in dest.
|
|
||||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x300,xRegisterSSE,xRegisterSSE,u64> WD;
|
|
||||||
|
|
||||||
// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
|
|
||||||
// and store them in dest.
|
|
||||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x400,xRegisterSSE,xRegisterSSE,u32> WQ;
|
|
||||||
|
|
||||||
// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
|
|
||||||
// and store them in dest.
|
|
||||||
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x500,xRegisterSSE,xRegisterSSE,u64> DQ;
|
|
||||||
};
|
|
||||||
|
|
|
@ -393,29 +393,30 @@ namespace x86Emitter
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
extern const Internal::SimdImpl_MoveSSE<0x00,true> xMOVAPS;
|
extern const xImplSimd_MoveSSE xMOVAPS;
|
||||||
extern const Internal::SimdImpl_MoveSSE<0x00,false> xMOVUPS;
|
extern const xImplSimd_MoveSSE xMOVUPS;
|
||||||
|
extern const xImplSimd_MoveSSE xMOVAPD;
|
||||||
|
extern const xImplSimd_MoveSSE xMOVUPD;
|
||||||
|
|
||||||
#ifdef ALWAYS_USE_MOVAPS
|
#ifdef ALWAYS_USE_MOVAPS
|
||||||
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVDQA;
|
extern const xImplSimd_MoveSSE xMOVDQA;
|
||||||
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVDQU;
|
extern const xImplSimd_MoveSSE xMOVDQU;
|
||||||
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVAPD;
|
|
||||||
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVUPD;
|
|
||||||
#else
|
#else
|
||||||
extern const Internal::SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA;
|
extern const xImplSimd_MoveDQ xMOVDQA;
|
||||||
extern const Internal::SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU;
|
extern const xImplSimd_MoveDQ xMOVDQU;
|
||||||
extern const Internal::SimdImpl_MoveSSE<0x66,true> xMOVAPD;
|
|
||||||
extern const Internal::SimdImpl_MoveSSE<0x66,false> xMOVUPD;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH;
|
extern const xImplSimd_MovHL xMOVH;
|
||||||
extern const Internal::MovhlImpl_RtoR<0x12> xMOVHL;
|
extern const xImplSimd_MovHL xMOVL;
|
||||||
|
extern const xImplSimd_MovHL_RtoR xMOVLH;
|
||||||
|
extern const xImplSimd_MovHL_RtoR xMOVHL;
|
||||||
|
|
||||||
extern const Internal::MovhlImplAll<0x16> xMOVH;
|
extern const xImplSimd_Blend xBLEND;
|
||||||
extern const Internal::MovhlImplAll<0x12> xMOVL;
|
extern const xImplSimd_PMove xPMOVSX;
|
||||||
|
extern const xImplSimd_PMove xPMOVZX;
|
||||||
|
|
||||||
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
extern const xImplSimd_DestRegSSE xMOVSLDUP;
|
||||||
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
extern const xImplSimd_DestRegSSE xMOVSHDUP;
|
||||||
|
|
||||||
extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
|
extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
|
||||||
extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
|
extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
|
||||||
|
@ -425,16 +426,16 @@ namespace x86Emitter
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND;
|
extern const xImplSimd_DestRegEither xPAND;
|
||||||
extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
|
extern const xImplSimd_DestRegEither xPANDN;
|
||||||
extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR;
|
extern const xImplSimd_DestRegEither xPOR;
|
||||||
extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR;
|
extern const xImplSimd_DestRegEither xPXOR;
|
||||||
|
|
||||||
extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF;
|
extern const xImplSimd_Shuffle xSHUF;
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST;
|
extern const xImplSimd_DestRegSSE xPTEST;
|
||||||
|
|
||||||
extern const xImplSimd_MinMax xMIN;
|
extern const xImplSimd_MinMax xMIN;
|
||||||
extern const xImplSimd_MinMax xMAX;
|
extern const xImplSimd_MinMax xMAX;
|
||||||
|
@ -526,16 +527,12 @@ namespace x86Emitter
|
||||||
extern const xImplSimd_DotProduct xDP;
|
extern const xImplSimd_DotProduct xDP;
|
||||||
extern const xImplSimd_Round xROUND;
|
extern const xImplSimd_Round xROUND;
|
||||||
|
|
||||||
extern const Internal::SimdImpl_PShuffle xPSHUF;
|
extern const xImplSimd_PShuffle xPSHUF;
|
||||||
extern const Internal::SimdImpl_PUnpack xPUNPCK;
|
extern const SimdImpl_PUnpack xPUNPCK;
|
||||||
extern const Internal::SimdImpl_Unpack xUNPCK;
|
extern const xImplSimd_Unpack xUNPCK;
|
||||||
extern const Internal::SimdImpl_Pack xPACK;
|
extern const SimdImpl_Pack xPACK;
|
||||||
extern const Internal::SimdImpl_PInsert xPINSR;
|
extern const xImplSimd_PInsert xPINSR;
|
||||||
extern const Internal::SimdImpl_PExtract xPEXTR;
|
extern const SimdImpl_PExtract xPEXTR;
|
||||||
extern const Internal::SimdImpl_Blend xBLEND;
|
|
||||||
|
|
||||||
extern const Internal::SimdImpl_PMove<true> xPMOVSX;
|
|
||||||
extern const Internal::SimdImpl_PMove<false> xPMOVZX;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -703,8 +703,6 @@ __forceinline void xWrite( T val )
|
||||||
{
|
{
|
||||||
#include "implement/helpers.h"
|
#include "implement/helpers.h"
|
||||||
#include "implement/simd_templated_helpers.h"
|
#include "implement/simd_templated_helpers.h"
|
||||||
#include "implement/xmm/moremovs.h"
|
|
||||||
#include "implement/xmm/shufflepack.h"
|
|
||||||
#include "implement/group1.h"
|
#include "implement/group1.h"
|
||||||
#include "implement/group2.h"
|
#include "implement/group2.h"
|
||||||
#include "implement/group3.h"
|
#include "implement/group3.h"
|
||||||
|
@ -737,7 +735,9 @@ __forceinline void xWrite( T val )
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "implement/simd_helpers.h"
|
#include "implement/simd_helpers.h"
|
||||||
|
#include "implement/simd_moremovs.h"
|
||||||
#include "implement/simd_arithmetic.h"
|
#include "implement/simd_arithmetic.h"
|
||||||
#include "implement/simd_comparisons.h"
|
#include "implement/simd_comparisons.h"
|
||||||
|
#include "implement/simd_shufflepack.h"
|
||||||
|
|
||||||
#include "inlines.inl"
|
#include "inlines.inl"
|
||||||
|
|
|
@ -106,56 +106,19 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// [SSE-3]
|
const xImplSimd_DestRegEither xPAND = { 0x66,0xdb };
|
||||||
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
const xImplSimd_DestRegEither xPANDN = { 0x66,0xdf };
|
||||||
// [SSE-3]
|
const xImplSimd_DestRegEither xPOR = { 0x66,0xeb };
|
||||||
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
const xImplSimd_DestRegEither xPXOR = { 0x66,0xef };
|
||||||
|
|
||||||
const SimdImpl_MoveSSE<0x00,true> xMOVAPS;
|
|
||||||
|
|
||||||
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
|
||||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
|
||||||
// which can be checked for alignment at runtime.
|
|
||||||
const SimdImpl_MoveSSE<0x00,false> xMOVUPS;
|
|
||||||
|
|
||||||
#ifdef ALWAYS_USE_MOVAPS
|
|
||||||
const SimdImpl_MoveSSE<0,true> xMOVDQA;
|
|
||||||
const SimdImpl_MoveSSE<0,true> xMOVAPD;
|
|
||||||
|
|
||||||
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
|
||||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
|
||||||
// which can be checked for alignment at runtime.
|
|
||||||
const SimdImpl_MoveSSE<0,false> xMOVDQU;
|
|
||||||
const SimdImpl_MoveSSE<0,false> xMOVUPD;
|
|
||||||
#else
|
|
||||||
const SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA;
|
|
||||||
const SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU;
|
|
||||||
const SimdImpl_MoveSSE<0x66,true> xMOVAPD;
|
|
||||||
const SimdImpl_MoveSSE<0x66,false> xMOVUPD;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const MovhlImplAll<0x16> xMOVH;
|
|
||||||
const MovhlImplAll<0x12> xMOVL;
|
|
||||||
const MovhlImpl_RtoR<0x16> xMOVLH;
|
|
||||||
const MovhlImpl_RtoR<0x12> xMOVHL;
|
|
||||||
|
|
||||||
const SimdImpl_Shuffle<0xc6> xSHUF;
|
|
||||||
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag
|
// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag
|
||||||
// only if all bits in the result are 0. PTEST also sets the CF flag according
|
// only if all bits in the result are 0. PTEST also sets the CF flag according
|
||||||
// to the following condition: (xmm2/m128 AND NOT xmm1) == 0;
|
// to the following condition: (xmm2/m128 AND NOT xmm1) == 0;
|
||||||
const SimdImpl_DestRegSSE<0x66,0x1738> xPTEST;
|
const xImplSimd_DestRegSSE xPTEST = { 0x66,0x1738 };
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// =====================================================================================================
|
||||||
// SSE Conversion Operations, as looney as they are.
|
// SSE Conversion Operations, as looney as they are.
|
||||||
//
|
// =====================================================================================================
|
||||||
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
|
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
|
||||||
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
|
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
|
||||||
//
|
//
|
||||||
|
@ -227,8 +190,8 @@ void xImplSimd_DestRegImmMMX::operator()( const xRegisterMMX& to, const ModSibBa
|
||||||
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||||
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||||
|
|
||||||
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteMMX( Opcode ); }
|
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteSSE( 0x00, Opcode ); }
|
||||||
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteMMX( Opcode ); }
|
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteSSE( 0x00, Opcode ); }
|
||||||
|
|
||||||
// =====================================================================================================
|
// =====================================================================================================
|
||||||
// SIMD Arithmetic Instructions
|
// SIMD Arithmetic Instructions
|
||||||
|
@ -237,8 +200,8 @@ void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBa
|
||||||
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||||
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||||
|
|
||||||
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteMMX( Opcode ); }
|
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteSSE( 0x00, Opcode ); }
|
||||||
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteMMX( Opcode ); }
|
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteSSE( 0x00, Opcode ); }
|
||||||
|
|
||||||
void _SimdShiftHelper::operator()( const xRegisterSSE& to, u8 imm8 ) const
|
void _SimdShiftHelper::operator()( const xRegisterSSE& to, u8 imm8 ) const
|
||||||
{
|
{
|
||||||
|
@ -471,64 +434,231 @@ const xImplSimd_PMinMax xPMAX =
|
||||||
{ 0x66, 0x3f38 }, // UD
|
{ 0x66, 0x3f38 }, // UD
|
||||||
};
|
};
|
||||||
|
|
||||||
const SimdImpl_PShuffle xPSHUF;
|
// =====================================================================================================
|
||||||
const SimdImpl_PUnpack xPUNPCK;
|
// SIMD Shuffle/Pack (Shuffle puck?)
|
||||||
const SimdImpl_Unpack xUNPCK;
|
// =====================================================================================================
|
||||||
const SimdImpl_Pack xPACK;
|
|
||||||
const SimdImpl_PInsert xPINSR;
|
|
||||||
const SimdImpl_PExtract xPEXTR;
|
|
||||||
const SimdImpl_Blend xBLEND;
|
|
||||||
|
|
||||||
const SimdImpl_PMove<true> xPMOVSX;
|
__forceinline void xImplSimd_Shuffle::_selector_assertion_check( u8 selector ) const
|
||||||
const SimdImpl_PMove<false> xPMOVZX;
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
|
|
||||||
// Converts from MMX register mode to FPU register mode. The cpu enters MMX register mode
|
|
||||||
// when ever MMX instructions are run, and if FPU instructions are run without using EMMS,
|
|
||||||
// the FPU results will be invalid.
|
|
||||||
__forceinline void xEMMS() { xWrite16( 0x770F ); }
|
|
||||||
|
|
||||||
// [3DNow] Same as EMMS, but an AMD special version which may (or may not) leave MMX regs
|
|
||||||
// in an undefined state (which is fine, since presumably you're done using them anyway).
|
|
||||||
// This instruction is thus faster than EMMS on K8s, but all newer AMD cpus use the same
|
|
||||||
// logic for either EMMS or FEMMS.
|
|
||||||
// Conclusion: Obsolete. Just use EMMS instead.
|
|
||||||
__forceinline void xFEMMS() { xWrite16( 0x0E0F ); }
|
|
||||||
|
|
||||||
|
|
||||||
// Store Streaming SIMD Extension Control/Status to Mem32.
|
|
||||||
__emitinline void xSTMXCSR( const ModSib32& dest )
|
|
||||||
{
|
{
|
||||||
SimdPrefix( 0, 0xae );
|
pxAssertMsg( (selector & ~3) == 0,
|
||||||
EmitSibMagic( 3, dest );
|
"Invalid immediate operand on SSE Shuffle: Upper 6 bits of the SSE Shuffle-PD Selector are reserved and must be zero."
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load Streaming SIMD Extension Control/Status from Mem32.
|
void xImplSimd_Shuffle::PS( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const
|
||||||
__emitinline void xLDMXCSR( const ModSib32& src )
|
|
||||||
{
|
{
|
||||||
SimdPrefix( 0, 0xae );
|
xOpWrite0F( 0xc6, to, from, selector );
|
||||||
EmitSibMagic( 2, src );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save x87 FPU, MMX Technology, and SSE State to buffer
|
void xImplSimd_Shuffle::PS( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const
|
||||||
// Target buffer must be at least 512 bytes in length to hold the result.
|
|
||||||
__emitinline void xFXSAVE( const ModSibBase& dest )
|
|
||||||
{
|
{
|
||||||
SimdPrefix( 0, 0xae );
|
xOpWrite0F( 0xc6, to, from, selector );
|
||||||
EmitSibMagic( 0, dest );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore x87 FPU, MMX , XMM, and MXCSR State.
|
void xImplSimd_Shuffle::PD( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const
|
||||||
// Source buffer should be 512 bytes in length.
|
|
||||||
__emitinline void xFXRSTOR( const ModSibBase& src )
|
|
||||||
{
|
{
|
||||||
SimdPrefix( 0, 0xae );
|
_selector_assertion_check( selector );
|
||||||
EmitSibMagic( 1, src );
|
xOpWrite0F( 0x66, 0xc6, to, from, selector & 0x3 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void xImplSimd_Shuffle::PD( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const
|
||||||
|
{
|
||||||
|
_selector_assertion_check( selector );
|
||||||
|
xOpWrite0F( 0x66, 0xc6, to, from, selector & 0x3 );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_InsertExtractHelper::operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
xOpWrite0F( 0x66, Opcode, to, from, imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_InsertExtractHelper::operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
xOpWrite0F( 0x66, Opcode, to, from, imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_PInsert::W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
|
||||||
|
void xImplSimd_PInsert::W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
|
||||||
|
void xImplSimd_PInsert::W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
|
||||||
|
void xImplSimd_PInsert::W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
|
||||||
|
|
||||||
|
void SimdImpl_PExtract::W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); }
|
||||||
|
void SimdImpl_PExtract::W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); }
|
||||||
|
void SimdImpl_PExtract::W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
|
||||||
|
|
||||||
|
const xImplSimd_Shuffle xSHUF;
|
||||||
|
|
||||||
|
const xImplSimd_PShuffle xPSHUF =
|
||||||
|
{
|
||||||
|
{ 0x00, 0x70 }, // W
|
||||||
|
{ 0x66, 0x70 }, // D
|
||||||
|
{ 0xf2, 0x70 }, // LW
|
||||||
|
{ 0xf3, 0x70 }, // HW
|
||||||
|
|
||||||
|
{ 0x66, 0x0038 }, // B
|
||||||
|
};
|
||||||
|
|
||||||
|
const SimdImpl_PUnpack xPUNPCK =
|
||||||
|
{
|
||||||
|
{ 0x66, 0x60 }, // LBW
|
||||||
|
{ 0x66, 0x61 }, // LWD
|
||||||
|
{ 0x66, 0x62 }, // LDQ
|
||||||
|
{ 0x66, 0x6c }, // LQDQ
|
||||||
|
|
||||||
|
{ 0x66, 0x68 }, // HBW
|
||||||
|
{ 0x66, 0x69 }, // HWD
|
||||||
|
{ 0x66, 0x6a }, // HDQ
|
||||||
|
{ 0x66, 0x6d }, // HQDQ
|
||||||
|
};
|
||||||
|
|
||||||
|
const SimdImpl_Pack xPACK =
|
||||||
|
{
|
||||||
|
{ 0x66, 0x63 }, // SSWB
|
||||||
|
{ 0x66, 0x6b }, // SSDW
|
||||||
|
{ 0x66, 0x67 }, // USWB
|
||||||
|
{ 0x66, 0x2b38 }, // USDW
|
||||||
|
};
|
||||||
|
|
||||||
|
const xImplSimd_Unpack xUNPCK =
|
||||||
|
{
|
||||||
|
{ 0x00, 0x15 }, // HPS
|
||||||
|
{ 0x66, 0x15 }, // HPD
|
||||||
|
{ 0x00, 0x14 }, // LPS
|
||||||
|
{ 0x66, 0x14 }, // LPD
|
||||||
|
};
|
||||||
|
|
||||||
|
const xImplSimd_PInsert xPINSR =
|
||||||
|
{
|
||||||
|
{ 0x203a }, // B
|
||||||
|
{ 0x223a }, // D
|
||||||
|
};
|
||||||
|
|
||||||
|
const SimdImpl_PExtract xPEXTR =
|
||||||
|
{
|
||||||
|
{ 0x143a }, // B
|
||||||
|
{ 0x163a }, // D
|
||||||
|
};
|
||||||
|
|
||||||
|
// =====================================================================================================
|
||||||
|
// SIMD Move And Blend Instructions
|
||||||
|
// =====================================================================================================
|
||||||
|
|
||||||
|
void xImplSimd_MovHL::PS( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); }
|
||||||
|
void xImplSimd_MovHL::PS( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode+1, from, to ); }
|
||||||
|
|
||||||
|
void xImplSimd_MovHL::PD( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
|
||||||
|
void xImplSimd_MovHL::PD( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode+1, from, to ); }
|
||||||
|
|
||||||
|
void xImplSimd_MovHL_RtoR::PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); }
|
||||||
|
void xImplSimd_MovHL_RtoR::PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
|
||||||
|
|
||||||
|
static const u16 MovPS_OpAligned = 0x28; // Aligned [aps] form
|
||||||
|
static const u16 MovPS_OpUnaligned = 0x10; // unaligned [ups] form
|
||||||
|
|
||||||
|
void xImplSimd_MoveSSE::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
|
||||||
|
{
|
||||||
|
if( to != from ) xOpWrite0F( Prefix, MovPS_OpAligned, to, from );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_MoveSSE::operator()( const xRegisterSSE& to, const ModSibBase& from ) const
|
||||||
|
{
|
||||||
|
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||||
|
bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() );
|
||||||
|
|
||||||
|
xOpWrite0F( Prefix, isReallyAligned ? MovPS_OpAligned : MovPS_OpUnaligned, to, from );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_MoveSSE::operator()( const ModSibBase& to, const xRegisterSSE& from ) const
|
||||||
|
{
|
||||||
|
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||||
|
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
|
||||||
|
xOpWrite0F( Prefix, isReallyAligned ? MovPS_OpAligned+1 : MovPS_OpUnaligned+1, from, to );
|
||||||
|
}
|
||||||
|
|
||||||
|
static const u8 MovDQ_PrefixAligned = 0x66; // Aligned [dqa] form
|
||||||
|
static const u8 MovDQ_PrefixUnaligned = 0xf3; // unaligned [dqu] form
|
||||||
|
|
||||||
|
void xImplSimd_MoveDQ::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
|
||||||
|
{
|
||||||
|
if( to != from ) xOpWrite0F( MovDQ_PrefixAligned, 0x6f, to, from );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_MoveDQ::operator()( const xRegisterSSE& to, const ModSibBase& from ) const
|
||||||
|
{
|
||||||
|
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||||
|
bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() );
|
||||||
|
xOpWrite0F( isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x6f, to, from );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_MoveDQ::operator()( const ModSibBase& to, const xRegisterSSE& from ) const
|
||||||
|
{
|
||||||
|
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||||
|
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
|
||||||
|
|
||||||
|
// use opcode 0x7f : alternate ModRM encoding (reverse src/dst)
|
||||||
|
xOpWrite0F( isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x7f, from, to );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xImplSimd_PMove::BW( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase ); }
|
||||||
|
void xImplSimd_PMove::BW( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const { OpWriteSSE( 0x66, OpcodeBase ); }
|
||||||
|
|
||||||
|
void xImplSimd_PMove::BD( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x100 ); }
|
||||||
|
void xImplSimd_PMove::BD( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x100 ); }
|
||||||
|
|
||||||
|
void xImplSimd_PMove::BQ( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x200 ); }
|
||||||
|
void xImplSimd_PMove::BQ( const xRegisterSSE& to, const ModSibStrict<u16>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x200 ); }
|
||||||
|
|
||||||
|
void xImplSimd_PMove::WD( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x300 ); }
|
||||||
|
void xImplSimd_PMove::WD( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x300 ); }
|
||||||
|
|
||||||
|
void xImplSimd_PMove::WQ( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x400 ); }
|
||||||
|
void xImplSimd_PMove::WQ( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x400 ); }
|
||||||
|
|
||||||
|
void xImplSimd_PMove::DQ( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x500 ); }
|
||||||
|
void xImplSimd_PMove::DQ( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x500 ); }
|
||||||
|
|
||||||
|
|
||||||
|
const xImplSimd_MoveSSE xMOVAPS = { 0x00, true };
|
||||||
|
const xImplSimd_MoveSSE xMOVUPS = { 0x00, false };
|
||||||
|
|
||||||
|
#ifdef ALWAYS_USE_MOVAPS
|
||||||
|
const xImplSimd_MoveSSE xMOVDQA = { 0x00, true };
|
||||||
|
const xImplSimd_MoveSSE xMOVAPD = { 0x00, true };
|
||||||
|
|
||||||
|
const xImplSimd_MoveSSE xMOVDQU = { 0x00, false };
|
||||||
|
const xImplSimd_MoveSSE xMOVUPD = { 0x00, false };
|
||||||
|
#else
|
||||||
|
const xImplSimd_MoveDQ xMOVDQA = { 0x66, true };
|
||||||
|
const xImplSimd_MoveSSE xMOVAPD = { 0x66, true };
|
||||||
|
|
||||||
|
const xImplSimd_MoveDQ xMOVDQU = { 0xf3, false };
|
||||||
|
const xImplSimd_MoveSSE xMOVUPD = { 0x66, false };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
const xImplSimd_MovHL xMOVH = { 0x16 };
|
||||||
|
const xImplSimd_MovHL xMOVL = { 0x12 };
|
||||||
|
|
||||||
|
const xImplSimd_MovHL_RtoR xMOVLH = { 0x16 };
|
||||||
|
const xImplSimd_MovHL_RtoR xMOVHL = { 0x12 };
|
||||||
|
|
||||||
|
const xImplSimd_Blend xBLEND =
|
||||||
|
{
|
||||||
|
{ 0x66, 0x0c3a }, // PS
|
||||||
|
{ 0x66, 0x0d3a }, // PD
|
||||||
|
{ 0x66, 0x1438 }, // VPS
|
||||||
|
{ 0x66, 0x1538 }, // VPD
|
||||||
|
};
|
||||||
|
|
||||||
|
const xImplSimd_PMove xPMOVSX = { 0x2038 };
|
||||||
|
const xImplSimd_PMove xPMOVZX = { 0x3038 };
|
||||||
|
|
||||||
|
// [SSE-3]
|
||||||
|
const xImplSimd_DestRegSSE xMOVSLDUP = { 0xf3,0x12 };
|
||||||
|
|
||||||
|
// [SSE-3]
|
||||||
|
const xImplSimd_DestRegSSE xMOVSHDUP = { 0xf3,0x16 };
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
|
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
|
||||||
//
|
//
|
||||||
|
@ -645,9 +775,9 @@ __forceinline void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u
|
||||||
__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); }
|
__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); }
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
// --------------------------------------------------------------------------------------
|
||||||
// INSERTPS / EXTRACTPS [SSE4.1 only!]
|
// INSERTPS / EXTRACTPS [SSE4.1 only!]
|
||||||
//
|
// --------------------------------------------------------------------------------------
|
||||||
// [TODO] these might be served better as classes, especially if other instructions use
|
// [TODO] these might be served better as classes, especially if other instructions use
|
||||||
// the M32,sse,imm form (I forget offhand if any do).
|
// the M32,sse,imm form (I forget offhand if any do).
|
||||||
|
|
||||||
|
@ -674,4 +804,52 @@ __emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& fr
|
||||||
__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); }
|
__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); }
|
||||||
__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
|
__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
|
||||||
|
|
||||||
|
|
||||||
|
// =====================================================================================================
|
||||||
|
// Ungrouped Instructions!
|
||||||
|
// =====================================================================================================
|
||||||
|
|
||||||
|
// Converts from MMX register mode to FPU register mode. The cpu enters MMX register mode
|
||||||
|
// when ever MMX instructions are run, and if FPU instructions are run without using EMMS,
|
||||||
|
// the FPU results will be invalid.
|
||||||
|
__forceinline void xEMMS() { xWrite16( 0x770F ); }
|
||||||
|
|
||||||
|
// [3DNow] Same as EMMS, but an AMD special version which may (or may not) leave MMX regs
|
||||||
|
// in an undefined state (which is fine, since presumably you're done using them anyway).
|
||||||
|
// This instruction is thus faster than EMMS on K8s, but all newer AMD cpus use the same
|
||||||
|
// logic for either EMMS or FEMMS.
|
||||||
|
// Conclusion: Obsolete. Just use EMMS instead.
|
||||||
|
__forceinline void xFEMMS() { xWrite16( 0x0E0F ); }
|
||||||
|
|
||||||
|
|
||||||
|
// Store Streaming SIMD Extension Control/Status to Mem32.
|
||||||
|
__emitinline void xSTMXCSR( const ModSib32& dest )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, 0xae );
|
||||||
|
EmitSibMagic( 3, dest );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load Streaming SIMD Extension Control/Status from Mem32.
|
||||||
|
__emitinline void xLDMXCSR( const ModSib32& src )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, 0xae );
|
||||||
|
EmitSibMagic( 2, src );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save x87 FPU, MMX Technology, and SSE State to buffer
|
||||||
|
// Target buffer must be at least 512 bytes in length to hold the result.
|
||||||
|
__emitinline void xFXSAVE( const ModSibBase& dest )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, 0xae );
|
||||||
|
EmitSibMagic( 0, dest );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore x87 FPU, MMX , XMM, and MXCSR State.
|
||||||
|
// Source buffer should be 512 bytes in length.
|
||||||
|
__emitinline void xFXRSTOR( const ModSibBase& src )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, 0xae );
|
||||||
|
EmitSibMagic( 1, src );
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue