Emitter Rewrite, Part 3 of 5: Finished all SIMD instructions, except those embedded into base instruction groups (CMPSS/SD, DIVSS/SD, etc).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2135 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-11-05 23:39:45 +00:00
parent 04c86ea6d3
commit fcdb429bb9
10 changed files with 570 additions and 534 deletions

View File

@ -332,14 +332,6 @@
<Filter <Filter
Name="Implement_Simd" Name="Implement_Simd"
> >
<File
RelativePath="..\..\include\x86emitter\implement\xmm\moremovs.h"
>
</File>
<File
RelativePath="..\..\include\x86emitter\implement\xmm\shufflepack.h"
>
</File>
<File <File
RelativePath="..\..\include\x86emitter\implement\simd_arithmetic.h" RelativePath="..\..\include\x86emitter\implement\simd_arithmetic.h"
> >
@ -352,6 +344,14 @@
RelativePath="..\..\include\x86emitter\implement\simd_helpers.h" RelativePath="..\..\include\x86emitter\implement\simd_helpers.h"
> >
</File> </File>
<File
RelativePath="..\..\include\x86emitter\implement\simd_moremovs.h"
>
</File>
<File
RelativePath="..\..\include\x86emitter\implement\simd_shufflepack.h"
>
</File>
<File <File
RelativePath="..\..\include\x86emitter\implement\simd_templated_helpers.h" RelativePath="..\..\include\x86emitter\implement\simd_templated_helpers.h"
> >

View File

@ -23,7 +23,6 @@
#pragma once #pragma once
#define OpWriteSSE( pre, op ) xOpWrite0F( pre, op, to, from ) #define OpWriteSSE( pre, op ) xOpWrite0F( pre, op, to, from )
#define OpWriteMMX( op ) xOpWrite0F( op, to, from )
extern void SimdPrefix( u8 prefix, u16 opcode ); extern void SimdPrefix( u8 prefix, u16 opcode );
extern void EmitSibMagic( uint regfield, const void* address ); extern void EmitSibMagic( uint regfield, const void* address );

View File

@ -18,10 +18,11 @@
// Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares // Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares
// with cmov many similarities). // with cmov many similarities).
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
////////////////////////////////////////////////////////////////////////////////////////// // --------------------------------------------------------------------------------------
// MOV instruction Implementation // MovImplAll
// --------------------------------------------------------------------------------------
// MOV instruction Implementation, plus many SIMD sub-mov variants.
class MovImplAll class MovImplAll
{ {

View File

@ -0,0 +1,174 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
namespace x86Emitter {
// --------------------------------------------------------------------------------------
// xImplSimd_MovHL
// --------------------------------------------------------------------------------------
// Moves to/from high/low portions of an xmm register.
// These instructions cannot be used in reg/reg form.
//
struct xImplSimd_MovHL
{
u16 Opcode;
void PS( const xRegisterSSE& to, const ModSibBase& from ) const;
void PS( const ModSibBase& to, const xRegisterSSE& from ) const;
void PD( const xRegisterSSE& to, const ModSibBase& from ) const;
void PD( const ModSibBase& to, const xRegisterSSE& from ) const;
};
// --------------------------------------------------------------------------------------
// xImplSimd_MovHL_RtoR
// --------------------------------------------------------------------------------------
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
// do something kinda different! Fun!
//
struct xImplSimd_MovHL_RtoR
{
u16 Opcode;
void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
};
// --------------------------------------------------------------------------------------
// xImplSimd_MoveSSE
// --------------------------------------------------------------------------------------
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
//
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
//
struct xImplSimd_MoveSSE
{
u8 Prefix;
bool isAligned;
void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
void operator()( const ModSibBase& to, const xRegisterSSE& from ) const;
};
// --------------------------------------------------------------------------------------
// xImplSimd_MoveDQ
// --------------------------------------------------------------------------------------
// Implementations for MOVDQA / MOVDQU
//
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
struct xImplSimd_MoveDQ
{
u8 Prefix;
bool isAligned;
void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
void operator()( const ModSibBase& to, const xRegisterSSE& from ) const;
};
// --------------------------------------------------------------------------------------
// xImplSimd_Blend
// --------------------------------------------------------------------------------------
// Blend - Conditional copying of values in src into dest.
//
struct xImplSimd_Blend
{
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
// mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a
// dword element in a 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegImmSSE PS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
// quadword element in a 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegImmSSE PD;
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
// to a dword element in the 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegSSE VPS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
// to a quadword element in the 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
xImplSimd_DestRegSSE VPD;
};
// --------------------------------------------------------------------------------------
// xImplSimd_PMove
// --------------------------------------------------------------------------------------
// Packed Move with Sign or Zero extension.
//
struct xImplSimd_PMove
{
u16 OpcodeBase;
// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
// and store them in dest.
void BW( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void BW( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
// and store them in dest.
void BD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void BD( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const;
// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
// and store them in dest.
void BQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void BQ( const xRegisterSSE& to, const ModSibStrict<u16>& from ) const;
// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
// and store them in dest.
void WD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void WD( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
// and store them in dest.
void WQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void WQ( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const;
// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
// and store them in dest.
void DQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
void DQ( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
};
}

View File

@ -15,50 +15,44 @@
#pragma once #pragma once
////////////////////////////////////////////////////////////////////////////////////////// namespace x86Emitter {
//
template< u16 OpcodeSSE > // --------------------------------------------------------------------------------------
class SimdImpl_Shuffle // xImplSimd_Shuffle
// --------------------------------------------------------------------------------------
struct xImplSimd_Shuffle
{ {
protected: inline void _selector_assertion_check( u8 selector ) const;
template< u8 Prefix > struct Woot
{ void PS( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite8( cmptype ); } void PS( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const;
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite8( cmptype ); }
Woot() {} void PD( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
void PD( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const;
}; };
public: // --------------------------------------------------------------------------------------
const Woot<0x00> PS; // xImplSimd_PShuffle
const Woot<0x66> PD; // --------------------------------------------------------------------------------------
struct xImplSimd_PShuffle
SimdImpl_Shuffle() {} //GCWhat?
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PShuffle
{ {
public:
SimdImpl_PShuffle() {}
// Copies words from src and inserts them into dest at word locations selected with // Copies words from src and inserts them into dest at word locations selected with
// the order operand (8 bit immediate). // the order operand (8 bit immediate).
const SimdImpl_DestRegImmMMX<0x00,0x70> W; const xImplSimd_DestRegImmMMX W;
// Copies doublewords from src and inserts them into dest at dword locations selected // Copies doublewords from src and inserts them into dest at dword locations selected
// with the order operand (8 bit immediate). // with the order operand (8 bit immediate).
const SimdImpl_DestRegImmSSE<0x66,0x70> D; const xImplSimd_DestRegImmSSE D;
// Copies words from the low quadword of src and inserts them into the low quadword // Copies words from the low quadword of src and inserts them into the low quadword
// of dest at word locations selected with the order operand (8 bit immediate). // of dest at word locations selected with the order operand (8 bit immediate).
// The high quadword of src is copied to the high quadword of dest. // The high quadword of src is copied to the high quadword of dest.
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW; const xImplSimd_DestRegImmSSE LW;
// Copies words from the high quadword of src and inserts them into the high quadword // Copies words from the high quadword of src and inserts them into the high quadword
// of dest at word locations selected with the order operand (8 bit immediate). // of dest at word locations selected with the order operand (8 bit immediate).
// The low quadword of src is copied to the low quadword of dest. // The low quadword of src is copied to the low quadword of dest.
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW; const xImplSimd_DestRegImmSSE HW;
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
// control mask in src. If the most significant bit (bit[7]) of each byte of the // control mask in src. If the most significant bit (bit[7]) of each byte of the
@ -68,68 +62,62 @@ public:
// operation) or 3 bits (64-bit operation) of the shuffle control byte. // operation) or 3 bits (64-bit operation) of the shuffle control byte.
// //
// Operands can be MMX or XMM registers. // Operands can be MMX or XMM registers.
const SimdImpl_DestRegEither<0x66,0x0038> B; const xImplSimd_DestRegEither B;
}; };
////////////////////////////////////////////////////////////////////////////////////////// // --------------------------------------------------------------------------------------
// // SimdImpl_PUnpack
class SimdImpl_PUnpack // --------------------------------------------------------------------------------------
struct SimdImpl_PUnpack
{ {
public:
SimdImpl_PUnpack() {}
// Unpack and interleave low-order bytes from src and dest into dest. // Unpack and interleave low-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x60> LBW; const xImplSimd_DestRegEither LBW;
// Unpack and interleave low-order words from src and dest into dest. // Unpack and interleave low-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x61> LWD; const xImplSimd_DestRegEither LWD;
// Unpack and interleave low-order doublewords from src and dest into dest. // Unpack and interleave low-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x62> LDQ; const xImplSimd_DestRegEither LDQ;
// Unpack and interleave low-order quadwords from src and dest into dest. // Unpack and interleave low-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ; const xImplSimd_DestRegSSE LQDQ;
// Unpack and interleave high-order bytes from src and dest into dest. // Unpack and interleave high-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x68> HBW; const xImplSimd_DestRegEither HBW;
// Unpack and interleave high-order words from src and dest into dest. // Unpack and interleave high-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x69> HWD; const xImplSimd_DestRegEither HWD;
// Unpack and interleave high-order doublewords from src and dest into dest. // Unpack and interleave high-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x6a> HDQ; const xImplSimd_DestRegEither HDQ;
// Unpack and interleave high-order quadwords from src and dest into dest. // Unpack and interleave high-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ; const xImplSimd_DestRegSSE HQDQ;
}; };
////////////////////////////////////////////////////////////////////////////////////////// // --------------------------------------------------------------------------------------
// SimdImpl_Pack
// --------------------------------------------------------------------------------------
// Pack with Signed or Unsigned Saturation // Pack with Signed or Unsigned Saturation
// //
class SimdImpl_Pack struct SimdImpl_Pack
{ {
public:
SimdImpl_Pack() {}
// Converts packed signed word integers from src and dest into packed signed // Converts packed signed word integers from src and dest into packed signed
// byte integers in dest, using signed saturation. // byte integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x63> SSWB; const xImplSimd_DestRegEither SSWB;
// Converts packed signed dword integers from src and dest into packed signed // Converts packed signed dword integers from src and dest into packed signed
// word integers in dest, using signed saturation. // word integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x6b> SSDW; const xImplSimd_DestRegEither SSDW;
// Converts packed unsigned word integers from src and dest into packed unsigned // Converts packed unsigned word integers from src and dest into packed unsigned
// byte integers in dest, using unsigned saturation. // byte integers in dest, using unsigned saturation.
const SimdImpl_DestRegEither<0x66,0x67> USWB; const xImplSimd_DestRegEither USWB;
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
// unsigned word integers in dest, using signed saturation. // unsigned word integers in dest, using signed saturation.
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW; const xImplSimd_DestRegSSE USDW;
}; };
// --------------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////////////// // SimdImpl_Unpack
// // --------------------------------------------------------------------------------------
class SimdImpl_Unpack struct xImplSimd_Unpack
{ {
public:
SimdImpl_Unpack() {}
// Unpacks the high doubleword [single-precision] values from src and dest into // Unpacks the high doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this: // dest, such that the result of dest looks like this:
// dest[0] <- dest[2] // dest[0] <- dest[2]
@ -137,14 +125,14 @@ public:
// dest[2] <- dest[3] // dest[2] <- dest[3]
// dest[3] <- src[3] // dest[3] <- src[3]
// //
const SimdImpl_DestRegSSE<0x00,0x15> HPS; const xImplSimd_DestRegSSE HPS;
// Unpacks the high quadword [double-precision] values from src and dest into // Unpacks the high quadword [double-precision] values from src and dest into
// dest, such that the result of dest looks like this: // dest, such that the result of dest looks like this:
// dest.lo <- dest.hi // dest.lo <- dest.hi
// dest.hi <- src.hi // dest.hi <- src.hi
// //
const SimdImpl_DestRegSSE<0x66,0x15> HPD; const xImplSimd_DestRegSSE HPD;
// Unpacks the low doubleword [single-precision] values from src and dest into // Unpacks the low doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this: // dest, such that the result of dest looks like this:
@ -153,7 +141,7 @@ public:
// dest[1] <- src[0] // dest[1] <- src[0]
// dest[0] <- dest[0] // dest[0] <- dest[0]
// //
const SimdImpl_DestRegSSE<0x00,0x14> LPS; const xImplSimd_DestRegSSE LPS;
// Unpacks the low quadword [double-precision] values from src and dest into // Unpacks the low quadword [double-precision] values from src and dest into
// dest, effectively moving the low portion of src into the upper portion of dest. // dest, effectively moving the low portion of src into the upper portion of dest.
@ -161,47 +149,39 @@ public:
// dest.hi <- src.lo // dest.hi <- src.lo
// dest.lo <- dest.lo [remains unchanged!] // dest.lo <- dest.lo [remains unchanged!]
// //
const SimdImpl_DestRegSSE<0x66,0x14> LPD; const xImplSimd_DestRegSSE LPD;
}; };
//////////////////////////////////////////////////////////////////////////////////////////
struct xImplSimd_InsertExtractHelper
{
u16 Opcode;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const;
};
// --------------------------------------------------------------------------------------
// SimdImpl_PInsert
// --------------------------------------------------------------------------------------
// PINSRW/B/D [all but Word form are SSE4.1 only!] // PINSRW/B/D [all but Word form are SSE4.1 only!]
// //
class SimdImpl_PInsert struct xImplSimd_PInsert
{ {
protected: void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
template< u16 Opcode > void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const;
class ByteDwordForms
{
public:
ByteDwordForms() {}
__forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const;
{ void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const;
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
}
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
{ xImplSimd_InsertExtractHelper B;
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
}
};
public: // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
SimdImpl_PInsert() {} xImplSimd_InsertExtractHelper D;
// Operation can be performed on either MMX or SSE src operands.
__forceinline void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
__forceinline void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
__forceinline void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
__forceinline void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
// [SSE-4.1]
const ByteDwordForms<0x20> B;
// [SSE-4.1]
const ByteDwordForms<0x22> D;
}; };
@ -210,47 +190,26 @@ public:
// //
// Note: Word form's indirect memory form is only available in SSE4.1. // Note: Word form's indirect memory form is only available in SSE4.1.
// //
class SimdImpl_PExtract struct SimdImpl_PExtract
{ {
protected:
template< u16 Opcode >
class ByteDwordForms
{
public:
ByteDwordForms() {}
__forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const
{
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 );
}
__forceinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
{
xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 );
}
};
public:
SimdImpl_PExtract() {}
// Copies the word element specified by imm8 from src to dest. The upper bits // Copies the word element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed // of dest are zero-extended (cleared). This can be used to extract any single packed
// word value from src into an x86 32 bit register. // word value from src into an x86 32 bit register.
// //
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension! // [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
// //
__forceinline void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); } void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const;
__forceinline void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); } void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const;
void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const;
__forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed // of dest are zero-extended (cleared). This can be used to extract any single packed
// byte value from src into an x86 32 bit register. // byte value from src into an x86 32 bit register.
const ByteDwordForms<0x14> B; const xImplSimd_InsertExtractHelper B;
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be // [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
// used to extract any single packed dword value from src into an x86 32 bit register. // used to extract any single packed dword value from src into an x86 32 bit register.
const ByteDwordForms<0x16> D; const xImplSimd_InsertExtractHelper D;
}; };
}

View File

@ -36,64 +36,3 @@ public:
SimdImpl_DestRegSSE() {} //GCWho? SimdImpl_DestRegSSE() {} //GCWho?
}; };
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
// (PSHUFD / PSHUFHW / etc).
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
SimdImpl_DestRegImmSSE() {} //GCWho?
};
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmMMX
{
public:
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
SimdImpl_DestRegImmMMX() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegEither
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode, to, from ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); }
SimdImpl_DestRegEither() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations where the destination *must* be a register, but the
// source can be Direct or Indirect (ModRM/SibSB). The SrcOperandType template parameter
// is used to enforce type strictness of the (void*) parameter and ModSib<> parameter, so
// that the programmer must be explicit in specifying desired operand size.
//
// IMPORTANT: This helper assumes the prefix opcode is written *always* -- regardless of
// MMX or XMM register status.
//
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
class SimdImpl_DestRegStrict
{
public:
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
SimdImpl_DestRegStrict() {} //GCWho?
};

View File

@ -1,211 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
// Moves to/from high/low portions of an xmm register.
// These instructions cannot be used in reg/reg form.
//
template< u16 Opcode >
class MovhlImplAll
{
protected:
template< u8 Prefix >
struct Woot
{
Woot() {}
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); }
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); }
};
public:
const Woot<0x00> PS;
const Woot<0x66> PD;
MovhlImplAll() {} //GCC.
};
// ------------------------------------------------------------------------
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
// do something kinda different! Fun!
//
template< u16 Opcode >
class MovhlImpl_RtoR
{
public:
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); }
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
MovhlImpl_RtoR() {} //GCC.
};
//////////////////////////////////////////////////////////////////////////////////////////
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
//
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
//
template< u8 Prefix, bool isAligned >
class SimdImpl_MoveSSE
{
static const u16 OpcodeA = 0x28; // Aligned [aps] form
static const u16 OpcodeU = 0x10; // unaligned [ups] form
public:
SimdImpl_MoveSSE() {} //GCC.
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
{
if( to != from ) xOpWrite0F( Prefix, OpcodeA, to, from );
}
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() );
u16 opcode;
// See previous comment.
if (isReallyAligned)
opcode = OpcodeA;
else
opcode = OpcodeU;
xOpWrite0F( Prefix, opcode, to, from );
}
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
xOpWrite0F( Prefix, isReallyAligned ? OpcodeA+1 : OpcodeU+1, from, to );
}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Implementations for MOVDQA / MOVDQU
//
template< u8 Prefix, bool isAligned >
class SimdImpl_MoveDQ
{
static const u8 PrefixA = 0x66; // Aligned [aps] form
static const u8 PrefixU = 0xf3; // unaligned [ups] form
static const u16 Opcode = 0x6f;
static const u16 Opcode_Alt = 0x7f; // alternate ModRM encoding (reverse src/dst)
public:
SimdImpl_MoveDQ() {} //GCC.
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
{
if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from );
}
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() );
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from );
}
__forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, from, to );
}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Blend - Conditional copying of values in src into dest.
//
class SimdImpl_Blend
{
public:
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
// mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a
// dword element in a 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegImmSSE<0x66,0x0c3a> PS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
// quadword element in a 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegImmSSE<0x66,0x0d3a> PD;
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
// to a dword element in the 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegSSE<0x66,0x1438> VPS;
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
// to a quadword element in the 128-bit operand.
//
// If a mask bit is 1, then the corresponding dword in the source operand is copied
// to dest, else the dword element in dest is left unchanged.
//
SimdImpl_DestRegSSE<0x66,0x1538> VPD;
};
//////////////////////////////////////////////////////////////////////////////////////////
// Packed Move with Sign or Zero extension.
//
template< bool SignExtend >
class SimdImpl_PMove
{
static const u16 OpcodeBase = SignExtend ? 0x2038 : 0x3038;
public:
// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase,xRegisterSSE,xRegisterSSE,u64> BW;
// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x100,xRegisterSSE,xRegisterSSE,u32> BD;
// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x200,xRegisterSSE,xRegisterSSE,u16> BQ;
// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x300,xRegisterSSE,xRegisterSSE,u64> WD;
// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x400,xRegisterSSE,xRegisterSSE,u32> WQ;
// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
// and store them in dest.
SimdImpl_DestRegStrict<0x66,OpcodeBase+0x500,xRegisterSSE,xRegisterSSE,u64> DQ;
};

View File

@ -393,29 +393,30 @@ namespace x86Emitter
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
extern const Internal::SimdImpl_MoveSSE<0x00,true> xMOVAPS; extern const xImplSimd_MoveSSE xMOVAPS;
extern const Internal::SimdImpl_MoveSSE<0x00,false> xMOVUPS; extern const xImplSimd_MoveSSE xMOVUPS;
extern const xImplSimd_MoveSSE xMOVAPD;
extern const xImplSimd_MoveSSE xMOVUPD;
#ifdef ALWAYS_USE_MOVAPS #ifdef ALWAYS_USE_MOVAPS
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVDQA; extern const xImplSimd_MoveSSE xMOVDQA;
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVDQU; extern const xImplSimd_MoveSSE xMOVDQU;
extern const Internal::SimdImpl_MoveSSE<0,true> xMOVAPD;
extern const Internal::SimdImpl_MoveSSE<0,false> xMOVUPD;
#else #else
extern const Internal::SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA; extern const xImplSimd_MoveDQ xMOVDQA;
extern const Internal::SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU; extern const xImplSimd_MoveDQ xMOVDQU;
extern const Internal::SimdImpl_MoveSSE<0x66,true> xMOVAPD;
extern const Internal::SimdImpl_MoveSSE<0x66,false> xMOVUPD;
#endif #endif
extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH; extern const xImplSimd_MovHL xMOVH;
extern const Internal::MovhlImpl_RtoR<0x12> xMOVHL; extern const xImplSimd_MovHL xMOVL;
extern const xImplSimd_MovHL_RtoR xMOVLH;
extern const xImplSimd_MovHL_RtoR xMOVHL;
extern const Internal::MovhlImplAll<0x16> xMOVH; extern const xImplSimd_Blend xBLEND;
extern const Internal::MovhlImplAll<0x12> xMOVL; extern const xImplSimd_PMove xPMOVSX;
extern const xImplSimd_PMove xPMOVZX;
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; extern const xImplSimd_DestRegSSE xMOVSLDUP;
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; extern const xImplSimd_DestRegSSE xMOVSHDUP;
extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ); extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 ); extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
@ -425,16 +426,16 @@ namespace x86Emitter
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND; extern const xImplSimd_DestRegEither xPAND;
extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN; extern const xImplSimd_DestRegEither xPANDN;
extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR; extern const xImplSimd_DestRegEither xPOR;
extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR; extern const xImplSimd_DestRegEither xPXOR;
extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF; extern const xImplSimd_Shuffle xSHUF;
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; extern const xImplSimd_DestRegSSE xPTEST;
extern const xImplSimd_MinMax xMIN; extern const xImplSimd_MinMax xMIN;
extern const xImplSimd_MinMax xMAX; extern const xImplSimd_MinMax xMAX;
@ -526,16 +527,12 @@ namespace x86Emitter
extern const xImplSimd_DotProduct xDP; extern const xImplSimd_DotProduct xDP;
extern const xImplSimd_Round xROUND; extern const xImplSimd_Round xROUND;
extern const Internal::SimdImpl_PShuffle xPSHUF; extern const xImplSimd_PShuffle xPSHUF;
extern const Internal::SimdImpl_PUnpack xPUNPCK; extern const SimdImpl_PUnpack xPUNPCK;
extern const Internal::SimdImpl_Unpack xUNPCK; extern const xImplSimd_Unpack xUNPCK;
extern const Internal::SimdImpl_Pack xPACK; extern const SimdImpl_Pack xPACK;
extern const Internal::SimdImpl_PInsert xPINSR; extern const xImplSimd_PInsert xPINSR;
extern const Internal::SimdImpl_PExtract xPEXTR; extern const SimdImpl_PExtract xPEXTR;
extern const Internal::SimdImpl_Blend xBLEND;
extern const Internal::SimdImpl_PMove<true> xPMOVSX;
extern const Internal::SimdImpl_PMove<false> xPMOVZX;
} }

View File

@ -703,8 +703,6 @@ __forceinline void xWrite( T val )
{ {
#include "implement/helpers.h" #include "implement/helpers.h"
#include "implement/simd_templated_helpers.h" #include "implement/simd_templated_helpers.h"
#include "implement/xmm/moremovs.h"
#include "implement/xmm/shufflepack.h"
#include "implement/group1.h" #include "implement/group1.h"
#include "implement/group2.h" #include "implement/group2.h"
#include "implement/group3.h" #include "implement/group3.h"
@ -737,7 +735,9 @@ __forceinline void xWrite( T val )
} }
#include "implement/simd_helpers.h" #include "implement/simd_helpers.h"
#include "implement/simd_moremovs.h"
#include "implement/simd_arithmetic.h" #include "implement/simd_arithmetic.h"
#include "implement/simd_comparisons.h" #include "implement/simd_comparisons.h"
#include "implement/simd_shufflepack.h"
#include "inlines.inl" #include "inlines.inl"

View File

@ -106,56 +106,19 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
} }
} }
// [SSE-3] const xImplSimd_DestRegEither xPAND = { 0x66,0xdb };
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; const xImplSimd_DestRegEither xPANDN = { 0x66,0xdf };
// [SSE-3] const xImplSimd_DestRegEither xPOR = { 0x66,0xeb };
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; const xImplSimd_DestRegEither xPXOR = { 0x66,0xef };
const SimdImpl_MoveSSE<0x00,true> xMOVAPS;
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
const SimdImpl_MoveSSE<0x00,false> xMOVUPS;
#ifdef ALWAYS_USE_MOVAPS
const SimdImpl_MoveSSE<0,true> xMOVDQA;
const SimdImpl_MoveSSE<0,true> xMOVAPD;
// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead.
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
// which can be checked for alignment at runtime.
const SimdImpl_MoveSSE<0,false> xMOVDQU;
const SimdImpl_MoveSSE<0,false> xMOVUPD;
#else
const SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA;
const SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU;
const SimdImpl_MoveSSE<0x66,true> xMOVAPD;
const SimdImpl_MoveSSE<0x66,false> xMOVUPD;
#endif
const MovhlImplAll<0x16> xMOVH;
const MovhlImplAll<0x12> xMOVL;
const MovhlImpl_RtoR<0x16> xMOVLH;
const MovhlImpl_RtoR<0x12> xMOVHL;
const SimdImpl_Shuffle<0xc6> xSHUF;
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
// ------------------------------------------------------------------------
// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag // [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag
// only if all bits in the result are 0. PTEST also sets the CF flag according // only if all bits in the result are 0. PTEST also sets the CF flag according
// to the following condition: (xmm2/m128 AND NOT xmm1) == 0; // to the following condition: (xmm2/m128 AND NOT xmm1) == 0;
const SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; const xImplSimd_DestRegSSE xPTEST = { 0x66,0x1738 };
// ------------------------------------------------------------------------ // =====================================================================================================
// SSE Conversion Operations, as looney as they are. // SSE Conversion Operations, as looney as they are.
// // =====================================================================================================
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing // These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). // nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
// //
@ -227,8 +190,8 @@ void xImplSimd_DestRegImmMMX::operator()( const xRegisterMMX& to, const ModSibBa
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); } void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); } void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteMMX( Opcode ); } void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteSSE( 0x00, Opcode ); }
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteMMX( Opcode ); } void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteSSE( 0x00, Opcode ); }
// ===================================================================================================== // =====================================================================================================
// SIMD Arithmetic Instructions // SIMD Arithmetic Instructions
@ -237,8 +200,8 @@ void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBa
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); } void _SimdShiftHelper::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); } void _SimdShiftHelper::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteMMX( Opcode ); } void _SimdShiftHelper::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteSSE( 0x00, Opcode ); }
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteMMX( Opcode ); } void _SimdShiftHelper::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteSSE( 0x00, Opcode ); }
void _SimdShiftHelper::operator()( const xRegisterSSE& to, u8 imm8 ) const void _SimdShiftHelper::operator()( const xRegisterSSE& to, u8 imm8 ) const
{ {
@ -471,64 +434,231 @@ const xImplSimd_PMinMax xPMAX =
{ 0x66, 0x3f38 }, // UD { 0x66, 0x3f38 }, // UD
}; };
const SimdImpl_PShuffle xPSHUF; // =====================================================================================================
const SimdImpl_PUnpack xPUNPCK; // SIMD Shuffle/Pack (Shuffle puck?)
const SimdImpl_Unpack xUNPCK; // =====================================================================================================
const SimdImpl_Pack xPACK;
const SimdImpl_PInsert xPINSR;
const SimdImpl_PExtract xPEXTR;
const SimdImpl_Blend xBLEND;
const SimdImpl_PMove<true> xPMOVSX; __forceinline void xImplSimd_Shuffle::_selector_assertion_check( u8 selector ) const
const SimdImpl_PMove<false> xPMOVZX;
//////////////////////////////////////////////////////////////////////////////////////////
//
// Converts from MMX register mode to FPU register mode. The cpu enters MMX register mode
// when ever MMX instructions are run, and if FPU instructions are run without using EMMS,
// the FPU results will be invalid.
__forceinline void xEMMS() { xWrite16( 0x770F ); }
// [3DNow] Same as EMMS, but an AMD special version which may (or may not) leave MMX regs
// in an undefined state (which is fine, since presumably you're done using them anyway).
// This instruction is thus faster than EMMS on K8s, but all newer AMD cpus use the same
// logic for either EMMS or FEMMS.
// Conclusion: Obsolete. Just use EMMS instead.
__forceinline void xFEMMS() { xWrite16( 0x0E0F ); }
// Store Streaming SIMD Extension Control/Status to Mem32.
__emitinline void xSTMXCSR( const ModSib32& dest )
{ {
SimdPrefix( 0, 0xae ); pxAssertMsg( (selector & ~3) == 0,
EmitSibMagic( 3, dest ); "Invalid immediate operand on SSE Shuffle: Upper 6 bits of the SSE Shuffle-PD Selector are reserved and must be zero."
);
} }
// Load Streaming SIMD Extension Control/Status from Mem32. void xImplSimd_Shuffle::PS( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const
__emitinline void xLDMXCSR( const ModSib32& src )
{ {
SimdPrefix( 0, 0xae ); xOpWrite0F( 0xc6, to, from, selector );
EmitSibMagic( 2, src );
} }
// Save x87 FPU, MMX Technology, and SSE State to buffer void xImplSimd_Shuffle::PS( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const
// Target buffer must be at least 512 bytes in length to hold the result.
__emitinline void xFXSAVE( const ModSibBase& dest )
{ {
SimdPrefix( 0, 0xae ); xOpWrite0F( 0xc6, to, from, selector );
EmitSibMagic( 0, dest );
} }
// Restore x87 FPU, MMX , XMM, and MXCSR State. void xImplSimd_Shuffle::PD( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const
// Source buffer should be 512 bytes in length.
__emitinline void xFXRSTOR( const ModSibBase& src )
{ {
SimdPrefix( 0, 0xae ); _selector_assertion_check( selector );
EmitSibMagic( 1, src ); xOpWrite0F( 0x66, 0xc6, to, from, selector & 0x3 );
} }
void xImplSimd_Shuffle::PD( const xRegisterSSE& to, const ModSibBase& from, u8 selector ) const
{
_selector_assertion_check( selector );
xOpWrite0F( 0x66, 0xc6, to, from, selector & 0x3 );
}
void xImplSimd_InsertExtractHelper::operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
{
xOpWrite0F( 0x66, Opcode, to, from, imm8 );
}
void xImplSimd_InsertExtractHelper::operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
{
xOpWrite0F( 0x66, Opcode, to, from, imm8 );
}
void xImplSimd_PInsert::W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
void xImplSimd_PInsert::W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); }
void xImplSimd_PInsert::W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
void xImplSimd_PInsert::W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); }
void SimdImpl_PExtract::W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); }
void SimdImpl_PExtract::W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); }
void SimdImpl_PExtract::W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
const xImplSimd_Shuffle xSHUF;
const xImplSimd_PShuffle xPSHUF =
{
{ 0x00, 0x70 }, // W
{ 0x66, 0x70 }, // D
{ 0xf2, 0x70 }, // LW
{ 0xf3, 0x70 }, // HW
{ 0x66, 0x0038 }, // B
};
const SimdImpl_PUnpack xPUNPCK =
{
{ 0x66, 0x60 }, // LBW
{ 0x66, 0x61 }, // LWD
{ 0x66, 0x62 }, // LDQ
{ 0x66, 0x6c }, // LQDQ
{ 0x66, 0x68 }, // HBW
{ 0x66, 0x69 }, // HWD
{ 0x66, 0x6a }, // HDQ
{ 0x66, 0x6d }, // HQDQ
};
const SimdImpl_Pack xPACK =
{
{ 0x66, 0x63 }, // SSWB
{ 0x66, 0x6b }, // SSDW
{ 0x66, 0x67 }, // USWB
{ 0x66, 0x2b38 }, // USDW
};
const xImplSimd_Unpack xUNPCK =
{
{ 0x00, 0x15 }, // HPS
{ 0x66, 0x15 }, // HPD
{ 0x00, 0x14 }, // LPS
{ 0x66, 0x14 }, // LPD
};
const xImplSimd_PInsert xPINSR =
{
{ 0x203a }, // B
{ 0x223a }, // D
};
const SimdImpl_PExtract xPEXTR =
{
{ 0x143a }, // B
{ 0x163a }, // D
};
// =====================================================================================================
// SIMD Move And Blend Instructions
// =====================================================================================================
void xImplSimd_MovHL::PS( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); }
void xImplSimd_MovHL::PS( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode+1, from, to ); }
void xImplSimd_MovHL::PD( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
void xImplSimd_MovHL::PD( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode+1, from, to ); }
void xImplSimd_MovHL_RtoR::PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); }
void xImplSimd_MovHL_RtoR::PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); }
static const u16 MovPS_OpAligned = 0x28; // Aligned [aps] form
static const u16 MovPS_OpUnaligned = 0x10; // unaligned [ups] form
void xImplSimd_MoveSSE::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
{
if( to != from ) xOpWrite0F( Prefix, MovPS_OpAligned, to, from );
}
void xImplSimd_MoveSSE::operator()( const xRegisterSSE& to, const ModSibBase& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() );
xOpWrite0F( Prefix, isReallyAligned ? MovPS_OpAligned : MovPS_OpUnaligned, to, from );
}
void xImplSimd_MoveSSE::operator()( const ModSibBase& to, const xRegisterSSE& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
xOpWrite0F( Prefix, isReallyAligned ? MovPS_OpAligned+1 : MovPS_OpUnaligned+1, from, to );
}
static const u8 MovDQ_PrefixAligned = 0x66; // Aligned [dqa] form
static const u8 MovDQ_PrefixUnaligned = 0xf3; // unaligned [dqu] form
void xImplSimd_MoveDQ::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const
{
if( to != from ) xOpWrite0F( MovDQ_PrefixAligned, 0x6f, to, from );
}
void xImplSimd_MoveDQ::operator()( const xRegisterSSE& to, const ModSibBase& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() );
xOpWrite0F( isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x6f, to, from );
}
void xImplSimd_MoveDQ::operator()( const ModSibBase& to, const xRegisterSSE& from ) const
{
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() );
// use opcode 0x7f : alternate ModRM encoding (reverse src/dst)
xOpWrite0F( isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x7f, from, to );
}
void xImplSimd_PMove::BW( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase ); }
void xImplSimd_PMove::BW( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const { OpWriteSSE( 0x66, OpcodeBase ); }
void xImplSimd_PMove::BD( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x100 ); }
void xImplSimd_PMove::BD( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x100 ); }
void xImplSimd_PMove::BQ( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x200 ); }
void xImplSimd_PMove::BQ( const xRegisterSSE& to, const ModSibStrict<u16>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x200 ); }
void xImplSimd_PMove::WD( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x300 ); }
void xImplSimd_PMove::WD( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x300 ); }
void xImplSimd_PMove::WQ( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x400 ); }
void xImplSimd_PMove::WQ( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x400 ); }
void xImplSimd_PMove::DQ( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x500 ); }
void xImplSimd_PMove::DQ( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const { OpWriteSSE( 0x66, OpcodeBase+0x500 ); }
const xImplSimd_MoveSSE xMOVAPS = { 0x00, true };
const xImplSimd_MoveSSE xMOVUPS = { 0x00, false };
#ifdef ALWAYS_USE_MOVAPS
const xImplSimd_MoveSSE xMOVDQA = { 0x00, true };
const xImplSimd_MoveSSE xMOVAPD = { 0x00, true };
const xImplSimd_MoveSSE xMOVDQU = { 0x00, false };
const xImplSimd_MoveSSE xMOVUPD = { 0x00, false };
#else
const xImplSimd_MoveDQ xMOVDQA = { 0x66, true };
const xImplSimd_MoveSSE xMOVAPD = { 0x66, true };
const xImplSimd_MoveDQ xMOVDQU = { 0xf3, false };
const xImplSimd_MoveSSE xMOVUPD = { 0x66, false };
#endif
const xImplSimd_MovHL xMOVH = { 0x16 };
const xImplSimd_MovHL xMOVL = { 0x12 };
const xImplSimd_MovHL_RtoR xMOVLH = { 0x16 };
const xImplSimd_MovHL_RtoR xMOVHL = { 0x12 };
const xImplSimd_Blend xBLEND =
{
{ 0x66, 0x0c3a }, // PS
{ 0x66, 0x0d3a }, // PD
{ 0x66, 0x1438 }, // VPS
{ 0x66, 0x1538 }, // VPD
};
const xImplSimd_PMove xPMOVSX = { 0x2038 };
const xImplSimd_PMove xPMOVZX = { 0x3038 };
// [SSE-3]
const xImplSimd_DestRegSSE xMOVSLDUP = { 0xf3,0x12 };
// [SSE-3]
const xImplSimd_DestRegSSE xMOVSHDUP = { 0xf3,0x16 };
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// MMX Mov Instructions (MOVD, MOVQ, MOVSS). // MMX Mov Instructions (MOVD, MOVQ, MOVSS).
// //
@ -645,9 +775,9 @@ __forceinline void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u
__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); } __forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); }
////////////////////////////////////////////////////////////////////////////////////////// // --------------------------------------------------------------------------------------
// INSERTPS / EXTRACTPS [SSE4.1 only!] // INSERTPS / EXTRACTPS [SSE4.1 only!]
// // --------------------------------------------------------------------------------------
// [TODO] these might be served better as classes, especially if other instructions use // [TODO] these might be served better as classes, especially if other instructions use
// the M32,sse,imm form (I forget offhand if any do). // the M32,sse,imm form (I forget offhand if any do).
@ -674,4 +804,52 @@ __emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& fr
__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); } __emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); }
__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); } __emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
// =====================================================================================================
// Ungrouped Instructions!
// =====================================================================================================
// Converts from MMX register mode to FPU register mode. The cpu enters MMX register mode
// when ever MMX instructions are run, and if FPU instructions are run without using EMMS,
// the FPU results will be invalid.
__forceinline void xEMMS() { xWrite16( 0x770F ); }
// [3DNow] Same as EMMS, but an AMD special version which may (or may not) leave MMX regs
// in an undefined state (which is fine, since presumably you're done using them anyway).
// This instruction is thus faster than EMMS on K8s, but all newer AMD cpus use the same
// logic for either EMMS or FEMMS.
// Conclusion: Obsolete. Just use EMMS instead.
__forceinline void xFEMMS() { xWrite16( 0x0E0F ); }
// Store Streaming SIMD Extension Control/Status to Mem32.
__emitinline void xSTMXCSR( const ModSib32& dest )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 3, dest );
}
// Load Streaming SIMD Extension Control/Status from Mem32.
__emitinline void xLDMXCSR( const ModSib32& src )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 2, src );
}
// Save x87 FPU, MMX Technology, and SSE State to buffer
// Target buffer must be at least 512 bytes in length to hold the result.
__emitinline void xFXSAVE( const ModSibBase& dest )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 0, dest );
}
// Restore x87 FPU, MMX , XMM, and MXCSR State.
// Source buffer should be 512 bytes in length.
__emitinline void xFXRSTOR( const ModSibBase& src )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 1, src );
}
} }