mirror of https://github.com/PCSX2/pcsx2.git
Emitter rewrite, part 1 of 5 (or so...): Re-tooled SSE arithmetic instructions to be class/template free.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2067 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e8d858c675
commit
6fdaea2c79
|
@ -6,6 +6,7 @@
|
|||
ProjectGUID="{A51123F5-9505-4EAE-85E7-D320290A272C}"
|
||||
RootNamespace="x86emitter"
|
||||
Keyword="Win32Proj"
|
||||
TargetFrameworkVersion="0"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
|
@ -331,14 +332,6 @@
|
|||
<Filter
|
||||
Name="Implement_Simd"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\include\x86emitter\implement\xmm\arithmetic.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\include\x86emitter\implement\xmm\basehelpers.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\include\x86emitter\implement\xmm\comparisons.h"
|
||||
>
|
||||
|
@ -351,6 +344,18 @@
|
|||
RelativePath="..\..\include\x86emitter\implement\xmm\shufflepack.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\include\x86emitter\implement\simd_arithmetic.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\include\x86emitter\implement\simd_helpers.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\include\x86emitter\implement\simd_templated_helpers.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
</Filter>
|
||||
<Filter
|
||||
|
|
|
@ -20,9 +20,11 @@
|
|||
// that all members contained within are in said namespace.
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#define OpWriteSSE( pre, op ) xOpWrite0F( pre, op, to, from )
|
||||
#define OpWriteMMX( op ) xOpWrite0F( op, to, from )
|
||||
|
||||
extern void SimdPrefix( u8 prefix, u16 opcode );
|
||||
extern void EmitSibMagic( uint regfield, const void* address );
|
||||
extern void EmitSibMagic( uint regfield, const ModSibBase& info );
|
||||
|
|
|
@ -15,101 +15,81 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ShiftHelper -- It's out here because C++ child class template semantics are generally
|
||||
// not cross-compiler friendly.
|
||||
//
|
||||
template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
|
||||
class _SimdShiftHelper
|
||||
namespace x86Emitter {
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// _SimdShiftHelper
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct _SimdShiftHelper
|
||||
{
|
||||
public:
|
||||
_SimdShiftHelper() {}
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
u16 OpcodeImm;
|
||||
u8 Modcode;
|
||||
|
||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); }
|
||||
void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||
void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
|
||||
|
||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode1, to, from ); }
|
||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode1, to, from ); }
|
||||
void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const;
|
||||
void operator()( const xRegisterMMX& to, const ModSibBase& from ) const;
|
||||
|
||||
|
||||
__emitinline void operator()( const xRegisterSSE& to, u8 imm8 ) const
|
||||
{
|
||||
SimdPrefix( 0x66, OpcodeImm );
|
||||
EmitSibMagic( (int)Modcode, to );
|
||||
xWrite8( imm8 );
|
||||
}
|
||||
|
||||
__emitinline void operator()( const xRegisterMMX& to, u8 imm8 ) const
|
||||
{
|
||||
SimdPrefix( 0x00, OpcodeImm );
|
||||
EmitSibMagic( (int)Modcode, to );
|
||||
xWrite8( imm8 );
|
||||
}
|
||||
void operator()( const xRegisterSSE& to, u8 imm8 ) const;
|
||||
void operator()( const xRegisterMMX& to, u8 imm8 ) const;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_Shift / xImplSimd_ShiftWithoutQ
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
// Used for PSRA, which lacks the Q form.
|
||||
//
|
||||
template< u16 OpcodeBase1, u8 Modcode >
|
||||
class SimdImpl_ShiftWithoutQ
|
||||
struct xImplSimd_ShiftWithoutQ
|
||||
{
|
||||
public:
|
||||
const _SimdShiftHelper<OpcodeBase1+1,0x71,Modcode> W;
|
||||
const _SimdShiftHelper<OpcodeBase1+2,0x72,Modcode> D;
|
||||
|
||||
SimdImpl_ShiftWithoutQ() {}
|
||||
const _SimdShiftHelper W;
|
||||
const _SimdShiftHelper D;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Implements PSRL and PSLL
|
||||
//
|
||||
template< u16 OpcodeBase1, u8 Modcode >
|
||||
class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
|
||||
struct xImplSimd_Shift
|
||||
{
|
||||
public:
|
||||
const _SimdShiftHelper<OpcodeBase1+3,0x73,Modcode> Q;
|
||||
|
||||
__forceinline void DQ( const xRegisterSSE& to, u8 imm8 ) const { xOpWrite0F( 0x66, 0x73, (int)Modcode+1, to, imm8 ); }
|
||||
|
||||
SimdImpl_Shift() {}
|
||||
};
|
||||
const _SimdShiftHelper W;
|
||||
const _SimdShiftHelper D;
|
||||
const _SimdShiftHelper Q;
|
||||
|
||||
void DQ( const xRegisterSSE& to, u8 imm8 ) const;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
template< u16 OpcodeB, u16 OpcodeQ >
|
||||
class SimdImpl_AddSub
|
||||
struct xImplSimd_AddSub
|
||||
{
|
||||
public:
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
|
||||
const xImplSimd_DestRegEither B;
|
||||
const xImplSimd_DestRegEither W;
|
||||
const xImplSimd_DestRegEither D;
|
||||
const xImplSimd_DestRegEither Q;
|
||||
|
||||
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
|
||||
const xImplSimd_DestRegEither SB;
|
||||
|
||||
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
|
||||
const xImplSimd_DestRegEither SW;
|
||||
|
||||
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
|
||||
const xImplSimd_DestRegEither USB;
|
||||
|
||||
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
|
||||
const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
|
||||
|
||||
SimdImpl_AddSub() {}
|
||||
const xImplSimd_DestRegEither USW;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
class SimdImpl_PMul
|
||||
struct xImplSimd_PMul
|
||||
{
|
||||
public:
|
||||
const SimdImpl_DestRegEither<0x66,0xd5> LW;
|
||||
const SimdImpl_DestRegEither<0x66,0xe5> HW;
|
||||
const SimdImpl_DestRegEither<0x66,0xe4> HUW;
|
||||
const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
|
||||
const xImplSimd_DestRegEither LW;
|
||||
const xImplSimd_DestRegEither HW;
|
||||
const xImplSimd_DestRegEither HUW;
|
||||
const xImplSimd_DestRegEither UDQ;
|
||||
|
||||
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
|
||||
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
|
||||
|
@ -121,112 +101,95 @@ public:
|
|||
//
|
||||
// Both operands can be MMX or XMM registers. Source can be register or memory.
|
||||
//
|
||||
const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
|
||||
const xImplSimd_DestRegEither HRSW;
|
||||
|
||||
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
|
||||
// the low 32 bits of each product in xmm1.
|
||||
const SimdImpl_DestRegSSE<0x66,0x4038> LD;
|
||||
const xImplSimd_DestRegSSE LD;
|
||||
|
||||
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
|
||||
const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
|
||||
|
||||
SimdImpl_PMul() {}
|
||||
const xImplSimd_DestRegSSE DQ;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
|
||||
//
|
||||
template< u16 OpcodeSSE >
|
||||
class SimdImpl_rSqrt
|
||||
struct xImplSimd_rSqrt
|
||||
{
|
||||
public:
|
||||
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
|
||||
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
|
||||
SimdImpl_rSqrt() {}
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SQRT has PS/SS/SD forms, but not the PD form.
|
||||
//
|
||||
template< u16 OpcodeSSE >
|
||||
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
|
||||
struct xImplSimd_Sqrt
|
||||
{
|
||||
public:
|
||||
SimdImpl_Sqrt() {}
|
||||
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
class SimdImpl_AndNot
|
||||
struct xImplSimd_AndNot
|
||||
{
|
||||
public:
|
||||
SimdImpl_AndNot() {}
|
||||
const SimdImpl_DestRegSSE<0x00,0x55> PS;
|
||||
const SimdImpl_DestRegSSE<0x66,0x55> PD;
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed absolute value. [sSSE3 only]
|
||||
//
|
||||
class SimdImpl_PAbsolute
|
||||
struct xImplSimd_PAbsolute
|
||||
{
|
||||
public:
|
||||
SimdImpl_PAbsolute() {}
|
||||
|
||||
// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
|
||||
// in dest, as UNSIGNED.
|
||||
const SimdImpl_DestRegEither<0x66, 0x1c38> B;
|
||||
const xImplSimd_DestRegEither B;
|
||||
|
||||
// [sSSE-3] Computes the absolute value of word in the src, and stores the result
|
||||
// in dest, as UNSIGNED.
|
||||
const SimdImpl_DestRegEither<0x66, 0x1d38> W;
|
||||
const xImplSimd_DestRegEither W;
|
||||
|
||||
// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
|
||||
// result in dest, as UNSIGNED.
|
||||
const SimdImpl_DestRegEither<0x66, 0x1e38> D;
|
||||
const xImplSimd_DestRegEither D;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the
|
||||
// corresponding sign in src.
|
||||
//
|
||||
class SimdImpl_PSign
|
||||
struct xImplSimd_PSign
|
||||
{
|
||||
public:
|
||||
SimdImpl_PSign() {}
|
||||
|
||||
// [sSSE-3] negates each byte element of dest if the signed integer value of the
|
||||
// corresponding data element in src is less than zero. If the signed integer value
|
||||
// of a data element in src is positive, the corresponding data element in dest is
|
||||
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const SimdImpl_DestRegEither<0x66, 0x0838> B;
|
||||
const xImplSimd_DestRegEither B;
|
||||
|
||||
// [sSSE-3] negates each word element of dest if the signed integer value of the
|
||||
// corresponding data element in src is less than zero. If the signed integer value
|
||||
// of a data element in src is positive, the corresponding data element in dest is
|
||||
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const SimdImpl_DestRegEither<0x66, 0x0938> W;
|
||||
const xImplSimd_DestRegEither W;
|
||||
|
||||
// [sSSE-3] negates each doubleword element of dest if the signed integer value
|
||||
// of the corresponding data element in src is less than zero. If the signed integer
|
||||
// value of a data element in src is positive, the corresponding data element in dest
|
||||
// is unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const SimdImpl_DestRegEither<0x66, 0x0a38> D;
|
||||
const xImplSimd_DestRegEither D;
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed Multiply and Add!!
|
||||
//
|
||||
class SimdImpl_PMultAdd
|
||||
struct xImplSimd_PMultAdd
|
||||
{
|
||||
public:
|
||||
SimdImpl_PMultAdd() {}
|
||||
|
||||
// Multiplies the individual signed words of dest by the corresponding signed words
|
||||
// of src, producing temporary signed, doubleword results. The adjacent doubleword
|
||||
// results are then summed and stored in the destination operand.
|
||||
|
@ -235,7 +198,7 @@ public:
|
|||
// DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] );
|
||||
// [.. repeat in the case of XMM src/dest operands ..]
|
||||
//
|
||||
const SimdImpl_DestRegEither<0x66, 0xf5> WD;
|
||||
const xImplSimd_DestRegEither WD;
|
||||
|
||||
// [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding
|
||||
// signed byte of src, producing intermediate signed 16-bit integers. Each adjacent
|
||||
|
@ -251,17 +214,14 @@ public:
|
|||
// DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] );
|
||||
// [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..]
|
||||
//
|
||||
const SimdImpl_DestRegEither<0x66, 0xf438> UBSW;
|
||||
const xImplSimd_DestRegEither UBSW;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed Horizontal Add [SSE3 only]
|
||||
//
|
||||
class SimdImpl_HorizAdd
|
||||
struct xImplSimd_HorizAdd
|
||||
{
|
||||
public:
|
||||
SimdImpl_HorizAdd() {}
|
||||
|
||||
// [SSE-3] Horizontal Add of Packed Data. A three step process:
|
||||
// * Adds the single-precision floating-point values in the first and second dwords of
|
||||
// dest and stores the result in the first dword of dest.
|
||||
|
@ -269,24 +229,21 @@ public:
|
|||
// stores the result in the second dword of dest.
|
||||
// * Adds single-precision floating-point values in the first and second dword of *src*
|
||||
// and stores the result in the third dword of dest.
|
||||
const SimdImpl_DestRegSSE<0xf2, 0x7c> PS;
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
|
||||
// [SSE-3] Horizontal Add of Packed Data. A two step process:
|
||||
// * Adds the double-precision floating-point values in the high and low quadwords of
|
||||
// dest and stores the result in the low quadword of dest.
|
||||
// * Adds the double-precision floating-point values in the high and low quadwords of
|
||||
// *src* stores the result in the high quadword of dest.
|
||||
const SimdImpl_DestRegSSE<0x66, 0x7c> PD;
|
||||
const xImplSimd_DestRegSSE PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// DotProduct calculation (SSE4.1 only!)
|
||||
//
|
||||
class SimdImpl_DotProduct
|
||||
struct xImplSimd_DotProduct
|
||||
{
|
||||
public:
|
||||
SimdImpl_DotProduct() {}
|
||||
|
||||
// [SSE-4.1] Conditionally multiplies the packed single precision floating-point
|
||||
// values in dest with the packed single-precision floats in src depending on a
|
||||
// mask extracted from the high 4 bits of the immediate byte. If a condition mask
|
||||
|
@ -300,20 +257,17 @@ public:
|
|||
// element in dest. If a broadcast mask bit is zero, the corresponding element in
|
||||
// the destination is set to zero.
|
||||
//
|
||||
SimdImpl_DestRegImmSSE<0x66,0x403a> PS;
|
||||
xImplSimd_DestRegImmSSE PS;
|
||||
|
||||
// [SSE-4.1]
|
||||
SimdImpl_DestRegImmSSE<0x66,0x413a> PD;
|
||||
xImplSimd_DestRegImmSSE PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Rounds floating point values (packed or single scalar) by an arbitrary rounding mode.
|
||||
// (SSE4.1 only!)
|
||||
class SimdImpl_Round
|
||||
struct xImplSimd_Round
|
||||
{
|
||||
public:
|
||||
SimdImpl_Round() {}
|
||||
|
||||
// [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
|
@ -324,7 +278,7 @@ public:
|
|||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x083a> PS;
|
||||
const xImplSimd_DestRegImmSSE PS;
|
||||
|
||||
// [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest.
|
||||
//
|
||||
|
@ -336,7 +290,7 @@ public:
|
|||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x093a> PD;
|
||||
const xImplSimd_DestRegImmSSE PD;
|
||||
|
||||
// [SSE-4.1] Rounds the single-precision src value and stores in dest.
|
||||
//
|
||||
|
@ -348,7 +302,7 @@ public:
|
|||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x0a3a> SS;
|
||||
const xImplSimd_DestRegImmSSE SS;
|
||||
|
||||
// [SSE-4.1] Rounds the double-precision src value and stores in dest.
|
||||
//
|
||||
|
@ -360,5 +314,8 @@ public:
|
|||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const SimdImpl_DestRegImmSSE<0x66,0x0b3a> SD;
|
||||
const xImplSimd_DestRegImmSSE SD;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter {
|
||||
|
||||
// =====================================================================================================
|
||||
// xImpl_SIMD Types (template free!)
|
||||
// =====================================================================================================
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
|
||||
// like ANDPS/ANDPD
|
||||
//
|
||||
struct xImplSimd_DestRegSSE
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||
void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
|
||||
// (PSHUFD / PSHUFHW / etc).
|
||||
//
|
||||
struct xImplSimd_DestRegImmSSE
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const;
|
||||
void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const;
|
||||
};
|
||||
|
||||
struct xImplSimd_DestRegImmMMX
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const;
|
||||
void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
|
||||
// but accept either MM or XMM destinations (most PADD/PSUB and other P arithmetic ops).
|
||||
//
|
||||
struct xImplSimd_DestRegEither
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
|
||||
void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
|
||||
|
||||
void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const;
|
||||
void operator()( const xRegisterMMX& to, const ModSibBase& from ) const;
|
||||
};
|
||||
|
||||
} // end namespace x86Emitter
|
||||
|
|
@ -428,13 +428,9 @@ namespace x86Emitter
|
|||
extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR;
|
||||
extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR;
|
||||
|
||||
extern const Internal::SimdImpl_AndNot xANDN;
|
||||
|
||||
extern const Internal::SimdImpl_COMI<true> xCOMI;
|
||||
extern const Internal::SimdImpl_COMI<false> xUCOMI;
|
||||
extern const Internal::SimdImpl_rSqrt<0x53> xRCP;
|
||||
extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT;
|
||||
extern const Internal::SimdImpl_Sqrt<0x51> xSQRT;
|
||||
|
||||
extern const Internal::SimdImpl_MinMax<0x5f> xMAX;
|
||||
extern const Internal::SimdImpl_MinMax<0x5d> xMIN;
|
||||
|
@ -488,32 +484,36 @@ namespace x86Emitter
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL;
|
||||
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||
extern const xImplSimd_AndNot xANDN;
|
||||
extern const xImplSimd_rSqrt xRCP;
|
||||
extern const xImplSimd_rSqrt xRSQRT;
|
||||
extern const xImplSimd_Sqrt xSQRT;
|
||||
|
||||
extern const xImplSimd_Shift xPSLL;
|
||||
extern const xImplSimd_Shift xPSRL;
|
||||
extern const xImplSimd_ShiftWithoutQ xPSRA;
|
||||
extern const xImplSimd_AddSub xPADD;
|
||||
extern const xImplSimd_AddSub xPSUB;
|
||||
extern const xImplSimd_PMul xPMUL;
|
||||
extern const xImplSimd_PAbsolute xPABS;
|
||||
extern const xImplSimd_PSign xPSIGN;
|
||||
extern const xImplSimd_PMultAdd xPMADD;
|
||||
extern const xImplSimd_HorizAdd xHADD;
|
||||
extern const xImplSimd_DotProduct xDP;
|
||||
extern const xImplSimd_Round xROUND;
|
||||
|
||||
|
||||
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||
extern const Internal::SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
|
||||
extern const Internal::SimdImpl_PMinMax<0xde,0x3c> xPMAX;
|
||||
extern const Internal::SimdImpl_PMinMax<0xda,0x38> xPMIN;
|
||||
|
||||
extern const Internal::SimdImpl_PMul xPMUL;
|
||||
extern const Internal::SimdImpl_PCompare xPCMP;
|
||||
extern const Internal::SimdImpl_PShuffle xPSHUF;
|
||||
extern const Internal::SimdImpl_PUnpack xPUNPCK;
|
||||
extern const Internal::SimdImpl_Unpack xUNPCK;
|
||||
extern const Internal::SimdImpl_Pack xPACK;
|
||||
|
||||
extern const Internal::SimdImpl_PAbsolute xPABS;
|
||||
extern const Internal::SimdImpl_PSign xPSIGN;
|
||||
extern const Internal::SimdImpl_PInsert xPINSR;
|
||||
extern const Internal::SimdImpl_PExtract xPEXTR;
|
||||
extern const Internal::SimdImpl_PMultAdd xPMADD;
|
||||
extern const Internal::SimdImpl_HorizAdd xHADD;
|
||||
|
||||
extern const Internal::SimdImpl_Blend xBLEND;
|
||||
extern const Internal::SimdImpl_DotProduct xDP;
|
||||
extern const Internal::SimdImpl_Round xROUND;
|
||||
|
||||
extern const Internal::SimdImpl_PMove<true> xPMOVSX;
|
||||
extern const Internal::SimdImpl_PMove<false> xPMOVZX;
|
||||
|
|
|
@ -694,9 +694,8 @@ __forceinline void xWrite( T val )
|
|||
namespace Internal
|
||||
{
|
||||
#include "implement/helpers.h"
|
||||
#include "implement/xmm/basehelpers.h"
|
||||
#include "implement/simd_templated_helpers.h"
|
||||
#include "implement/xmm/moremovs.h"
|
||||
#include "implement/xmm/arithmetic.h"
|
||||
#include "implement/xmm/comparisons.h"
|
||||
#include "implement/xmm/shufflepack.h"
|
||||
#include "implement/group1.h"
|
||||
|
@ -730,4 +729,7 @@ __forceinline void xWrite( T val )
|
|||
}
|
||||
}
|
||||
|
||||
#include "implement/simd_helpers.h"
|
||||
#include "implement/simd_arithmetic.h"
|
||||
|
||||
#include "inlines.inl"
|
||||
|
|
|
@ -90,12 +90,8 @@ const MovhlImplAll<0x12> xMOVL;
|
|||
const MovhlImpl_RtoR<0x16> xMOVLH;
|
||||
const MovhlImpl_RtoR<0x12> xMOVHL;
|
||||
|
||||
const SimdImpl_AndNot xANDN;
|
||||
const SimdImpl_COMI<true> xCOMI;
|
||||
const SimdImpl_COMI<false> xUCOMI;
|
||||
const SimdImpl_rSqrt<0x53> xRCP;
|
||||
const SimdImpl_rSqrt<0x52> xRSQRT;
|
||||
const SimdImpl_Sqrt<0x51> xSQRT;
|
||||
|
||||
const SimdImpl_MinMax<0x5f> xMAX;
|
||||
const SimdImpl_MinMax<0x5d> xMIN;
|
||||
|
@ -160,32 +156,186 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
const SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||
const SimdImpl_Shift<0xf0, 6> xPSLL;
|
||||
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||
void xImplSimd_DestRegSSE::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||
void xImplSimd_DestRegSSE::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||
|
||||
void xImplSimd_DestRegImmSSE::operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
|
||||
void xImplSimd_DestRegImmSSE::operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); }
|
||||
|
||||
void xImplSimd_DestRegImmMMX::operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
|
||||
void xImplSimd_DestRegImmMMX::operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); }
|
||||
|
||||
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||
void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||
|
||||
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteMMX( Opcode ); }
|
||||
void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteMMX( Opcode ); }
|
||||
|
||||
// =====================================================================================================
|
||||
// SIMD Arithmetic Instructions
|
||||
// =====================================================================================================
|
||||
|
||||
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||
void _SimdShiftHelper::operator()( const xRegisterSSE& to, const ModSibBase& from ) const { OpWriteSSE( Prefix, Opcode ); }
|
||||
|
||||
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteMMX( Opcode ); }
|
||||
void _SimdShiftHelper::operator()( const xRegisterMMX& to, const ModSibBase& from ) const { OpWriteMMX( Opcode ); }
|
||||
|
||||
void _SimdShiftHelper::operator()( const xRegisterSSE& to, u8 imm8 ) const
|
||||
{
|
||||
SimdPrefix( 0x66, OpcodeImm );
|
||||
EmitSibMagic( (int)Modcode, to );
|
||||
xWrite8( imm8 );
|
||||
}
|
||||
|
||||
void _SimdShiftHelper::operator()( const xRegisterMMX& to, u8 imm8 ) const
|
||||
{
|
||||
SimdPrefix( 0x00, OpcodeImm );
|
||||
EmitSibMagic( (int)Modcode, to );
|
||||
xWrite8( imm8 );
|
||||
}
|
||||
|
||||
void xImplSimd_Shift::DQ( const xRegisterSSE& to, u8 imm8 ) const
|
||||
{
|
||||
xOpWrite0F( 0x66, 0x73, (int)Q.Modcode+1, to, imm8 );
|
||||
}
|
||||
|
||||
|
||||
const xImplSimd_ShiftWithoutQ xPSRA =
|
||||
{
|
||||
{ 0x66, 0xe1, 0x71, 4 }, // W
|
||||
{ 0x66, 0xe2, 0x72, 4 } // D
|
||||
};
|
||||
|
||||
const xImplSimd_Shift xPSRL =
|
||||
{
|
||||
{ 0x66, 0xd1, 0x71, 2 }, // W
|
||||
{ 0x66, 0xd2, 0x72, 2 }, // D
|
||||
{ 0x66, 0xd3, 0x73, 2 }, // Q
|
||||
};
|
||||
|
||||
const xImplSimd_Shift xPSLL =
|
||||
{
|
||||
{ 0x66, 0xf1, 0x71, 6 }, // W
|
||||
{ 0x66, 0xf2, 0x72, 6 }, // D
|
||||
{ 0x66, 0xf3, 0x73, 6 }, // Q
|
||||
};
|
||||
|
||||
|
||||
const xImplSimd_AddSub xPADD =
|
||||
{
|
||||
{ 0x66, 0xdc+0x20 }, // B
|
||||
{ 0x66, 0xdc+0x21 }, // W
|
||||
{ 0x66, 0xdc+0x22 }, // D
|
||||
{ 0x66, 0xd4 }, // Q
|
||||
|
||||
{ 0x66, 0xdc+0x10 }, // SB
|
||||
{ 0x66, 0xdc+0x11 }, // SW
|
||||
{ 0x66, 0xdc }, // USB
|
||||
{ 0x66, 0xdc+1 }, // USW
|
||||
};
|
||||
|
||||
const xImplSimd_AddSub xPSUB =
|
||||
{
|
||||
{ 0x66, 0xd8+0x20 }, // B
|
||||
{ 0x66, 0xd8+0x21 }, // W
|
||||
{ 0x66, 0xd8+0x22 }, // D
|
||||
{ 0x66, 0xfb }, // Q
|
||||
|
||||
{ 0x66, 0xd8+0x10 }, // SB
|
||||
{ 0x66, 0xd8+0x11 }, // SW
|
||||
{ 0x66, 0xd8 }, // USB
|
||||
{ 0x66, 0xd8+1 }, // USW
|
||||
};
|
||||
|
||||
|
||||
const xImplSimd_PMul xPMUL =
|
||||
{
|
||||
{ 0x66, 0xd5 }, // LW
|
||||
{ 0x66, 0xe5 }, // HW
|
||||
{ 0x66, 0xe4 }, // HUW
|
||||
{ 0x66, 0xf4 }, // UDQ
|
||||
|
||||
{ 0x66, 0x0b38 }, // HRSW
|
||||
{ 0x66, 0x4038 }, // LD
|
||||
{ 0x66, 0x2838 }, // DQ
|
||||
};
|
||||
|
||||
const xImplSimd_rSqrt xRSQRT =
|
||||
{
|
||||
{ 0x00, 0x52 }, // PS
|
||||
{ 0xf3, 0x52 } // SS
|
||||
};
|
||||
|
||||
const xImplSimd_rSqrt xRCP =
|
||||
{
|
||||
{ 0x00, 0x53 }, // PS
|
||||
{ 0xf3, 0x53 } // SS
|
||||
};
|
||||
|
||||
const xImplSimd_Sqrt xSQRT =
|
||||
{
|
||||
{ 0x00, 0x51 }, // PS
|
||||
{ 0xf3, 0x51 }, // SS
|
||||
{ 0xf2, 0x51 } // SS
|
||||
};
|
||||
|
||||
const xImplSimd_AndNot xANDN =
|
||||
{
|
||||
{ 0x00, 0x55 }, // PS
|
||||
{ 0x66, 0x55 } // PD
|
||||
};
|
||||
|
||||
const xImplSimd_PAbsolute xPABS =
|
||||
{
|
||||
{ 0x66, 0x1c38 }, // B
|
||||
{ 0x66, 0x1d38 }, // W
|
||||
{ 0x66, 0x1e38 } // D
|
||||
};
|
||||
|
||||
const xImplSimd_PSign xPSIGN =
|
||||
{
|
||||
{ 0x66, 0x0838 }, // B
|
||||
{ 0x66, 0x0938 }, // W
|
||||
{ 0x66, 0x0a38 }, // D
|
||||
};
|
||||
|
||||
const xImplSimd_PMultAdd xPMADD =
|
||||
{
|
||||
{ 0x66, 0xf5 }, // WD
|
||||
{ 0x66, 0xf438 }, // UBSW
|
||||
};
|
||||
|
||||
const xImplSimd_HorizAdd xHADD =
|
||||
{
|
||||
{ 0xf2, 0x7c }, // PS
|
||||
{ 0x66, 0x7c }, // PD
|
||||
};
|
||||
|
||||
const xImplSimd_DotProduct xDP =
|
||||
{
|
||||
{ 0x66,0x403a }, // PS
|
||||
{ 0x66,0x413a }, // PD
|
||||
};
|
||||
|
||||
const xImplSimd_Round xROUND =
|
||||
{
|
||||
{ 0x66,0x083a }, // PS
|
||||
{ 0x66,0x093a }, // PD
|
||||
{ 0x66,0x0a3a }, // SS
|
||||
{ 0x66,0x0b3a }, // SD
|
||||
};
|
||||
|
||||
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||
const SimdImpl_AddSub<0xd8, 0xfb> xPSUB;
|
||||
const SimdImpl_PMinMax<0xde,0x3c> xPMAX;
|
||||
const SimdImpl_PMinMax<0xda,0x38> xPMIN;
|
||||
|
||||
const SimdImpl_PMul xPMUL;
|
||||
const SimdImpl_PCompare xPCMP;
|
||||
const SimdImpl_PShuffle xPSHUF;
|
||||
const SimdImpl_PUnpack xPUNPCK;
|
||||
const SimdImpl_Unpack xUNPCK;
|
||||
const SimdImpl_Pack xPACK;
|
||||
|
||||
const SimdImpl_PAbsolute xPABS;
|
||||
const SimdImpl_PSign xPSIGN;
|
||||
const SimdImpl_PInsert xPINSR;
|
||||
const SimdImpl_PExtract xPEXTR;
|
||||
const SimdImpl_PMultAdd xPMADD;
|
||||
const SimdImpl_HorizAdd xHADD;
|
||||
|
||||
const SimdImpl_Blend xBLEND;
|
||||
const SimdImpl_DotProduct xDP;
|
||||
const SimdImpl_Round xROUND;
|
||||
|
||||
const SimdImpl_PMove<true> xPMOVSX;
|
||||
const SimdImpl_PMove<false> xPMOVZX;
|
||||
|
|
Loading…
Reference in New Issue