mirror of https://github.com/PCSX2/pcsx2.git
Fixed a small bug from my last commit (mostly only affected debug builds), and implemented PALIGNR/MOVSLDUP/PABS/PSIGN/PEXTR/PINS.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1036 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5c312c36c7
commit
deb642af43
|
@ -0,0 +1,230 @@
|
||||||
|
/* Pcsx2 - Pc Ps2 Emulator
|
||||||
|
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Used for PSRA, which lacks the Q form.
|
||||||
|
//
|
||||||
|
template< u16 OpcodeBase1, u8 Modcode >
|
||||||
|
class SimdImpl_ShiftWithoutQ
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
|
||||||
|
class ShiftHelper
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ShiftHelper() {}
|
||||||
|
|
||||||
|
template< typename OperandType >
|
||||||
|
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const xRegisterSIMD<OperandType>& from ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, Opcode1, to, from );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename OperandType >
|
||||||
|
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const void* from ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, Opcode1, to, from );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename OperandType >
|
||||||
|
__noinline void operator()( const xRegisterSIMD<OperandType>& to, const ModSibBase& from ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, Opcode1, to, from );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename OperandType >
|
||||||
|
__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm ) const
|
||||||
|
{
|
||||||
|
SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
|
||||||
|
ModRM( 3, (int)Modcode, to.Id );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
const ShiftHelper<OpcodeBase1+1,0x71,Modcode> W;
|
||||||
|
const ShiftHelper<OpcodeBase1+2,0x72,Modcode> D;
|
||||||
|
|
||||||
|
SimdImpl_ShiftWithoutQ() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Implements PSRL and PSLL
|
||||||
|
//
|
||||||
|
template< u16 OpcodeBase1, u8 Modcode >
|
||||||
|
class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const ShiftHelper<OpcodeBase1+3,0x73,Modcode> Q;
|
||||||
|
|
||||||
|
void DQ( const xRegisterSSE& to, u8 imm ) const
|
||||||
|
{
|
||||||
|
SimdPrefix( 0x66, 0x73 );
|
||||||
|
ModRM( 3, (int)Modcode+1, to.Id );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
|
||||||
|
SimdImpl_Shift() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
template< u16 OpcodeB, u16 OpcodeQ >
|
||||||
|
class SimdImpl_AddSub
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
|
||||||
|
|
||||||
|
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
|
||||||
|
|
||||||
|
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
|
||||||
|
|
||||||
|
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
|
||||||
|
|
||||||
|
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
|
||||||
|
const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
|
||||||
|
|
||||||
|
SimdImpl_AddSub() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
class SimdImpl_PMul
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const SimdImpl_DestRegEither<0x66,0xd5> LW;
|
||||||
|
const SimdImpl_DestRegEither<0x66,0xe5> HW;
|
||||||
|
const SimdImpl_DestRegEither<0x66,0xe4> HUW;
|
||||||
|
const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
|
||||||
|
|
||||||
|
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
|
||||||
|
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
|
||||||
|
// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
|
||||||
|
// bits. Rounding is always performed by adding 1 to the least significant bit of the
|
||||||
|
// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
|
||||||
|
// immediately to the right of the most significant bit of each 18-bit intermediate
|
||||||
|
// result and packed to the destination operand.
|
||||||
|
//
|
||||||
|
// Both operands can be MMX or XMM registers. Source can be register or memory.
|
||||||
|
//
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
|
||||||
|
|
||||||
|
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
|
||||||
|
// the low 32 bits of each product in xmm1.
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x4038> LD;
|
||||||
|
|
||||||
|
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
|
||||||
|
|
||||||
|
SimdImpl_PMul() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
|
||||||
|
//
|
||||||
|
template< u16 OpcodeSSE >
|
||||||
|
class SimdImpl_rSqrt
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
|
||||||
|
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
|
||||||
|
SimdImpl_rSqrt() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// SQRT has PS/SS/SD forms, but not the PD form.
|
||||||
|
//
|
||||||
|
template< u16 OpcodeSSE >
|
||||||
|
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
|
||||||
|
SimdImpl_Sqrt() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
class SimdImpl_AndNot
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const SimdImpl_DestRegSSE<0x00,0x55> PS;
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x55> PD;
|
||||||
|
SimdImpl_AndNot() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Packed absolute value. [sSSE3 only]
|
||||||
|
//
|
||||||
|
class SimdImpl_PAbsolute
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_PAbsolute() {}
|
||||||
|
|
||||||
|
// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
|
||||||
|
// in dest, as UNSIGNED.
|
||||||
|
const SimdImpl_DestRegEither<0x66, 0x1c38> B;
|
||||||
|
|
||||||
|
// [sSSE-3] Computes the absolute value of word in the src, and stores the result
|
||||||
|
// in dest, as UNSIGNED.
|
||||||
|
const SimdImpl_DestRegEither<0x66, 0x1d38> W;
|
||||||
|
|
||||||
|
// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
|
||||||
|
// result in dest, as UNSIGNED.
|
||||||
|
const SimdImpl_DestRegEither<0x66, 0x1e38> D;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the
|
||||||
|
// corresponding sign in src.
|
||||||
|
//
|
||||||
|
class SimdImpl_PSign
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_PSign() {}
|
||||||
|
|
||||||
|
// [sSSE-3] negates each byte element of dest if the signed integer value of the
|
||||||
|
// corresponding data element in src is less than zero. If the signed integer value
|
||||||
|
// of a data element in src is positive, the corresponding data element in dest is
|
||||||
|
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||||
|
// dest is set to zero.
|
||||||
|
const SimdImpl_DestRegEither<0x66, 0x0838> B;
|
||||||
|
|
||||||
|
// [sSSE-3] negates each word element of dest if the signed integer value of the
|
||||||
|
// corresponding data element in src is less than zero. If the signed integer value
|
||||||
|
// of a data element in src is positive, the corresponding data element in dest is
|
||||||
|
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||||
|
// dest is set to zero.
|
||||||
|
const SimdImpl_DestRegEither<0x66, 0x0938> W;
|
||||||
|
|
||||||
|
// [sSSE-3] negates each doubleword element of dest if the signed integer value
|
||||||
|
// of the corresponding data element in src is less than zero. If the signed integer
|
||||||
|
// value of a data element in src is positive, the corresponding data element in dest
|
||||||
|
// is unchanged. If a data element in src is zero, the corresponding data element in
|
||||||
|
// dest is set to zero.
|
||||||
|
const SimdImpl_DestRegEither<0x66, 0x0a38> D;
|
||||||
|
|
||||||
|
};
|
|
@ -0,0 +1,152 @@
|
||||||
|
/* Pcsx2 - Pc Ps2 Emulator
|
||||||
|
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// MMX / SSE Helper Functions!
|
||||||
|
|
||||||
|
extern void SimdPrefix( u8 prefix, u16 opcode );
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// xmm emitter helpers for xmm instruction with prefixes.
|
||||||
|
// These functions also support deducing the use of the prefix from the template parameters,
|
||||||
|
// since most xmm instructions use a prefix and most mmx instructions do not. (some mov
|
||||||
|
// instructions violate this "guideline.")
|
||||||
|
//
|
||||||
|
template< typename T, typename T2 >
|
||||||
|
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
|
||||||
|
{
|
||||||
|
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||||
|
ModRM_Direct( to.Id, from.Id );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename T >
|
||||||
|
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
|
||||||
|
{
|
||||||
|
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||||
|
EmitSibMagic( reg.Id, sib );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename T >
|
||||||
|
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
|
||||||
|
{
|
||||||
|
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
||||||
|
xWriteDisp( reg.Id, data );
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// xmm emitter helpers for xmm instructions *without* prefixes.
|
||||||
|
// These are normally used for special instructions that have MMX forms only (non-SSE), however
|
||||||
|
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
|
||||||
|
//
|
||||||
|
template< typename T, typename T2 >
|
||||||
|
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, opcode );
|
||||||
|
ModRM_Direct( to.Id, from.Id );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename T >
|
||||||
|
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, opcode );
|
||||||
|
EmitSibMagic( reg.Id, sib );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename T >
|
||||||
|
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, opcode );
|
||||||
|
xWriteDisp( reg.Id, data );
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
|
||||||
|
// like ANDPS/ANDPD
|
||||||
|
//
|
||||||
|
template< u8 Prefix, u16 Opcode >
|
||||||
|
class SimdImpl_DestRegSSE
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
|
||||||
|
SimdImpl_DestRegSSE() {} //GCWho?
|
||||||
|
};
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
|
||||||
|
// (PSHUFD / PSHUFHW / etc).
|
||||||
|
//
|
||||||
|
template< u8 Prefix, u16 Opcode >
|
||||||
|
class SimdImpl_DestRegImmSSE
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||||
|
|
||||||
|
SimdImpl_DestRegImmSSE() {} //GCWho?
|
||||||
|
};
|
||||||
|
|
||||||
|
template< u8 Prefix, u16 Opcode >
|
||||||
|
class SimdImpl_DestRegImmMMX
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||||
|
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||||
|
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
||||||
|
|
||||||
|
SimdImpl_DestRegImmMMX() {} //GCWho?
|
||||||
|
};
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
|
||||||
|
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
|
||||||
|
//
|
||||||
|
template< u8 Prefix, u16 Opcode >
|
||||||
|
class SimdImpl_DestRegEither
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
template< typename T > __forceinline
|
||||||
|
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
template< typename T > __forceinline
|
||||||
|
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
template< typename T > __forceinline
|
||||||
|
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
|
||||||
|
SimdImpl_DestRegEither() {} //GCWho?
|
||||||
|
};
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
|
||||||
|
// can be regDirect or ModRM (indirect).
|
||||||
|
//
|
||||||
|
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
|
||||||
|
class SimdImpl_DestRegStrict
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||||
|
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||||
|
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
||||||
|
|
||||||
|
SimdImpl_DestRegStrict() {} //GCWho?
|
||||||
|
};
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
/* Pcsx2 - Pc Ps2 Emulator
|
||||||
|
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
template< u16 OpcodeSSE >
|
||||||
|
class SimdImpl_MinMax
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision
|
||||||
|
const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision
|
||||||
|
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision
|
||||||
|
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision
|
||||||
|
|
||||||
|
SimdImpl_MinMax() {} //GChow?
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
template< SSE2_ComparisonType CType >
|
||||||
|
class SimdImpl_Compare
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
template< u8 Prefix > struct Woot
|
||||||
|
{
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||||
|
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
||||||
|
Woot() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
const Woot<0x00> PS;
|
||||||
|
const Woot<0x66> PD;
|
||||||
|
const Woot<0xf3> SS;
|
||||||
|
const Woot<0xf2> SD;
|
||||||
|
SimdImpl_Compare() {} //GCWhat?
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
class SimdImpl_PCompare
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_PCompare() {}
|
||||||
|
|
||||||
|
// Compare packed bytes for equality.
|
||||||
|
// If a data element in dest is equal to the corresponding date element src, the
|
||||||
|
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x74> EQB;
|
||||||
|
|
||||||
|
// Compare packed words for equality.
|
||||||
|
// If a data element in dest is equal to the corresponding date element src, the
|
||||||
|
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x75> EQW;
|
||||||
|
|
||||||
|
// Compare packed doublewords [32-bits] for equality.
|
||||||
|
// If a data element in dest is equal to the corresponding date element src, the
|
||||||
|
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x76> EQD;
|
||||||
|
|
||||||
|
// Compare packed signed bytes for greater than.
|
||||||
|
// If a data element in dest is greater than the corresponding date element src, the
|
||||||
|
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x64> GTB;
|
||||||
|
|
||||||
|
// Compare packed signed words for greater than.
|
||||||
|
// If a data element in dest is greater than the corresponding date element src, the
|
||||||
|
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x65> GTW;
|
||||||
|
|
||||||
|
// Compare packed signed doublewords [32-bits] for greater than.
|
||||||
|
// If a data element in dest is greater than the corresponding date element src, the
|
||||||
|
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x66> GTD;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
template< u8 Opcode1, u16 Opcode2 >
|
||||||
|
class SimdImpl_PMinMax
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_PMinMax() {}
|
||||||
|
|
||||||
|
// Compare packed unsigned byte integers in dest to src and store packed min/max
|
||||||
|
// values in dest.
|
||||||
|
// Operation can be performed on either MMX or SSE operands.
|
||||||
|
const SimdImpl_DestRegEither<0x66,Opcode1> UB;
|
||||||
|
|
||||||
|
// Compare packed signed word integers in dest to src and store packed min/max
|
||||||
|
// values in dest.
|
||||||
|
// Operation can be performed on either MMX or SSE operands.
|
||||||
|
const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
|
||||||
|
|
||||||
|
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
|
||||||
|
// packed min/max values in dest. (SSE operands only)
|
||||||
|
const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
|
||||||
|
|
||||||
|
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
|
||||||
|
// packed min/max values in dest. (SSE operands only)
|
||||||
|
const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
|
||||||
|
|
||||||
|
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
|
||||||
|
// packed min/max values in dest. (SSE operands only)
|
||||||
|
const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
|
||||||
|
|
||||||
|
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
|
||||||
|
// packed min/max values in dest. (SSE operands only)
|
||||||
|
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
|
||||||
|
};
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
/* Pcsx2 - Pc Ps2 Emulator
|
||||||
|
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Moves to/from high/low portions of an xmm register.
|
||||||
|
// These instructions cannot be used in reg/reg form.
|
||||||
|
//
|
||||||
|
template< u16 Opcode >
|
||||||
|
class MovhlImplAll
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
template< u8 Prefix >
|
||||||
|
struct Woot
|
||||||
|
{
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
|
||||||
|
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
Woot<0x00> PS;
|
||||||
|
Woot<0x66> PD;
|
||||||
|
|
||||||
|
MovhlImplAll() {} //GCC.
|
||||||
|
};
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
|
||||||
|
// do something kinda different! Fun!
|
||||||
|
//
|
||||||
|
template< u16 Opcode >
|
||||||
|
class MovhlImpl_RtoR
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); }
|
||||||
|
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
||||||
|
|
||||||
|
MovhlImpl_RtoR() {} //GCC.
|
||||||
|
};
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
|
||||||
|
class MovapsImplAll
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
|
||||||
|
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
||||||
|
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
|
||||||
|
|
||||||
|
MovapsImplAll() {} //GCC.
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
template< u8 AltPrefix, u16 OpcodeSSE >
|
||||||
|
class SimdImpl_UcomI
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS;
|
||||||
|
const SimdImpl_DestRegSSE<AltPrefix,OpcodeSSE> SD;
|
||||||
|
SimdImpl_UcomI() {}
|
||||||
|
};
|
|
@ -1,646 +0,0 @@
|
||||||
/* Pcsx2 - Pc Ps2 Emulator
|
|
||||||
* Copyright (C) 2002-2009 Pcsx2 Team
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// MMX / SSE Helper Functions!
|
|
||||||
|
|
||||||
extern void SimdPrefix( u8 prefix, u16 opcode );
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// xmm emitter helpers for xmm instruction with prefixes.
|
|
||||||
// These functions also support deducing the use of the prefix from the template parameters,
|
|
||||||
// since most xmm instructions use a prefix and most mmx instructions do not. (some mov
|
|
||||||
// instructions violate this "guideline.")
|
|
||||||
//
|
|
||||||
template< typename T, typename T2 >
|
|
||||||
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
|
|
||||||
{
|
|
||||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
|
||||||
ModRM_Direct( to.Id, from.Id );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename T >
|
|
||||||
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
|
|
||||||
{
|
|
||||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
|
||||||
EmitSibMagic( reg.Id, sib );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename T >
|
|
||||||
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
|
|
||||||
{
|
|
||||||
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
|
|
||||||
xWriteDisp( reg.Id, data );
|
|
||||||
}
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// xmm emitter helpers for xmm instructions *without* prefixes.
|
|
||||||
// These are normally used for special instructions that have MMX forms only (non-SSE), however
|
|
||||||
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
|
|
||||||
//
|
|
||||||
template< typename T, typename T2 >
|
|
||||||
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
|
|
||||||
{
|
|
||||||
SimdPrefix( 0, opcode );
|
|
||||||
ModRM_Direct( to.Id, from.Id );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename T >
|
|
||||||
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
|
|
||||||
{
|
|
||||||
SimdPrefix( 0, opcode );
|
|
||||||
EmitSibMagic( reg.Id, sib );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename T >
|
|
||||||
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
|
|
||||||
{
|
|
||||||
SimdPrefix( 0, opcode );
|
|
||||||
xWriteDisp( reg.Id, data );
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Moves to/from high/low portions of an xmm register.
|
|
||||||
// These instructions cannot be used in reg/reg form.
|
|
||||||
//
|
|
||||||
template< u16 Opcode >
|
|
||||||
class MovhlImplAll
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
template< u8 Prefix >
|
|
||||||
struct Woot
|
|
||||||
{
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
|
|
||||||
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
Woot<0x00> PS;
|
|
||||||
Woot<0x66> PD;
|
|
||||||
|
|
||||||
MovhlImplAll() {} //GCC.
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
|
|
||||||
// do something kinda different! Fun!
|
|
||||||
//
|
|
||||||
template< u16 Opcode >
|
|
||||||
class MovhlImpl_RtoR
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); }
|
|
||||||
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
|
||||||
|
|
||||||
MovhlImpl_RtoR() {} //GCC.
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
|
|
||||||
class MovapsImplAll
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
|
|
||||||
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
|
|
||||||
|
|
||||||
MovapsImplAll() {} //GCC.
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for
|
|
||||||
// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms).
|
|
||||||
//
|
|
||||||
template< u16 Opcode >
|
|
||||||
class SimdImpl_PackedLogic
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
template< typename T > __forceinline
|
|
||||||
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
|
||||||
template< typename T > __forceinline
|
|
||||||
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
|
||||||
template< typename T > __forceinline
|
|
||||||
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
|
|
||||||
|
|
||||||
SimdImpl_PackedLogic() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
|
|
||||||
// like ANDPS/ANDPD
|
|
||||||
//
|
|
||||||
template< u8 Prefix, u16 Opcode >
|
|
||||||
class SimdImpl_DestRegSSE
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegSSE() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
|
|
||||||
// (PSHUFD / PSHUFHW / etc).
|
|
||||||
//
|
|
||||||
template< u8 Prefix, u16 Opcode >
|
|
||||||
class SimdImpl_DestRegImmSSE
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegImmSSE() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
template< u8 Prefix, u16 Opcode >
|
|
||||||
class SimdImpl_DestRegImmMMX
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
|
||||||
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
|
||||||
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegImmMMX() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
|
|
||||||
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
|
|
||||||
//
|
|
||||||
template< u8 Prefix, u16 Opcode >
|
|
||||||
class SimdImpl_DestRegEither
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
template< typename DestOperandType > __forceinline
|
|
||||||
void operator()( const xRegisterSIMD<DestOperandType>& to, const xRegisterSIMD<DestOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
template< typename DestOperandType > __forceinline
|
|
||||||
void operator()( const xRegisterSIMD<DestOperandType>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
template< typename DestOperandType > __forceinline
|
|
||||||
void operator()( const xRegisterSIMD<DestOperandType>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegEither() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
|
|
||||||
// can be regDirect or ModRM (indirect).
|
|
||||||
//
|
|
||||||
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
|
|
||||||
class SimdImpl_DestRegStrict
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
|
||||||
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
|
||||||
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
|
|
||||||
|
|
||||||
SimdImpl_DestRegStrict() {} //GCWho?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
template< u16 OpcodeSSE >
|
|
||||||
class SimdImpl_PSPD_SSSD
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision
|
|
||||||
const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision
|
|
||||||
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision
|
|
||||||
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision
|
|
||||||
|
|
||||||
SimdImpl_PSPD_SSSD() {} //GChow?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
template< u16 OpcodeSSE >
|
|
||||||
class SimdImpl_AndNot
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
|
|
||||||
const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD;
|
|
||||||
SimdImpl_AndNot() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For instructions that have SS/SD form only (UCOMI, etc)
|
|
||||||
// AltPrefix - prefixed used for doubles (SD form).
|
|
||||||
template< u8 AltPrefix, u16 OpcodeSSE >
|
|
||||||
class SimdImpl_SS_SD
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS;
|
|
||||||
const SimdImpl_DestRegSSE<AltPrefix,OpcodeSSE> SD;
|
|
||||||
SimdImpl_SS_SD() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
|
|
||||||
template< u16 OpcodeSSE >
|
|
||||||
class SimdImpl_rSqrt
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
|
|
||||||
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
|
|
||||||
SimdImpl_rSqrt() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// For instructions that have PS/SS/SD form only (most commonly Sqrt functions)
|
|
||||||
template< u16 OpcodeSSE >
|
|
||||||
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
|
|
||||||
SimdImpl_Sqrt() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
template< u16 OpcodeSSE >
|
|
||||||
class SimdImpl_Shuffle
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
template< u8 Prefix > struct Woot
|
|
||||||
{
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
|
||||||
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
|
||||||
Woot() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
const Woot<0x00> PS;
|
|
||||||
const Woot<0x66> PD;
|
|
||||||
|
|
||||||
SimdImpl_Shuffle() {} //GCWhat?
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
template< SSE2_ComparisonType CType >
|
|
||||||
class SimdImpl_Compare
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
template< u8 Prefix > struct Woot
|
|
||||||
{
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
|
||||||
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
|
||||||
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
|
|
||||||
Woot() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
const Woot<0x00> PS;
|
|
||||||
const Woot<0x66> PD;
|
|
||||||
const Woot<0xf3> SS;
|
|
||||||
const Woot<0xf2> SD;
|
|
||||||
SimdImpl_Compare() {} //GCWhat?
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
//
|
|
||||||
template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
|
|
||||||
class SimdImpl_Shift
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimdImpl_Shift() {}
|
|
||||||
|
|
||||||
template< typename OperandType >
|
|
||||||
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const xRegisterSIMD<OperandType>& from ) const
|
|
||||||
{
|
|
||||||
writeXMMop( 0x66, Opcode1, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename OperandType >
|
|
||||||
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const void* from ) const
|
|
||||||
{
|
|
||||||
writeXMMop( 0x66, Opcode1, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename OperandType >
|
|
||||||
__noinline void operator()( const xRegisterSIMD<OperandType>& to, const ModSibBase& from ) const
|
|
||||||
{
|
|
||||||
writeXMMop( 0x66, Opcode1, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename OperandType >
|
|
||||||
__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm ) const
|
|
||||||
{
|
|
||||||
SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
|
|
||||||
ModRM( 3, (int)Modcode, to.Id );
|
|
||||||
xWrite<u8>( imm );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
// Used for PSRA
|
|
||||||
template< u16 OpcodeBase1, u8 Modcode >
|
|
||||||
class SimdImpl_ShiftWithoutQ
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_Shift<OpcodeBase1+1,0x71,Modcode> W;
|
|
||||||
const SimdImpl_Shift<OpcodeBase1+2,0x72,Modcode> D;
|
|
||||||
|
|
||||||
SimdImpl_ShiftWithoutQ() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
|
||||||
template< u16 OpcodeBase1, u8 Modcode >
|
|
||||||
class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_Shift<OpcodeBase1+3,0x73,Modcode> Q;
|
|
||||||
|
|
||||||
void DQ( const xRegisterSSE& to, u8 imm ) const
|
|
||||||
{
|
|
||||||
SimdPrefix( 0x66, 0x73 );
|
|
||||||
ModRM( 3, (int)Modcode+1, to.Id );
|
|
||||||
xWrite<u8>( imm );
|
|
||||||
}
|
|
||||||
|
|
||||||
SimdImpl_ShiftAll() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
template< u16 OpcodeB, u16 OpcodeQ >
|
|
||||||
class SimdImpl_AddSub
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
|
|
||||||
|
|
||||||
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
|
|
||||||
|
|
||||||
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
|
|
||||||
|
|
||||||
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
|
|
||||||
|
|
||||||
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
|
|
||||||
const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
|
|
||||||
|
|
||||||
SimdImpl_AddSub() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
class SimdImpl_PMul
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xd5> LW;
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xe5> HW;
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xe4> HUW;
|
|
||||||
const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
|
|
||||||
|
|
||||||
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
|
|
||||||
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
|
|
||||||
// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
|
|
||||||
// bits. Rounding is always performed by adding 1 to the least significant bit of the
|
|
||||||
// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
|
|
||||||
// immediately to the right of the most significant bit of each 18-bit intermediate
|
|
||||||
// result and packed to the destination operand.
|
|
||||||
//
|
|
||||||
// Both operands can be MMX or XMM registers. Source can be register or memory.
|
|
||||||
//
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
|
|
||||||
|
|
||||||
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
|
|
||||||
// the low 32 bits of each product in xmm1.
|
|
||||||
const SimdImpl_DestRegSSE<0x66,0x4038> LD;
|
|
||||||
|
|
||||||
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
|
|
||||||
const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
|
|
||||||
|
|
||||||
SimdImpl_PMul() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
class SimdImpl_PCompare
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimdImpl_PCompare() {}
|
|
||||||
|
|
||||||
// Compare packed bytes for equality.
|
|
||||||
// If a data element in dest is equal to the corresponding date element src, the
|
|
||||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x74> EQB;
|
|
||||||
|
|
||||||
// Compare packed words for equality.
|
|
||||||
// If a data element in dest is equal to the corresponding date element src, the
|
|
||||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x75> EQW;
|
|
||||||
|
|
||||||
// Compare packed doublewords [32-bits] for equality.
|
|
||||||
// If a data element in dest is equal to the corresponding date element src, the
|
|
||||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x76> EQD;
|
|
||||||
|
|
||||||
// Compare packed signed bytes for greater than.
|
|
||||||
// If a data element in dest is greater than the corresponding date element src, the
|
|
||||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x64> GTB;
|
|
||||||
|
|
||||||
// Compare packed signed words for greater than.
|
|
||||||
// If a data element in dest is greater than the corresponding date element src, the
|
|
||||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x65> GTW;
|
|
||||||
|
|
||||||
// Compare packed signed doublewords [32-bits] for greater than.
|
|
||||||
// If a data element in dest is greater than the corresponding date element src, the
|
|
||||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x66> GTD;
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
template< u8 Opcode1, u16 Opcode2 >
|
|
||||||
class SimdImpl_PMinMax
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimdImpl_PMinMax() {}
|
|
||||||
|
|
||||||
// Compare packed unsigned byte integers in dest to src and store packed min/max
|
|
||||||
// values in dest.
|
|
||||||
// Operation can be performed on either MMX or SSE operands.
|
|
||||||
const SimdImpl_DestRegEither<0x66,Opcode1> UB;
|
|
||||||
|
|
||||||
// Compare packed signed word integers in dest to src and store packed min/max
|
|
||||||
// values in dest.
|
|
||||||
// Operation can be performed on either MMX or SSE operands.
|
|
||||||
const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
|
|
||||||
|
|
||||||
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
|
|
||||||
// packed min/max values in dest. (SSE operands only)
|
|
||||||
const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
|
|
||||||
|
|
||||||
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
|
|
||||||
// packed min/max values in dest. (SSE operands only)
|
|
||||||
const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
|
|
||||||
|
|
||||||
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
|
|
||||||
// packed min/max values in dest. (SSE operands only)
|
|
||||||
const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
|
|
||||||
|
|
||||||
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
|
|
||||||
// packed min/max values in dest. (SSE operands only)
|
|
||||||
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
class SimdImpl_PShuffle
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimdImpl_PShuffle() {}
|
|
||||||
|
|
||||||
// Copies words from src and inserts them into dest at word locations selected with
|
|
||||||
// the order operand (8 bit immediate).
|
|
||||||
const SimdImpl_DestRegImmMMX<0x00,0x70> W;
|
|
||||||
|
|
||||||
// Copies doublewords from src and inserts them into dest at dword locations selected
|
|
||||||
// with the order operand (8 bit immediate).
|
|
||||||
const SimdImpl_DestRegImmSSE<0x66,0x70> D;
|
|
||||||
|
|
||||||
// Copies words from the low quadword of src and inserts them into the low quadword
|
|
||||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
|
||||||
// The high quadword of src is copied to the high quadword of dest.
|
|
||||||
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
|
|
||||||
|
|
||||||
// Copies words from the high quadword of src and inserts them into the high quadword
|
|
||||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
|
||||||
// The low quadword of src is copied to the low quadword of dest.
|
|
||||||
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
|
|
||||||
|
|
||||||
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
|
||||||
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
|
||||||
// shuffle control mask is set, then constant zero is written in the result byte.
|
|
||||||
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
|
||||||
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
|
||||||
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
|
||||||
//
|
|
||||||
// Operands can be MMX or XMM registers.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x0038> B;
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
class SimdImpl_PUnpack
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimdImpl_PUnpack() {}
|
|
||||||
|
|
||||||
// Unpack and interleave low-order bytes from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x60> LBW;
|
|
||||||
// Unpack and interleave low-order words from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x61> LWD;
|
|
||||||
// Unpack and interleave low-order doublewords from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x62> LDQ;
|
|
||||||
// Unpack and interleave low-order quadwords from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
|
|
||||||
|
|
||||||
// Unpack and interleave high-order bytes from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x68> HBW;
|
|
||||||
// Unpack and interleave high-order words from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x69> HWD;
|
|
||||||
// Unpack and interleave high-order doublewords from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
|
|
||||||
// Unpack and interleave high-order quadwords from src and dest into dest.
|
|
||||||
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Pack with Signed or Unsigned Saturation
|
|
||||||
//
|
|
||||||
class SimdImpl_Pack
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimdImpl_Pack() {}
|
|
||||||
|
|
||||||
// Converts packed signed word integers from src and dest into packed signed
|
|
||||||
// byte integers in dest, using signed saturation.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x63> SSWB;
|
|
||||||
|
|
||||||
// Converts packed signed dword integers from src and dest into packed signed
|
|
||||||
// word integers in dest, using signed saturation.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
|
|
||||||
|
|
||||||
// Converts packed unsigned word integers from src and dest into packed unsigned
|
|
||||||
// byte integers in dest, using unsigned saturation.
|
|
||||||
const SimdImpl_DestRegEither<0x66,0x67> USWB;
|
|
||||||
|
|
||||||
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
|
||||||
// unsigned word integers in dest, using signed saturation.
|
|
||||||
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
class SimdImpl_Unpack
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SimdImpl_Unpack() {}
|
|
||||||
|
|
||||||
// Unpacks the high doubleword [single-precision] values from src and dest into
|
|
||||||
// dest, such that the result of dest looks like this:
|
|
||||||
// dest[0] <- dest[2]
|
|
||||||
// dest[1] <- src[2]
|
|
||||||
// dest[2] <- dest[3]
|
|
||||||
// dest[3] <- src[3]
|
|
||||||
//
|
|
||||||
const SimdImpl_DestRegSSE<0x00,0x15> HPS;
|
|
||||||
|
|
||||||
// Unpacks the high quadword [double-precision] values from src and dest into
|
|
||||||
// dest, such that the result of dest looks like this:
|
|
||||||
// dest.lo <- dest.hi
|
|
||||||
// dest.hi <- src.hi
|
|
||||||
//
|
|
||||||
const SimdImpl_DestRegSSE<0x66,0x15> HPD;
|
|
||||||
|
|
||||||
// Unpacks the low doubleword [single-precision] values from src and dest into
|
|
||||||
// dest, such that the result of dest looks like this:
|
|
||||||
// dest[3] <- src[1]
|
|
||||||
// dest[2] <- dest[1]
|
|
||||||
// dest[1] <- src[0]
|
|
||||||
// dest[0] <- dest[0]
|
|
||||||
//
|
|
||||||
const SimdImpl_DestRegSSE<0x00,0x14> LPS;
|
|
||||||
|
|
||||||
// Unpacks the low quadword [double-precision] values from src and dest into
|
|
||||||
// dest, effectively moving the low portion of src into the upper portion of dest.
|
|
||||||
// The result of dest is loaded as such:
|
|
||||||
// dest.hi <- src.lo
|
|
||||||
// dest.lo <- dest.lo [remains unchanged!]
|
|
||||||
//
|
|
||||||
const SimdImpl_DestRegSSE<0x66,0x14> LPD;
|
|
||||||
};
|
|
||||||
|
|
|
@ -0,0 +1,306 @@
|
||||||
|
/* Pcsx2 - Pc Ps2 Emulator
|
||||||
|
* Copyright (C) 2002-2009 Pcsx2 Team
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
template< u16 OpcodeSSE >
|
||||||
|
class SimdImpl_Shuffle
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
template< u8 Prefix > struct Woot
|
||||||
|
{
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||||
|
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
|
||||||
|
Woot() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
const Woot<0x00> PS;
|
||||||
|
const Woot<0x66> PD;
|
||||||
|
|
||||||
|
SimdImpl_Shuffle() {} //GCWhat?
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
class SimdImpl_PShuffle
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_PShuffle() {}
|
||||||
|
|
||||||
|
// Copies words from src and inserts them into dest at word locations selected with
|
||||||
|
// the order operand (8 bit immediate).
|
||||||
|
const SimdImpl_DestRegImmMMX<0x00,0x70> W;
|
||||||
|
|
||||||
|
// Copies doublewords from src and inserts them into dest at dword locations selected
|
||||||
|
// with the order operand (8 bit immediate).
|
||||||
|
const SimdImpl_DestRegImmSSE<0x66,0x70> D;
|
||||||
|
|
||||||
|
// Copies words from the low quadword of src and inserts them into the low quadword
|
||||||
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||||
|
// The high quadword of src is copied to the high quadword of dest.
|
||||||
|
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
|
||||||
|
|
||||||
|
// Copies words from the high quadword of src and inserts them into the high quadword
|
||||||
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||||
|
// The low quadword of src is copied to the low quadword of dest.
|
||||||
|
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
|
||||||
|
|
||||||
|
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
||||||
|
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
||||||
|
// shuffle control mask is set, then constant zero is written in the result byte.
|
||||||
|
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
||||||
|
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
||||||
|
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
||||||
|
//
|
||||||
|
// Operands can be MMX or XMM registers.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x0038> B;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
class SimdImpl_PUnpack
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_PUnpack() {}
|
||||||
|
|
||||||
|
// Unpack and interleave low-order bytes from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x60> LBW;
|
||||||
|
// Unpack and interleave low-order words from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x61> LWD;
|
||||||
|
// Unpack and interleave low-order doublewords from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x62> LDQ;
|
||||||
|
// Unpack and interleave low-order quadwords from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
|
||||||
|
|
||||||
|
// Unpack and interleave high-order bytes from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x68> HBW;
|
||||||
|
// Unpack and interleave high-order words from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x69> HWD;
|
||||||
|
// Unpack and interleave high-order doublewords from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
|
||||||
|
// Unpack and interleave high-order quadwords from src and dest into dest.
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Pack with Signed or Unsigned Saturation
|
||||||
|
//
|
||||||
|
class SimdImpl_Pack
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_Pack() {}
|
||||||
|
|
||||||
|
// Converts packed signed word integers from src and dest into packed signed
|
||||||
|
// byte integers in dest, using signed saturation.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x63> SSWB;
|
||||||
|
|
||||||
|
// Converts packed signed dword integers from src and dest into packed signed
|
||||||
|
// word integers in dest, using signed saturation.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
|
||||||
|
|
||||||
|
// Converts packed unsigned word integers from src and dest into packed unsigned
|
||||||
|
// byte integers in dest, using unsigned saturation.
|
||||||
|
const SimdImpl_DestRegEither<0x66,0x67> USWB;
|
||||||
|
|
||||||
|
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
||||||
|
// unsigned word integers in dest, using signed saturation.
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
class SimdImpl_Unpack
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimdImpl_Unpack() {}
|
||||||
|
|
||||||
|
// Unpacks the high doubleword [single-precision] values from src and dest into
|
||||||
|
// dest, such that the result of dest looks like this:
|
||||||
|
// dest[0] <- dest[2]
|
||||||
|
// dest[1] <- src[2]
|
||||||
|
// dest[2] <- dest[3]
|
||||||
|
// dest[3] <- src[3]
|
||||||
|
//
|
||||||
|
const SimdImpl_DestRegSSE<0x00,0x15> HPS;
|
||||||
|
|
||||||
|
// Unpacks the high quadword [double-precision] values from src and dest into
|
||||||
|
// dest, such that the result of dest looks like this:
|
||||||
|
// dest.lo <- dest.hi
|
||||||
|
// dest.hi <- src.hi
|
||||||
|
//
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x15> HPD;
|
||||||
|
|
||||||
|
// Unpacks the low doubleword [single-precision] values from src and dest into
|
||||||
|
// dest, such that the result of dest looks like this:
|
||||||
|
// dest[3] <- src[1]
|
||||||
|
// dest[2] <- dest[1]
|
||||||
|
// dest[1] <- src[0]
|
||||||
|
// dest[0] <- dest[0]
|
||||||
|
//
|
||||||
|
const SimdImpl_DestRegSSE<0x00,0x14> LPS;
|
||||||
|
|
||||||
|
// Unpacks the low quadword [double-precision] values from src and dest into
|
||||||
|
// dest, effectively moving the low portion of src into the upper portion of dest.
|
||||||
|
// The result of dest is loaded as such:
|
||||||
|
// dest.hi <- src.lo
|
||||||
|
// dest.lo <- dest.lo [remains unchanged!]
|
||||||
|
//
|
||||||
|
const SimdImpl_DestRegSSE<0x66,0x14> LPD;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// PINSW/B/D [all but Word form are SSE4.1 only!]
|
||||||
|
//
|
||||||
|
class SimdImpl_PInsert
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
template< u16 Opcode >
|
||||||
|
class ByteDwordForms
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ByteDwordForms() {}
|
||||||
|
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
|
||||||
|
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
SimdImpl_PInsert() {}
|
||||||
|
|
||||||
|
// Operation can be performed on either MMX or SSE src operands.
|
||||||
|
template< typename T >
|
||||||
|
__forceinline void W( const xRegisterSIMD<T>& to, const xRegister32& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, 0xc4, to, from );
|
||||||
|
xWrite<u8>( imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Operation can be performed on either MMX or SSE src operands.
|
||||||
|
template< typename T >
|
||||||
|
__forceinline void W( const xRegisterSIMD<T>& to, const void* from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, 0xc4, to, from );
|
||||||
|
xWrite<u8>( imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Operation can be performed on either MMX or SSE src operands.
|
||||||
|
template< typename T >
|
||||||
|
__noinline void W( const xRegisterSIMD<T>& to, const ModSibBase& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, 0xc4, to, from );
|
||||||
|
xWrite<u8>( imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// [SSE-4.1]
|
||||||
|
const ByteDwordForms<0x20> B;
|
||||||
|
|
||||||
|
// [SSE-4.1]
|
||||||
|
const ByteDwordForms<0x22> D;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// PEXTRW/B/D [all but Word form are SSE4.1 only!]
|
||||||
|
//
|
||||||
|
// Note: Word form's indirect memory form is only available in SSE4.1.
|
||||||
|
//
|
||||||
|
class SimdImpl_PExtract
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
template< u16 Opcode >
|
||||||
|
class ByteDwordForms
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ByteDwordForms() {}
|
||||||
|
|
||||||
|
__forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
|
||||||
|
__noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
|
||||||
|
xWrite<u8>( imm );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
SimdImpl_PExtract() {}
|
||||||
|
|
||||||
|
// Copies the word element specified by imm8 from src to dest. The upper bits
|
||||||
|
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||||
|
// word value from src into an x86 32 bit register.
|
||||||
|
//
|
||||||
|
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
|
||||||
|
//
|
||||||
|
template< typename T >
|
||||||
|
__forceinline void W( const xRegister32& to, const xRegisterSIMD<T>& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, 0xc5, to, from, true );
|
||||||
|
xWrite<u8>( imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, 0x153a, from, dest );
|
||||||
|
xWrite<u8>( imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
__noinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
|
||||||
|
{
|
||||||
|
writeXMMop( 0x66, 0x153a, from, dest );
|
||||||
|
xWrite<u8>( imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
||||||
|
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||||
|
// byte value from src into an x86 32 bit register.
|
||||||
|
const ByteDwordForms<0x14> B;
|
||||||
|
|
||||||
|
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
||||||
|
// used to extract any single packed dword value from src into an x86 32 bit register.
|
||||||
|
const ByteDwordForms<0x16> D;
|
||||||
|
};
|
|
@ -641,18 +641,25 @@ __emitinline void xBSWAP( const xRegister32& to )
|
||||||
// MMX / XMM Instructions
|
// MMX / XMM Instructions
|
||||||
// (these will get put in their own file later)
|
// (these will get put in their own file later)
|
||||||
|
|
||||||
// If the upper 8 bits of opcode are zero, the opcode is treated as a u8.
|
// ------------------------------------------------------------------------
|
||||||
// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst
|
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
|
||||||
// 0x38, which is the only valid high word for 16 bit opcodes as such)
|
// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
|
||||||
|
// instructions). Any other lower value assumes the upper value is 0 and ignored.
|
||||||
|
// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
|
||||||
|
// generate an assertion.
|
||||||
|
//
|
||||||
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
||||||
{
|
{
|
||||||
|
const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
|
||||||
|
|
||||||
|
// If the lower byte is not a valid previx and the upper byte is non-zero it
|
||||||
|
// means we made a mistake!
|
||||||
|
if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 );
|
||||||
|
|
||||||
if( prefix != 0 )
|
if( prefix != 0 )
|
||||||
{
|
{
|
||||||
if( (opcode & 0xff00) != 0 )
|
if( is16BitOpcode )
|
||||||
{
|
xWrite<u32>( (opcode<<16) | 0x0f00 | prefix );
|
||||||
jASSUME( (opcode & 0xff00) == 0x3800 );
|
|
||||||
xWrite<u32>( (opcode<<16) | (0x0f00 | prefix) );
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xWrite<u16>( 0x0f00 | prefix );
|
xWrite<u16>( 0x0f00 | prefix );
|
||||||
|
@ -661,9 +668,9 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if( (opcode & 0xff00) != 0 )
|
if( is16BitOpcode )
|
||||||
{
|
{
|
||||||
jASSUME( (opcode & 0xff00) == 0x3800 );
|
xWrite<u8>( 0x0f );
|
||||||
xWrite<u16>( opcode );
|
xWrite<u16>( opcode );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -671,6 +678,11 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// [SSE-3]
|
||||||
|
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
||||||
|
// [SSE-3]
|
||||||
|
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
||||||
|
|
||||||
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
|
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
|
||||||
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
|
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
|
||||||
const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD;
|
const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD;
|
||||||
|
@ -689,20 +701,20 @@ const MovhlImplAll<0x12> xMOVL;
|
||||||
const MovhlImpl_RtoR<0x16> xMOVLH;
|
const MovhlImpl_RtoR<0x16> xMOVLH;
|
||||||
const MovhlImpl_RtoR<0x12> xMOVHL;
|
const MovhlImpl_RtoR<0x12> xMOVHL;
|
||||||
|
|
||||||
const SimdImpl_PackedLogic<0xdb> xPAND;
|
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
|
||||||
const SimdImpl_PackedLogic<0xdf> xPANDN;
|
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
|
||||||
const SimdImpl_PackedLogic<0xeb> xPOR;
|
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
|
||||||
const SimdImpl_PackedLogic<0xef> xPXOR;
|
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
|
||||||
|
|
||||||
const SimdImpl_AndNot<0x55> xANDN;
|
const SimdImpl_AndNot xANDN;
|
||||||
|
|
||||||
const SimdImpl_SS_SD<0x66,0x2e> xUCOMI;
|
const SimdImpl_UcomI<0x66,0x2e> xUCOMI;
|
||||||
const SimdImpl_rSqrt<0x53> xRCP;
|
const SimdImpl_rSqrt<0x53> xRCP;
|
||||||
const SimdImpl_rSqrt<0x52> xRSQRT;
|
const SimdImpl_rSqrt<0x52> xRSQRT;
|
||||||
const SimdImpl_Sqrt<0x51> xSQRT;
|
const SimdImpl_Sqrt<0x51> xSQRT;
|
||||||
|
|
||||||
const SimdImpl_PSPD_SSSD<0x5f> xMAX;
|
const SimdImpl_MinMax<0x5f> xMAX;
|
||||||
const SimdImpl_PSPD_SSSD<0x5d> xMIN;
|
const SimdImpl_MinMax<0x5d> xMIN;
|
||||||
const SimdImpl_Shuffle<0xc6> xSHUF;
|
const SimdImpl_Shuffle<0xc6> xSHUF;
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
@ -754,8 +766,8 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
const SimdImpl_ShiftAll<0xd0, 2> xPSRL;
|
const SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||||
const SimdImpl_ShiftAll<0xf0, 6> xPSLL;
|
const SimdImpl_Shift<0xf0, 6> xPSLL;
|
||||||
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||||
|
|
||||||
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||||
|
@ -770,10 +782,29 @@ const SimdImpl_PUnpack xPUNPCK;
|
||||||
const SimdImpl_Unpack xUNPCK;
|
const SimdImpl_Unpack xUNPCK;
|
||||||
const SimdImpl_Pack xPACK;
|
const SimdImpl_Pack xPACK;
|
||||||
|
|
||||||
|
const SimdImpl_PAbsolute xPABS;
|
||||||
|
const SimdImpl_PSign xPSIGN;
|
||||||
|
const SimdImpl_PInsert xPINS;
|
||||||
|
const SimdImpl_PExtract xPEXTR;
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// Store Streaming SIMD Extension Control/Status to Mem32.
|
||||||
|
__emitinline void xSTMXCSR( u32* dest )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, 0xae );
|
||||||
|
xWriteDisp( 3, dest );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load Streaming SIMD Extension Control/Status from Mem32.
|
||||||
|
__emitinline void xLDMXCSR( const u32* src )
|
||||||
|
{
|
||||||
|
SimdPrefix( 0, 0xae );
|
||||||
|
xWriteDisp( 2, src );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||||
// being cleared to zero.
|
// being cleared to zero.
|
||||||
|
@ -851,5 +882,8 @@ __noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { wri
|
||||||
__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
||||||
__noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
__noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
||||||
|
|
||||||
|
__forceinline void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x50, to, from ); }
|
||||||
|
__forceinline void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); }
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -370,8 +370,23 @@ namespace x86Emitter
|
||||||
template< typename T >
|
template< typename T >
|
||||||
static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); }
|
static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); }
|
||||||
|
|
||||||
|
// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
|
||||||
|
// shifts the composite at byte granularity to the right by a constant immediate,
|
||||||
|
// and extracts the right-aligned result into the destination.
|
||||||
|
//
|
||||||
|
template< typename T >
|
||||||
|
static __forceinline void xPALIGNR( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from, u8 imm8 )
|
||||||
|
{
|
||||||
|
Internal::writeXMMop( 0x66, 0x0f3a, to, from );
|
||||||
|
xWrite<u8>( imm8 );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
extern void xSTMXCSR( u32* dest );
|
||||||
|
extern void xLDMXCSR( const u32* src );
|
||||||
|
|
||||||
extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from );
|
extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from );
|
||||||
extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from );
|
extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from );
|
||||||
extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from );
|
extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from );
|
||||||
|
@ -411,8 +426,14 @@ namespace x86Emitter
|
||||||
extern void xMOVNTQ( void* to, const xRegisterMMX& from );
|
extern void xMOVNTQ( void* to, const xRegisterMMX& from );
|
||||||
extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from );
|
extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from );
|
||||||
|
|
||||||
|
extern void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from );
|
||||||
|
extern void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from );
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
|
||||||
|
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
|
||||||
|
|
||||||
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS;
|
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS;
|
||||||
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS;
|
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS;
|
||||||
|
|
||||||
|
@ -435,29 +456,29 @@ namespace x86Emitter
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
extern const Internal::SimdImpl_PackedLogic<0xdb> xPAND;
|
extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND;
|
||||||
extern const Internal::SimdImpl_PackedLogic<0xdf> xPANDN;
|
extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
|
||||||
extern const Internal::SimdImpl_PackedLogic<0xeb> xPOR;
|
extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR;
|
||||||
extern const Internal::SimdImpl_PackedLogic<0xef> xPXOR;
|
extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR;
|
||||||
|
|
||||||
extern const Internal::SimdImpl_AndNot<0x55> xANDN;
|
extern const Internal::SimdImpl_AndNot xANDN;
|
||||||
|
|
||||||
extern const Internal::SimdImpl_SS_SD<0x66,0x2e> xUCOMI;
|
extern const Internal::SimdImpl_UcomI<0x66,0x2e> xUCOMI;
|
||||||
extern const Internal::SimdImpl_rSqrt<0x53> xRCP;
|
extern const Internal::SimdImpl_rSqrt<0x53> xRCP;
|
||||||
extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT;
|
extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT;
|
||||||
extern const Internal::SimdImpl_Sqrt<0x51> xSQRT;
|
extern const Internal::SimdImpl_Sqrt<0x51> xSQRT;
|
||||||
|
|
||||||
extern const Internal::SimdImpl_PSPD_SSSD<0x5f> xMAX;
|
extern const Internal::SimdImpl_MinMax<0x5f> xMAX;
|
||||||
extern const Internal::SimdImpl_PSPD_SSSD<0x5d> xMIN;
|
extern const Internal::SimdImpl_MinMax<0x5d> xMIN;
|
||||||
extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF;
|
extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF;
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_Equal> xCMPEQ;
|
extern const Internal::SimdImpl_Compare<SSE2_Equal> xCMPEQ;
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_Less> xCMPLT;
|
extern const Internal::SimdImpl_Compare<SSE2_Less> xCMPLT;
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
|
extern const Internal::SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
|
extern const Internal::SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
|
extern const Internal::SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_NotLess> xCMPNLT;
|
extern const Internal::SimdImpl_Compare<SSE2_NotLess> xCMPNLT;
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
|
extern const Internal::SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
|
||||||
extern const Internal::SimdImpl_Compare<SSE2_Ordered> xCMPORD;
|
extern const Internal::SimdImpl_Compare<SSE2_Ordered> xCMPORD;
|
||||||
|
@ -497,8 +518,8 @@ namespace x86Emitter
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL;
|
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
|
||||||
extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL;
|
extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL;
|
||||||
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
|
||||||
|
|
||||||
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
|
||||||
|
@ -512,5 +533,11 @@ namespace x86Emitter
|
||||||
extern const Internal::SimdImpl_PUnpack xPUNPCK;
|
extern const Internal::SimdImpl_PUnpack xPUNPCK;
|
||||||
extern const Internal::SimdImpl_Unpack xUNPCK;
|
extern const Internal::SimdImpl_Unpack xUNPCK;
|
||||||
extern const Internal::SimdImpl_Pack xPACK;
|
extern const Internal::SimdImpl_Pack xPACK;
|
||||||
|
|
||||||
|
extern const Internal::SimdImpl_PAbsolute xPABS;
|
||||||
|
extern const Internal::SimdImpl_PSign xPSIGN;
|
||||||
|
extern const Internal::SimdImpl_PInsert xPINS;
|
||||||
|
extern const Internal::SimdImpl_PExtract xPEXTR;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1351,7 +1351,6 @@ extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from);
|
||||||
extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||||
extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||||
extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||||
extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8);
|
|
||||||
|
|
||||||
// SSE4.1
|
// SSE4.1
|
||||||
|
|
||||||
|
|
|
@ -95,9 +95,13 @@ using namespace x86Emitter;
|
||||||
emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
|
emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
|
||||||
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
#define DEFINE_LEGACY_OP128( mod, sub ) \
|
#define DEFINE_LEGACY_OP128( ssenum, mod, sub ) \
|
||||||
emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||||
emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); }
|
emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); }
|
||||||
|
|
||||||
|
#define DEFINE_LEGACY_MOV128( ssenum, mod, sub ) \
|
||||||
|
emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod##sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
|
||||||
|
emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod##sub( xRegisterSSE(to), (void*)from ); }
|
||||||
|
|
||||||
|
|
||||||
#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
|
#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
|
||||||
|
@ -136,23 +140,31 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP )
|
||||||
DEFINE_LEGACY_RSQRT_OPCODE( RSQRT )
|
DEFINE_LEGACY_RSQRT_OPCODE( RSQRT )
|
||||||
DEFINE_LEGACY_SQRT_OPCODE( SQRT )
|
DEFINE_LEGACY_SQRT_OPCODE( SQRT )
|
||||||
|
|
||||||
DEFINE_LEGACY_OP128( PMUL, LW )
|
DEFINE_LEGACY_OP128( 2, PMUL, LW )
|
||||||
DEFINE_LEGACY_OP128( PMUL, HW )
|
DEFINE_LEGACY_OP128( 2, PMUL, HW )
|
||||||
DEFINE_LEGACY_OP128( PMUL, UDQ )
|
DEFINE_LEGACY_OP128( 2, PMUL, UDQ )
|
||||||
|
|
||||||
DEFINE_LEGACY_OP128( PMAX, SW )
|
DEFINE_LEGACY_OP128( 2, PMAX, SW )
|
||||||
DEFINE_LEGACY_OP128( PMAX, UB )
|
DEFINE_LEGACY_OP128( 2, PMAX, UB )
|
||||||
DEFINE_LEGACY_OP128( PMIN, SW )
|
DEFINE_LEGACY_OP128( 2, PMIN, SW )
|
||||||
DEFINE_LEGACY_OP128( PMIN, UB )
|
DEFINE_LEGACY_OP128( 2, PMIN, UB )
|
||||||
|
|
||||||
DEFINE_LEGACY_OP128( UNPCK, LPS )
|
DEFINE_LEGACY_OP128( 2, UNPCK, LPS )
|
||||||
DEFINE_LEGACY_OP128( UNPCK, HPS )
|
DEFINE_LEGACY_OP128( 2, UNPCK, HPS )
|
||||||
DEFINE_LEGACY_OP128( PUNPCK, LQDQ )
|
DEFINE_LEGACY_OP128( 2, PUNPCK, LQDQ )
|
||||||
DEFINE_LEGACY_OP128( PUNPCK, HQDQ )
|
DEFINE_LEGACY_OP128( 2, PUNPCK, HQDQ )
|
||||||
|
|
||||||
DEFINE_LEGACY_OP128( PACK, SSWB )
|
DEFINE_LEGACY_OP128( 2, PACK, SSWB )
|
||||||
DEFINE_LEGACY_OP128( PACK, SSDW )
|
DEFINE_LEGACY_OP128( 2, PACK, SSDW )
|
||||||
DEFINE_LEGACY_OP128( PACK, USWB )
|
DEFINE_LEGACY_OP128( 2, PACK, USWB )
|
||||||
|
|
||||||
|
DEFINE_LEGACY_MOV128( 3, MOV, SLDUP )
|
||||||
|
DEFINE_LEGACY_MOV128( 3, MOV, SHDUP )
|
||||||
|
|
||||||
|
DEFINE_LEGACY_OP128( 4, PMAX, SD )
|
||||||
|
DEFINE_LEGACY_OP128( 4, PMIN, SD )
|
||||||
|
DEFINE_LEGACY_OP128( 4, PMAX, UD )
|
||||||
|
DEFINE_LEGACY_OP128( 4, PMIN, UD )
|
||||||
|
|
||||||
|
|
||||||
emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
@ -201,11 +213,11 @@ emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.P
|
||||||
emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); }
|
emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); }
|
||||||
emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); }
|
emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); }
|
||||||
|
|
||||||
emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); }
|
emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); }
|
emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||||
emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); }
|
emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); }
|
||||||
|
@ -247,16 +259,6 @@ emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 )
|
||||||
emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
|
||||||
emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); }
|
emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); }
|
||||||
|
|
||||||
emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
|
||||||
emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); }
|
|
||||||
emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
|
||||||
emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); }
|
|
||||||
|
|
||||||
emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
|
||||||
emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); }
|
|
||||||
emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
|
|
||||||
emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); }
|
|
||||||
|
|
||||||
emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
|
emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); }
|
emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); }
|
||||||
|
@ -264,113 +266,35 @@ emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) {
|
||||||
emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); }
|
emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); }
|
||||||
emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
|
emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPS( xRegister32(to), xRegisterSSE(from) ); }
|
||||||
|
emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPD( xRegister32(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
|
emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.B( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.W( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.D( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
|
emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.B( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.W( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.D( xRegisterSSE(to), xRegisterSSE(from) ); }
|
||||||
|
|
||||||
|
emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ) { xPEXTR.W( xRegister32(to), xRegisterSSE(from), imm8 ); }
|
||||||
|
emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ) { xPINS.W( xRegisterSSE(to), xRegister32(from), imm8 ); }
|
||||||
|
|
||||||
|
emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); }
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//**********************************************************************************/
|
|
||||||
//STMXCSR : Store Streaming SIMD Extension Control/Status *
|
|
||||||
//**********************************************************************************
|
|
||||||
emitterT void SSE_STMXCSR( uptr from ) {
|
|
||||||
write16( 0xAE0F );
|
|
||||||
ModRM( 0, 0x3, DISP32 );
|
|
||||||
write32( MEMADDR(from, 4) );
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//**********************************************************************************/
|
|
||||||
//LDMXCSR : Load Streaming SIMD Extension Control/Status *
|
|
||||||
//**********************************************************************************
|
|
||||||
emitterT void SSE_LDMXCSR( uptr from ) {
|
|
||||||
write16( 0xAE0F );
|
|
||||||
ModRM( 0, 0x2, DISP32 );
|
|
||||||
write32( MEMADDR(from, 4) );
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//**********************************************************************************/
|
//**********************************************************************************/
|
||||||
//PEXTRW,PINSRW: Packed Extract/Insert Word *
|
//PEXTRW,PINSRW: Packed Extract/Insert Word *
|
||||||
//**********************************************************************************
|
//**********************************************************************************}
|
||||||
emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
|
|
||||||
emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
|
|
||||||
|
|
||||||
emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
|
|
||||||
emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
|
|
||||||
|
|
||||||
emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); }
|
emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); }
|
||||||
|
|
||||||
emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); }
|
emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); }
|
||||||
emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); }
|
emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); }
|
||||||
|
|
||||||
emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
|
||||||
write8(0xf3);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write16( 0x120f);
|
|
||||||
ModRM( 3, to, from );
|
|
||||||
}
|
|
||||||
|
|
||||||
emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); }
|
|
||||||
emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); }
|
|
||||||
emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); }
|
|
||||||
|
|
||||||
// SSSE3
|
|
||||||
|
|
||||||
emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
|
||||||
{
|
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write24(0x1C380F);
|
|
||||||
ModRM(3, to, from);
|
|
||||||
}
|
|
||||||
|
|
||||||
emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
|
||||||
{
|
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write24(0x1D380F);
|
|
||||||
ModRM(3, to, from);
|
|
||||||
}
|
|
||||||
|
|
||||||
emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
|
||||||
{
|
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write24(0x1E380F);
|
|
||||||
ModRM(3, to, from);
|
|
||||||
}
|
|
||||||
|
|
||||||
emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
|
|
||||||
{
|
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write24(0x0F3A0F);
|
|
||||||
ModRM(3, to, from);
|
|
||||||
write8(imm8);
|
|
||||||
}
|
|
||||||
|
|
||||||
emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
|
||||||
{
|
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write24(0x08380F);
|
|
||||||
ModRM(3, to, from);
|
|
||||||
}
|
|
||||||
|
|
||||||
emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
|
||||||
{
|
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write24(0x09380F);
|
|
||||||
ModRM(3, to, from);
|
|
||||||
}
|
|
||||||
|
|
||||||
emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
|
|
||||||
{
|
|
||||||
write8(0x66);
|
|
||||||
RexRB(0, to, from);
|
|
||||||
write24(0x0A380F);
|
|
||||||
ModRM(3, to, from);
|
|
||||||
}
|
|
||||||
|
|
||||||
// SSE4.1
|
// SSE4.1
|
||||||
|
|
||||||
|
|
|
@ -697,7 +697,11 @@ namespace x86Emitter
|
||||||
template< typename T > bool Is8BitOp() { return sizeof(T) == 1; }
|
template< typename T > bool Is8BitOp() { return sizeof(T) == 1; }
|
||||||
template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite<u8>( 0x66 ); }
|
template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite<u8>( 0x66 ); }
|
||||||
|
|
||||||
#include "implement/xmm/movqss.h"
|
#include "implement/xmm/basehelpers.h"
|
||||||
|
#include "implement/xmm/moremovs.h"
|
||||||
|
#include "implement/xmm/arithmetic.h"
|
||||||
|
#include "implement/xmm/comparisons.h"
|
||||||
|
#include "implement/xmm/shufflepack.h"
|
||||||
#include "implement/group1.h"
|
#include "implement/group1.h"
|
||||||
#include "implement/group2.h"
|
#include "implement/group2.h"
|
||||||
#include "implement/group3.h"
|
#include "implement/group3.h"
|
||||||
|
|
Loading…
Reference in New Issue