Fixed a small bug from my last commit (mostly only affected debug builds), and implemented PALIGNR/MOVSLDUP/PABS/PSIGN/PEXTR/PINS.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1036 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-21 05:29:14 +00:00
parent 5c312c36c7
commit deb642af43
11 changed files with 1052 additions and 809 deletions

View File

@ -0,0 +1,230 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
// Used for PSRA, which lacks the Q form.
//
template< u16 OpcodeBase1, u8 Modcode >
class SimdImpl_ShiftWithoutQ
{
protected:
template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
class ShiftHelper
{
public:
ShiftHelper() {}
template< typename OperandType >
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const xRegisterSIMD<OperandType>& from ) const
{
writeXMMop( 0x66, Opcode1, to, from );
}
template< typename OperandType >
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const void* from ) const
{
writeXMMop( 0x66, Opcode1, to, from );
}
template< typename OperandType >
__noinline void operator()( const xRegisterSIMD<OperandType>& to, const ModSibBase& from ) const
{
writeXMMop( 0x66, Opcode1, to, from );
}
template< typename OperandType >
__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm ) const
{
SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
ModRM( 3, (int)Modcode, to.Id );
xWrite<u8>( imm );
}
};
public:
const ShiftHelper<OpcodeBase1+1,0x71,Modcode> W;
const ShiftHelper<OpcodeBase1+2,0x72,Modcode> D;
SimdImpl_ShiftWithoutQ() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Implements PSRL and PSLL
//
template< u16 OpcodeBase1, u8 Modcode >
class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
{
public:
const ShiftHelper<OpcodeBase1+3,0x73,Modcode> Q;
void DQ( const xRegisterSSE& to, u8 imm ) const
{
SimdPrefix( 0x66, 0x73 );
ModRM( 3, (int)Modcode+1, to.Id );
xWrite<u8>( imm );
}
SimdImpl_Shift() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u16 OpcodeB, u16 OpcodeQ >
class SimdImpl_AddSub
{
public:
const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
SimdImpl_AddSub() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PMul
{
public:
const SimdImpl_DestRegEither<0x66,0xd5> LW;
const SimdImpl_DestRegEither<0x66,0xe5> HW;
const SimdImpl_DestRegEither<0x66,0xe4> HUW;
const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
// bits. Rounding is always performed by adding 1 to the least significant bit of the
// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
// immediately to the right of the most significant bit of each 18-bit intermediate
// result and packed to the destination operand.
//
// Both operands can be MMX or XMM registers. Source can be register or memory.
//
const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
// the low 32 bits of each product in xmm1.
const SimdImpl_DestRegSSE<0x66,0x4038> LD;
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
SimdImpl_PMul() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
//
template< u16 OpcodeSSE >
class SimdImpl_rSqrt
{
public:
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
SimdImpl_rSqrt() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
// SQRT has PS/SS/SD forms, but not the PD form.
//
template< u16 OpcodeSSE >
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
{
public:
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
SimdImpl_Sqrt() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_AndNot
{
public:
const SimdImpl_DestRegSSE<0x00,0x55> PS;
const SimdImpl_DestRegSSE<0x66,0x55> PD;
SimdImpl_AndNot() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Packed absolute value. [sSSE3 only]
//
class SimdImpl_PAbsolute
{
public:
SimdImpl_PAbsolute() {}
// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
// in dest, as UNSIGNED.
const SimdImpl_DestRegEither<0x66, 0x1c38> B;
// [sSSE-3] Computes the absolute value of word in the src, and stores the result
// in dest, as UNSIGNED.
const SimdImpl_DestRegEither<0x66, 0x1d38> W;
// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
// result in dest, as UNSIGNED.
const SimdImpl_DestRegEither<0x66, 0x1e38> D;
};
//////////////////////////////////////////////////////////////////////////////////////////
// Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the
// corresponding sign in src.
//
class SimdImpl_PSign
{
public:
SimdImpl_PSign() {}
// [sSSE-3] negates each byte element of dest if the signed integer value of the
// corresponding data element in src is less than zero. If the signed integer value
// of a data element in src is positive, the corresponding data element in dest is
// unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero.
const SimdImpl_DestRegEither<0x66, 0x0838> B;
// [sSSE-3] negates each word element of dest if the signed integer value of the
// corresponding data element in src is less than zero. If the signed integer value
// of a data element in src is positive, the corresponding data element in dest is
// unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero.
const SimdImpl_DestRegEither<0x66, 0x0938> W;
// [sSSE-3] negates each doubleword element of dest if the signed integer value
// of the corresponding data element in src is less than zero. If the signed integer
// value of a data element in src is positive, the corresponding data element in dest
// is unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero.
const SimdImpl_DestRegEither<0x66, 0x0a38> D;
};

View File

@ -0,0 +1,152 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
// MMX / SSE Helper Functions!
extern void SimdPrefix( u8 prefix, u16 opcode );
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instruction with prefixes.
// These functions also support deducing the use of the prefix from the template parameters,
// since most xmm instructions use a prefix and most mmx instructions do not. (some mov
// instructions violate this "guideline.")
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
xWriteDisp( reg.Id, data );
}
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instructions *without* prefixes.
// These are normally used for special instructions that have MMX forms only (non-SSE), however
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
{
SimdPrefix( 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
{
SimdPrefix( 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
{
SimdPrefix( 0, opcode );
xWriteDisp( reg.Id, data );
}
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
// like ANDPS/ANDPD
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
SimdImpl_DestRegSSE() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
// (PSHUFD / PSHUFHW / etc).
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
SimdImpl_DestRegImmSSE() {} //GCWho?
};
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmMMX
{
public:
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
SimdImpl_DestRegImmMMX() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegEither
{
public:
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
SimdImpl_DestRegEither() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
// can be regDirect or ModRM (indirect).
//
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
class SimdImpl_DestRegStrict
{
public:
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
SimdImpl_DestRegStrict() {} //GCWho?
};

View File

@ -0,0 +1,131 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u16 OpcodeSSE >
class SimdImpl_MinMax
{
public:
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision
const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision
SimdImpl_MinMax() {} //GChow?
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< SSE2_ComparisonType CType >
class SimdImpl_Compare
{
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
Woot() {}
};
public:
const Woot<0x00> PS;
const Woot<0x66> PD;
const Woot<0xf3> SS;
const Woot<0xf2> SD;
SimdImpl_Compare() {} //GCWhat?
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PCompare
{
public:
SimdImpl_PCompare() {}
// Compare packed bytes for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x74> EQB;
// Compare packed words for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x75> EQW;
// Compare packed doublewords [32-bits] for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x76> EQD;
// Compare packed signed bytes for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x64> GTB;
// Compare packed signed words for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x65> GTW;
// Compare packed signed doublewords [32-bits] for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x66> GTD;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u8 Opcode1, u16 Opcode2 >
class SimdImpl_PMinMax
{
public:
SimdImpl_PMinMax() {}
// Compare packed unsigned byte integers in dest to src and store packed min/max
// values in dest.
// Operation can be performed on either MMX or SSE operands.
const SimdImpl_DestRegEither<0x66,Opcode1> UB;
// Compare packed signed word integers in dest to src and store packed min/max
// values in dest.
// Operation can be performed on either MMX or SSE operands.
const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
};

View File

@ -0,0 +1,82 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
// Moves to/from high/low portions of an xmm register.
// These instructions cannot be used in reg/reg form.
//
template< u16 Opcode >
class MovhlImplAll
{
protected:
template< u8 Prefix >
struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
};
public:
Woot<0x00> PS;
Woot<0x66> PD;
MovhlImplAll() {} //GCC.
};
// ------------------------------------------------------------------------
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
// do something kinda different! Fun!
//
template< u16 Opcode >
class MovhlImpl_RtoR
{
public:
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); }
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
MovhlImpl_RtoR() {} //GCC.
};
// ------------------------------------------------------------------------
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
class MovapsImplAll
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
MovapsImplAll() {} //GCC.
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u8 AltPrefix, u16 OpcodeSSE >
class SimdImpl_UcomI
{
public:
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS;
const SimdImpl_DestRegSSE<AltPrefix,OpcodeSSE> SD;
SimdImpl_UcomI() {}
};

View File

@ -1,646 +0,0 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
// MMX / SSE Helper Functions!
extern void SimdPrefix( u8 prefix, u16 opcode );
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instruction with prefixes.
// These functions also support deducing the use of the prefix from the template parameters,
// since most xmm instructions use a prefix and most mmx instructions do not. (some mov
// instructions violate this "guideline.")
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
{
SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
xWriteDisp( reg.Id, data );
}
// ------------------------------------------------------------------------
// xmm emitter helpers for xmm instructions *without* prefixes.
// These are normally used for special instructions that have MMX forms only (non-SSE), however
// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
//
template< typename T, typename T2 >
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
{
SimdPrefix( 0, opcode );
ModRM_Direct( to.Id, from.Id );
}
template< typename T >
__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
{
SimdPrefix( 0, opcode );
EmitSibMagic( reg.Id, sib );
}
template< typename T >
__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
{
SimdPrefix( 0, opcode );
xWriteDisp( reg.Id, data );
}
//////////////////////////////////////////////////////////////////////////////////////////
// Moves to/from high/low portions of an xmm register.
// These instructions cannot be used in reg/reg form.
//
template< u16 Opcode >
class MovhlImplAll
{
protected:
template< u8 Prefix >
struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); }
};
public:
Woot<0x00> PS;
Woot<0x66> PD;
MovhlImplAll() {} //GCC.
};
// ------------------------------------------------------------------------
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
// do something kinda different! Fun!
//
template< u16 Opcode >
class MovhlImpl_RtoR
{
public:
__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); }
__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
MovhlImpl_RtoR() {} //GCC.
};
// ------------------------------------------------------------------------
template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
class MovapsImplAll
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); }
MovapsImplAll() {} //GCC.
};
//////////////////////////////////////////////////////////////////////////////////////////
// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for
// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms).
//
template< u16 Opcode >
class SimdImpl_PackedLogic
{
public:
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); }
template< typename T > __forceinline
void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); }
SimdImpl_PackedLogic() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
// like ANDPS/ANDPD
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
SimdImpl_DestRegSSE() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
// (PSHUFD / PSHUFHW / etc).
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmSSE
{
public:
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
SimdImpl_DestRegImmSSE() {} //GCWho?
};
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegImmMMX
{
public:
__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
SimdImpl_DestRegImmMMX() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations that have reg,reg/rm forms only,
// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
//
template< u8 Prefix, u16 Opcode >
class SimdImpl_DestRegEither
{
public:
template< typename DestOperandType > __forceinline
void operator()( const xRegisterSIMD<DestOperandType>& to, const xRegisterSIMD<DestOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename DestOperandType > __forceinline
void operator()( const xRegisterSIMD<DestOperandType>& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); }
template< typename DestOperandType > __forceinline
void operator()( const xRegisterSIMD<DestOperandType>& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); }
SimdImpl_DestRegEither() {} //GCWho?
};
// ------------------------------------------------------------------------
// For implementing MMX/SSE operations which the destination *must* be a register, but the source
// can be regDirect or ModRM (indirect).
//
template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
class SimdImpl_DestRegStrict
{
public:
__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); }
SimdImpl_DestRegStrict() {} //GCWho?
};
// ------------------------------------------------------------------------
template< u16 OpcodeSSE >
class SimdImpl_PSPD_SSSD
{
public:
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision
const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision
SimdImpl_PSPD_SSSD() {} //GChow?
};
// ------------------------------------------------------------------------
//
template< u16 OpcodeSSE >
class SimdImpl_AndNot
{
public:
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD;
SimdImpl_AndNot() {}
};
// ------------------------------------------------------------------------
// For instructions that have SS/SD form only (UCOMI, etc)
// AltPrefix - prefixed used for doubles (SD form).
template< u8 AltPrefix, u16 OpcodeSSE >
class SimdImpl_SS_SD
{
public:
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS;
const SimdImpl_DestRegSSE<AltPrefix,OpcodeSSE> SD;
SimdImpl_SS_SD() {}
};
// ------------------------------------------------------------------------
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
template< u16 OpcodeSSE >
class SimdImpl_rSqrt
{
public:
const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
SimdImpl_rSqrt() {}
};
// ------------------------------------------------------------------------
// For instructions that have PS/SS/SD form only (most commonly Sqrt functions)
template< u16 OpcodeSSE >
class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
{
public:
const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
SimdImpl_Sqrt() {}
};
// ------------------------------------------------------------------------
template< u16 OpcodeSSE >
class SimdImpl_Shuffle
{
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
Woot() {}
};
public:
const Woot<0x00> PS;
const Woot<0x66> PD;
SimdImpl_Shuffle() {} //GCWhat?
};
// ------------------------------------------------------------------------
template< SSE2_ComparisonType CType >
class SimdImpl_Compare
{
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
Woot() {}
};
public:
const Woot<0x00> PS;
const Woot<0x66> PD;
const Woot<0xf3> SS;
const Woot<0xf2> SD;
SimdImpl_Compare() {} //GCWhat?
};
//////////////////////////////////////////////////////////////////////////////////////////
//
//
template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
class SimdImpl_Shift
{
public:
SimdImpl_Shift() {}
template< typename OperandType >
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const xRegisterSIMD<OperandType>& from ) const
{
writeXMMop( 0x66, Opcode1, to, from );
}
template< typename OperandType >
__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const void* from ) const
{
writeXMMop( 0x66, Opcode1, to, from );
}
template< typename OperandType >
__noinline void operator()( const xRegisterSIMD<OperandType>& to, const ModSibBase& from ) const
{
writeXMMop( 0x66, Opcode1, to, from );
}
template< typename OperandType >
__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm ) const
{
SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
ModRM( 3, (int)Modcode, to.Id );
xWrite<u8>( imm );
}
};
// ------------------------------------------------------------------------
// Used for PSRA
template< u16 OpcodeBase1, u8 Modcode >
class SimdImpl_ShiftWithoutQ
{
public:
const SimdImpl_Shift<OpcodeBase1+1,0x71,Modcode> W;
const SimdImpl_Shift<OpcodeBase1+2,0x72,Modcode> D;
SimdImpl_ShiftWithoutQ() {}
};
// ------------------------------------------------------------------------
template< u16 OpcodeBase1, u8 Modcode >
class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
{
public:
const SimdImpl_Shift<OpcodeBase1+3,0x73,Modcode> Q;
void DQ( const xRegisterSSE& to, u8 imm ) const
{
SimdPrefix( 0x66, 0x73 );
ModRM( 3, (int)Modcode+1, to.Id );
xWrite<u8>( imm );
}
SimdImpl_ShiftAll() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u16 OpcodeB, u16 OpcodeQ >
class SimdImpl_AddSub
{
public:
const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
SimdImpl_AddSub() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PMul
{
public:
const SimdImpl_DestRegEither<0x66,0xd5> LW;
const SimdImpl_DestRegEither<0x66,0xe5> HW;
const SimdImpl_DestRegEither<0x66,0xe4> HUW;
const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
// bits. Rounding is always performed by adding 1 to the least significant bit of the
// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
// immediately to the right of the most significant bit of each 18-bit intermediate
// result and packed to the destination operand.
//
// Both operands can be MMX or XMM registers. Source can be register or memory.
//
const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
// the low 32 bits of each product in xmm1.
const SimdImpl_DestRegSSE<0x66,0x4038> LD;
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
SimdImpl_PMul() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PCompare
{
public:
SimdImpl_PCompare() {}
// Compare packed bytes for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x74> EQB;
// Compare packed words for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x75> EQW;
// Compare packed doublewords [32-bits] for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x76> EQD;
// Compare packed signed bytes for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x64> GTB;
// Compare packed signed words for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x65> GTW;
// Compare packed signed doublewords [32-bits] for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const SimdImpl_DestRegEither<0x66,0x66> GTD;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u8 Opcode1, u16 Opcode2 >
class SimdImpl_PMinMax
{
public:
SimdImpl_PMinMax() {}
// Compare packed unsigned byte integers in dest to src and store packed min/max
// values in dest.
// Operation can be performed on either MMX or SSE operands.
const SimdImpl_DestRegEither<0x66,Opcode1> UB;
// Compare packed signed word integers in dest to src and store packed min/max
// values in dest.
// Operation can be performed on either MMX or SSE operands.
const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PShuffle
{
public:
SimdImpl_PShuffle() {}
// Copies words from src and inserts them into dest at word locations selected with
// the order operand (8 bit immediate).
const SimdImpl_DestRegImmMMX<0x00,0x70> W;
// Copies doublewords from src and inserts them into dest at dword locations selected
// with the order operand (8 bit immediate).
const SimdImpl_DestRegImmSSE<0x66,0x70> D;
// Copies words from the low quadword of src and inserts them into the low quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The high quadword of src is copied to the high quadword of dest.
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
// Copies words from the high quadword of src and inserts them into the high quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The low quadword of src is copied to the low quadword of dest.
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
// control mask in src. If the most significant bit (bit[7]) of each byte of the
// shuffle control mask is set, then constant zero is written in the result byte.
// Each byte in the shuffle control mask forms an index to permute the corresponding
// byte in dest. The value of each index is the least significant 4 bits (128-bit
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
//
// Operands can be MMX or XMM registers.
const SimdImpl_DestRegEither<0x66,0x0038> B;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PUnpack
{
public:
SimdImpl_PUnpack() {}
// Unpack and interleave low-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x60> LBW;
// Unpack and interleave low-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x61> LWD;
// Unpack and interleave low-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x62> LDQ;
// Unpack and interleave low-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
// Unpack and interleave high-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x68> HBW;
// Unpack and interleave high-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x69> HWD;
// Unpack and interleave high-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
// Unpack and interleave high-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
};
//////////////////////////////////////////////////////////////////////////////////////////
// Pack with Signed or Unsigned Saturation
//
class SimdImpl_Pack
{
public:
SimdImpl_Pack() {}
// Converts packed signed word integers from src and dest into packed signed
// byte integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x63> SSWB;
// Converts packed signed dword integers from src and dest into packed signed
// word integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
// Converts packed unsigned word integers from src and dest into packed unsigned
// byte integers in dest, using unsigned saturation.
const SimdImpl_DestRegEither<0x66,0x67> USWB;
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
// unsigned word integers in dest, using signed saturation.
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_Unpack
{
public:
SimdImpl_Unpack() {}
// Unpacks the high doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[0] <- dest[2]
// dest[1] <- src[2]
// dest[2] <- dest[3]
// dest[3] <- src[3]
//
const SimdImpl_DestRegSSE<0x00,0x15> HPS;
// Unpacks the high quadword [double-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest.lo <- dest.hi
// dest.hi <- src.hi
//
const SimdImpl_DestRegSSE<0x66,0x15> HPD;
// Unpacks the low doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[3] <- src[1]
// dest[2] <- dest[1]
// dest[1] <- src[0]
// dest[0] <- dest[0]
//
const SimdImpl_DestRegSSE<0x00,0x14> LPS;
// Unpacks the low quadword [double-precision] values from src and dest into
// dest, effectively moving the low portion of src into the upper portion of dest.
// The result of dest is loaded as such:
// dest.hi <- src.lo
// dest.lo <- dest.lo [remains unchanged!]
//
const SimdImpl_DestRegSSE<0x66,0x14> LPD;
};

View File

@ -0,0 +1,306 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
//
template< u16 OpcodeSSE >
class SimdImpl_Shuffle
{
protected:
template< u8 Prefix > struct Woot
{
__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
Woot() {}
};
public:
const Woot<0x00> PS;
const Woot<0x66> PD;
SimdImpl_Shuffle() {} //GCWhat?
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PShuffle
{
public:
SimdImpl_PShuffle() {}
// Copies words from src and inserts them into dest at word locations selected with
// the order operand (8 bit immediate).
const SimdImpl_DestRegImmMMX<0x00,0x70> W;
// Copies doublewords from src and inserts them into dest at dword locations selected
// with the order operand (8 bit immediate).
const SimdImpl_DestRegImmSSE<0x66,0x70> D;
// Copies words from the low quadword of src and inserts them into the low quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The high quadword of src is copied to the high quadword of dest.
const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
// Copies words from the high quadword of src and inserts them into the high quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The low quadword of src is copied to the low quadword of dest.
const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
// control mask in src. If the most significant bit (bit[7]) of each byte of the
// shuffle control mask is set, then constant zero is written in the result byte.
// Each byte in the shuffle control mask forms an index to permute the corresponding
// byte in dest. The value of each index is the least significant 4 bits (128-bit
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
//
// Operands can be MMX or XMM registers.
const SimdImpl_DestRegEither<0x66,0x0038> B;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_PUnpack
{
public:
SimdImpl_PUnpack() {}
// Unpack and interleave low-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x60> LBW;
// Unpack and interleave low-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x61> LWD;
// Unpack and interleave low-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x62> LDQ;
// Unpack and interleave low-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
// Unpack and interleave high-order bytes from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x68> HBW;
// Unpack and interleave high-order words from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x69> HWD;
// Unpack and interleave high-order doublewords from src and dest into dest.
const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
// Unpack and interleave high-order quadwords from src and dest into dest.
const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
};
//////////////////////////////////////////////////////////////////////////////////////////
// Pack with Signed or Unsigned Saturation
//
class SimdImpl_Pack
{
public:
SimdImpl_Pack() {}
// Converts packed signed word integers from src and dest into packed signed
// byte integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x63> SSWB;
// Converts packed signed dword integers from src and dest into packed signed
// word integers in dest, using signed saturation.
const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
// Converts packed unsigned word integers from src and dest into packed unsigned
// byte integers in dest, using unsigned saturation.
const SimdImpl_DestRegEither<0x66,0x67> USWB;
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
// unsigned word integers in dest, using signed saturation.
const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
class SimdImpl_Unpack
{
public:
SimdImpl_Unpack() {}
// Unpacks the high doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[0] <- dest[2]
// dest[1] <- src[2]
// dest[2] <- dest[3]
// dest[3] <- src[3]
//
const SimdImpl_DestRegSSE<0x00,0x15> HPS;
// Unpacks the high quadword [double-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest.lo <- dest.hi
// dest.hi <- src.hi
//
const SimdImpl_DestRegSSE<0x66,0x15> HPD;
// Unpacks the low doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[3] <- src[1]
// dest[2] <- dest[1]
// dest[1] <- src[0]
// dest[0] <- dest[0]
//
const SimdImpl_DestRegSSE<0x00,0x14> LPS;
// Unpacks the low quadword [double-precision] values from src and dest into
// dest, effectively moving the low portion of src into the upper portion of dest.
// The result of dest is loaded as such:
// dest.hi <- src.lo
// dest.lo <- dest.lo [remains unchanged!]
//
const SimdImpl_DestRegSSE<0x66,0x14> LPD;
};
//////////////////////////////////////////////////////////////////////////////////////////
// PINSW/B/D [all but Word form are SSE4.1 only!]
//
class SimdImpl_PInsert
{
protected:
template< u16 Opcode >
class ByteDwordForms
{
public:
ByteDwordForms() {}
__forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm );
}
__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm );
}
__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm );
}
};
public:
SimdImpl_PInsert() {}
// Operation can be performed on either MMX or SSE src operands.
template< typename T >
__forceinline void W( const xRegisterSIMD<T>& to, const xRegister32& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc4, to, from );
xWrite<u8>( imm8 );
}
// Operation can be performed on either MMX or SSE src operands.
template< typename T >
__forceinline void W( const xRegisterSIMD<T>& to, const void* from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc4, to, from );
xWrite<u8>( imm8 );
}
// Operation can be performed on either MMX or SSE src operands.
template< typename T >
__noinline void W( const xRegisterSIMD<T>& to, const ModSibBase& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc4, to, from );
xWrite<u8>( imm8 );
}
// [SSE-4.1]
const ByteDwordForms<0x20> B;
// [SSE-4.1]
const ByteDwordForms<0x22> D;
};
//////////////////////////////////////////////////////////////////////////////////////////
// PEXTRW/B/D [all but Word form are SSE4.1 only!]
//
// Note: Word form's indirect memory form is only available in SSE4.1.
//
class SimdImpl_PExtract
{
protected:
template< u16 Opcode >
class ByteDwordForms
{
public:
ByteDwordForms() {}
__forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm );
}
__forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm );
}
__noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
xWrite<u8>( imm );
}
};
public:
SimdImpl_PExtract() {}
// Copies the word element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// word value from src into an x86 32 bit register.
//
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
//
template< typename T >
__forceinline void W( const xRegister32& to, const xRegisterSIMD<T>& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0xc5, to, from, true );
xWrite<u8>( imm8 );
}
__forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0x153a, from, dest );
xWrite<u8>( imm8 );
}
__noinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
{
writeXMMop( 0x66, 0x153a, from, dest );
xWrite<u8>( imm8 );
}
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// byte value from src into an x86 32 bit register.
const ByteDwordForms<0x14> B;
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
// used to extract any single packed dword value from src into an x86 32 bit register.
const ByteDwordForms<0x16> D;
};

View File

@ -641,18 +641,25 @@ __emitinline void xBSWAP( const xRegister32& to )
// MMX / XMM Instructions
// (these will get put in their own file later)
// If the upper 8 bits of opcode are zero, the opcode is treated as a u8.
// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst
// 0x38, which is the only valid high word for 16 bit opcodes as such)
// ------------------------------------------------------------------------
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
// instructions). Any other lower value assumes the upper value is 0 and ignored.
// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
// generate an assertion.
//
__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
{
const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
// If the lower byte is not a valid previx and the upper byte is non-zero it
// means we made a mistake!
if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 );
if( prefix != 0 )
{
if( (opcode & 0xff00) != 0 )
{
jASSUME( (opcode & 0xff00) == 0x3800 );
xWrite<u32>( (opcode<<16) | (0x0f00 | prefix) );
}
if( is16BitOpcode )
xWrite<u32>( (opcode<<16) | 0x0f00 | prefix );
else
{
xWrite<u16>( 0x0f00 | prefix );
@ -661,9 +668,9 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
}
else
{
if( (opcode & 0xff00) != 0 )
if( is16BitOpcode )
{
jASSUME( (opcode & 0xff00) == 0x3800 );
xWrite<u8>( 0x0f );
xWrite<u16>( opcode );
}
else
@ -671,6 +678,11 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
}
}
// [SSE-3]
const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
// [SSE-3]
const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS;
const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD;
@ -689,20 +701,20 @@ const MovhlImplAll<0x12> xMOVL;
const MovhlImpl_RtoR<0x16> xMOVLH;
const MovhlImpl_RtoR<0x12> xMOVHL;
const SimdImpl_PackedLogic<0xdb> xPAND;
const SimdImpl_PackedLogic<0xdf> xPANDN;
const SimdImpl_PackedLogic<0xeb> xPOR;
const SimdImpl_PackedLogic<0xef> xPXOR;
const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
const SimdImpl_DestRegEither<0x66,0xef> xPXOR;
const SimdImpl_AndNot<0x55> xANDN;
const SimdImpl_AndNot xANDN;
const SimdImpl_SS_SD<0x66,0x2e> xUCOMI;
const SimdImpl_UcomI<0x66,0x2e> xUCOMI;
const SimdImpl_rSqrt<0x53> xRCP;
const SimdImpl_rSqrt<0x52> xRSQRT;
const SimdImpl_Sqrt<0x51> xSQRT;
const SimdImpl_PSPD_SSSD<0x5f> xMAX;
const SimdImpl_PSPD_SSSD<0x5d> xMIN;
const SimdImpl_MinMax<0x5f> xMAX;
const SimdImpl_MinMax<0x5d> xMIN;
const SimdImpl_Shuffle<0xc6> xSHUF;
// ------------------------------------------------------------------------
@ -754,8 +766,8 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S
// ------------------------------------------------------------------------
const SimdImpl_ShiftAll<0xd0, 2> xPSRL;
const SimdImpl_ShiftAll<0xf0, 6> xPSLL;
const SimdImpl_Shift<0xd0, 2> xPSRL;
const SimdImpl_Shift<0xf0, 6> xPSLL;
const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
@ -770,10 +782,29 @@ const SimdImpl_PUnpack xPUNPCK;
const SimdImpl_Unpack xUNPCK;
const SimdImpl_Pack xPACK;
const SimdImpl_PAbsolute xPABS;
const SimdImpl_PSign xPSIGN;
const SimdImpl_PInsert xPINS;
const SimdImpl_PExtract xPEXTR;
//////////////////////////////////////////////////////////////////////////////////////////
//
// Store Streaming SIMD Extension Control/Status to Mem32.
__emitinline void xSTMXCSR( u32* dest )
{
SimdPrefix( 0, 0xae );
xWriteDisp( 3, dest );
}
// Load Streaming SIMD Extension Control/Status from Mem32.
__emitinline void xLDMXCSR( const u32* src )
{
SimdPrefix( 0, 0xae );
xWriteDisp( 2, src );
}
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
@ -851,5 +882,8 @@ __noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { wri
__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
__noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
__forceinline void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x50, to, from ); }
__forceinline void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); }
}

View File

@ -370,8 +370,23 @@ namespace x86Emitter
template< typename T >
static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD<T>& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); }
// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
// shifts the composite at byte granularity to the right by a constant immediate,
// and extracts the right-aligned result into the destination.
//
template< typename T >
static __forceinline void xPALIGNR( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from, u8 imm8 )
{
Internal::writeXMMop( 0x66, 0x0f3a, to, from );
xWrite<u8>( imm8 );
}
// ------------------------------------------------------------------------
extern void xSTMXCSR( u32* dest );
extern void xLDMXCSR( const u32* src );
extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from );
extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from );
extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from );
@ -411,8 +426,14 @@ namespace x86Emitter
extern void xMOVNTQ( void* to, const xRegisterMMX& from );
extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from );
extern void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from );
extern void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from );
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS;
extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS;
@ -435,29 +456,29 @@ namespace x86Emitter
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_PackedLogic<0xdb> xPAND;
extern const Internal::SimdImpl_PackedLogic<0xdf> xPANDN;
extern const Internal::SimdImpl_PackedLogic<0xeb> xPOR;
extern const Internal::SimdImpl_PackedLogic<0xef> xPXOR;
extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND;
extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR;
extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR;
extern const Internal::SimdImpl_AndNot<0x55> xANDN;
extern const Internal::SimdImpl_AndNot xANDN;
extern const Internal::SimdImpl_SS_SD<0x66,0x2e> xUCOMI;
extern const Internal::SimdImpl_UcomI<0x66,0x2e> xUCOMI;
extern const Internal::SimdImpl_rSqrt<0x53> xRCP;
extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT;
extern const Internal::SimdImpl_Sqrt<0x51> xSQRT;
extern const Internal::SimdImpl_PSPD_SSSD<0x5f> xMAX;
extern const Internal::SimdImpl_PSPD_SSSD<0x5d> xMIN;
extern const Internal::SimdImpl_MinMax<0x5f> xMAX;
extern const Internal::SimdImpl_MinMax<0x5d> xMIN;
extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF;
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_Compare<SSE2_Equal> xCMPEQ;
extern const Internal::SimdImpl_Compare<SSE2_Less> xCMPLT;
extern const Internal::SimdImpl_Compare<SSE2_Equal> xCMPEQ;
extern const Internal::SimdImpl_Compare<SSE2_Less> xCMPLT;
extern const Internal::SimdImpl_Compare<SSE2_LessOrEqual> xCMPLE;
extern const Internal::SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
extern const Internal::SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
extern const Internal::SimdImpl_Compare<SSE2_Unordered> xCMPUNORD;
extern const Internal::SimdImpl_Compare<SSE2_NotEqual> xCMPNE;
extern const Internal::SimdImpl_Compare<SSE2_NotLess> xCMPNLT;
extern const Internal::SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
extern const Internal::SimdImpl_Compare<SSE2_Ordered> xCMPORD;
@ -497,8 +518,8 @@ namespace x86Emitter
// ------------------------------------------------------------------------
extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL;
extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL;
extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL;
extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;
extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
@ -512,5 +533,11 @@ namespace x86Emitter
extern const Internal::SimdImpl_PUnpack xPUNPCK;
extern const Internal::SimdImpl_Unpack xUNPCK;
extern const Internal::SimdImpl_Pack xPACK;
extern const Internal::SimdImpl_PAbsolute xPABS;
extern const Internal::SimdImpl_PSign xPSIGN;
extern const Internal::SimdImpl_PInsert xPINS;
extern const Internal::SimdImpl_PExtract xPEXTR;
}

View File

@ -1351,7 +1351,6 @@ extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from);
extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8);
// SSE4.1

View File

@ -95,9 +95,13 @@ using namespace x86Emitter;
emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \
emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
#define DEFINE_LEGACY_OP128( mod, sub ) \
emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); }
#define DEFINE_LEGACY_OP128( ssenum, mod, sub ) \
emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); }
#define DEFINE_LEGACY_MOV128( ssenum, mod, sub ) \
emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod##sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod##sub( xRegisterSSE(to), (void*)from ); }
#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
@ -136,23 +140,31 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP )
DEFINE_LEGACY_RSQRT_OPCODE( RSQRT )
DEFINE_LEGACY_SQRT_OPCODE( SQRT )
DEFINE_LEGACY_OP128( PMUL, LW )
DEFINE_LEGACY_OP128( PMUL, HW )
DEFINE_LEGACY_OP128( PMUL, UDQ )
DEFINE_LEGACY_OP128( 2, PMUL, LW )
DEFINE_LEGACY_OP128( 2, PMUL, HW )
DEFINE_LEGACY_OP128( 2, PMUL, UDQ )
DEFINE_LEGACY_OP128( PMAX, SW )
DEFINE_LEGACY_OP128( PMAX, UB )
DEFINE_LEGACY_OP128( PMIN, SW )
DEFINE_LEGACY_OP128( PMIN, UB )
DEFINE_LEGACY_OP128( 2, PMAX, SW )
DEFINE_LEGACY_OP128( 2, PMAX, UB )
DEFINE_LEGACY_OP128( 2, PMIN, SW )
DEFINE_LEGACY_OP128( 2, PMIN, UB )
DEFINE_LEGACY_OP128( UNPCK, LPS )
DEFINE_LEGACY_OP128( UNPCK, HPS )
DEFINE_LEGACY_OP128( PUNPCK, LQDQ )
DEFINE_LEGACY_OP128( PUNPCK, HQDQ )
DEFINE_LEGACY_OP128( 2, UNPCK, LPS )
DEFINE_LEGACY_OP128( 2, UNPCK, HPS )
DEFINE_LEGACY_OP128( 2, PUNPCK, LQDQ )
DEFINE_LEGACY_OP128( 2, PUNPCK, HQDQ )
DEFINE_LEGACY_OP128( PACK, SSWB )
DEFINE_LEGACY_OP128( PACK, SSDW )
DEFINE_LEGACY_OP128( PACK, USWB )
DEFINE_LEGACY_OP128( 2, PACK, SSWB )
DEFINE_LEGACY_OP128( 2, PACK, SSDW )
DEFINE_LEGACY_OP128( 2, PACK, USWB )
DEFINE_LEGACY_MOV128( 3, MOV, SLDUP )
DEFINE_LEGACY_MOV128( 3, MOV, SHDUP )
DEFINE_LEGACY_OP128( 4, PMAX, SD )
DEFINE_LEGACY_OP128( 4, PMIN, SD )
DEFINE_LEGACY_OP128( 4, PMAX, UD )
DEFINE_LEGACY_OP128( 4, PMIN, UD )
emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
@ -201,11 +213,11 @@ emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.P
emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); }
emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); }
emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); }
emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); }
emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); }
@ -247,16 +259,6 @@ emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 )
emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); }
emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); }
emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); }
@ -264,113 +266,35 @@ emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) {
emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); }
emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPS( xRegister32(to), xRegisterSSE(from) ); }
emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPD( xRegister32(to), xRegisterSSE(from) ); }
emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.B( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.W( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.D( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.B( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.W( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.D( xRegisterSSE(to), xRegisterSSE(from) ); }
emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ) { xPEXTR.W( xRegister32(to), xRegisterSSE(from), imm8 ); }
emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ) { xPINS.W( xRegisterSSE(to), xRegister32(from), imm8 ); }
emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); }
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//STMXCSR : Store Streaming SIMD Extension Control/Status *
//**********************************************************************************
emitterT void SSE_STMXCSR( uptr from ) {
write16( 0xAE0F );
ModRM( 0, 0x3, DISP32 );
write32( MEMADDR(from, 4) );
}
/////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//LDMXCSR : Load Streaming SIMD Extension Control/Status *
//**********************************************************************************
emitterT void SSE_LDMXCSR( uptr from ) {
write16( 0xAE0F );
ModRM( 0, 0x2, DISP32 );
write32( MEMADDR(from, 4) );
}
////////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//PEXTRW,PINSRW: Packed Extract/Insert Word *
//**********************************************************************************
emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
//**********************************************************************************}
emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); }
emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); }
emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); }
emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
write8(0xf3);
RexRB(0, to, from);
write16( 0x120f);
ModRM( 3, to, from );
}
emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); }
emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); }
emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); }
// SSSE3
emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x1C380F);
ModRM(3, to, from);
}
emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x1D380F);
ModRM(3, to, from);
}
emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x1E380F);
ModRM(3, to, from);
}
emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
{
write8(0x66);
RexRB(0, to, from);
write24(0x0F3A0F);
ModRM(3, to, from);
write8(imm8);
}
emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x08380F);
ModRM(3, to, from);
}
emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x09380F);
ModRM(3, to, from);
}
emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
write8(0x66);
RexRB(0, to, from);
write24(0x0A380F);
ModRM(3, to, from);
}
// SSE4.1

View File

@ -697,7 +697,11 @@ namespace x86Emitter
template< typename T > bool Is8BitOp() { return sizeof(T) == 1; }
template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite<u8>( 0x66 ); }
#include "implement/xmm/movqss.h"
#include "implement/xmm/basehelpers.h"
#include "implement/xmm/moremovs.h"
#include "implement/xmm/arithmetic.h"
#include "implement/xmm/comparisons.h"
#include "implement/xmm/shufflepack.h"
#include "implement/group1.h"
#include "implement/group2.h"
#include "implement/group3.h"