diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h new file mode 100644 index 0000000000..6ac3f91877 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -0,0 +1,230 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// Used for PSRA, which lacks the Q form. +// +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_ShiftWithoutQ +{ +protected: + template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > + class ShiftHelper + { + public: + ShiftHelper() {} + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __emitinline void operator()( const xRegisterSIMD& to, u8 imm ) const + { + SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); + ModRM( 3, (int)Modcode, to.Id ); + xWrite( imm ); + } + }; + +public: + const ShiftHelper W; + const ShiftHelper D; + + SimdImpl_ShiftWithoutQ() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Implements PSRL and PSLL +// +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ +{ +public: + const ShiftHelper Q; + + void DQ( const xRegisterSSE& to, u8 imm ) const + { + SimdPrefix( 0x66, 0x73 ); + ModRM( 3, (int)Modcode+1, to.Id ); + xWrite( imm ); + } + + SimdImpl_Shift() {} +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u16 OpcodeB, u16 OpcodeQ > +class SimdImpl_AddSub +{ +public: + const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D; + const SimdImpl_DestRegEither<0x66,OpcodeQ> Q; + + // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB; + + // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW; + + // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB> USB; + + // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW; + + SimdImpl_AddSub() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PMul +{ +public: + const SimdImpl_DestRegEither<0x66,0xd5> LW; + const SimdImpl_DestRegEither<0x66,0xe5> HW; + const SimdImpl_DestRegEither<0x66,0xe4> HUW; + const SimdImpl_DestRegEither<0x66,0xf4> UDQ; + + // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the + // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit + // integers. Each intermediate 32-bit integer is truncated to the 18 most significant + // bits. Rounding is always performed by adding 1 to the least significant bit of the + // 18-bit intermediate result. The final result is obtained by selecting the 16 bits + // immediately to the right of the most significant bit of each 18-bit intermediate + // result and packed to the destination operand. + // + // Both operands can be MMX or XMM registers. Source can be register or memory. + // + const SimdImpl_DestRegEither<0x66,0x0b38> HRSW; + + // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store + // the low 32 bits of each product in xmm1. + const SimdImpl_DestRegSSE<0x66,0x4038> LD; + + // [SSE-4.1] Multiply the packed signed dword integers in dest with src. + const SimdImpl_DestRegSSE<0x66,0x2838> DQ; + + SimdImpl_PMul() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) +// +template< u16 OpcodeSSE > +class SimdImpl_rSqrt +{ +public: + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; + SimdImpl_rSqrt() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// SQRT has PS/SS/SD forms, but not the PD form. +// +template< u16 OpcodeSSE > +class SimdImpl_Sqrt : public SimdImpl_rSqrt +{ +public: + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; + SimdImpl_Sqrt() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_AndNot +{ +public: + const SimdImpl_DestRegSSE<0x00,0x55> PS; + const SimdImpl_DestRegSSE<0x66,0x55> PD; + SimdImpl_AndNot() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed absolute value. [sSSE3 only] +// +class SimdImpl_PAbsolute +{ +public: + SimdImpl_PAbsolute() {} + + // [sSSE-3] Computes the absolute value of bytes in the src, and stores the result + // in dest, as UNSIGNED. + const SimdImpl_DestRegEither<0x66, 0x1c38> B; + + // [sSSE-3] Computes the absolute value of word in the src, and stores the result + // in dest, as UNSIGNED. + const SimdImpl_DestRegEither<0x66, 0x1d38> W; + + // [sSSE-3] Computes the absolute value of doublewords in the src, and stores the + // result in dest, as UNSIGNED. + const SimdImpl_DestRegEither<0x66, 0x1e38> D; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the +// corresponding sign in src. +// +class SimdImpl_PSign +{ +public: + SimdImpl_PSign() {} + + // [sSSE-3] negates each byte element of dest if the signed integer value of the + // corresponding data element in src is less than zero. If the signed integer value + // of a data element in src is positive, the corresponding data element in dest is + // unchanged. If a data element in src is zero, the corresponding data element in + // dest is set to zero. + const SimdImpl_DestRegEither<0x66, 0x0838> B; + + // [sSSE-3] negates each word element of dest if the signed integer value of the + // corresponding data element in src is less than zero. If the signed integer value + // of a data element in src is positive, the corresponding data element in dest is + // unchanged. If a data element in src is zero, the corresponding data element in + // dest is set to zero. + const SimdImpl_DestRegEither<0x66, 0x0938> W; + + // [sSSE-3] negates each doubleword element of dest if the signed integer value + // of the corresponding data element in src is less than zero. If the signed integer + // value of a data element in src is positive, the corresponding data element in dest + // is unchanged. If a data element in src is zero, the corresponding data element in + // dest is set to zero. + const SimdImpl_DestRegEither<0x66, 0x0a38> D; + +}; diff --git a/pcsx2/x86/ix86/implement/xmm/basehelpers.h b/pcsx2/x86/ix86/implement/xmm/basehelpers.h new file mode 100644 index 0000000000..7094322b3d --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/basehelpers.h @@ -0,0 +1,152 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// MMX / SSE Helper Functions! + +extern void SimdPrefix( u8 prefix, u16 opcode ); + +// ------------------------------------------------------------------------ +// xmm emitter helpers for xmm instruction with prefixes. +// These functions also support deducing the use of the prefix from the template parameters, +// since most xmm instructions use a prefix and most mmx instructions do not. (some mov +// instructions violate this "guideline.") +// +template< typename T, typename T2 > +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) +{ + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); + ModRM_Direct( to.Id, from.Id ); +} + +template< typename T > +__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +{ + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); + EmitSibMagic( reg.Id, sib ); +} + +template< typename T > +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +{ + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); + xWriteDisp( reg.Id, data ); +} + +// ------------------------------------------------------------------------ +// xmm emitter helpers for xmm instructions *without* prefixes. +// These are normally used for special instructions that have MMX forms only (non-SSE), however +// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. +// +template< typename T, typename T2 > +__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) +{ + SimdPrefix( 0, opcode ); + ModRM_Direct( to.Id, from.Id ); +} + +template< typename T > +__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) +{ + SimdPrefix( 0, opcode ); + EmitSibMagic( reg.Id, sib ); +} + +template< typename T > +__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) +{ + SimdPrefix( 0, opcode ); + xWriteDisp( reg.Id, data ); +} + +// ------------------------------------------------------------------------ +// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only, +// like ANDPS/ANDPD +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegSSE +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + + SimdImpl_DestRegSSE() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only +// (PSHUFD / PSHUFHW / etc). +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmSSE +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmSSE() {} //GCWho? +}; + +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmMMX +{ +public: + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmMMX() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing MMX/SSE operations that have reg,reg/rm forms only, +// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops). +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegEither +{ +public: + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + + SimdImpl_DestRegEither() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing MMX/SSE operations which the destination *must* be a register, but the source +// can be regDirect or ModRM (indirect). +// +template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > +class SimdImpl_DestRegStrict +{ +public: + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + + SimdImpl_DestRegStrict() {} //GCWho? +}; + diff --git a/pcsx2/x86/ix86/implement/xmm/comparisons.h b/pcsx2/x86/ix86/implement/xmm/comparisons.h new file mode 100644 index 0000000000..469a808524 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/comparisons.h @@ -0,0 +1,131 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u16 OpcodeSSE > +class SimdImpl_MinMax +{ +public: + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision + const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision + + SimdImpl_MinMax() {} //GChow? +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< SSE2_ComparisonType CType > +class SimdImpl_Compare +{ +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + Woot() {} + }; + +public: + const Woot<0x00> PS; + const Woot<0x66> PD; + const Woot<0xf3> SS; + const Woot<0xf2> SD; + SimdImpl_Compare() {} //GCWhat? +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PCompare +{ +public: + SimdImpl_PCompare() {} + + // Compare packed bytes for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x74> EQB; + + // Compare packed words for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x75> EQW; + + // Compare packed doublewords [32-bits] for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x76> EQD; + + // Compare packed signed bytes for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x64> GTB; + + // Compare packed signed words for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x65> GTW; + + // Compare packed signed doublewords [32-bits] for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x66> GTD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u8 Opcode1, u16 Opcode2 > +class SimdImpl_PMinMax +{ +public: + SimdImpl_PMinMax() {} + + // Compare packed unsigned byte integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1> UB; + + // Compare packed signed word integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW; + + // [SSE-4.1] Compare packed signed byte integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB; + + // [SSE-4.1] Compare packed signed doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD; + + // [SSE-4.1] Compare packed unsigned word integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW; + + // [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; +}; + diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h new file mode 100644 index 0000000000..44da893436 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -0,0 +1,82 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// Moves to/from high/low portions of an xmm register. +// These instructions cannot be used in reg/reg form. +// +template< u16 Opcode > +class MovhlImplAll +{ +protected: + template< u8 Prefix > + struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + }; + +public: + Woot<0x00> PS; + Woot<0x66> PD; + + MovhlImplAll() {} //GCC. +}; + +// ------------------------------------------------------------------------ +// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but +// do something kinda different! Fun! +// +template< u16 Opcode > +class MovhlImpl_RtoR +{ +public: + __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); } + __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + + MovhlImpl_RtoR() {} //GCC. +}; + +// ------------------------------------------------------------------------ +template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > +class MovapsImplAll +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + + MovapsImplAll() {} //GCC. +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u8 AltPrefix, u16 OpcodeSSE > +class SimdImpl_UcomI +{ +public: + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS; + const SimdImpl_DestRegSSE SD; + SimdImpl_UcomI() {} +}; diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h deleted file mode 100644 index 3d47f0b6e9..0000000000 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ /dev/null @@ -1,646 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -////////////////////////////////////////////////////////////////////////////////////////// -// MMX / SSE Helper Functions! - -extern void SimdPrefix( u8 prefix, u16 opcode ); - -// ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instruction with prefixes. -// These functions also support deducing the use of the prefix from the template parameters, -// since most xmm instructions use a prefix and most mmx instructions do not. (some mov -// instructions violate this "guideline.") -// -template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) -{ - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - ModRM_Direct( to.Id, from.Id ); -} - -template< typename T > -__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) -{ - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - EmitSibMagic( reg.Id, sib ); -} - -template< typename T > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) -{ - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - xWriteDisp( reg.Id, data ); -} - -// ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instructions *without* prefixes. -// These are normally used for special instructions that have MMX forms only (non-SSE), however -// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. -// -template< typename T, typename T2 > -__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) -{ - SimdPrefix( 0, opcode ); - ModRM_Direct( to.Id, from.Id ); -} - -template< typename T > -__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) -{ - SimdPrefix( 0, opcode ); - EmitSibMagic( reg.Id, sib ); -} - -template< typename T > -__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) -{ - SimdPrefix( 0, opcode ); - xWriteDisp( reg.Id, data ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// Moves to/from high/low portions of an xmm register. -// These instructions cannot be used in reg/reg form. -// -template< u16 Opcode > -class MovhlImplAll -{ -protected: - template< u8 Prefix > - struct Woot - { - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - }; - -public: - Woot<0x00> PS; - Woot<0x66> PD; - - MovhlImplAll() {} //GCC. -}; - -// ------------------------------------------------------------------------ -// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but -// do something kinda different! Fun! -// -template< u16 Opcode > -class MovhlImpl_RtoR -{ -public: - __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); } - __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - - MovhlImpl_RtoR() {} //GCC. -}; - -// ------------------------------------------------------------------------ -template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > -class MovapsImplAll -{ -public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - - MovapsImplAll() {} //GCC. -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for -// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms). -// -template< u16 Opcode > -class SimdImpl_PackedLogic -{ -public: - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - - SimdImpl_PackedLogic() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only, -// like ANDPS/ANDPD -// -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegSSE -{ -public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - - SimdImpl_DestRegSSE() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only -// (PSHUFD / PSHUFHW / etc). -// -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegImmSSE -{ -public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - - SimdImpl_DestRegImmSSE() {} //GCWho? -}; - -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegImmMMX -{ -public: - __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - - SimdImpl_DestRegImmMMX() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing MMX/SSE operations that have reg,reg/rm forms only, -// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops). -// -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegEither -{ -public: - template< typename DestOperandType > __forceinline - void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > __forceinline - void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > __forceinline - void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - - SimdImpl_DestRegEither() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing MMX/SSE operations which the destination *must* be a register, but the source -// can be regDirect or ModRM (indirect). -// -template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > -class SimdImpl_DestRegStrict -{ -public: - __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - - SimdImpl_DestRegStrict() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -template< u16 OpcodeSSE > -class SimdImpl_PSPD_SSSD -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision - const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision - const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision - const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision - - SimdImpl_PSPD_SSSD() {} //GChow? -}; - -// ------------------------------------------------------------------------ -// -template< u16 OpcodeSSE > -class SimdImpl_AndNot -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; - const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; - SimdImpl_AndNot() {} -}; - -// ------------------------------------------------------------------------ -// For instructions that have SS/SD form only (UCOMI, etc) -// AltPrefix - prefixed used for doubles (SD form). -template< u8 AltPrefix, u16 OpcodeSSE > -class SimdImpl_SS_SD -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS; - const SimdImpl_DestRegSSE SD; - SimdImpl_SS_SD() {} -}; - -// ------------------------------------------------------------------------ -// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) -template< u16 OpcodeSSE > -class SimdImpl_rSqrt -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; - const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; - SimdImpl_rSqrt() {} -}; - -// ------------------------------------------------------------------------ -// For instructions that have PS/SS/SD form only (most commonly Sqrt functions) -template< u16 OpcodeSSE > -class SimdImpl_Sqrt : public SimdImpl_rSqrt -{ -public: - const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; - SimdImpl_Sqrt() {} -}; - -// ------------------------------------------------------------------------ -template< u16 OpcodeSSE > -class SimdImpl_Shuffle -{ -protected: - template< u8 Prefix > struct Woot - { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - Woot() {} - }; - -public: - const Woot<0x00> PS; - const Woot<0x66> PD; - - SimdImpl_Shuffle() {} //GCWhat? -}; - -// ------------------------------------------------------------------------ -template< SSE2_ComparisonType CType > -class SimdImpl_Compare -{ -protected: - template< u8 Prefix > struct Woot - { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - Woot() {} - }; - -public: - const Woot<0x00> PS; - const Woot<0x66> PD; - const Woot<0xf3> SS; - const Woot<0xf2> SD; - SimdImpl_Compare() {} //GCWhat? -}; - - -////////////////////////////////////////////////////////////////////////////////////////// -// -// -template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > -class SimdImpl_Shift -{ -public: - SimdImpl_Shift() {} - - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __emitinline void operator()( const xRegisterSIMD& to, u8 imm ) const - { - SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); - ModRM( 3, (int)Modcode, to.Id ); - xWrite( imm ); - } -}; - -// ------------------------------------------------------------------------ -// Used for PSRA -template< u16 OpcodeBase1, u8 Modcode > -class SimdImpl_ShiftWithoutQ -{ -public: - const SimdImpl_Shift W; - const SimdImpl_Shift D; - - SimdImpl_ShiftWithoutQ() {} -}; - -// ------------------------------------------------------------------------ -template< u16 OpcodeBase1, u8 Modcode > -class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ -{ -public: - const SimdImpl_Shift Q; - - void DQ( const xRegisterSSE& to, u8 imm ) const - { - SimdPrefix( 0x66, 0x73 ); - ModRM( 3, (int)Modcode+1, to.Id ); - xWrite( imm ); - } - - SimdImpl_ShiftAll() {} -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -template< u16 OpcodeB, u16 OpcodeQ > -class SimdImpl_AddSub -{ -public: - const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B; - const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W; - const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D; - const SimdImpl_DestRegEither<0x66,OpcodeQ> Q; - - // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB; - - // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW; - - // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB> USB; - - // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW; - - SimdImpl_AddSub() {} -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PMul -{ -public: - const SimdImpl_DestRegEither<0x66,0xd5> LW; - const SimdImpl_DestRegEither<0x66,0xe5> HW; - const SimdImpl_DestRegEither<0x66,0xe4> HUW; - const SimdImpl_DestRegEither<0x66,0xf4> UDQ; - - // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the - // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit - // integers. Each intermediate 32-bit integer is truncated to the 18 most significant - // bits. Rounding is always performed by adding 1 to the least significant bit of the - // 18-bit intermediate result. The final result is obtained by selecting the 16 bits - // immediately to the right of the most significant bit of each 18-bit intermediate - // result and packed to the destination operand. - // - // Both operands can be MMX or XMM registers. Source can be register or memory. - // - const SimdImpl_DestRegEither<0x66,0x0b38> HRSW; - - // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store - // the low 32 bits of each product in xmm1. - const SimdImpl_DestRegSSE<0x66,0x4038> LD; - - // [SSE-4.1] Multiply the packed signed dword integers in dest with src. - const SimdImpl_DestRegSSE<0x66,0x2838> DQ; - - SimdImpl_PMul() {} -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PCompare -{ -public: - SimdImpl_PCompare() {} - - // Compare packed bytes for equality. - // If a data element in dest is equal to the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x74> EQB; - - // Compare packed words for equality. - // If a data element in dest is equal to the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x75> EQW; - - // Compare packed doublewords [32-bits] for equality. - // If a data element in dest is equal to the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x76> EQD; - - // Compare packed signed bytes for greater than. - // If a data element in dest is greater than the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x64> GTB; - - // Compare packed signed words for greater than. - // If a data element in dest is greater than the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x65> GTW; - - // Compare packed signed doublewords [32-bits] for greater than. - // If a data element in dest is greater than the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x66> GTD; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -template< u8 Opcode1, u16 Opcode2 > -class SimdImpl_PMinMax -{ -public: - SimdImpl_PMinMax() {} - - // Compare packed unsigned byte integers in dest to src and store packed min/max - // values in dest. - // Operation can be performed on either MMX or SSE operands. - const SimdImpl_DestRegEither<0x66,Opcode1> UB; - - // Compare packed signed word integers in dest to src and store packed min/max - // values in dest. - // Operation can be performed on either MMX or SSE operands. - const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW; - - // [SSE-4.1] Compare packed signed byte integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB; - - // [SSE-4.1] Compare packed signed doubleword integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD; - - // [SSE-4.1] Compare packed unsigned word integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW; - - // [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PShuffle -{ -public: - SimdImpl_PShuffle() {} - - // Copies words from src and inserts them into dest at word locations selected with - // the order operand (8 bit immediate). - const SimdImpl_DestRegImmMMX<0x00,0x70> W; - - // Copies doublewords from src and inserts them into dest at dword locations selected - // with the order operand (8 bit immediate). - const SimdImpl_DestRegImmSSE<0x66,0x70> D; - - // Copies words from the low quadword of src and inserts them into the low quadword - // of dest at word locations selected with the order operand (8 bit immediate). - // The high quadword of src is copied to the high quadword of dest. - const SimdImpl_DestRegImmSSE<0xf2,0x70> LW; - - // Copies words from the high quadword of src and inserts them into the high quadword - // of dest at word locations selected with the order operand (8 bit immediate). - // The low quadword of src is copied to the low quadword of dest. - const SimdImpl_DestRegImmSSE<0xf3,0x70> HW; - - // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle - // control mask in src. If the most significant bit (bit[7]) of each byte of the - // shuffle control mask is set, then constant zero is written in the result byte. - // Each byte in the shuffle control mask forms an index to permute the corresponding - // byte in dest. The value of each index is the least significant 4 bits (128-bit - // operation) or 3 bits (64-bit operation) of the shuffle control byte. - // - // Operands can be MMX or XMM registers. - const SimdImpl_DestRegEither<0x66,0x0038> B; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PUnpack -{ -public: - SimdImpl_PUnpack() {} - - // Unpack and interleave low-order bytes from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x60> LBW; - // Unpack and interleave low-order words from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x61> LWD; - // Unpack and interleave low-order doublewords from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x62> LDQ; - // Unpack and interleave low-order quadwords from src and dest into dest. - const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ; - - // Unpack and interleave high-order bytes from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x68> HBW; - // Unpack and interleave high-order words from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x69> HWD; - // Unpack and interleave high-order doublewords from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x6a> HDQ; - // Unpack and interleave high-order quadwords from src and dest into dest. - const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// Pack with Signed or Unsigned Saturation -// -class SimdImpl_Pack -{ -public: - SimdImpl_Pack() {} - - // Converts packed signed word integers from src and dest into packed signed - // byte integers in dest, using signed saturation. - const SimdImpl_DestRegEither<0x66,0x63> SSWB; - - // Converts packed signed dword integers from src and dest into packed signed - // word integers in dest, using signed saturation. - const SimdImpl_DestRegEither<0x66,0x6b> SSDW; - - // Converts packed unsigned word integers from src and dest into packed unsigned - // byte integers in dest, using unsigned saturation. - const SimdImpl_DestRegEither<0x66,0x67> USWB; - - // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed - // unsigned word integers in dest, using signed saturation. - const SimdImpl_DestRegSSE<0x66,0x2b38> USDW; -}; - - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_Unpack -{ -public: - SimdImpl_Unpack() {} - - // Unpacks the high doubleword [single-precision] values from src and dest into - // dest, such that the result of dest looks like this: - // dest[0] <- dest[2] - // dest[1] <- src[2] - // dest[2] <- dest[3] - // dest[3] <- src[3] - // - const SimdImpl_DestRegSSE<0x00,0x15> HPS; - - // Unpacks the high quadword [double-precision] values from src and dest into - // dest, such that the result of dest looks like this: - // dest.lo <- dest.hi - // dest.hi <- src.hi - // - const SimdImpl_DestRegSSE<0x66,0x15> HPD; - - // Unpacks the low doubleword [single-precision] values from src and dest into - // dest, such that the result of dest looks like this: - // dest[3] <- src[1] - // dest[2] <- dest[1] - // dest[1] <- src[0] - // dest[0] <- dest[0] - // - const SimdImpl_DestRegSSE<0x00,0x14> LPS; - - // Unpacks the low quadword [double-precision] values from src and dest into - // dest, effectively moving the low portion of src into the upper portion of dest. - // The result of dest is loaded as such: - // dest.hi <- src.lo - // dest.lo <- dest.lo [remains unchanged!] - // - const SimdImpl_DestRegSSE<0x66,0x14> LPD; -}; - diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h new file mode 100644 index 0000000000..93a96569c6 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -0,0 +1,306 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u16 OpcodeSSE > +class SimdImpl_Shuffle +{ +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + Woot() {} + }; + +public: + const Woot<0x00> PS; + const Woot<0x66> PD; + + SimdImpl_Shuffle() {} //GCWhat? +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PShuffle +{ +public: + SimdImpl_PShuffle() {} + + // Copies words from src and inserts them into dest at word locations selected with + // the order operand (8 bit immediate). + const SimdImpl_DestRegImmMMX<0x00,0x70> W; + + // Copies doublewords from src and inserts them into dest at dword locations selected + // with the order operand (8 bit immediate). + const SimdImpl_DestRegImmSSE<0x66,0x70> D; + + // Copies words from the low quadword of src and inserts them into the low quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The high quadword of src is copied to the high quadword of dest. + const SimdImpl_DestRegImmSSE<0xf2,0x70> LW; + + // Copies words from the high quadword of src and inserts them into the high quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The low quadword of src is copied to the low quadword of dest. + const SimdImpl_DestRegImmSSE<0xf3,0x70> HW; + + // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle + // control mask in src. If the most significant bit (bit[7]) of each byte of the + // shuffle control mask is set, then constant zero is written in the result byte. + // Each byte in the shuffle control mask forms an index to permute the corresponding + // byte in dest. The value of each index is the least significant 4 bits (128-bit + // operation) or 3 bits (64-bit operation) of the shuffle control byte. + // + // Operands can be MMX or XMM registers. + const SimdImpl_DestRegEither<0x66,0x0038> B; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PUnpack +{ +public: + SimdImpl_PUnpack() {} + + // Unpack and interleave low-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x60> LBW; + // Unpack and interleave low-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x61> LWD; + // Unpack and interleave low-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x62> LDQ; + // Unpack and interleave low-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ; + + // Unpack and interleave high-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x68> HBW; + // Unpack and interleave high-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x69> HWD; + // Unpack and interleave high-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x6a> HDQ; + // Unpack and interleave high-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Pack with Signed or Unsigned Saturation +// +class SimdImpl_Pack +{ +public: + SimdImpl_Pack() {} + + // Converts packed signed word integers from src and dest into packed signed + // byte integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x63> SSWB; + + // Converts packed signed dword integers from src and dest into packed signed + // word integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x6b> SSDW; + + // Converts packed unsigned word integers from src and dest into packed unsigned + // byte integers in dest, using unsigned saturation. + const SimdImpl_DestRegEither<0x66,0x67> USWB; + + // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed + // unsigned word integers in dest, using signed saturation. + const SimdImpl_DestRegSSE<0x66,0x2b38> USDW; +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_Unpack +{ +public: + SimdImpl_Unpack() {} + + // Unpacks the high doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[0] <- dest[2] + // dest[1] <- src[2] + // dest[2] <- dest[3] + // dest[3] <- src[3] + // + const SimdImpl_DestRegSSE<0x00,0x15> HPS; + + // Unpacks the high quadword [double-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest.lo <- dest.hi + // dest.hi <- src.hi + // + const SimdImpl_DestRegSSE<0x66,0x15> HPD; + + // Unpacks the low doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[3] <- src[1] + // dest[2] <- dest[1] + // dest[1] <- src[0] + // dest[0] <- dest[0] + // + const SimdImpl_DestRegSSE<0x00,0x14> LPS; + + // Unpacks the low quadword [double-precision] values from src and dest into + // dest, effectively moving the low portion of src into the upper portion of dest. + // The result of dest is loaded as such: + // dest.hi <- src.lo + // dest.lo <- dest.lo [remains unchanged!] + // + const SimdImpl_DestRegSSE<0x66,0x14> LPD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// PINSW/B/D [all but Word form are SSE4.1 only!] +// +class SimdImpl_PInsert +{ +protected: + template< u16 Opcode > + class ByteDwordForms + { + public: + ByteDwordForms() {} + + __forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + }; + +public: + SimdImpl_PInsert() {} + + // Operation can be performed on either MMX or SSE src operands. + template< typename T > + __forceinline void W( const xRegisterSIMD& to, const xRegister32& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc4, to, from ); + xWrite( imm8 ); + } + + // Operation can be performed on either MMX or SSE src operands. + template< typename T > + __forceinline void W( const xRegisterSIMD& to, const void* from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc4, to, from ); + xWrite( imm8 ); + } + + // Operation can be performed on either MMX or SSE src operands. + template< typename T > + __noinline void W( const xRegisterSIMD& to, const ModSibBase& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc4, to, from ); + xWrite( imm8 ); + } + + // [SSE-4.1] + const ByteDwordForms<0x20> B; + + // [SSE-4.1] + const ByteDwordForms<0x22> D; +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// PEXTRW/B/D [all but Word form are SSE4.1 only!] +// +// Note: Word form's indirect memory form is only available in SSE4.1. +// +class SimdImpl_PExtract +{ +protected: + template< u16 Opcode > + class ByteDwordForms + { + public: + ByteDwordForms() {} + + __forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + }; + +public: + SimdImpl_PExtract() {} + + // Copies the word element specified by imm8 from src to dest. The upper bits + // of dest are zero-extended (cleared). This can be used to extract any single packed + // word value from src into an x86 32 bit register. + // + // [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension! + // + template< typename T > + __forceinline void W( const xRegister32& to, const xRegisterSIMD& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc5, to, from, true ); + xWrite( imm8 ); + } + + __forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0x153a, from, dest ); + xWrite( imm8 ); + } + + __noinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0x153a, from, dest ); + xWrite( imm8 ); + } + + // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits + // of dest are zero-extended (cleared). This can be used to extract any single packed + // byte value from src into an x86 32 bit register. + const ByteDwordForms<0x14> B; + + // [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be + // used to extract any single packed dword value from src into an x86 32 bit register. + const ByteDwordForms<0x16> D; +}; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 9125feaa3e..e6bc9f34a7 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -641,18 +641,25 @@ __emitinline void xBSWAP( const xRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) -// If the upper 8 bits of opcode are zero, the opcode is treated as a u8. -// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst -// 0x38, which is the only valid high word for 16 bit opcodes as such) +// ------------------------------------------------------------------------ +// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is +// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4 +// instructions). Any other lower value assumes the upper value is 0 and ignored. +// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will +// generate an assertion. +// __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) { + const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); + + // If the lower byte is not a valid previx and the upper byte is non-zero it + // means we made a mistake! + if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 ); + if( prefix != 0 ) { - if( (opcode & 0xff00) != 0 ) - { - jASSUME( (opcode & 0xff00) == 0x3800 ); - xWrite( (opcode<<16) | (0x0f00 | prefix) ); - } + if( is16BitOpcode ) + xWrite( (opcode<<16) | 0x0f00 | prefix ); else { xWrite( 0x0f00 | prefix ); @@ -661,9 +668,9 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) } else { - if( (opcode & 0xff00) != 0 ) + if( is16BitOpcode ) { - jASSUME( (opcode & 0xff00) == 0x3800 ); + xWrite( 0x0f ); xWrite( opcode ); } else @@ -671,6 +678,11 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) } } +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS; const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD; @@ -689,20 +701,20 @@ const MovhlImplAll<0x12> xMOVL; const MovhlImpl_RtoR<0x16> xMOVLH; const MovhlImpl_RtoR<0x12> xMOVHL; -const SimdImpl_PackedLogic<0xdb> xPAND; -const SimdImpl_PackedLogic<0xdf> xPANDN; -const SimdImpl_PackedLogic<0xeb> xPOR; -const SimdImpl_PackedLogic<0xef> xPXOR; +const SimdImpl_DestRegEither<0x66,0xdb> xPAND; +const SimdImpl_DestRegEither<0x66,0xdf> xPANDN; +const SimdImpl_DestRegEither<0x66,0xeb> xPOR; +const SimdImpl_DestRegEither<0x66,0xef> xPXOR; -const SimdImpl_AndNot<0x55> xANDN; +const SimdImpl_AndNot xANDN; -const SimdImpl_SS_SD<0x66,0x2e> xUCOMI; +const SimdImpl_UcomI<0x66,0x2e> xUCOMI; const SimdImpl_rSqrt<0x53> xRCP; const SimdImpl_rSqrt<0x52> xRSQRT; const SimdImpl_Sqrt<0x51> xSQRT; -const SimdImpl_PSPD_SSSD<0x5f> xMAX; -const SimdImpl_PSPD_SSSD<0x5d> xMIN; +const SimdImpl_MinMax<0x5f> xMAX; +const SimdImpl_MinMax<0x5d> xMIN; const SimdImpl_Shuffle<0xc6> xSHUF; // ------------------------------------------------------------------------ @@ -754,8 +766,8 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S // ------------------------------------------------------------------------ -const SimdImpl_ShiftAll<0xd0, 2> xPSRL; -const SimdImpl_ShiftAll<0xf0, 6> xPSLL; +const SimdImpl_Shift<0xd0, 2> xPSRL; +const SimdImpl_Shift<0xf0, 6> xPSLL; const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; const SimdImpl_AddSub<0xdc, 0xd4> xPADD; @@ -770,10 +782,29 @@ const SimdImpl_PUnpack xPUNPCK; const SimdImpl_Unpack xUNPCK; const SimdImpl_Pack xPACK; +const SimdImpl_PAbsolute xPABS; +const SimdImpl_PSign xPSIGN; +const SimdImpl_PInsert xPINS; +const SimdImpl_PExtract xPEXTR; + ////////////////////////////////////////////////////////////////////////////////////////// // +// Store Streaming SIMD Extension Control/Status to Mem32. +__emitinline void xSTMXCSR( u32* dest ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 3, dest ); +} + +// Load Streaming SIMD Extension Control/Status from Mem32. +__emitinline void xLDMXCSR( const u32* src ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 2, src ); +} + // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. @@ -851,5 +882,8 @@ __noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { wri __forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } __noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } +__forceinline void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } +__forceinline void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } + } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index b0c4bfca9d..62f434ccf3 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -370,8 +370,23 @@ namespace x86Emitter template< typename T > static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } + // [sSSE-3] Concatenates dest and source operands into an intermediate composite, + // shifts the composite at byte granularity to the right by a constant immediate, + // and extracts the right-aligned result into the destination. + // + template< typename T > + static __forceinline void xPALIGNR( const xRegisterSIMD& to, const xRegisterSIMD& from, u8 imm8 ) + { + Internal::writeXMMop( 0x66, 0x0f3a, to, from ); + xWrite( imm8 ); + } + + // ------------------------------------------------------------------------ - + + extern void xSTMXCSR( u32* dest ); + extern void xLDMXCSR( const u32* src ); + extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ); extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ); extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ); @@ -411,8 +426,14 @@ namespace x86Emitter extern void xMOVNTQ( void* to, const xRegisterMMX& from ); extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ); + extern void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from ); + extern void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from ); + // ------------------------------------------------------------------------ + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS; extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS; @@ -435,29 +456,29 @@ namespace x86Emitter // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_PackedLogic<0xdb> xPAND; - extern const Internal::SimdImpl_PackedLogic<0xdf> xPANDN; - extern const Internal::SimdImpl_PackedLogic<0xeb> xPOR; - extern const Internal::SimdImpl_PackedLogic<0xef> xPXOR; + extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND; + extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN; + extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR; + extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR; - extern const Internal::SimdImpl_AndNot<0x55> xANDN; + extern const Internal::SimdImpl_AndNot xANDN; - extern const Internal::SimdImpl_SS_SD<0x66,0x2e> xUCOMI; + extern const Internal::SimdImpl_UcomI<0x66,0x2e> xUCOMI; extern const Internal::SimdImpl_rSqrt<0x53> xRCP; extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT; extern const Internal::SimdImpl_Sqrt<0x51> xSQRT; - extern const Internal::SimdImpl_PSPD_SSSD<0x5f> xMAX; - extern const Internal::SimdImpl_PSPD_SSSD<0x5d> xMIN; + extern const Internal::SimdImpl_MinMax<0x5f> xMAX; + extern const Internal::SimdImpl_MinMax<0x5d> xMIN; extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF; // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_Compare xCMPEQ; - extern const Internal::SimdImpl_Compare xCMPLT; + extern const Internal::SimdImpl_Compare xCMPEQ; + extern const Internal::SimdImpl_Compare xCMPLT; extern const Internal::SimdImpl_Compare xCMPLE; - extern const Internal::SimdImpl_Compare xCMPUNORD; - extern const Internal::SimdImpl_Compare xCMPNE; + extern const Internal::SimdImpl_Compare xCMPUNORD; + extern const Internal::SimdImpl_Compare xCMPNE; extern const Internal::SimdImpl_Compare xCMPNLT; extern const Internal::SimdImpl_Compare xCMPNLE; extern const Internal::SimdImpl_Compare xCMPORD; @@ -497,8 +518,8 @@ namespace x86Emitter // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL; - extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL; + extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL; + extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL; extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD; @@ -512,5 +533,11 @@ namespace x86Emitter extern const Internal::SimdImpl_PUnpack xPUNPCK; extern const Internal::SimdImpl_Unpack xUNPCK; extern const Internal::SimdImpl_Pack xPACK; + + extern const Internal::SimdImpl_PAbsolute xPABS; + extern const Internal::SimdImpl_PSign xPSIGN; + extern const Internal::SimdImpl_PInsert xPINS; + extern const Internal::SimdImpl_PExtract xPEXTR; + } diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index f903e120cb..7f3e2ea740 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -1351,7 +1351,6 @@ extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); // SSE4.1 diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 9a97441b87..2a164b4d99 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -95,9 +95,13 @@ using namespace x86Emitter; emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } -#define DEFINE_LEGACY_OP128( mod, sub ) \ - emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); } +#define DEFINE_LEGACY_OP128( ssenum, mod, sub ) \ + emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); } + +#define DEFINE_LEGACY_MOV128( ssenum, mod, sub ) \ + emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod##sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod##sub( xRegisterSSE(to), (void*)from ); } #define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ @@ -136,23 +140,31 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP ) DEFINE_LEGACY_RSQRT_OPCODE( RSQRT ) DEFINE_LEGACY_SQRT_OPCODE( SQRT ) -DEFINE_LEGACY_OP128( PMUL, LW ) -DEFINE_LEGACY_OP128( PMUL, HW ) -DEFINE_LEGACY_OP128( PMUL, UDQ ) +DEFINE_LEGACY_OP128( 2, PMUL, LW ) +DEFINE_LEGACY_OP128( 2, PMUL, HW ) +DEFINE_LEGACY_OP128( 2, PMUL, UDQ ) -DEFINE_LEGACY_OP128( PMAX, SW ) -DEFINE_LEGACY_OP128( PMAX, UB ) -DEFINE_LEGACY_OP128( PMIN, SW ) -DEFINE_LEGACY_OP128( PMIN, UB ) +DEFINE_LEGACY_OP128( 2, PMAX, SW ) +DEFINE_LEGACY_OP128( 2, PMAX, UB ) +DEFINE_LEGACY_OP128( 2, PMIN, SW ) +DEFINE_LEGACY_OP128( 2, PMIN, UB ) -DEFINE_LEGACY_OP128( UNPCK, LPS ) -DEFINE_LEGACY_OP128( UNPCK, HPS ) -DEFINE_LEGACY_OP128( PUNPCK, LQDQ ) -DEFINE_LEGACY_OP128( PUNPCK, HQDQ ) +DEFINE_LEGACY_OP128( 2, UNPCK, LPS ) +DEFINE_LEGACY_OP128( 2, UNPCK, HPS ) +DEFINE_LEGACY_OP128( 2, PUNPCK, LQDQ ) +DEFINE_LEGACY_OP128( 2, PUNPCK, HQDQ ) -DEFINE_LEGACY_OP128( PACK, SSWB ) -DEFINE_LEGACY_OP128( PACK, SSDW ) -DEFINE_LEGACY_OP128( PACK, USWB ) +DEFINE_LEGACY_OP128( 2, PACK, SSWB ) +DEFINE_LEGACY_OP128( 2, PACK, SSDW ) +DEFINE_LEGACY_OP128( 2, PACK, USWB ) + +DEFINE_LEGACY_MOV128( 3, MOV, SLDUP ) +DEFINE_LEGACY_MOV128( 3, MOV, SHDUP ) + +DEFINE_LEGACY_OP128( 4, PMAX, SD ) +DEFINE_LEGACY_OP128( 4, PMIN, SD ) +DEFINE_LEGACY_OP128( 4, PMAX, UD ) +DEFINE_LEGACY_OP128( 4, PMIN, UD ) emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -201,11 +213,11 @@ emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.P emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); } +emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); } emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); } @@ -247,16 +259,6 @@ emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); } -emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); } -emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); } - -emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); } -emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); } - emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); } @@ -264,113 +266,35 @@ emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); } emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPS( xRegister32(to), xRegisterSSE(from) ); } +emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPD( xRegister32(to), xRegisterSSE(from) ); } + +emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.B( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.W( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.D( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.B( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.W( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.D( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ) { xPEXTR.W( xRegister32(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ) { xPINS.W( xRegisterSSE(to), xRegister32(from), imm8 ); } + +emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } + ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//STMXCSR : Store Streaming SIMD Extension Control/Status * -//********************************************************************************** -emitterT void SSE_STMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//LDMXCSR : Load Streaming SIMD Extension Control/Status * -//********************************************************************************** -emitterT void SSE_LDMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); -} - //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PEXTRW,PINSRW: Packed Extract/Insert Word * -//********************************************************************************** -emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } -emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } - -emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } -emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } +//**********************************************************************************} emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } -emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0xf3); - RexRB(0, to, from); - write16( 0x120f); - ModRM( 3, to, from ); -} - -emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } -emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } -emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } - -// SSSE3 - -emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1C380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1D380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1E380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0F3A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x08380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x09380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0A380F); - ModRM(3, to, from); -} // SSE4.1 diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index f788085771..647c812657 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -697,7 +697,11 @@ namespace x86Emitter template< typename T > bool Is8BitOp() { return sizeof(T) == 1; } template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite( 0x66 ); } - #include "implement/xmm/movqss.h" + #include "implement/xmm/basehelpers.h" + #include "implement/xmm/moremovs.h" + #include "implement/xmm/arithmetic.h" + #include "implement/xmm/comparisons.h" + #include "implement/xmm/shufflepack.h" #include "implement/group1.h" #include "implement/group2.h" #include "implement/group3.h"