Fixed a small bug from my last commit (mostly only affected debug builds), and implemented PALIGNR/MOVSLDUP/PABS/PSIGN/PEXTR/PINS.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1036 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-04-21 05:29:14 +00:00 · 2009-04-21 05:29:14 +00:00 · deb642af43
parent 5c312c36c7
commit deb642af43
11 changed files with 1052 additions and 809 deletions
--- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h
+++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h
@ -0,0 +1,230 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Used for PSRA, which lacks the Q form.
+//
+template< u16 OpcodeBase1, u8 Modcode >
+class SimdImpl_ShiftWithoutQ
+{
+protected:
+	template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
+	class ShiftHelper
+	{
+	public:
+		ShiftHelper() {}
+
+		template< typename OperandType >
+		__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const xRegisterSIMD<OperandType>& from ) const
+		{
+			writeXMMop( 0x66, Opcode1, to, from );
+		}
+
+		template< typename OperandType >
+		__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const void* from ) const
+		{
+			writeXMMop( 0x66, Opcode1, to, from );
+		}
+
+		template< typename OperandType >
+		__noinline void operator()( const xRegisterSIMD<OperandType>& to, const ModSibBase& from ) const
+		{
+			writeXMMop( 0x66, Opcode1, to, from );
+		}
+
+		template< typename OperandType >
+		__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm ) const
+		{
+			SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
+			ModRM( 3, (int)Modcode, to.Id );
+			xWrite<u8>( imm );
+		}
+	};
+
+public:
+	const ShiftHelper<OpcodeBase1+1,0x71,Modcode> W;
+	const ShiftHelper<OpcodeBase1+2,0x72,Modcode> D;
+
+	SimdImpl_ShiftWithoutQ() {}
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Implements PSRL and PSLL
+//
+template< u16 OpcodeBase1, u8 Modcode >
+class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
+{
+public:
+	const ShiftHelper<OpcodeBase1+3,0x73,Modcode> Q;
+	
+	void DQ( const xRegisterSSE& to, u8 imm ) const
+	{
+		SimdPrefix( 0x66, 0x73 );
+		ModRM( 3, (int)Modcode+1, to.Id );
+		xWrite<u8>( imm );
+	}
+	
+	SimdImpl_Shift() {}
+};
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+template< u16 OpcodeB, u16 OpcodeQ >
+class SimdImpl_AddSub
+{
+public:
+	const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
+	const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
+	const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
+	const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
+
+	// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
+	const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
+
+	// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
+	const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
+
+	// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
+	const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
+
+	// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
+	const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
+
+	SimdImpl_AddSub() {}
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+class SimdImpl_PMul
+{
+public:
+	const SimdImpl_DestRegEither<0x66,0xd5> LW;
+	const SimdImpl_DestRegEither<0x66,0xe5> HW;
+	const SimdImpl_DestRegEither<0x66,0xe4> HUW;
+	const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
+
+	// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
+	// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
+	// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
+	// bits. Rounding is always performed by adding 1 to the least significant bit of the
+	// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
+	// immediately to the right of the most significant bit of each 18-bit intermediate
+	// result and packed to the destination operand.
+	//
+	// Both operands can be MMX or XMM registers.  Source can be register or memory.
+	//
+	const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
+	
+	// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
+	// the low 32 bits of each product in xmm1.
+	const SimdImpl_DestRegSSE<0x66,0x4038> LD;
+	
+	// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
+	const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
+	
+	SimdImpl_PMul() {}
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
+//
+template< u16 OpcodeSSE >
+class SimdImpl_rSqrt
+{
+public:
+	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
+	const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
+	SimdImpl_rSqrt() {}
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// SQRT has PS/SS/SD forms, but not the PD form.
+//
+template< u16 OpcodeSSE >
+class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
+{
+public:
+	const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
+	SimdImpl_Sqrt() {}
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+class SimdImpl_AndNot
+{
+public:
+	const SimdImpl_DestRegSSE<0x00,0x55> PS;
+	const SimdImpl_DestRegSSE<0x66,0x55> PD;
+	SimdImpl_AndNot() {}
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Packed absolute value. [sSSE3 only]
+//
+class SimdImpl_PAbsolute
+{
+public:
+	SimdImpl_PAbsolute() {}
+	
+	// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
+	// in dest, as UNSIGNED.
+	const SimdImpl_DestRegEither<0x66, 0x1c38> B;
+
+	// [sSSE-3] Computes the absolute value of word in the src, and stores the result
+	// in dest, as UNSIGNED.
+	const SimdImpl_DestRegEither<0x66, 0x1d38> W;
+
+	// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
+	// result in dest, as UNSIGNED.
+	const SimdImpl_DestRegEither<0x66, 0x1e38> D;
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the
+// corresponding sign in src.
+//
+class SimdImpl_PSign
+{
+public:
+	SimdImpl_PSign() {}
+
+	// [sSSE-3] negates each byte element of dest if the signed integer value of the
+	// corresponding data element in src is less than zero. If the signed integer value
+	// of a data element in src is positive, the corresponding data element in dest is
+	// unchanged. If a data element in src is zero, the corresponding data element in
+	// dest is set to zero.
+	const SimdImpl_DestRegEither<0x66, 0x0838> B;
+
+	// [sSSE-3] negates each word element of dest if the signed integer value of the
+	// corresponding data element in src is less than zero. If the signed integer value
+	// of a data element in src is positive, the corresponding data element in dest is
+	// unchanged. If a data element in src is zero, the corresponding data element in
+	// dest is set to zero.
+	const SimdImpl_DestRegEither<0x66, 0x0938> W;
+
+	// [sSSE-3] negates each doubleword element of dest if the signed integer value
+	// of the corresponding data element in src is less than zero. If the signed integer
+	// value of a data element in src is positive, the corresponding data element in dest
+	// is unchanged. If a data element in src is zero, the corresponding data element in
+	// dest is set to zero.
+	const SimdImpl_DestRegEither<0x66, 0x0a38> D;
+
+};
--- a/pcsx2/x86/ix86/implement/xmm/basehelpers.h
+++ b/pcsx2/x86/ix86/implement/xmm/basehelpers.h
@ -0,0 +1,152 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// MMX / SSE Helper Functions!
+
+extern void SimdPrefix( u8 prefix, u16 opcode );
+
+// ------------------------------------------------------------------------
+// xmm emitter helpers for xmm instruction with prefixes.
+// These functions also support deducing the use of the prefix from the template parameters,
+// since most xmm instructions use a prefix and most mmx instructions do not.  (some mov
+// instructions violate this "guideline.")
+//
+template< typename T, typename T2 >
+__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
+{
+	SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
+	ModRM_Direct( to.Id, from.Id );
+}
+
+template< typename T >
+__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
+{
+	SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
+	EmitSibMagic( reg.Id, sib );
+}
+
+template< typename T >
+__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
+{
+	SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
+	xWriteDisp( reg.Id, data );
+}
+
+// ------------------------------------------------------------------------
+// xmm emitter helpers for xmm instructions *without* prefixes.
+// These are normally used for special instructions that have MMX forms only (non-SSE), however
+// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
+//
+template< typename T, typename T2 >
+__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
+{
+	SimdPrefix( 0, opcode );
+	ModRM_Direct( to.Id, from.Id );
+}
+
+template< typename T >
+__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
+{
+	SimdPrefix( 0, opcode );
+	EmitSibMagic( reg.Id, sib );
+}
+
+template< typename T >
+__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
+{
+	SimdPrefix( 0, opcode );
+	xWriteDisp( reg.Id, data );
+}
+
+// ------------------------------------------------------------------------
+// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
+// like ANDPS/ANDPD
+//
+template< u8 Prefix, u16 Opcode >
+class SimdImpl_DestRegSSE
+{
+public:
+	__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ writeXMMop( Prefix, Opcode, to, from ); }
+	__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
+	__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const	{ writeXMMop( Prefix, Opcode, to, from ); }
+
+	SimdImpl_DestRegSSE() {} //GCWho?
+};
+
+// ------------------------------------------------------------------------
+// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
+// (PSHUFD / PSHUFHW / etc).
+//
+template< u8 Prefix, u16 Opcode >
+class SimdImpl_DestRegImmSSE
+{
+public:
+	__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
+	__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const			{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
+	__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
+
+	SimdImpl_DestRegImmSSE() {} //GCWho?
+};
+
+template< u8 Prefix, u16 Opcode >
+class SimdImpl_DestRegImmMMX
+{
+public:
+	__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
+	__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const			{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
+	__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
+
+	SimdImpl_DestRegImmMMX() {} //GCWho?
+};
+
+// ------------------------------------------------------------------------
+// For implementing MMX/SSE operations that have reg,reg/rm forms only,
+// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
+//
+template< u8 Prefix, u16 Opcode >
+class SimdImpl_DestRegEither
+{
+public:
+	template< typename T > __forceinline
+	void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const	{ writeXMMop( Prefix, Opcode, to, from ); }
+	template< typename T > __forceinline
+	void operator()( const xRegisterSIMD<T>& to, const void* from ) const				{ writeXMMop( Prefix, Opcode, to, from ); }
+	template< typename T > __forceinline
+	void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
+
+	SimdImpl_DestRegEither() {} //GCWho?
+};
+
+// ------------------------------------------------------------------------
+// For implementing MMX/SSE operations which the destination *must* be a register, but the source
+// can be regDirect or ModRM (indirect).
+//
+template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
+class SimdImpl_DestRegStrict
+{
+public:
+	__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const					{ writeXMMop( Prefix, Opcode, to, from, true ); }
+	__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const				{ writeXMMop( Prefix, Opcode, to, from, true ); }
+	__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const	{ writeXMMop( Prefix, Opcode, to, from, true ); }
+
+	SimdImpl_DestRegStrict() {} //GCWho?
+};
+
--- a/pcsx2/x86/ix86/implement/xmm/comparisons.h
+++ b/pcsx2/x86/ix86/implement/xmm/comparisons.h
@ -0,0 +1,131 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+template< u16 OpcodeSSE >
+class SimdImpl_MinMax
+{
+public:
+	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;		// packed single precision
+	const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD;		// packed double precision
+	const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;		// scalar single precision
+	const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;		// scalar double precision
+
+	SimdImpl_MinMax() {}  //GChow?
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+template< SSE2_ComparisonType CType >
+class SimdImpl_Compare
+{
+protected:
+	template< u8 Prefix > struct Woot
+	{
+		__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
+		__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
+		__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const		{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
+		Woot() {}
+	};
+
+public:
+	const Woot<0x00> PS;
+	const Woot<0x66> PD;
+	const Woot<0xf3> SS;
+	const Woot<0xf2> SD;
+	SimdImpl_Compare() {} //GCWhat?
+};
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+class SimdImpl_PCompare
+{
+public:
+	SimdImpl_PCompare() {}
+	
+	// Compare packed bytes for equality.
+	// If a data element in dest is equal to the corresponding date element src, the
+	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
+	const SimdImpl_DestRegEither<0x66,0x74> EQB;
+
+	// Compare packed words for equality.
+	// If a data element in dest is equal to the corresponding date element src, the
+	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
+	const SimdImpl_DestRegEither<0x66,0x75> EQW;
+
+	// Compare packed doublewords [32-bits] for equality.
+	// If a data element in dest is equal to the corresponding date element src, the
+	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
+	const SimdImpl_DestRegEither<0x66,0x76> EQD;
+
+	// Compare packed signed bytes for greater than.
+	// If a data element in dest is greater than the corresponding date element src, the
+	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
+	const SimdImpl_DestRegEither<0x66,0x64> GTB;
+
+	// Compare packed signed words for greater than.
+	// If a data element in dest is greater than the corresponding date element src, the
+	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
+	const SimdImpl_DestRegEither<0x66,0x65> GTW;
+
+	// Compare packed signed doublewords [32-bits] for greater than.
+	// If a data element in dest is greater than the corresponding date element src, the
+	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
+	const SimdImpl_DestRegEither<0x66,0x66> GTD;
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// 
+template< u8 Opcode1, u16 Opcode2 >
+class SimdImpl_PMinMax
+{
+public:
+	SimdImpl_PMinMax() {}
+	
+	// Compare packed unsigned byte integers in dest to src and store packed min/max
+	// values in dest.
+	// Operation can be performed on either MMX or SSE operands.
+	const SimdImpl_DestRegEither<0x66,Opcode1> UB;
+
+	// Compare packed signed word integers in dest to src and store packed min/max
+	// values in dest.
+	// Operation can be performed on either MMX or SSE operands.
+	const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
+
+	// [SSE-4.1] Compare packed signed byte integers in dest to src and store 
+	// packed min/max values in dest. (SSE operands only)
+	const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
+
+	// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store 
+	// packed min/max values in dest. (SSE operands only)
+	const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
+
+	// [SSE-4.1] Compare packed unsigned word integers in dest to src and store 
+	// packed min/max values in dest. (SSE operands only)
+	const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
+
+	// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store 
+	// packed min/max values in dest. (SSE operands only)
+	const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
+};
+
--- a/pcsx2/x86/ix86/implement/xmm/moremovs.h
+++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h
@ -0,0 +1,82 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Moves to/from high/low portions of an xmm register.
+// These instructions cannot be used in reg/reg form.
+//
+template< u16 Opcode >
+class MovhlImplAll
+{
+protected:
+	template< u8 Prefix >
+	struct Woot
+	{
+		__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
+		__forceinline void operator()( const void* to, const xRegisterSSE& from ) const			{ writeXMMop( Prefix, Opcode+1, from, to ); }
+		__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const		{ writeXMMop( Prefix, Opcode, to, from ); }
+		__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const		{ writeXMMop( Prefix, Opcode+1, from, to ); }
+	};
+
+public:
+	Woot<0x00> PS;
+	Woot<0x66> PD;
+
+	MovhlImplAll() {} //GCC.
+};
+
+// ------------------------------------------------------------------------
+// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
+// do something kinda different! Fun!
+//
+template< u16 Opcode >
+class MovhlImpl_RtoR
+{
+public:
+	__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const			{ writeXMMop( Opcode, to, from ); }
+	__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const			{ writeXMMop( 0x66, Opcode, to, from ); }
+
+	MovhlImpl_RtoR() {} //GCC.
+};
+
+// ------------------------------------------------------------------------
+template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
+class MovapsImplAll
+{
+public:
+	__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
+	__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
+	__forceinline void operator()( const void* to, const xRegisterSSE& from ) const			{ writeXMMop( Prefix, OpcodeAlt, from, to ); }
+	__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const		{ writeXMMop( Prefix, Opcode, to, from ); }
+	__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const		{ writeXMMop( Prefix, OpcodeAlt, from, to ); }
+	
+	MovapsImplAll() {} //GCC.
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+template< u8 AltPrefix, u16 OpcodeSSE >
+class SimdImpl_UcomI
+{
+public:
+	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS;
+	const SimdImpl_DestRegSSE<AltPrefix,OpcodeSSE> SD;
+	SimdImpl_UcomI() {}
+};
--- a/pcsx2/x86/ix86/implement/xmm/movqss.h
+++ b/pcsx2/x86/ix86/implement/xmm/movqss.h
@ -1,646 +0,0 @@
-/*  Pcsx2 - Pc Ps2 Emulator
- *  Copyright (C) 2002-2009  Pcsx2 Team
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *  
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
- */
-
-#pragma once
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// MMX / SSE Helper Functions!
-
-extern void SimdPrefix( u8 prefix, u16 opcode );
-
-// ------------------------------------------------------------------------
-// xmm emitter helpers for xmm instruction with prefixes.
-// These functions also support deducing the use of the prefix from the template parameters,
-// since most xmm instructions use a prefix and most mmx instructions do not.  (some mov
-// instructions violate this "guideline.")
-//
-template< typename T, typename T2 >
-__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& to, const xRegister<T2>& from, bool forcePrefix=false )
-{
-	SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
-	ModRM_Direct( to.Id, from.Id );
-}
-
-template< typename T >
-__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const ModSibBase& sib, bool forcePrefix=false )
-{
-	SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
-	EmitSibMagic( reg.Id, sib );
-}
-
-template< typename T >
-__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister<T>& reg, const void* data, bool forcePrefix=false )
-{
-	SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode );
-	xWriteDisp( reg.Id, data );
-}
-
-// ------------------------------------------------------------------------
-// xmm emitter helpers for xmm instructions *without* prefixes.
-// These are normally used for special instructions that have MMX forms only (non-SSE), however
-// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies.
-//
-template< typename T, typename T2 >
-__emitinline void writeXMMop( u16 opcode, const xRegister<T>& to, const xRegister<T2>& from )
-{
-	SimdPrefix( 0, opcode );
-	ModRM_Direct( to.Id, from.Id );
-}
-
-template< typename T >
-__noinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const ModSibBase& sib )
-{
-	SimdPrefix( 0, opcode );
-	EmitSibMagic( reg.Id, sib );
-}
-
-template< typename T >
-__emitinline void writeXMMop( u16 opcode, const xRegister<T>& reg, const void* data )
-{
-	SimdPrefix( 0, opcode );
-	xWriteDisp( reg.Id, data );
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// Moves to/from high/low portions of an xmm register.
-// These instructions cannot be used in reg/reg form.
-//
-template< u16 Opcode >
-class MovhlImplAll
-{
-protected:
-	template< u8 Prefix >
-	struct Woot
-	{
-		__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
-		__forceinline void operator()( const void* to, const xRegisterSSE& from ) const			{ writeXMMop( Prefix, Opcode+1, from, to ); }
-		__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const		{ writeXMMop( Prefix, Opcode, to, from ); }
-		__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const		{ writeXMMop( Prefix, Opcode+1, from, to ); }
-	};
-
-public:
-	Woot<0x00> PS;
-	Woot<0x66> PD;
-
-	MovhlImplAll() {} //GCC.
-};
-
-// ------------------------------------------------------------------------
-// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
-// do something kinda different! Fun!
-//
-template< u16 Opcode >
-class MovhlImpl_RtoR
-{
-public:
-	__forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const			{ writeXMMop( Opcode, to, from ); }
-	__forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const			{ writeXMMop( 0x66, Opcode, to, from ); }
-
-	MovhlImpl_RtoR() {} //GCC.
-};
-
-// ------------------------------------------------------------------------
-template< u8 Prefix, u16 Opcode, u16 OpcodeAlt >
-class MovapsImplAll
-{
-public:
-	__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ if( to != from ) writeXMMop( Prefix, Opcode, to, from ); }
-	__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
-	__forceinline void operator()( const void* to, const xRegisterSSE& from ) const			{ writeXMMop( Prefix, OpcodeAlt, from, to ); }
-	__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const		{ writeXMMop( Prefix, Opcode, to, from ); }
-	__noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const		{ writeXMMop( Prefix, OpcodeAlt, from, to ); }
-	
-	MovapsImplAll() {} //GCC.
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for
-// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms).
-//
-template< u16 Opcode >
-class SimdImpl_PackedLogic
-{
-public:
-	template< typename T > __forceinline
-	void operator()( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from ) const	{ writeXMMop( 0x66, Opcode, to, from ); }
-	template< typename T > __forceinline
-	void operator()( const xRegisterSIMD<T>& to, const void* from ) const				{ writeXMMop( 0x66, Opcode, to, from ); }
-	template< typename T > __forceinline
-	void operator()( const xRegisterSIMD<T>& to, const ModSibBase& from ) const		{ writeXMMop( 0x66, Opcode, to, from ); }
-
-	SimdImpl_PackedLogic() {} //GCWho?
-};
-
-// ------------------------------------------------------------------------
-// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
-// like ANDPS/ANDPD
-//
-template< u8 Prefix, u16 Opcode >
-class SimdImpl_DestRegSSE
-{
-public:
-	__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ writeXMMop( Prefix, Opcode, to, from ); }
-	__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
-	__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const	{ writeXMMop( Prefix, Opcode, to, from ); }
-
-	SimdImpl_DestRegSSE() {} //GCWho?
-};
-
-// ------------------------------------------------------------------------
-// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
-// (PSHUFD / PSHUFHW / etc).
-//
-template< u8 Prefix, u16 Opcode >
-class SimdImpl_DestRegImmSSE
-{
-public:
-	__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
-	__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const			{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
-	__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
-
-	SimdImpl_DestRegImmSSE() {} //GCWho?
-};
-
-template< u8 Prefix, u16 Opcode >
-class SimdImpl_DestRegImmMMX
-{
-public:
-	__forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
-	__forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const			{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
-	__forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const	{ writeXMMop( Prefix, Opcode, to, from ); xWrite<u8>( imm ); }
-
-	SimdImpl_DestRegImmMMX() {} //GCWho?
-};
-
-// ------------------------------------------------------------------------
-// For implementing MMX/SSE operations that have reg,reg/rm forms only,
-// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops).
-//
-template< u8 Prefix, u16 Opcode >
-class SimdImpl_DestRegEither
-{
-public:
-	template< typename DestOperandType > __forceinline
-	void operator()( const xRegisterSIMD<DestOperandType>& to, const xRegisterSIMD<DestOperandType>& from ) const	{ writeXMMop( Prefix, Opcode, to, from ); }
-	template< typename DestOperandType > __forceinline
-	void operator()( const xRegisterSIMD<DestOperandType>& to, const void* from ) const			{ writeXMMop( Prefix, Opcode, to, from ); }
-	template< typename DestOperandType > __forceinline
-	void operator()( const xRegisterSIMD<DestOperandType>& to, const ModSibBase& from ) const		{ writeXMMop( Prefix, Opcode, to, from ); }
-
-	SimdImpl_DestRegEither() {} //GCWho?
-};
-
-// ------------------------------------------------------------------------
-// For implementing MMX/SSE operations which the destination *must* be a register, but the source
-// can be regDirect or ModRM (indirect).
-//
-template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType >
-class SimdImpl_DestRegStrict
-{
-public:
-	__forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const					{ writeXMMop( Prefix, Opcode, to, from, true ); }
-	__forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const				{ writeXMMop( Prefix, Opcode, to, from, true ); }
-	__forceinline void operator()( const DestRegType& to, const ModSibStrict<SrcOperandType>& from ) const	{ writeXMMop( Prefix, Opcode, to, from, true ); }
-
-	SimdImpl_DestRegStrict() {} //GCWho?
-};
-
-// ------------------------------------------------------------------------
-template< u16 OpcodeSSE >
-class SimdImpl_PSPD_SSSD
-{
-public:
-	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;		// packed single precision
-	const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD;		// packed double precision
-	const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;		// scalar single precision
-	const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;		// scalar double precision
-	
-	SimdImpl_PSPD_SSSD() {}  //GChow?
-};
-
-// ------------------------------------------------------------------------
-//
-template< u16 OpcodeSSE >
-class SimdImpl_AndNot
-{
-public:
-	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
-	const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD;
-	SimdImpl_AndNot() {}
-};
-
-// ------------------------------------------------------------------------
-// For instructions that have SS/SD form only (UCOMI, etc)
-// AltPrefix - prefixed used for doubles (SD form).
-template< u8 AltPrefix, u16 OpcodeSSE >
-class SimdImpl_SS_SD
-{
-public:
-	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS;
-	const SimdImpl_DestRegSSE<AltPrefix,OpcodeSSE> SD;
-	SimdImpl_SS_SD() {}
-};
-
-// ------------------------------------------------------------------------
-// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
-template< u16 OpcodeSSE >
-class SimdImpl_rSqrt
-{
-public:
-	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
-	const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
-	SimdImpl_rSqrt() {}
-};
-
-// ------------------------------------------------------------------------
-// For instructions that have PS/SS/SD form only (most commonly Sqrt functions)
-template< u16 OpcodeSSE >
-class SimdImpl_Sqrt : public SimdImpl_rSqrt<OpcodeSSE>
-{
-public:
-	const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
-	SimdImpl_Sqrt() {}
-};
-
-// ------------------------------------------------------------------------
-template< u16 OpcodeSSE >
-class SimdImpl_Shuffle
-{
-protected:
-	template< u8 Prefix > struct Woot
-	{
-		__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const	{ writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
-		__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const			{ writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
-		__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const		{ writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
-		Woot() {}
-	};
-
-public:
-	const Woot<0x00> PS;
-	const Woot<0x66> PD;
-
-	SimdImpl_Shuffle() {} //GCWhat?
-};
-
-// ------------------------------------------------------------------------
-template< SSE2_ComparisonType CType >
-class SimdImpl_Compare
-{
-protected:
-	template< u8 Prefix > struct Woot
-	{
-		__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
-		__forceinline void operator()( const xRegisterSSE& to, const void* from ) const			{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
-		__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const		{ writeXMMop( Prefix, 0xc2, to, from ); xWrite<u8>( CType ); }
-		Woot() {}
-	};
-
-public:
-	const Woot<0x00> PS;
-	const Woot<0x66> PD;
-	const Woot<0xf3> SS;
-	const Woot<0xf2> SD;
-	SimdImpl_Compare() {} //GCWhat?
-};
-
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-//
-template< u16 Opcode1, u16 OpcodeImm, u8 Modcode >
-class SimdImpl_Shift
-{
-public:
-	SimdImpl_Shift() {}
-
-	template< typename OperandType >
-	__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const xRegisterSIMD<OperandType>& from ) const
-	{
-		writeXMMop( 0x66, Opcode1, to, from );
-	}
-
-	template< typename OperandType >
-	__forceinline void operator()( const xRegisterSIMD<OperandType>& to, const void* from ) const
-	{
-		writeXMMop( 0x66, Opcode1, to, from );
-	}
-
-	template< typename OperandType >
-	__noinline void operator()( const xRegisterSIMD<OperandType>& to, const ModSibBase& from ) const
-	{
-		writeXMMop( 0x66, Opcode1, to, from );
-	}
-
-	template< typename OperandType >
-	__emitinline void operator()( const xRegisterSIMD<OperandType>& to, u8 imm ) const
-	{
-		SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm );
-		ModRM( 3, (int)Modcode, to.Id );
-		xWrite<u8>( imm );
-	}
-};
-
-// ------------------------------------------------------------------------
-// Used for PSRA
-template< u16 OpcodeBase1, u8 Modcode >
-class SimdImpl_ShiftWithoutQ
-{
-public:
-	const SimdImpl_Shift<OpcodeBase1+1,0x71,Modcode> W;
-	const SimdImpl_Shift<OpcodeBase1+2,0x72,Modcode> D;
-
-	SimdImpl_ShiftWithoutQ() {}
-};
-
-// ------------------------------------------------------------------------
-template< u16 OpcodeBase1, u8 Modcode >
-class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ<OpcodeBase1, Modcode>
-{
-public:
-	const SimdImpl_Shift<OpcodeBase1+3,0x73,Modcode> Q;
-	
-	void DQ( const xRegisterSSE& to, u8 imm ) const
-	{
-		SimdPrefix( 0x66, 0x73 );
-		ModRM( 3, (int)Modcode+1, to.Id );
-		xWrite<u8>( imm );
-	}
-	
-	SimdImpl_ShiftAll() {}
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-template< u16 OpcodeB, u16 OpcodeQ >
-class SimdImpl_AddSub
-{
-public:
-	const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B;
-	const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W;
-	const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D;
-	const SimdImpl_DestRegEither<0x66,OpcodeQ> Q;
-
-	// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
-	const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB;
-
-	// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
-	const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW;
-
-	// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
-	const SimdImpl_DestRegEither<0x66,OpcodeB> USB;
-
-	// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
-	const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW;
-
-	SimdImpl_AddSub() {}
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-class SimdImpl_PMul
-{
-public:
-	const SimdImpl_DestRegEither<0x66,0xd5> LW;
-	const SimdImpl_DestRegEither<0x66,0xe5> HW;
-	const SimdImpl_DestRegEither<0x66,0xe4> HUW;
-	const SimdImpl_DestRegEither<0x66,0xf4> UDQ;
-
-	// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
-	// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
-	// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
-	// bits. Rounding is always performed by adding 1 to the least significant bit of the
-	// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
-	// immediately to the right of the most significant bit of each 18-bit intermediate
-	// result and packed to the destination operand.
-	//
-	// Both operands can be MMX or XMM registers.  Source can be register or memory.
-	//
-	const SimdImpl_DestRegEither<0x66,0x0b38> HRSW;
-	
-	// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
-	// the low 32 bits of each product in xmm1.
-	const SimdImpl_DestRegSSE<0x66,0x4038> LD;
-	
-	// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
-	const SimdImpl_DestRegSSE<0x66,0x2838> DQ;
-	
-	SimdImpl_PMul() {}
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-class SimdImpl_PCompare
-{
-public:
-	SimdImpl_PCompare() {}
-	
-	// Compare packed bytes for equality.
-	// If a data element in dest is equal to the corresponding date element src, the
-	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
-	const SimdImpl_DestRegEither<0x66,0x74> EQB;
-
-	// Compare packed words for equality.
-	// If a data element in dest is equal to the corresponding date element src, the
-	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
-	const SimdImpl_DestRegEither<0x66,0x75> EQW;
-
-	// Compare packed doublewords [32-bits] for equality.
-	// If a data element in dest is equal to the corresponding date element src, the
-	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
-	const SimdImpl_DestRegEither<0x66,0x76> EQD;
-
-	// Compare packed signed bytes for greater than.
-	// If a data element in dest is greater than the corresponding date element src, the
-	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
-	const SimdImpl_DestRegEither<0x66,0x64> GTB;
-
-	// Compare packed signed words for greater than.
-	// If a data element in dest is greater than the corresponding date element src, the
-	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
-	const SimdImpl_DestRegEither<0x66,0x65> GTW;
-
-	// Compare packed signed doublewords [32-bits] for greater than.
-	// If a data element in dest is greater than the corresponding date element src, the
-	// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
-	const SimdImpl_DestRegEither<0x66,0x66> GTD;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// 
-template< u8 Opcode1, u16 Opcode2 >
-class SimdImpl_PMinMax
-{
-public:
-	SimdImpl_PMinMax() {}
-	
-	// Compare packed unsigned byte integers in dest to src and store packed min/max
-	// values in dest.
-	// Operation can be performed on either MMX or SSE operands.
-	const SimdImpl_DestRegEither<0x66,Opcode1> UB;
-
-	// Compare packed signed word integers in dest to src and store packed min/max
-	// values in dest.
-	// Operation can be performed on either MMX or SSE operands.
-	const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW;
-
-	// [SSE-4.1] Compare packed signed byte integers in dest to src and store 
-	// packed min/max values in dest. (SSE operands only)
-	const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB;
-
-	// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store 
-	// packed min/max values in dest. (SSE operands only)
-	const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD;
-
-	// [SSE-4.1] Compare packed unsigned word integers in dest to src and store 
-	// packed min/max values in dest. (SSE operands only)
-	const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW;
-
-	// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store 
-	// packed min/max values in dest. (SSE operands only)
-	const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-class SimdImpl_PShuffle
-{
-public:
-	SimdImpl_PShuffle() {}
-	
-	// Copies words from src and inserts them into dest at word locations selected with
-	// the order operand (8 bit immediate).
-	const SimdImpl_DestRegImmMMX<0x00,0x70> W;
-
-	// Copies doublewords from src and inserts them into dest at dword locations selected
-	// with the order operand (8 bit immediate).
-	const SimdImpl_DestRegImmSSE<0x66,0x70> D;
-	
-	// Copies words from the low quadword of src and inserts them into the low quadword
-	// of dest at word locations selected with the order operand (8 bit immediate).
-	// The high quadword of src is copied to the high quadword of dest.
-	const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
-
-	// Copies words from the high quadword of src and inserts them into the high quadword
-	// of dest at word locations selected with the order operand (8 bit immediate).
-	// The low quadword of src is copied to the low quadword of dest.
-	const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
-
-	// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
-	// control mask in src.  If the most significant bit (bit[7]) of each byte of the
-	// shuffle control mask is set, then constant zero is written in the result byte.
-	// Each byte in the shuffle control mask forms an index to permute the corresponding
-	// byte in dest. The value of each index is the least significant 4 bits (128-bit
-	// operation) or 3 bits (64-bit operation) of the shuffle control byte.
-	//
-	// Operands can be MMX or XMM registers.
-	const SimdImpl_DestRegEither<0x66,0x0038> B;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-class SimdImpl_PUnpack
-{
-public:
-	SimdImpl_PUnpack() {}
-	
-	// Unpack and interleave low-order bytes from src and dest into dest.
-	const SimdImpl_DestRegEither<0x66,0x60> LBW;
-	// Unpack and interleave low-order words from src and dest into dest.
-	const SimdImpl_DestRegEither<0x66,0x61> LWD;
-	// Unpack and interleave low-order doublewords from src and dest into dest.
-	const SimdImpl_DestRegEither<0x66,0x62> LDQ;
-	// Unpack and interleave low-order quadwords from src and dest into dest.
-	const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
-
-	// Unpack and interleave high-order bytes from src and dest into dest.
-	const SimdImpl_DestRegEither<0x66,0x68> HBW;
-	// Unpack and interleave high-order words from src and dest into dest.
-	const SimdImpl_DestRegEither<0x66,0x69> HWD;
-	// Unpack and interleave high-order doublewords from src and dest into dest.
-	const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
-	// Unpack and interleave high-order quadwords from src and dest into dest.
-	const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// Pack with Signed or Unsigned Saturation
-//
-class SimdImpl_Pack
-{
-public:
-	SimdImpl_Pack() {}
-
-	// Converts packed signed word integers from src and dest into packed signed 
-	// byte integers in dest, using signed saturation.
-	const SimdImpl_DestRegEither<0x66,0x63> SSWB;
-
-	// Converts packed signed dword integers from src and dest into packed signed 
-	// word integers in dest, using signed saturation.
-	const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
-
-	// Converts packed unsigned word integers from src and dest into packed unsigned 
-	// byte integers in dest, using unsigned saturation.
-	const SimdImpl_DestRegEither<0x66,0x67> USWB;
-
-	// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
-	// unsigned word integers in dest, using signed saturation.
-	const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
-};
-
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-class SimdImpl_Unpack
-{
-public:
-	SimdImpl_Unpack() {}
-
-	// Unpacks the high doubleword [single-precision] values from src and dest into
-	// dest, such that the result of dest looks like this:
-	//    dest[0] <- dest[2]
-	//    dest[1] <- src[2]
-	//    dest[2] <- dest[3]
-	//    dest[3] <- src[3]
-	//
-	const SimdImpl_DestRegSSE<0x00,0x15> HPS;
-
-	// Unpacks the high quadword [double-precision] values from src and dest into
-	// dest, such that the result of dest looks like this:
-	//    dest.lo <- dest.hi
-	//    dest.hi <- src.hi
-	//
-	const SimdImpl_DestRegSSE<0x66,0x15> HPD;
-
-	// Unpacks the low doubleword [single-precision] values from src and dest into
-	// dest, such that the result of dest looks like this:
-	//    dest[3] <- src[1]
-	//    dest[2] <- dest[1]
-	//    dest[1] <- src[0]
-	//    dest[0] <- dest[0]
-	//
-	const SimdImpl_DestRegSSE<0x00,0x14> LPS;
-
-	// Unpacks the low quadword [double-precision] values from src and dest into
-	// dest, effectively moving the low portion of src into the upper portion of dest.
-	// The result of dest is loaded as such:
-	//    dest.hi <- src.lo
-	//    dest.lo <- dest.lo  [remains unchanged!]
-	//
-	const SimdImpl_DestRegSSE<0x66,0x14> LPD;
-};
-
--- a/pcsx2/x86/ix86/implement/xmm/shufflepack.h
+++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h
@ -0,0 +1,306 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+template< u16 OpcodeSSE >
+class SimdImpl_Shuffle
+{
+protected:
+	template< u8 Prefix > struct Woot
+	{
+		__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const	{ writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
+		__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const			{ writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
+		__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const		{ writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite<u8>( cmptype ); }
+		Woot() {}
+	};
+
+public:
+	const Woot<0x00> PS;
+	const Woot<0x66> PD;
+
+	SimdImpl_Shuffle() {} //GCWhat?
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+class SimdImpl_PShuffle
+{
+public:
+	SimdImpl_PShuffle() {}
+	
+	// Copies words from src and inserts them into dest at word locations selected with
+	// the order operand (8 bit immediate).
+	const SimdImpl_DestRegImmMMX<0x00,0x70> W;
+
+	// Copies doublewords from src and inserts them into dest at dword locations selected
+	// with the order operand (8 bit immediate).
+	const SimdImpl_DestRegImmSSE<0x66,0x70> D;
+	
+	// Copies words from the low quadword of src and inserts them into the low quadword
+	// of dest at word locations selected with the order operand (8 bit immediate).
+	// The high quadword of src is copied to the high quadword of dest.
+	const SimdImpl_DestRegImmSSE<0xf2,0x70> LW;
+
+	// Copies words from the high quadword of src and inserts them into the high quadword
+	// of dest at word locations selected with the order operand (8 bit immediate).
+	// The low quadword of src is copied to the low quadword of dest.
+	const SimdImpl_DestRegImmSSE<0xf3,0x70> HW;
+
+	// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
+	// control mask in src.  If the most significant bit (bit[7]) of each byte of the
+	// shuffle control mask is set, then constant zero is written in the result byte.
+	// Each byte in the shuffle control mask forms an index to permute the corresponding
+	// byte in dest. The value of each index is the least significant 4 bits (128-bit
+	// operation) or 3 bits (64-bit operation) of the shuffle control byte.
+	//
+	// Operands can be MMX or XMM registers.
+	const SimdImpl_DestRegEither<0x66,0x0038> B;
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+class SimdImpl_PUnpack
+{
+public:
+	SimdImpl_PUnpack() {}
+	
+	// Unpack and interleave low-order bytes from src and dest into dest.
+	const SimdImpl_DestRegEither<0x66,0x60> LBW;
+	// Unpack and interleave low-order words from src and dest into dest.
+	const SimdImpl_DestRegEither<0x66,0x61> LWD;
+	// Unpack and interleave low-order doublewords from src and dest into dest.
+	const SimdImpl_DestRegEither<0x66,0x62> LDQ;
+	// Unpack and interleave low-order quadwords from src and dest into dest.
+	const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ;
+
+	// Unpack and interleave high-order bytes from src and dest into dest.
+	const SimdImpl_DestRegEither<0x66,0x68> HBW;
+	// Unpack and interleave high-order words from src and dest into dest.
+	const SimdImpl_DestRegEither<0x66,0x69> HWD;
+	// Unpack and interleave high-order doublewords from src and dest into dest.
+	const SimdImpl_DestRegEither<0x66,0x6a> HDQ;
+	// Unpack and interleave high-order quadwords from src and dest into dest.
+	const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ;
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Pack with Signed or Unsigned Saturation
+//
+class SimdImpl_Pack
+{
+public:
+	SimdImpl_Pack() {}
+
+	// Converts packed signed word integers from src and dest into packed signed 
+	// byte integers in dest, using signed saturation.
+	const SimdImpl_DestRegEither<0x66,0x63> SSWB;
+
+	// Converts packed signed dword integers from src and dest into packed signed 
+	// word integers in dest, using signed saturation.
+	const SimdImpl_DestRegEither<0x66,0x6b> SSDW;
+
+	// Converts packed unsigned word integers from src and dest into packed unsigned 
+	// byte integers in dest, using unsigned saturation.
+	const SimdImpl_DestRegEither<0x66,0x67> USWB;
+
+	// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
+	// unsigned word integers in dest, using signed saturation.
+	const SimdImpl_DestRegSSE<0x66,0x2b38> USDW;
+};
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+class SimdImpl_Unpack
+{
+public:
+	SimdImpl_Unpack() {}
+
+	// Unpacks the high doubleword [single-precision] values from src and dest into
+	// dest, such that the result of dest looks like this:
+	//    dest[0] <- dest[2]
+	//    dest[1] <- src[2]
+	//    dest[2] <- dest[3]
+	//    dest[3] <- src[3]
+	//
+	const SimdImpl_DestRegSSE<0x00,0x15> HPS;
+
+	// Unpacks the high quadword [double-precision] values from src and dest into
+	// dest, such that the result of dest looks like this:
+	//    dest.lo <- dest.hi
+	//    dest.hi <- src.hi
+	//
+	const SimdImpl_DestRegSSE<0x66,0x15> HPD;
+
+	// Unpacks the low doubleword [single-precision] values from src and dest into
+	// dest, such that the result of dest looks like this:
+	//    dest[3] <- src[1]
+	//    dest[2] <- dest[1]
+	//    dest[1] <- src[0]
+	//    dest[0] <- dest[0]
+	//
+	const SimdImpl_DestRegSSE<0x00,0x14> LPS;
+
+	// Unpacks the low quadword [double-precision] values from src and dest into
+	// dest, effectively moving the low portion of src into the upper portion of dest.
+	// The result of dest is loaded as such:
+	//    dest.hi <- src.lo
+	//    dest.lo <- dest.lo  [remains unchanged!]
+	//
+	const SimdImpl_DestRegSSE<0x66,0x14> LPD;
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// PINSW/B/D [all but Word form are SSE4.1 only!]
+//
+class SimdImpl_PInsert
+{
+protected:
+	template< u16 Opcode >
+	class ByteDwordForms
+	{
+	public:
+		ByteDwordForms() {}
+		
+		__forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const
+		{
+			writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
+			xWrite<u8>( imm );
+		}
+
+		__forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const
+		{
+			writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
+			xWrite<u8>( imm );
+		}
+
+		__noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const
+		{
+			writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
+			xWrite<u8>( imm );
+		}
+	};
+	
+public:
+	SimdImpl_PInsert() {}
+
+	// Operation can be performed on either MMX or SSE src operands.
+	template< typename T >
+	__forceinline void W( const xRegisterSIMD<T>& to, const xRegister32& from, u8 imm8 ) const
+	{
+		writeXMMop( 0x66, 0xc4, to, from );
+		xWrite<u8>( imm8 );
+	}
+
+	// Operation can be performed on either MMX or SSE src operands.
+	template< typename T >
+	__forceinline void W( const xRegisterSIMD<T>& to, const void* from, u8 imm8 ) const
+	{
+		writeXMMop( 0x66, 0xc4, to, from );
+		xWrite<u8>( imm8 );
+	}
+
+	// Operation can be performed on either MMX or SSE src operands.
+	template< typename T >
+	__noinline void W( const xRegisterSIMD<T>& to, const ModSibBase& from, u8 imm8 ) const
+	{
+		writeXMMop( 0x66, 0xc4, to, from );
+		xWrite<u8>( imm8 );
+	}
+
+	// [SSE-4.1] 
+	const ByteDwordForms<0x20> B;
+
+	// [SSE-4.1]
+	const ByteDwordForms<0x22> D;
+};
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// PEXTRW/B/D [all but Word form are SSE4.1 only!]
+//
+// Note: Word form's indirect memory form is only available in SSE4.1.
+//
+class SimdImpl_PExtract
+{
+protected:
+	template< u16 Opcode >
+	class ByteDwordForms
+	{
+	public:
+		ByteDwordForms() {}
+
+		__forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const
+		{
+			writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
+			xWrite<u8>( imm );
+		}
+
+		__forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const
+		{
+			writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
+			xWrite<u8>( imm );
+		}
+
+		__noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
+		{
+			writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from );
+			xWrite<u8>( imm );
+		}
+	};
+
+public:
+	SimdImpl_PExtract() {}
+
+	// Copies the word element specified by imm8 from src to dest.  The upper bits
+	// of dest are zero-extended (cleared).  This can be used to extract any single packed
+	// word value from src into an x86 32 bit register.
+	//
+	// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
+	//
+	template< typename T >
+	__forceinline void W( const xRegister32& to, const xRegisterSIMD<T>& from, u8 imm8 ) const
+	{
+		writeXMMop( 0x66, 0xc5, to, from, true );
+		xWrite<u8>( imm8 );
+	}
+
+	__forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const
+	{
+		writeXMMop( 0x66, 0x153a, from, dest );
+		xWrite<u8>( imm8 );
+	}
+
+	__noinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const
+	{
+		writeXMMop( 0x66, 0x153a, from, dest );
+		xWrite<u8>( imm8 );
+	}
+
+	// [SSE-4.1] Copies the byte element specified by imm8 from src to dest.  The upper bits
+	// of dest are zero-extended (cleared).  This can be used to extract any single packed
+	// byte value from src into an x86 32 bit register.
+	const ByteDwordForms<0x14> B;
+
+	// [SSE-4.1] Copies the dword element specified by imm8 from src to dest.  This can be
+	// used to extract any single packed dword value from src into an x86 32 bit register.
+	const ByteDwordForms<0x16> D;
+};
--- a/pcsx2/x86/ix86/ix86.cpp
+++ b/pcsx2/x86/ix86/ix86.cpp
@ -641,18 +641,25 @@ __emitinline void xBSWAP( const xRegister32& to )
 // MMX / XMM Instructions
 // (these will get put in their own file later)

-// If the upper 8 bits of opcode are zero, the opcode is treated as a u8.
-// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst
-// 0x38, which is the only valid high word for 16 bit opcodes as such)
+// ------------------------------------------------------------------------
+// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
+// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
+// instructions).  Any other lower value assumes the upper value is 0 and ignored.
+// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
+// generate an assertion.
+//
 __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
 {
+	const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
+
+	// If the lower byte is not a valid previx and the upper byte is non-zero it
+	// means we made a mistake!
+	if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 );
+
 	if( prefix != 0 )
 	{
-		if( (opcode & 0xff00) != 0 )
-		{
-			jASSUME( (opcode & 0xff00) == 0x3800 );
-			xWrite<u32>( (opcode<<16) | (0x0f00 | prefix) );
-		}
+		if( is16BitOpcode )
+			xWrite<u32>( (opcode<<16) | 0x0f00 | prefix );
 		else
 		{
 			xWrite<u16>( 0x0f00 | prefix );
@ -661,9 +668,9 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
 	}
 	else
 	{
-		if( (opcode & 0xff00) != 0 )
+		if( is16BitOpcode )
 		{
-			jASSUME( (opcode & 0xff00) == 0x3800 );
+			xWrite<u8>( 0x0f );
 			xWrite<u16>( opcode );
 		}
 		else
@ -671,6 +678,11 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
 	}
 }

+// [SSE-3]
+const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
+// [SSE-3]
+const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
+
 const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; 
 const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS;
 const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD;
@ -689,20 +701,20 @@ const MovhlImplAll<0x12> xMOVL;
 const MovhlImpl_RtoR<0x16> xMOVLH;
 const MovhlImpl_RtoR<0x12> xMOVHL;

-const SimdImpl_PackedLogic<0xdb> xPAND;
-const SimdImpl_PackedLogic<0xdf> xPANDN;
-const SimdImpl_PackedLogic<0xeb> xPOR;
-const SimdImpl_PackedLogic<0xef> xPXOR;
+const SimdImpl_DestRegEither<0x66,0xdb> xPAND;
+const SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
+const SimdImpl_DestRegEither<0x66,0xeb> xPOR;
+const SimdImpl_DestRegEither<0x66,0xef> xPXOR;

-const SimdImpl_AndNot<0x55> xANDN;
+const SimdImpl_AndNot xANDN;

-const SimdImpl_SS_SD<0x66,0x2e> xUCOMI;
+const SimdImpl_UcomI<0x66,0x2e> xUCOMI;
 const SimdImpl_rSqrt<0x53> xRCP;
 const SimdImpl_rSqrt<0x52> xRSQRT;
 const SimdImpl_Sqrt<0x51> xSQRT;

-const SimdImpl_PSPD_SSSD<0x5f> xMAX;
-const SimdImpl_PSPD_SSSD<0x5d> xMIN;
+const SimdImpl_MinMax<0x5f> xMAX;
+const SimdImpl_MinMax<0x5d> xMIN;
 const SimdImpl_Shuffle<0xc6> xSHUF;

 // ------------------------------------------------------------------------
@ -754,8 +766,8 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32>		xCVTTSS2S

 // ------------------------------------------------------------------------

-const SimdImpl_ShiftAll<0xd0, 2> xPSRL;
-const SimdImpl_ShiftAll<0xf0, 6> xPSLL;
+const SimdImpl_Shift<0xd0, 2> xPSRL;
+const SimdImpl_Shift<0xf0, 6> xPSLL;
 const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;

 const SimdImpl_AddSub<0xdc, 0xd4> xPADD;
@ -770,10 +782,29 @@ const SimdImpl_PUnpack xPUNPCK;
 const SimdImpl_Unpack xUNPCK;
 const SimdImpl_Pack xPACK;

+const SimdImpl_PAbsolute xPABS;
+const SimdImpl_PSign xPSIGN;
+const SimdImpl_PInsert xPINS;
+const SimdImpl_PExtract xPEXTR;
+

 //////////////////////////////////////////////////////////////////////////////////////////
 //

+// Store Streaming SIMD Extension Control/Status to Mem32.
+__emitinline void xSTMXCSR( u32* dest )
+{
+	SimdPrefix( 0, 0xae );
+	xWriteDisp( 3, dest );
+}
+
+// Load Streaming SIMD Extension Control/Status from Mem32.
+__emitinline void xLDMXCSR( const u32* src )
+{
+	SimdPrefix( 0, 0xae );
+	xWriteDisp( 2, src );
+}
+

 // Moves from XMM to XMM, with the *upper 64 bits* of the destination register
 // being cleared to zero.
@ -851,5 +882,8 @@ __noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from )	{ wri
 __forceinline void xMOVNTQ( void* to, const xRegisterMMX& from )			{ writeXMMop( 0xe7, from, to ); }
 __noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from )	{ writeXMMop( 0xe7, from, to ); }

+__forceinline void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from)	{ writeXMMop( 0x50, to, from ); }
+__forceinline void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from)	{ writeXMMop( 0x66, 0x50, to, from, true ); }
+

 }
--- a/pcsx2/x86/ix86/ix86_instructions.h
+++ b/pcsx2/x86/ix86/ix86_instructions.h
@ -370,8 +370,23 @@ namespace x86Emitter
 	template< typename T >
 	static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD<T>& from )	{ Internal::writeXMMop( 0x66, 0xd7, to, from ); }
 	
+	// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
+	// shifts the composite at byte granularity to the right by a constant immediate,
+	// and extracts the right-aligned result into the destination.
+	//
+	template< typename T >
+	static __forceinline void xPALIGNR( const xRegisterSIMD<T>& to, const xRegisterSIMD<T>& from, u8 imm8 )
+	{
+		Internal::writeXMMop( 0x66, 0x0f3a, to, from );
+		xWrite<u8>( imm8 );
+	}
+
+
 	// ------------------------------------------------------------------------
-	
+
+	extern void xSTMXCSR( u32* dest );
+	extern void xLDMXCSR( const u32* src );
+
 	extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from );
 	extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from );
 	extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from );
@ -411,8 +426,14 @@ namespace x86Emitter
 	extern void xMOVNTQ( void* to, const xRegisterMMX& from );
 	extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from );

+	extern void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from );
+	extern void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from );
+
 	// ------------------------------------------------------------------------

+	extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP;
+	extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP;
+
 	extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS;
 	extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS;

@ -435,29 +456,29 @@ namespace x86Emitter

 	// ------------------------------------------------------------------------
 	
-	extern const Internal::SimdImpl_PackedLogic<0xdb> xPAND;
-	extern const Internal::SimdImpl_PackedLogic<0xdf> xPANDN;
-	extern const Internal::SimdImpl_PackedLogic<0xeb> xPOR;
-	extern const Internal::SimdImpl_PackedLogic<0xef> xPXOR;
+	extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND;
+	extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN;
+	extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR;
+	extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR;

-	extern const Internal::SimdImpl_AndNot<0x55> xANDN;
+	extern const Internal::SimdImpl_AndNot xANDN;

-	extern const Internal::SimdImpl_SS_SD<0x66,0x2e> xUCOMI;
+	extern const Internal::SimdImpl_UcomI<0x66,0x2e> xUCOMI;
 	extern const Internal::SimdImpl_rSqrt<0x53> xRCP;
 	extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT;
 	extern const Internal::SimdImpl_Sqrt<0x51> xSQRT;
 	
-	extern const Internal::SimdImpl_PSPD_SSSD<0x5f> xMAX;
-	extern const Internal::SimdImpl_PSPD_SSSD<0x5d> xMIN;
+	extern const Internal::SimdImpl_MinMax<0x5f> xMAX;
+	extern const Internal::SimdImpl_MinMax<0x5d> xMIN;
 	extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF;

 	// ------------------------------------------------------------------------
 	
-	extern const Internal::SimdImpl_Compare<SSE2_Equal>		xCMPEQ;
-	extern const Internal::SimdImpl_Compare<SSE2_Less>		xCMPLT;
+	extern const Internal::SimdImpl_Compare<SSE2_Equal>			xCMPEQ;
+	extern const Internal::SimdImpl_Compare<SSE2_Less>			xCMPLT;
 	extern const Internal::SimdImpl_Compare<SSE2_LessOrEqual>	xCMPLE;
-	extern const Internal::SimdImpl_Compare<SSE2_Unordered>	xCMPUNORD;
-	extern const Internal::SimdImpl_Compare<SSE2_NotEqual>	xCMPNE;
+	extern const Internal::SimdImpl_Compare<SSE2_Unordered>		xCMPUNORD;
+	extern const Internal::SimdImpl_Compare<SSE2_NotEqual>		xCMPNE;
 	extern const Internal::SimdImpl_Compare<SSE2_NotLess>		xCMPNLT;
 	extern const Internal::SimdImpl_Compare<SSE2_NotLessOrEqual> xCMPNLE;
 	extern const Internal::SimdImpl_Compare<SSE2_Ordered>		xCMPORD;
@ -497,8 +518,8 @@ namespace x86Emitter
 	
 	// ------------------------------------------------------------------------
 	
-	extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL;
-	extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL;
+	extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL;
+	extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL;
 	extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA;

 	extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD;
@ -512,5 +533,11 @@ namespace x86Emitter
 	extern const Internal::SimdImpl_PUnpack xPUNPCK;
 	extern const Internal::SimdImpl_Unpack xUNPCK;
 	extern const Internal::SimdImpl_Pack xPACK;
+	
+	extern const Internal::SimdImpl_PAbsolute xPABS;
+	extern const Internal::SimdImpl_PSign xPSIGN;
+	extern const Internal::SimdImpl_PInsert xPINS;
+	extern const Internal::SimdImpl_PExtract xPEXTR;
+
 }

--- a/pcsx2/x86/ix86/ix86_legacy_instructions.h
+++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h
@ -1351,7 +1351,6 @@ extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from);
 extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
 extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
 extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
-extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8);

 // SSE4.1

--- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp
+++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp
@ -95,9 +95,13 @@ using namespace x86Emitter;
 	emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from )			{ x##mod.SD( xRegisterSSE(to), (void*)from ); } \
 	emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); }

-#define DEFINE_LEGACY_OP128( mod, sub ) \
-	emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
-	emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from )			{ x##mod.sub( xRegisterSSE(to), (void*)from ); }
+#define DEFINE_LEGACY_OP128( ssenum, mod, sub ) \
+	emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
+	emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from )			{ x##mod.sub( xRegisterSSE(to), (void*)from ); }
+
+#define DEFINE_LEGACY_MOV128( ssenum, mod, sub ) \
+	emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ x##mod##sub( xRegisterSSE(to), xRegisterSSE(from) ); } \
+	emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from )			{ x##mod##sub( xRegisterSSE(to), (void*)from ); }


 #define DEFINE_LEGACY_PSSD_OPCODE( mod ) \
@ -136,23 +140,31 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP )
 DEFINE_LEGACY_RSQRT_OPCODE( RSQRT )
 DEFINE_LEGACY_SQRT_OPCODE( SQRT )

-DEFINE_LEGACY_OP128( PMUL, LW )
-DEFINE_LEGACY_OP128( PMUL, HW )
-DEFINE_LEGACY_OP128( PMUL, UDQ )
+DEFINE_LEGACY_OP128( 2, PMUL, LW )
+DEFINE_LEGACY_OP128( 2, PMUL, HW )
+DEFINE_LEGACY_OP128( 2, PMUL, UDQ )

-DEFINE_LEGACY_OP128( PMAX, SW )
-DEFINE_LEGACY_OP128( PMAX, UB )
-DEFINE_LEGACY_OP128( PMIN, SW )
-DEFINE_LEGACY_OP128( PMIN, UB )
+DEFINE_LEGACY_OP128( 2, PMAX, SW )
+DEFINE_LEGACY_OP128( 2, PMAX, UB )
+DEFINE_LEGACY_OP128( 2, PMIN, SW )
+DEFINE_LEGACY_OP128( 2, PMIN, UB )

-DEFINE_LEGACY_OP128( UNPCK, LPS )
-DEFINE_LEGACY_OP128( UNPCK, HPS )
-DEFINE_LEGACY_OP128( PUNPCK, LQDQ )
-DEFINE_LEGACY_OP128( PUNPCK, HQDQ )
+DEFINE_LEGACY_OP128( 2, UNPCK, LPS )
+DEFINE_LEGACY_OP128( 2, UNPCK, HPS )
+DEFINE_LEGACY_OP128( 2, PUNPCK, LQDQ )
+DEFINE_LEGACY_OP128( 2, PUNPCK, HQDQ )

-DEFINE_LEGACY_OP128( PACK, SSWB )
-DEFINE_LEGACY_OP128( PACK, SSDW )
-DEFINE_LEGACY_OP128( PACK, USWB )
+DEFINE_LEGACY_OP128( 2, PACK, SSWB )
+DEFINE_LEGACY_OP128( 2, PACK, SSDW )
+DEFINE_LEGACY_OP128( 2, PACK, USWB )
+
+DEFINE_LEGACY_MOV128( 3, MOV, SLDUP )
+DEFINE_LEGACY_MOV128( 3, MOV, SHDUP )
+
+DEFINE_LEGACY_OP128( 4, PMAX, SD )
+DEFINE_LEGACY_OP128( 4, PMIN, SD )
+DEFINE_LEGACY_OP128( 4, PMAX, UD )
+DEFINE_LEGACY_OP128( 4, PMIN, UD )


 emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); }
@ -201,11 +213,11 @@ emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from )						{ xMOVH.P
 emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset )	{ xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); }
 emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset )	{ xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); }

-emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
-emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
+emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )			{ xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); }
+emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )			{ xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); }

-emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); }
-emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from)	{ xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); }
+emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )			{ xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); }
+emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from)			{ xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); }

 emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )	{ xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
 emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 )			{ xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); }
@ -247,16 +259,6 @@ emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 )
 emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )	{ xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); }
 emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 )			{ xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); }

-emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
-emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from )			{ xPMAX.SD( xRegisterSSE(to), (void*)from ); }
-emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); }
-emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from )			{ xPMIN.SD( xRegisterSSE(to), (void*)from ); }
-
-emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
-emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from )			{ xPMAX.UD( xRegisterSSE(to), (void*)from ); }
-emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); }
-emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from )			{ xPMIN.UD( xRegisterSSE(to), (void*)from ); }
-
 emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); }

 emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from )			{ xUNPCK.LPS( xRegisterSSE(to), (void*)from ); }
@ -264,113 +266,35 @@ emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{
 emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from )			{ xUNPCK.HPS( xRegisterSSE(to), (void*)from ); }
 emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); }

+emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from)		{ xMOVMSKPS( xRegister32(to), xRegisterSSE(from) ); }
+emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from)	{ xMOVMSKPD( xRegister32(to), xRegisterSSE(from) ); }
+
+emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ xPABS.B( xRegisterSSE(to), xRegisterSSE(from) ); }
+emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ xPABS.W( xRegisterSSE(to), xRegisterSSE(from) ); }
+emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ xPABS.D( xRegisterSSE(to), xRegisterSSE(from) ); }
+
+emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ xPSIGN.B( xRegisterSSE(to), xRegisterSSE(from) ); }
+emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ xPSIGN.W( xRegisterSSE(to), xRegisterSSE(from) ); }
+emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ xPSIGN.D( xRegisterSSE(to), xRegisterSSE(from) ); }
+
+emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 )	{ xPEXTR.W( xRegister32(to), xRegisterSSE(from), imm8 ); }
+emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 )	{ xPINS.W( xRegisterSSE(to), xRegister32(from), imm8 ); }
+
+emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); }
+
 //////////////////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////

-/////////////////////////////////////////////////////////////////////////////////////////
-//**********************************************************************************/
-//STMXCSR : Store Streaming SIMD Extension Control/Status                         *
-//**********************************************************************************
-emitterT void SSE_STMXCSR( uptr from ) {
-	write16( 0xAE0F );
-	ModRM( 0, 0x3, DISP32 );
-	write32( MEMADDR(from, 4) );
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-//**********************************************************************************/
-//LDMXCSR : Load Streaming SIMD Extension Control/Status                         *
-//**********************************************************************************
-emitterT void SSE_LDMXCSR( uptr from ) {
-	write16( 0xAE0F );
-	ModRM( 0, 0x2, DISP32 );
-	write32( MEMADDR(from, 4) );
-}
-
 ////////////////////////////////////////////////////////////////////////////////////////////
 //**********************************************************************************/
 //PEXTRW,PINSRW: Packed Extract/Insert Word                                        *
-//**********************************************************************************
-emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
-emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
-
-emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from)	{ SSERtoR(0x500F); }
-emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from)	{ SSERtoR66(0x500F); }
+//**********************************************************************************}

 emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)	{ SSERtoR66(0xF50F); }

 emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from)		{ write8(0xf2); SSERtoR( 0x7c0f ); }
 emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from)				{ write8(0xf2); SSEMtoR( 0x7c0f, 0 ); }

-emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
-	write8(0xf3);
-    RexRB(0, to, from);
-	write16( 0x120f);
-	ModRM( 3, to, from );
-}
-
-emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from)			{ write8(0xf3); SSEMtoR(0x120f, 0); }
-emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from)	{ write8(0xf3); SSERtoR(0x160f); }
-emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from)			{ write8(0xf3); SSEMtoR(0x160f, 0); }
-
-// SSSE3
-
-emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
-{
-	write8(0x66);
-	RexRB(0, to, from);
-	write24(0x1C380F);
-	ModRM(3, to, from);
-}
-
-emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
-{
-	write8(0x66);
-	RexRB(0, to, from);
-	write24(0x1D380F);
-	ModRM(3, to, from);
-}
-
-emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
-{
-	write8(0x66);
-	RexRB(0, to, from);
-	write24(0x1E380F);
-	ModRM(3, to, from);
-}
-
-emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8)
-{
-	write8(0x66);
-	RexRB(0, to, from);
-	write24(0x0F3A0F);
-	ModRM(3, to, from);
-	write8(imm8);
-}
-
-emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
-{
-	write8(0x66);
-	RexRB(0, to, from);
-	write24(0x08380F);
-	ModRM(3, to, from);
-}
-
-emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
-{
-	write8(0x66);
-	RexRB(0, to, from);
-	write24(0x09380F);
-	ModRM(3, to, from);
-}
-
-emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
-{
-	write8(0x66);
-	RexRB(0, to, from);
-	write24(0x0A380F);
-	ModRM(3, to, from);
-}

 // SSE4.1

--- a/pcsx2/x86/ix86/ix86_types.h
+++ b/pcsx2/x86/ix86/ix86_types.h
@ -697,7 +697,11 @@ namespace x86Emitter
 		template< typename T > bool Is8BitOp() { return sizeof(T) == 1; }
 		template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite<u8>( 0x66 ); }

-		#include "implement/xmm/movqss.h"
+		#include "implement/xmm/basehelpers.h"
+		#include "implement/xmm/moremovs.h"
+		#include "implement/xmm/arithmetic.h"
+		#include "implement/xmm/comparisons.h"
+		#include "implement/xmm/shufflepack.h"
 		#include "implement/group1.h"
 		#include "implement/group2.h"
 		#include "implement/group3.h"