From 2b5964199c7ec4d31a9d9a7123fe14da1a3643aa Mon Sep 17 00:00:00 2001
From: "Jake.Stine" <Jake.Stine@96395faa-99c1-11dd-bbfe-3dabce05a288>
Date: Fri, 6 Nov 2009 21:45:30 +0000
Subject: [PATCH] Emitter rewrite, part 4 of 5: De-templificated all x86 base
 integer operations.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2142 96395faa-99c1-11dd-bbfe-3dabce05a288
---
 common/build/x86emitter/x86emitter.vcproj     |   12 +-
 common/include/x86emitter/implement/dwshift.h |   40 +-
 common/include/x86emitter/implement/group1.h  |  171 +--
 common/include/x86emitter/implement/group2.h  |   79 +-
 common/include/x86emitter/implement/group3.h  |  130 +-
 common/include/x86emitter/implement/helpers.h |   80 +-
 common/include/x86emitter/implement/incdec.h  |   38 +-
 common/include/x86emitter/implement/jmpcall.h |   21 +-
 common/include/x86emitter/implement/movs.h    |  223 +---
 .../x86emitter/implement/simd_helpers.h       |    9 +
 .../x86emitter/implement/simd_moremovs.h      |   12 +-
 common/include/x86emitter/implement/test.h    |  102 +-
 common/include/x86emitter/implement/xchg.h    |    5 +
 common/include/x86emitter/inlines.inl         |  228 +---
 common/include/x86emitter/instructions.h      |  146 +--
 common/include/x86emitter/internal.h          |   49 +
 common/include/x86emitter/x86emitter.h        |    4 +-
 common/include/x86emitter/x86types.h          | 1151 ++++++++++-------
 common/src/x86emitter/groups.cpp              |  287 ++++
 common/src/x86emitter/jmp.cpp                 |   68 +-
 common/src/x86emitter/legacy.cpp              |   66 +-
 common/src/x86emitter/movs.cpp                |  268 ++++
 common/src/x86emitter/simd.cpp                |   37 +-
 common/src/x86emitter/x86emitter.cpp          |  661 ++++++----
 24 files changed, 2124 insertions(+), 1763 deletions(-)
 create mode 100644 common/src/x86emitter/groups.cpp
 create mode 100644 common/src/x86emitter/movs.cpp
diff --git a/common/build/x86emitter/x86emitter.vcproj b/common/build/x86emitter/x86emitter.vcproj
index 86d6cf74a6..5fd01c446b 100644
--- a/common/build/x86emitter/x86emitter.vcproj
+++ b/common/build/x86emitter/x86emitter.vcproj
@@ -203,6 +203,10 @@
 				RelativePath="..\..\src\x86emitter\fpu.cpp"
 				>
 			</File>
+			<File
+				RelativePath="..\..\src\x86emitter\groups.cpp"
+				>
+			</File>
 			<File
 				RelativePath="..\..\src\x86emitter\jmp.cpp"
 				>
@@ -215,6 +219,10 @@
 				RelativePath="..\..\src\x86emitter\legacy_sse.cpp"
 				>
 			</File>
+			<File
+				RelativePath="..\..\src\x86emitter\movs.cpp"
+				>
+			</File>
 			<File
 				RelativePath="..\..\src\x86emitter\PrecompiledHeader.cpp"
 				>
@@ -352,10 +360,6 @@
 					RelativePath="..\..\include\x86emitter\implement\simd_shufflepack.h"
 					>
 				</File>
-				<File
-					RelativePath="..\..\include\x86emitter\implement\simd_templated_helpers.h"
-					>
-				</File>
 			</Filter>
 		</Filter>
 		<Filter
diff --git a/common/include/x86emitter/implement/dwshift.h b/common/include/x86emitter/implement/dwshift.h
index 1b582ba365..c989637ad8 100644
--- a/common/include/x86emitter/implement/dwshift.h
+++ b/common/include/x86emitter/implement/dwshift.h
@@ -15,38 +15,30 @@
 
 #pragma once
 
-// Implementations here cover SHLD and SHRD.
-// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
+namespace x86Emitter {
 
-//////////////////////////////////////////////////////////////////////////////////////////
+// Implementations here cover SHLD and SHRD.
+
+// --------------------------------------------------------------------------------------
+//  xImpl_DowrdShift
+// --------------------------------------------------------------------------------------
 // I use explicit method declarations here instead of templates, in order to provide
 // *only* 32 and 16 bit register operand forms (8 bit registers are not valid in SHLD/SHRD).
 //
 // Optimization Note: Imm shifts by 0 are ignore (no code generated).  This is a safe optimization
-// because shifts by 0 do *not* affect flags status.
+// because shifts by 0 do *not* affect flags status (intel docs cited).
 //
-template< bool isShiftRight >
-class DwordShiftImplAll
+struct xImpl_DwordShift
 {
-	static const u8 m_shiftop = isShiftRight ? 0x8 : 0;
+	u16		OpcodeBase;
 
-public:
-	// ---------- 32 Bit Interface -----------
-	__forceinline void operator()( const xRegister32& to,	const xRegister32& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( 0xa5 | m_shiftop, to, from ); }
-	__forceinline void operator()( void* dest,				const xRegister32& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( 0xa5 | m_shiftop, from, dest ); }
-	__forceinline void operator()( const ModSibBase& dest,	const xRegister32& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( 0xa5 | m_shiftop, from, dest ); }
-	__forceinline void operator()( const xRegister32& to,	const xRegister32& from, u8 shiftcnt ) const						{ if( shiftcnt != 0 ) xOpWrite0F( 0xa4 | m_shiftop, to, from ); }
-	__forceinline void operator()( void* dest,				const xRegister32& from, u8 shiftcnt ) const						{ if( shiftcnt != 0 ) xOpWrite0F( 0xa4 | m_shiftop, from, dest, shiftcnt ); }
-	__forceinline void operator()( const ModSibBase& dest,	const xRegister32& from, u8 shiftcnt ) const						{ if( shiftcnt != 0 ) xOpWrite0F( 0xa4 | m_shiftop, from, dest, shiftcnt ); }
+	void operator()( const xRegister32& to,	const xRegister32& from, const xRegisterCL& clreg ) const;
+	void operator()( const xRegister16& to,	const xRegister16& from, const xRegisterCL& clreg ) const;
+	void operator()( const xRegister32& to,	const xRegister32& from, u8 shiftcnt ) const;
+	void operator()( const xRegister16& to,	const xRegister16& from, u8 shiftcnt ) const;
 
-	// ---------- 16 Bit Interface -----------
-	__forceinline void operator()( const xRegister16& to,	const xRegister16& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( 0x66, 0xa5 | m_shiftop, to, from ); }
-	__forceinline void operator()( void* dest,				const xRegister16& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( 0x66, 0xa5 | m_shiftop, from, dest ); }
-	__forceinline void operator()( const ModSibBase& dest,	const xRegister16& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( 0x66, 0xa5 | m_shiftop, from, dest ); }
-	__forceinline void operator()( const xRegister16& to,	const xRegister16& from, u8 shiftcnt ) const						{ if( shiftcnt != 0 ) xOpWrite0F( 0x66, 0xa4 | m_shiftop, to, from ); }
-	__forceinline void operator()( void* dest,				const xRegister16& from, u8 shiftcnt ) const						{ if( shiftcnt != 0 ) xOpWrite0F( 0x66, 0xa4 | m_shiftop, from, dest, shiftcnt ); }
-	__forceinline void operator()( const ModSibBase& dest,	const xRegister16& from, u8 shiftcnt ) const						{ if( shiftcnt != 0 ) xOpWrite0F( 0x66, 0xa4 | m_shiftop, from, dest, shiftcnt ); }
-
-	DwordShiftImplAll() {}		// Why does GCC need these?
+	void operator()( const ModSibBase& dest,const xRegister16or32& from, const xRegisterCL& clreg ) const;
+	void operator()( const ModSibBase& dest,const xRegister16or32& from, u8 shiftcnt ) const;
 };
 
+}	// End namespace x86Emitter
diff --git a/common/include/x86emitter/implement/group1.h b/common/include/x86emitter/implement/group1.h
index 28ddf1eed0..f52066f879 100644
--- a/common/include/x86emitter/implement/group1.h
+++ b/common/include/x86emitter/implement/group1.h
@@ -15,8 +15,7 @@
 
 #pragma once
 
-// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
-// Instructions implemented in this header are as follows -->>
+namespace x86Emitter {
 
 enum G1Type
 {
@@ -30,84 +29,23 @@ enum G1Type
 	G1Type_CMP
 };
 
-// -------------------------------------------------------------------
-//
-template< G1Type InstType >
-class xImpl_Group1
+// --------------------------------------------------------------------------------------
+//  xImpl_Group1
+// --------------------------------------------------------------------------------------
+struct xImpl_Group1
 {
-public:
-	// ------------------------------------------------------------------------
-	template< typename T > __forceinline void operator()( const xRegister<T>& to, const xRegister<T>& from ) const
-	{
-		prefix16<T>();
-		xWrite8( (Is8BitOp<T>() ? 0 : 1) | (InstType<<3) );
-		EmitSibMagic( from, to );
-	}
+	G1Type	InstType;
 
-	// ------------------------------------------------------------------------
-	template< typename T > __noinline void operator()( const ModSibBase& sibdest, const xRegister<T>& from ) const
-	{
-		prefix16<T>();
-		xWrite8( (Is8BitOp<T>() ? 0 : 1) | (InstType<<3) ); 
-		EmitSibMagic( from, sibdest );
-	}
+	void operator()( const xRegister8& to, const xRegister8& from ) const;
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
 
-	// ------------------------------------------------------------------------
-	template< typename T > __noinline void operator()( const xRegister<T>& to, const ModSibBase& sibsrc ) const
-	{
-		prefix16<T>();
-		xWrite8( (Is8BitOp<T>() ? 2 : 3) | (InstType<<3) );
-		EmitSibMagic( to, sibsrc );
-	}
+	void operator()( const ModSibBase& to, const xRegisterInt& from ) const;
+	void operator()( const xRegisterInt& to, const ModSibBase& from ) const;
+	void operator()( const xRegisterInt& to, int imm ) const;
+	void operator()( const ModSib32orLess& to, int imm ) const;
 
-	// ------------------------------------------------------------------------
-	// Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler
-	// perspective.  (using uint tends to make the compiler try and fail to match signed immediates with
-	// one of the other overloads).
-	
-	template< typename T > __noinline void operator()( const ModSibStrict<T>& sibdest, int imm ) const
-	{
-		if( Is8BitOp<T>() )
-		{
-			xWrite8( 0x80 );
-			EmitSibMagic( InstType, sibdest );
-			xWrite<s8>( imm );
-		}
-		else
-		{		
-			prefix16<T>();
-			xWrite8( is_s8( imm ) ? 0x83 : 0x81 );
-			EmitSibMagic( InstType, sibdest );
-			if( is_s8( imm ) )
-				xWrite<s8>( imm );
-			else
-				xWrite<T>( imm );
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	template< typename T > __forceinline void operator()( const xRegister<T>& to, int imm ) const
-	{
-		prefix16<T>();
-		if( !Is8BitOp<T>() && is_s8( imm ) )
-		{
-			xWrite8( 0x83 );
-			EmitSibMagic( InstType, to );
-			xWrite<s8>( imm );
-		}
-		else
-		{
-			if( to.IsAccumulator() )
-				xWrite8( (Is8BitOp<T>() ? 4 : 5) | (InstType<<3) );
-			else
-			{
-				xWrite8( Is8BitOp<T>() ? 0x80 : 0x81 );
-				EmitSibMagic( InstType, to );
-			}
-			xWrite<T>( imm );
-		}
-	}
-	
+#if 0
 	// ------------------------------------------------------------------------
 	template< typename T > __noinline void operator()( const ModSibBase& to, const xImmReg<T>& immOrReg ) const
 	{
@@ -129,68 +67,83 @@ public:
 		_DoI_helpermess( *this, to, from );
 	}
 
-	template< typename T > __noinline void operator()( const xRegister<T>& to, const xDirectOrIndirect<T>& from ) const
+	// FIXME : Make this struct to 8, 16, and 32 bit registers
+	template< typename T > __noinline void operator()( const xRegisterBase& to, const xDirectOrIndirect<T>& from ) const
 	{
 		_DoI_helpermess( *this, xDirectOrIndirect<T>( to ), from );
 	}
 
-	template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xRegister<T>& from ) const
+	// FIXME : Make this struct to 8, 16, and 32 bit registers
+	template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xRegisterBase& from ) const
 	{
 		_DoI_helpermess( *this, to, xDirectOrIndirect<T>( from ) );
 	}
-
-	xImpl_Group1() {}		// Why does GCC need these?
+#endif
 };
 
 // ------------------------------------------------------------------------
 // This class combines x86 with SSE/SSE2 logic operations (ADD, OR, and NOT).
 // Note: ANDN [AndNot] is handled below separately.
 //
-template< G1Type InstType, u16 OpcodeSSE >
-class xImpl_G1Logic : public xImpl_Group1<InstType>
+struct xImpl_G1Logic
 {
-public:
-	using xImpl_Group1<InstType>::operator();
+	G1Type	InstType;
 
-	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;		// packed single precision
-	const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD;		// packed double precision
+	void operator()( const xRegister8& to, const xRegister8& from ) const;
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
 
-	xImpl_G1Logic() {}
+	void operator()( const ModSibBase& to, const xRegisterInt& from ) const;
+	void operator()( const xRegisterInt& to, const ModSibBase& from ) const;
+	void operator()( const xRegisterInt& to, int imm ) const;
+
+	void operator()( const ModSib32orLess& to, int imm ) const;
+
+	xImplSimd_DestRegSSE PS;			// packed single precision
+	xImplSimd_DestRegSSE PD;			// packed double precision
 };
 
 // ------------------------------------------------------------------------
 // This class combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB).
 //
-template< G1Type InstType, u16 OpcodeSSE >
-class xImpl_G1Arith : public xImpl_G1Logic<InstType, OpcodeSSE >
+struct xImpl_G1Arith
 {
-public:
-	using xImpl_Group1<InstType>::operator();
+	G1Type	InstType;
 
-	const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;		// scalar single precision
-	const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;		// scalar double precision
+	void operator()( const xRegister8& to, const xRegister8& from ) const;
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
 
-	xImpl_G1Arith() {}
+	void operator()( const ModSibBase& to, const xRegisterInt& from ) const;
+	void operator()( const xRegisterInt& to, const ModSibBase& from ) const;
+	void operator()( const xRegisterInt& to, int imm ) const;
+
+	void operator()( const ModSib32orLess& to, int imm ) const;
+
+	xImplSimd_DestRegSSE PS;			// packed single precision
+	xImplSimd_DestRegSSE PD;			// packed double precision
+	xImplSimd_DestRegSSE SS;			// scalar single precision
+	xImplSimd_DestRegSSE SD;			// scalar double precision
 };
 
 // ------------------------------------------------------------------------
-class xImpl_G1Compare : xImpl_Group1< G1Type_CMP >
+struct xImpl_G1Compare
 {
-protected:
-	template< u8 Prefix > struct Woot
-	{
-		__forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ xOpWrite0F( Prefix, 0xc2, to, from, (u8)cmptype ); }
-		__forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const	{ xOpWrite0F( Prefix, 0xc2, to, from, (u8)cmptype ); }
-		Woot() {}
-	};
+	void operator()( const xRegister8& to, const xRegister8& from ) const;
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
 
-public:
-	using xImpl_Group1<G1Type_CMP>::operator();
+	void operator()( const ModSibBase& to, const xRegisterInt& from ) const;
+	void operator()( const xRegisterInt& to, const ModSibBase& from ) const;
+	void operator()( const xRegisterInt& to, int imm ) const;
 
-	const Woot<0x00> PS;
-	const Woot<0x66> PD;
-	const Woot<0xf3> SS;
-	const Woot<0xf2> SD;
+	void operator()( const ModSib32orLess& to, int imm ) const;
 
-	xImpl_G1Compare() {} //GCWhat?
+	xImplSimd_DestSSE_CmpImm	PS;
+	xImplSimd_DestSSE_CmpImm	PD;
+	xImplSimd_DestSSE_CmpImm	SS;
+	xImplSimd_DestSSE_CmpImm	SD;
 };
+
+}	// End namespace x86Emitter
+
diff --git a/common/include/x86emitter/implement/group2.h b/common/include/x86emitter/implement/group2.h
index 91e8208418..45286c3d1b 100644
--- a/common/include/x86emitter/implement/group2.h
+++ b/common/include/x86emitter/implement/group2.h
@@ -15,8 +15,7 @@
 
 #pragma once
 
-// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
-// Instructions implemented in this header are as follows -->>
+namespace x86Emitter {
 
 enum G2Type
 {
@@ -30,73 +29,23 @@ enum G2Type
 	G2Type_SAR
 };
 
-// -------------------------------------------------------------------
+// --------------------------------------------------------------------------------------
+//  xImpl_Group2
+// --------------------------------------------------------------------------------------
 // Group 2 (shift) instructions have no Sib/ModRM forms.
 // Optimization Note: For Imm forms, we ignore the instruction if the shift count is zero.
 // This is a safe optimization since any zero-value shift does not affect any flags.
 //
-template< G2Type InstType >
-class Group2ImplAll
+struct xImpl_Group2
 {
-public:
-	template< typename T > __forceinline void operator()( const xRegister<T>& to,		const xRegisterCL& /* from */ ) const
-	{
-		//if( !Is8BitOp<T>() )
-		//	pxAssert( to != xRegister<T>( ebp.Id ) );
+	G2Type InstType;
 
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0xd2 : 0xd3 );
-		EmitSibMagic( InstType, to );
-	}
-
-	template< typename T > __noinline void operator()( const ModSibStrict<T>& sibdest,	const xRegisterCL& /* from */ ) const
-	{
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0xd2 : 0xd3 );
-		EmitSibMagic( InstType, sibdest );
-	}
-
-	template< typename T > __noinline void operator()( const ModSibStrict<T>& sibdest, u8 imm ) const
-	{
-		if( imm == 0 ) return;
-
-		prefix16<T>();
-		if( imm == 1 )
-		{
-			// special encoding of 1's
-			xWrite8( Is8BitOp<T>() ? 0xd0 : 0xd1 );
-			EmitSibMagic( InstType, sibdest );
-		}
-		else
-		{
-			xWrite8( Is8BitOp<T>() ? 0xc0 : 0xc1 );
-			EmitSibMagic( InstType, sibdest );
-			xWrite8( imm );
-		}
-	}
-	
-	template< typename T > __forceinline void operator()( const xRegister<T>& to, u8 imm ) const
-	{
-		if( imm == 0 ) return;
-
-		//if( !Is8BitOp<T>() )
-		//	pxAssert( to != xRegister<T>( ebp.Id ) );
-
-		prefix16<T>();
-		if( imm == 1 )
-		{
-			// special encoding of 1's
-			xWrite8( Is8BitOp<T>() ? 0xd0 : 0xd1 );
-			EmitSibMagic( InstType, to );
-		}
-		else
-		{
-			xWrite8( Is8BitOp<T>() ? 0xc0 : 0xc1 );
-			EmitSibMagic( InstType, to );
-			xWrite8( imm );
-		}
-	}
+	void operator()( const xRegisterInt& to, const xRegisterCL& from ) const;
+	void operator()( const ModSib32orLess& to, const xRegisterCL& from ) const;
+	void operator()( const xRegisterInt& to, u8 imm ) const;
+	void operator()( const ModSib32orLess& to, u8 imm ) const;
 
+#if 0
 	// ------------------------------------------------------------------------
 	template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, u8 imm ) const
 	{
@@ -107,7 +56,7 @@ public:
 	{
 		_DoI_helpermess( *this, to, from );
 	}
-
-
-	Group2ImplAll() {}		// I am a class with no members, so I need an explicit constructor!  Sense abounds.
+#endif
 };
+
+} // End namespace x86Emitter
diff --git a/common/include/x86emitter/implement/group3.h b/common/include/x86emitter/implement/group3.h
index a474433e63..bc21276bc5 100644
--- a/common/include/x86emitter/implement/group3.h
+++ b/common/include/x86emitter/implement/group3.h
@@ -15,8 +15,7 @@
 
 #pragma once
 
-// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
-// Instructions implemented in this header are as follows -->>
+namespace x86Emitter {
 
 enum G3Type
 {
@@ -28,84 +27,83 @@ enum G3Type
 	G3Type_iDIV	= 7
 };
 
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-template< G3Type InstType >
-class xImpl_Group3
+// --------------------------------------------------------------------------------------
+//  xImpl_Group3
+// --------------------------------------------------------------------------------------
+struct xImpl_Group3
 {
-public:
-	// ------------------------------------------------------------------------
-	template< typename T > __emitinline void operator()( const xRegister<T>& from ) const
-	{
-		prefix16<T>();
-		xWrite8(Is8BitOp<T>() ? 0xf6 : 0xf7 );
-		EmitSibMagic( InstType, from );
-	}
+	G3Type	InstType;
 
-	// ------------------------------------------------------------------------
-	template< typename T > __emitinline void operator()( const ModSibStrict<T>& from ) const
-	{
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0xf6 : 0xf7 );
-		EmitSibMagic( InstType, from );
-	}
+	void operator()( const xRegisterInt& from ) const;
+	void operator()( const ModSib32orLess& from ) const;
 
-	template< typename T > __emitinline void operator()( const xDirectOrIndirect<T>& from ) const
+#if 0
+	template< typename T >
+	void operator()( const xDirectOrIndirect<T>& from ) const
 	{
 		_DoI_helpermess( *this, from );
 	}
-	xImpl_Group3() {}
+#endif
 };
 
-// ------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------
+//  xImpl_MulDivBase
+// --------------------------------------------------------------------------------------
 // This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV.
 //
-template< G3Type InstType, u16 OpcodeSSE >
-class ImplMulDivBase : public xImpl_Group3<InstType>
+struct xImpl_MulDivBase
 {
-public:
-	ImplMulDivBase() {}
-	const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS;
-	const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD;
-	const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS;
-	const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD;
+	G3Type	InstType;
+	u16		OpcodeSSE;
+
+	void operator()( const xRegisterInt& from ) const;
+	void operator()( const ModSib32orLess& from ) const;
+
+	const xImplSimd_DestRegSSE	PS;
+	const xImplSimd_DestRegSSE	PD;
+	const xImplSimd_DestRegSSE	SS;
+	const xImplSimd_DestRegSSE	SD;
 };
 
-//////////////////////////////////////////////////////////////////////////////////////////
+// --------------------------------------------------------------------------------------
+//  xImpl_iDiv
+// --------------------------------------------------------------------------------------
+struct xImpl_iDiv
+{
+	void operator()( const xRegisterInt& from ) const;
+	void operator()( const ModSib32orLess& from ) const;
+
+	const xImplSimd_DestRegSSE	PS;
+	const xImplSimd_DestRegSSE	PD;
+	const xImplSimd_DestRegSSE	SS;
+	const xImplSimd_DestRegSSE	SD;
+};
+
+// --------------------------------------------------------------------------------------
+//  xImpl_iMul
+// --------------------------------------------------------------------------------------
 //
-class xImpl_iDiv : public ImplMulDivBase<G3Type_iDIV,0x5e>
+struct xImpl_iMul
 {
-public:
-	using ImplMulDivBase<G3Type_iDIV,0x5e>::operator();
+	void operator()( const xRegisterInt& from ) const;
+	void operator()( const ModSib32orLess& from ) const;
+
+	// The following iMul-specific forms are valid for 16 and 32 bit register operands only!
+
+	void operator()( const xRegister32& to,	const xRegister32& from ) const;
+	void operator()( const xRegister32& to,	const ModSibBase& src ) const;
+	void operator()( const xRegister16& to,	const xRegister16& from ) const;
+	void operator()( const xRegister16& to,	const ModSibBase& src ) const;
+
+	void operator()( const xRegister32& to,	const xRegister32& from, s32 imm ) const;
+	void operator()( const xRegister32& to,	const ModSibBase& from, s32 imm ) const;
+	void operator()( const xRegister16& to,	const xRegister16& from, s16 imm ) const;
+	void operator()( const xRegister16& to,	const ModSibBase& from, s16 imm ) const;
+
+	const xImplSimd_DestRegSSE	PS;
+	const xImplSimd_DestRegSSE	PD;
+	const xImplSimd_DestRegSSE	SS;
+	const xImplSimd_DestRegSSE	SD;
 };
 
-//////////////////////////////////////////////////////////////////////////////////////////
-// 	The following iMul-specific forms are valid for 16 and 32 bit register operands only!
-//
-class xImpl_iMul : public ImplMulDivBase<G3Type_iMUL,0x59>
-{
-	template< typename T1, typename T2, typename ImmType >
-	static __forceinline void ImmStyle( const T1& param1, const T2& param2, ImmType imm8 )
-	{
-		xOpWrite0F( (sizeof(ImmType) == 2) ? 0x66 : 0, is_s8( imm8 ) ? 0x6b : 0x69, param1, param2 );
-		if( is_s8( imm8 ) )
-			xWrite8( (u8)imm8 );
-		else
-			xWrite<ImmType>( imm8 );
-	}
-
-public:
-	using ImplMulDivBase<G3Type_iMUL,0x59>::operator();
-	
-	__forceinline void operator()( const xRegister32& to,	const xRegister32& from ) const			{ xOpWrite0F( 0xaf, to, from ); }
-	__forceinline void operator()( const xRegister32& to,	const ModSibBase& src ) const			{ xOpWrite0F( 0xaf, to, src ); }
-	__forceinline void operator()( const xRegister32& to,	const xRegister32& from, s32 imm ) const{ ImmStyle( to, from, imm ); }
-	__forceinline void operator()( const xRegister32& to,	const ModSibBase& from, s32 imm ) const	{ ImmStyle( to, from, imm ); }
-
-	__forceinline void operator()( const xRegister16& to,	const xRegister16& from ) const			{ xOpWrite0F( 0x66, 0xaf, to, from ); }
-	__forceinline void operator()( const xRegister16& to,	const ModSibBase& src ) const			{ xOpWrite0F( 0x66, 0xaf, to, src ); }
-	__forceinline void operator()( const xRegister16& to,	const xRegister16& from, s16 imm ) const{ ImmStyle( to, from, imm ); }
-	__forceinline void operator()( const xRegister16& to,	const ModSibBase& from, s16 imm ) const	{ ImmStyle( to, from, imm ); }
-
-	xImpl_iMul() {}
-};
+}
\ No newline at end of file
diff --git a/common/include/x86emitter/implement/helpers.h b/common/include/x86emitter/implement/helpers.h
index 581c9eb41d..e74c948363 100644
--- a/common/include/x86emitter/implement/helpers.h
+++ b/common/include/x86emitter/implement/helpers.h
@@ -13,83 +13,16 @@
  *  If not, see <http://www.gnu.org/licenses/>.
  */
 
-// ------------------------------------------------------------------------
-// helpers.h -- Various universally helpful functions for emitter convenience!
-//
-// Note: Header file should be included from the x86Emitter::Internal namespace, such
-// that all members contained within are in said namespace.
-// ------------------------------------------------------------------------
-
 #pragma once
 
-#define OpWriteSSE( pre, op )		xOpWrite0F( pre, op, to, from )
+namespace x86Emitter {
 
-extern void SimdPrefix( u8 prefix, u16 opcode );
-extern void EmitSibMagic( uint regfield, const void* address );
-extern void EmitSibMagic( uint regfield, const ModSibBase& info );
-extern void xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward );
+// helpermess is currently broken >_<
 
-template< typename T > bool Is8BitOp() { return sizeof(T) == 1; }
-template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite8( 0x66 ); }
-
-
-// Writes a ModRM byte for "Direct" register access forms, which is used for all
-// instructions taking a form of [reg,reg].
-template< typename T > __emitinline
-void EmitSibMagic( uint reg1, const xRegisterBase<T>& reg2 )
-{
-	xWrite8( (Mod_Direct << 6) | (reg1 << 3) | reg2.Id );
-}
-
-template< typename T1, typename T2 > __emitinline
-void EmitSibMagic( const xRegisterBase<T1> reg1, const xRegisterBase<T2>& reg2 )
-{
-	xWrite8( (Mod_Direct << 6) | (reg1.Id << 3) | reg2.Id );
-}
-
-template< typename T1 > __emitinline
-void EmitSibMagic( const xRegisterBase<T1> reg1, const void* src )			{ EmitSibMagic( reg1.Id, src ); }
-
-template< typename T1 > __emitinline
-void EmitSibMagic( const xRegisterBase<T1> reg1, const ModSibBase& sib )	{ EmitSibMagic( reg1.Id, sib ); }
-
-// ------------------------------------------------------------------------
-template< typename T1, typename T2 > __emitinline
-void xOpWrite( u8 prefix, u8 opcode, const T1& param1, const T2& param2 )
-{
-	if( prefix != 0 )
-		xWrite16( (opcode<<8) | prefix );
-	else
-		xWrite8( opcode );
-
-	EmitSibMagic( param1, param2 );
-}
-
-// ------------------------------------------------------------------------
-template< typename T1, typename T2 > __emitinline
-void xOpWrite0F( u8 prefix, u16 opcode, const T1& param1, const T2& param2 )
-{
-	SimdPrefix( prefix, opcode );
-	EmitSibMagic( param1, param2 );
-}
-
-template< typename T1, typename T2 > __emitinline
-void xOpWrite0F( u8 prefix, u16 opcode, const T1& param1, const T2& param2, u8 imm8 )
-{
-	xOpWrite0F( prefix, opcode, param1, param2 );
-	xWrite8( imm8 );
-}
-
-template< typename T1, typename T2 > __emitinline
-void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2 )			{ xOpWrite0F( 0, opcode, param1, param2 ); }
-
-template< typename T1, typename T2 > __emitinline
-void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2, u8 imm8 )	{ xOpWrite0F( 0, opcode, param1, param2, imm8 ); }
-
-// ------------------------------------------------------------------------
+#if 0
 
 template< typename xImpl, typename T >
-void _DoI_helpermess( const xImpl& helpme, const xDirectOrIndirect<T>& to, const xImmReg<T>& immOrReg )
+void _DoI_helpermess( const xImpl& helpme, const xDirectOrIndirect& to, const xImmReg<T>& immOrReg )
 {
 	if( to.IsDirect() )
 	{
@@ -113,7 +46,7 @@ void _DoI_helpermess( const xImpl& helpme, const ModSibBase& to, const xImmReg<T
 	if( immOrReg.IsReg() )
 		helpme( to, immOrReg.GetReg() );
 	else
-		helpme( ModSibStrict<T>(to), immOrReg.GetImm() );
+		helpme( (ModSibStrict)to, immOrReg.GetImm() );
 }
 
 template< typename xImpl, typename T >
@@ -153,3 +86,6 @@ void _DoI_helpermess( const xImpl& helpme, const xDirectOrIndirect<T>& to, const
 
 		pxFailDev( "Invalid asm instruction: Both operands are indirect memory addresses." );
 }
+#endif
+
+}	// End namespace x86Emitter
diff --git a/common/include/x86emitter/implement/incdec.h b/common/include/x86emitter/implement/incdec.h
index 03c5908257..156d5a5752 100644
--- a/common/include/x86emitter/implement/incdec.h
+++ b/common/include/x86emitter/implement/incdec.h
@@ -16,35 +16,19 @@
 #pragma once
 
 // Implementations found here: Increment and Decrement Instructions!
-// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
+// (They're soooo lonely... but I dunno where else to stick this class!)
 
+namespace x86Emitter {
 
-template< bool isDec >
-class xImpl_IncDec
+// --------------------------------------------------------------------------------------
+//  xImpl_IncDec
+// --------------------------------------------------------------------------------------
+struct xImpl_IncDec
 {
-public:
-	template< typename T >
-	__forceinline void operator()( const xRegister<T>& to )	const
-	{
-		if( Is8BitOp<T>() )
-		{
-			xWrite8( 0xfe );
-			EmitSibMagic( isDec ? 1 : 0, to );
-		}
-		else
-		{
-			prefix16<T>();
-			xWrite8( (isDec ? 0x48 : 0x40) | to.Id );
-		}
-	}
+	bool	isDec;
 
-	template< typename T >
-	__forceinline void operator()( const ModSibStrict<T>& sibdest ) const
-	{
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0xfe : 0xff );
-		EmitSibMagic( isDec ? 1 : 0, sibdest );
-	}
-
-	xImpl_IncDec() {}		// don't ask.
+	void operator()( const xRegisterInt& to ) const;
+	void operator()( const ModSib32orLess& to ) const;
 };
+
+}	// End namespace x86Emitter
diff --git a/common/include/x86emitter/implement/jmpcall.h b/common/include/x86emitter/implement/jmpcall.h
index 5c2f9dec53..d4ef841d09 100644
--- a/common/include/x86emitter/implement/jmpcall.h
+++ b/common/include/x86emitter/implement/jmpcall.h
@@ -16,7 +16,8 @@
 #pragma once
 
 // Implementations found here: CALL and JMP!  (unconditional only)
-// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
+
+namespace x86Emitter {
 
 #ifdef __GNUG__
 	// GCC has a bug that causes the templated function handler for Jmp/Call emitters to generate
@@ -29,18 +30,18 @@
 #	define __always_inline_tmpl_fail
 #endif
 
+extern void xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward );
+
 // ------------------------------------------------------------------------
-template< bool isJmp >
-class xImpl_JmpCall
+struct xImpl_JmpCall
 {
-public:
-	xImpl_JmpCall() {}
+	bool	isJmp;
 
-	__forceinline void operator()( const xRegister32& absreg ) const	{ xOpWrite( 0x00, 0xff, isJmp ? 4 : 2, absreg ); }
-	__forceinline void operator()( const ModSibStrict<u32>& src ) const	{ xOpWrite( 0x00, 0xff, isJmp ? 4 : 2, src ); }
+	void operator()( const xRegister32& absreg ) const;
+	void operator()( const ModSib32& src ) const;
 
-	__forceinline void operator()( const xRegister16& absreg ) const	{ xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, absreg ); }
-	__forceinline void operator()( const ModSibStrict<u16>& src ) const	{ xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, src ); }
+	void operator()( const xRegister16& absreg ) const;
+	void operator()( const ModSib16& src ) const;
 
 	// Special form for calling functions.  This form automatically resolves the
 	// correct displacement based on the size of the instruction being generated.
@@ -61,3 +62,5 @@ public:
 	}
 };
 
+}	// End namespace x86Emitter
+
diff --git a/common/include/x86emitter/implement/movs.h b/common/include/x86emitter/implement/movs.h
index 9479d967c1..dd898514af 100644
--- a/common/include/x86emitter/implement/movs.h
+++ b/common/include/x86emitter/implement/movs.h
@@ -18,92 +18,27 @@
 // Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares
 // with cmov many similarities).
 
+namespace x86Emitter {
 
 // --------------------------------------------------------------------------------------
 //  MovImplAll
 // --------------------------------------------------------------------------------------
 // MOV instruction Implementation, plus many SIMD sub-mov variants.
-
-class MovImplAll
+//
+struct xImpl_Mov
 {
-public:
-	// ------------------------------------------------------------------------
-	template< typename T > __forceinline void operator()( const xRegister<T>& to, const xRegister<T>& from ) const
-	{
-		if( to == from ) return;	// ignore redundant MOVs.
+	xImpl_Mov() {} // Satisfy GCC's whims.
 
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0x88 : 0x89 );
-		EmitSibMagic( from, to );
-	}
+	void operator()( const xRegister8& to, const xRegister8& from ) const;
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
 
-	// ------------------------------------------------------------------------
-	template< typename T > __noinline void operator()( const ModSibBase& dest, const xRegister<T>& from ) const
-	{
-		prefix16<T>();
+	void operator()( const ModSibBase& dest, const xRegisterInt& from ) const;
+	void operator()( const xRegisterInt& to, const ModSibBase& src ) const;
+	void operator()( const ModSib32orLess& dest, int imm ) const;
+	void operator()( const xRegisterInt& to, int imm, bool preserve_flags=false ) const;
 
-		// mov eax has a special from when writing directly to a DISP32 address
-		// (sans any register index/base registers).
-
-		if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() )
-		{
-			xWrite8( Is8BitOp<T>() ? 0xa2 : 0xa3 );
-			xWrite32( dest.Displacement );
-		}
-		else
-		{
-			xWrite8( Is8BitOp<T>() ? 0x88 : 0x89 );
-			EmitSibMagic( from.Id, dest );
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	template< typename T > __noinline void operator()( const xRegister<T>& to, const ModSibBase& src ) const
-	{
-		prefix16<T>();
-
-		// mov eax has a special from when reading directly from a DISP32 address
-		// (sans any register index/base registers).
-
-		if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() )
-		{
-			xWrite8( Is8BitOp<T>() ? 0xa0 : 0xa1 );
-			xWrite32( src.Displacement );
-		}
-		else
-		{
-			xWrite8( Is8BitOp<T>() ? 0x8a : 0x8b );
-			EmitSibMagic( to, src );
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	template< typename T > __noinline void operator()( const ModSibStrict<T>& dest, int imm ) const
-	{
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0xc6 : 0xc7 );
-		EmitSibMagic( 0, dest );
-		xWrite<T>( imm );
-	}
-
-	// ------------------------------------------------------------------------
-	// preserve_flags  - set to true to disable optimizations which could alter the state of
-	//   the flags (namely replacing mov reg,0 with xor).
-	template< typename T > __emitinline void operator()( const xRegister<T>& to, int imm, bool preserve_flags=false ) const
-	{
-		if( !preserve_flags && (imm == 0) )
-			xXOR( to, to );
-		else
-		{
-			// Note: MOV does not have (reg16/32,imm8) forms.
-
-			prefix16<T>();
-			xWrite8( (Is8BitOp<T>() ? 0xb0 : 0xb8) | to.Id ); 
-			xWrite<T>( imm );
-		}
-	}
-
-	// ------------------------------------------------------------------------
+#if 0
 	template< typename T > __noinline void operator()( const ModSibBase& to, const xImmReg<T>& immOrReg ) const
 	{
 		_DoI_helpermess( *this, to, immOrReg );
@@ -125,7 +60,7 @@ public:
 		_DoI_helpermess( *this, to, from );
 	}
 
-	template< typename T > __noinline void operator()( const xRegister<T>& to, const xDirectOrIndirect<T>& from ) const
+	/*template< typename T > __noinline void operator()( const xRegister<T>& to, const xDirectOrIndirect<T>& from ) const
 	{
 		_DoI_helpermess( *this, xDirectOrIndirect<T>( to ), from );
 	}
@@ -133,16 +68,15 @@ public:
 	template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xRegister<T>& from ) const
 	{
 		_DoI_helpermess( *this, to, xDirectOrIndirect<T>( from ) );
-	}
-
-	MovImplAll() {} // Satisfy GCC's whims.
+	}*/
+#endif
 };
 
-#define ccSane()	pxAssertDev( ccType >= 0 && ccType <= 0x0f, "Invalid comparison type specifier." )
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// CMOV !!  [in all of it's disappointing lack-of glory]  .. and ..
-// SETcc!!  [more glory, less lack!]
+// --------------------------------------------------------------------------------------
+//  xImpl_CMov
+// --------------------------------------------------------------------------------------
+// CMOVcc !!  [in all of it's disappointing lack-of glory]  .. and ..
+// SETcc !!  [more glory, less lack!]
 //
 // CMOV Disclaimer: Caution!  This instruction can look exciting and cool, until you
 // realize that it cannot load immediate values into registers. -_-
@@ -150,92 +84,67 @@ public:
 // I use explicit method declarations here instead of templates, in order to provide
 // *only* 32 and 16 bit register operand forms (8 bit registers are not valid in CMOV).
 //
-class CMovImplGeneric
+
+struct xImpl_CMov
 {
-public:
-	__forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const xRegister32& from ) const		{ ccSane(); xOpWrite0F( 0x40 | ccType, to, from ); }
-	__noinline void operator()( JccComparisonType ccType, const xRegister32& to, const ModSibBase& sibsrc ) const		{ ccSane(); xOpWrite0F( 0x40 | ccType, to, sibsrc ); }
-	//__noinline void operator()( JccComparisonType ccType, const xDirectOrIndirect32& to, const xDirectOrIndirect32& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }	// too.. lazy.. to fix.
+	JccComparisonType	ccType;
 
-	__forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const xRegister16& from ) const		{ ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, from ); }
-	__noinline void operator()( JccComparisonType ccType, const xRegister16& to, const ModSibBase& sibsrc ) const	{ ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, sibsrc ); }
-	//__noinline void operator()( JccComparisonType ccType, const xDirectOrIndirect16& to, const xDirectOrIndirect16& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
+	void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const;
 
-	CMovImplGeneric() {}		// don't ask.
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const;
+
+	//void operator()( const xDirectOrIndirect32& to, const xDirectOrIndirect32& from );
+	//void operator()( const xDirectOrIndirect16& to, const xDirectOrIndirect16& from ) const;
 };
 
-// ------------------------------------------------------------------------
-template< JccComparisonType ccType >
-class CMovImplAll
+struct xImpl_Set
 {
-	static const u16 Opcode = 0x40 | ccType;
+	JccComparisonType ccType;
 
-public:
-	__forceinline void operator()( const xRegister32& to, const xRegister32& from ) const	{ ccSane(); xOpWrite0F( Opcode, to, from ); }
-	__noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const		{ ccSane(); xOpWrite0F( Opcode, to, sibsrc ); }
-	__noinline void operator()( const xDirectOrIndirect32& to, const xDirectOrIndirect32& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
+	void operator()( const xRegister8& to ) const;
+	void operator()( const ModSib8& dest ) const;
 
-	__forceinline void operator()( const xRegister16& to, const xRegister16& from ) const	{ ccSane(); xOpWrite0F( 0x66, Opcode, to, from ); }
-	__noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const		{ ccSane(); xOpWrite0F( 0x66, Opcode, to, sibsrc ); }
-	__noinline void operator()( const xDirectOrIndirect16& to, const xDirectOrIndirect16& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
-
-	CMovImplAll() {}		// don't ask.
+	//void operator()( const xDirectOrIndirect8& dest ) const;
 };
 
-// ------------------------------------------------------------------------
-class SetImplGeneric
-{
-	// note: SETcc are 0x90, with 0 in the Reg field of ModRM.
-public:
-	__forceinline void operator()( JccComparisonType ccType, const xRegister8& to ) const		{ ccSane(); xOpWrite0F( 0x90 | ccType, 0, to ); }
-	__noinline void operator()( JccComparisonType ccType, const ModSibStrict<u8>& dest ) const	{ ccSane(); xOpWrite0F( 0x90 | ccType, 0, dest ); }
-
-	SetImplGeneric() {}		// if you do, ask GCC.
-};
-
-// ------------------------------------------------------------------------
-template< JccComparisonType ccType >
-class SetImplAll
-{
-	static const u16 Opcode = 0x90 | ccType;		// SETcc are 0x90 base opcode, with 0 in the Reg field of ModRM.
-
-public:
-	__forceinline void operator()( const xRegister8& to ) const			{ ccSane(); xOpWrite0F( Opcode, 0, to ); }
-	__noinline void operator()( const ModSibStrict<u8>& dest ) const	{ ccSane(); xOpWrite0F( Opcode, 0, dest ); }
-	__noinline void operator()( const xDirectOrIndirect8& dest ) const	{ ccSane(); _DoI_helpermess( *this, dest ); }
-	
-	SetImplAll() {}		// if you do, ask GCC.
-};
-
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// Mov with sign/zero extension implementations (movsx / movzx)
-//
-
-// ------------------------------------------------------------------------
-template< bool SignExtend >
-class MovExtendImplAll
+class xRegister16or32
 {
 protected:
-	static const u16 Opcode = 0xb6 | (SignExtend ? 8 : 0 );
-
-	// Macro useful for trapping unwanted use of EBP.
-	//#define EbpAssert() pxAssert( to != ebp )
-	#define EbpAssert()
+	const xRegisterInt&		m_convtype;
 
 public:
-	__forceinline void operator()( const xRegister32& to, const xRegister16& from )	const		{ EbpAssert(); xOpWrite0F( Opcode+1, to, from ); }
-	__noinline void operator()( const xRegister32& to, const ModSibStrict<u16>& sibsrc ) const	{ EbpAssert(); xOpWrite0F( Opcode+1, to, sibsrc ); }
-	__noinline void operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const	{ EbpAssert(); _DoI_helpermess( *this, to, src ); }
-	
-	__forceinline void operator()( const xRegister32& to, const xRegister8& from ) const		{ EbpAssert(); xOpWrite0F( Opcode, to, from ); }
-	__noinline void operator()( const xRegister32& to, const ModSibStrict<u8>& sibsrc ) const	{ EbpAssert(); xOpWrite0F( Opcode, to, sibsrc ); }
-	__noinline void operator()( const xRegister32& to, const xDirectOrIndirect8& src ) const	{ EbpAssert(); _DoI_helpermess( *this, to, src ); }
+	xRegister16or32( const xRegister32& src ) : m_convtype( src ) {}
+	xRegister16or32( const xRegister16& src ) : m_convtype( src ) {}
 
-	__forceinline void operator()( const xRegister16& to, const xRegister8& from ) const		{ xOpWrite0F( 0x66, Opcode, to, from ); }
-	__noinline void operator()( const xRegister16& to, const ModSibStrict<u8>& sibsrc ) const	{ xOpWrite0F( 0x66, Opcode, to, sibsrc ); }
-	__noinline void operator()( const xRegister16& to, const xDirectOrIndirect8& src ) const	{ _DoI_helpermess( *this, to, src ); }
+	//operator const xRegisterInt&() const { return m_convtype; }
+	operator const xRegisterBase&() const { return m_convtype; }
 
-	MovExtendImplAll() {}		// don't ask.
+	const xRegisterInt* operator->() const
+	{
+		return &m_convtype;
+	}
 };
 
+
+// --------------------------------------------------------------------------------------
+//  xImpl_MovExtend
+// --------------------------------------------------------------------------------------
+// Mov with sign/zero extension implementations (movsx / movzx)
+//
+struct xImpl_MovExtend
+{
+	bool	SignExtend;
+
+	void operator()( const xRegister16or32& to, const xRegister8& from ) const;
+	void operator()( const xRegister16or32& to, const ModSib8& sibsrc ) const;
+	void operator()( const xRegister32& to, const xRegister16& from ) const;
+	void operator()( const xRegister32& to, const ModSib16& sibsrc ) const;
+
+	//void operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const;
+	//void operator()( const xRegister16or32& to, const xDirectOrIndirect8& src ) const;
+	//void operator()( const xRegister16& to, const xDirectOrIndirect8& src ) const;
+};
+
+}	// End namespace x86Emitter
diff --git a/common/include/x86emitter/implement/simd_helpers.h b/common/include/x86emitter/implement/simd_helpers.h
index c33827ee26..60ed8d2faf 100644
--- a/common/include/x86emitter/implement/simd_helpers.h
+++ b/common/include/x86emitter/implement/simd_helpers.h
@@ -47,6 +47,15 @@ struct xImplSimd_DestRegImmSSE
 	void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const;
 };
 
+struct xImplSimd_DestSSE_CmpImm
+{
+	u8		Prefix;
+	u16		Opcode;
+
+	void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType imm ) const;
+	void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType imm ) const;
+};
+
 struct xImplSimd_DestRegImmMMX
 {
 	u8		Prefix;
diff --git a/common/include/x86emitter/implement/simd_moremovs.h b/common/include/x86emitter/implement/simd_moremovs.h
index 02c66d05e0..48f8f27efa 100644
--- a/common/include/x86emitter/implement/simd_moremovs.h
+++ b/common/include/x86emitter/implement/simd_moremovs.h
@@ -142,32 +142,32 @@ struct xImplSimd_PMove
 	// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
 	// and store them in dest.
 	void BW( const xRegisterSSE& to, const xRegisterSSE& from ) const;
-	void BW( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
+	void BW( const xRegisterSSE& to, const ModSib64& from ) const;
 
 	// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
 	// and store them in dest.
 	void BD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
-	void BD( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const;
+	void BD( const xRegisterSSE& to, const ModSib32& from ) const;
 
 	// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
 	// and store them in dest.
 	void BQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
-	void BQ( const xRegisterSSE& to, const ModSibStrict<u16>& from ) const;
+	void BQ( const xRegisterSSE& to, const ModSib16& from ) const;
 	
 	// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
 	// and store them in dest.
 	void WD( const xRegisterSSE& to, const xRegisterSSE& from ) const;
-	void WD( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
+	void WD( const xRegisterSSE& to, const ModSib64& from ) const;
 
 	// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
 	// and store them in dest.
 	void WQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
-	void WQ( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const;
+	void WQ( const xRegisterSSE& to, const ModSib32& from ) const;
 
 	// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
 	// and store them in dest.
 	void DQ( const xRegisterSSE& to, const xRegisterSSE& from ) const;
-	void DQ( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const;
+	void DQ( const xRegisterSSE& to, const ModSib64& from ) const;
 };
 
 }
diff --git a/common/include/x86emitter/implement/test.h b/common/include/x86emitter/implement/test.h
index 7dfa8e2bc7..d31b0e0d29 100644
--- a/common/include/x86emitter/implement/test.h
+++ b/common/include/x86emitter/implement/test.h
@@ -16,50 +16,20 @@
 #pragma once
 
 // Implementations found here: TEST + BTS/BT/BTC/BTR + BSF/BSR! (for lack of better location)
-// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
 
-//////////////////////////////////////////////////////////////////////////////////////////
-// TEST instruction Implementation
+namespace x86Emitter {
+
+// --------------------------------------------------------------------------------------
+//  xImpl_Test
+// --------------------------------------------------------------------------------------
 //
-class xImpl_Test
+struct xImpl_Test
 {
-public:
-	// ------------------------------------------------------------------------
-	template< typename T > __forceinline
-	void operator()( const xRegister<T>& to, const xRegister<T>& from ) const
-	{
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0x84 : 0x85 );
-		EmitSibMagic( from, to );
-	}
-	
-	// ------------------------------------------------------------------------
-	template< typename T > __forceinline
-	void operator()( const ModSibStrict<T>& dest, int imm ) const
-	{
-		prefix16<T>();
-		xWrite8( Is8BitOp<T>() ? 0xf6 : 0xf7 );
-		EmitSibMagic( 0, dest );
-		xWrite<T>( imm );
-	}
-	
-	// ------------------------------------------------------------------------
-	template< typename T > __forceinline
-	void operator()( const xRegister<T>& to, int imm ) const
-	{
-		prefix16<T>();
-
-		if( to.IsAccumulator() )
-			xWrite8( Is8BitOp<T>() ? 0xa8 : 0xa9 );
-		else
-		{
-			xWrite8( Is8BitOp<T>() ? 0xf6 : 0xf7 );
-			EmitSibMagic( 0, to );
-		}
-		xWrite<T>( imm );
-	}
-
-	xImpl_Test() {}		// Why does GCC need these?
+	void operator()( const xRegister8& to, const xRegister8& from ) const;
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
+	void operator()( const ModSib32orLess& dest, int imm ) const;
+	void operator()( const xRegisterInt& to, int imm ) const;
 };
 
 enum G8Type
@@ -70,40 +40,38 @@ enum G8Type
 	G8Type_BTC,
 };
 
-//////////////////////////////////////////////////////////////////////////////////////////
-// BSF / BSR -- 16/32 operands supported only.
+// --------------------------------------------------------------------------------------
+//  BSF / BSR
+// --------------------------------------------------------------------------------------
+// 16/32 operands are available.  No 8 bit ones, not that any of you cared, I bet.
 //
-// 0xbc [fwd] / 0xbd [rev]
-//
-template< u16 Opcode >
-class xImpl_BitScan
+struct xImpl_BitScan
 {
-public:
-	xImpl_BitScan() {}
+	// 0xbc [fwd] / 0xbd [rev]
+	u16		Opcode;
 
-	__forceinline void operator()( const xRegister32& to, const xRegister32& from ) const	{ xOpWrite0F( Opcode, to, from ); }
-	__forceinline void operator()( const xRegister16& to, const xRegister16& from ) const	{ xOpWrite0F( 0x66, Opcode, to, from ); }
-	__forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const	{ xOpWrite0F( Opcode, to, sibsrc ); }
-	__forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const	{ xOpWrite0F( 0x66, Opcode, to, sibsrc ); }
+	void operator()( const xRegister32& to, const xRegister32& from ) const;
+	void operator()( const xRegister16& to, const xRegister16& from ) const;
+	void operator()( const xRegister16or32& to, const ModSibBase& sibsrc ) const;
 };
 
-//////////////////////////////////////////////////////////////////////////////////////////
+// --------------------------------------------------------------------------------------
+//  xImpl_Group8
+// --------------------------------------------------------------------------------------
 // Bit Test Instructions - Valid on 16/32 bit instructions only.
 //
-template< G8Type InstType >
-class xImpl_Group8
+struct xImpl_Group8
 {
-	static const uint RegFormOp = 0xa3 | (InstType << 3);
-public:
-	__forceinline void operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const	{ xOpWrite0F( RegFormOp, bitbase, bitoffset ); }
-	__forceinline void operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const	{ xOpWrite0F( 0x66, RegFormOp, bitbase, bitoffset ); }
-	__forceinline void operator()( const ModSibBase& bitbase, const xRegister32& bitoffset ) const	{ xOpWrite0F( RegFormOp, bitoffset, bitbase ); }
-	__forceinline void operator()( const ModSibBase& bitbase, const xRegister16& bitoffset ) const	{ xOpWrite0F( 0x66, RegFormOp, bitoffset, bitbase ); }
+	G8Type	InstType;
 
-	__forceinline void operator()( const ModSibStrict<u32>& bitbase, u8 bitoffset ) const			{ xOpWrite0F( 0xba, InstType, bitbase, bitoffset ); }
-	__forceinline void operator()( const ModSibStrict<u16>& bitbase, u8 bitoffset ) const			{ xOpWrite0F( 0x66, 0xba, InstType, bitbase, bitoffset ); }
-	__forceinline void operator()( const xRegister<u32>& bitbase, u8 bitoffset ) const				{ xOpWrite0F( 0xba, InstType, bitbase, bitoffset ); }
-	__forceinline void operator()( const xRegister<u16>& bitbase, u8 bitoffset ) const				{ xOpWrite0F( 0x66, 0xba, InstType, bitbase, bitoffset ); }
+	void operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const;
+	void operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const;
+	void operator()( const xRegister16or32& bitbase, u8 bitoffset ) const;
 
-	xImpl_Group8() {}
+	void operator()( const ModSibBase& bitbase, const xRegister16or32& bitoffset ) const;
+	void operator()( const ModSib32& bitbase, u8 bitoffset ) const;
+	void operator()( const ModSib16& bitbase, u8 bitoffset ) const;
 };
+
+}	// End namespace x86Emitter
+
diff --git a/common/include/x86emitter/implement/xchg.h b/common/include/x86emitter/implement/xchg.h
index 3a09b889fb..56069e7ebd 100644
--- a/common/include/x86emitter/implement/xchg.h
+++ b/common/include/x86emitter/implement/xchg.h
@@ -17,3 +17,8 @@
 
 // This header file is intended to be the future home of xchg, cmpxchg, xadd, and
 // other threading-related exchange instructions.
+
+namespace x86Emitter {
+
+
+}	// End namespace x86Emitter
diff --git a/common/include/x86emitter/inlines.inl b/common/include/x86emitter/inlines.inl
index 8d6e9f1854..a1f5c179e8 100644
--- a/common/include/x86emitter/inlines.inl
+++ b/common/include/x86emitter/inlines.inl
@@ -14,7 +14,7 @@
  */
 
 /*
- * ix86 core v0.9.0
+ * ix86 core v0.9.1
  *
  * Original Authors (v0.6.2 and prior):
  *		linuzappz <linuzappz@pcsx.net>
@@ -22,7 +22,7 @@
  *		goldfinger
  *		zerofrog(@gmail.com)
  *
- * Authors of v0.9.0:
+ * Authors of v0.9.1:
  *		Jake.Stine(@gmail.com)
  *		cottonvibes(@gmail.com)
  *		sudonim(1@gmail.com)
@@ -45,52 +45,10 @@
 
 namespace x86Emitter
 {
-	extern const char *const x86_regnames_gpr8[8];
-	extern const char *const x86_regnames_gpr16[8];
-	extern const char *const x86_regnames_gpr32[8];
+	// --------------------------------------------------------------------------------------
+	//  x86Register Method Implementations (inlined!)
+	// --------------------------------------------------------------------------------------
 
-	extern const char *const x86_regnames_sse[8];
-	extern const char *const x86_regnames_mmx[8];
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// Diagnostic -- returns a string representation of this register.
-	//
-	template< typename T >
-	const char* xGetRegName( const xRegister<T>& src )
-	{
-		if( src.IsEmpty() ) return "empty";
-		
-		switch( sizeof(T) )
-		{
-			case 1: return x86_regnames_gpr8[ src.Id ];
-			case 2: return x86_regnames_gpr16[ src.Id ];
-			case 4: return x86_regnames_gpr32[ src.Id ];
-			
-			jNO_DEFAULT
-		}
-
-		return "oops?";
-	}
-
-	template< typename T >
-	const char* xGetRegName( const xRegisterSIMD<T>& src )
-	{
-		if( src.IsEmpty() ) return "empty";
-		
-		switch( sizeof(T) )
-		{
-			case 8: return x86_regnames_mmx[ src.Id ];
-			case 16: return x86_regnames_sse[ src.Id ];
-			
-			jNO_DEFAULT
-		}
-
-		return "oops?";
-	}
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// x86Register Method Implementations
-	//
 	__forceinline xAddressInfo xAddressReg::operator+( const xAddressReg& right ) const
 	{
 		pxAssertMsg( Id != -1, "Uninitialized x86 register." );
@@ -132,184 +90,12 @@ namespace x86Emitter
 	__forceinline xAddressInfo xAddressReg::operator*( u32 right ) const
 	{
 		pxAssertMsg( Id != -1, "Uninitialized x86 register." );
-		return xAddressInfo( Empty, *this, right );
+		return xAddressInfo( xEmptyReg, *this, right );
 	}
 
 	__forceinline xAddressInfo xAddressReg::operator<<( u32 shift ) const
 	{
 		pxAssertMsg( Id != -1, "Uninitialized x86 register." );
-		return xAddressInfo( Empty, *this, 1<<shift );
-	}
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// ModSib Method Implementations
-	//
-
-	// ------------------------------------------------------------------------
-	__forceinline ModSibBase::ModSibBase( const xAddressInfo& src ) :
-		Base( src.Base ),
-		Index( src.Index ),
-		Scale( src.Factor ),
-		Displacement( src.Displacement )
-	{
-		Reduce();
-	}
-
-	// ------------------------------------------------------------------------
-	__forceinline ModSibBase::ModSibBase( xAddressReg base, xAddressReg index, int scale, s32 displacement ) :
-		Base( base ),
-		Index( index ),
-		Scale( scale ),
-		Displacement( displacement )
-	{
-		Reduce();
-	}
-
-	// ------------------------------------------------------------------------
-	__forceinline ModSibBase::ModSibBase( s32 displacement ) :
-		Base(),
-		Index(),
-		Scale(0),
-		Displacement( displacement )
-	{
-		// no reduction necessary :D
-	}
-
-	// ------------------------------------------------------------------------
-	__forceinline ModSibBase::ModSibBase( const void* target ) :
-		Base(),
-		Index(),
-		Scale(0),
-		Displacement( (s32)target )
-	{
-		// no reduction necessary :D
-	}
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// xAddressInfo Method Implementations
-	//
-	__forceinline xAddressInfo& xAddressInfo::Add( const xAddressReg& src )
-	{
-		if( src == Index )
-		{
-			Factor++;
-		}
-		else if( src == Base )
-		{
-			// Compound the existing register reference into the Index/Scale pair.
-			Base = xAddressReg::Empty;
-
-			if( src == Index )
-				Factor++;
-			else
-			{
-				pxAssertDev( Index.IsEmpty(), "x86Emitter: Only one scaled index register is allowed in an address modifier." );
-				Index = src;
-				Factor = 2;
-			}
-		}
-		else if( Base.IsEmpty() )
-			Base = src;
-		else if( Index.IsEmpty() )
-			Index = src;
-		else
-			pxFailDev( L"x86Emitter: address modifiers cannot have more than two index registers." );	// oops, only 2 regs allowed per ModRm!
-
-		return *this;
-	}
-
-	// ------------------------------------------------------------------------
-	__forceinline xAddressInfo& xAddressInfo::Add( const xAddressInfo& src )
-	{
-		Add( src.Base );
-		Add( src.Displacement );
-
-		// If the factor is 1, we can just treat index like a base register also.
-		if( src.Factor == 1 )
-		{
-			Add( src.Index );
-		}
-		else if( Index.IsEmpty() )
-		{
-			Index = src.Index;
-			Factor = src.Factor;
-		}
-		else if( Index == src.Index )
-		{
-			Factor += src.Factor;
-		}
-		else
-			pxFailDev( L"x86Emitter: address modifiers cannot have more than two index registers." );	// oops, only 2 regs allowed per ModRm!
-
-		return *this;
-	}
-	
-	//////////////////////////////////////////////////////////////////////////////////////////
-	//
-	
-	// ------------------------------------------------------------------------
-	template< typename OperandType >
-	xForwardJump<OperandType>::xForwardJump( JccComparisonType cctype ) :
-		BasePtr( (s8*)xGetPtr() +
-			((OperandSize == 1) ? 2 :		// j8's are always 2 bytes.
-			((cctype==Jcc_Unconditional) ? 5 : 6 ))	// j32's are either 5 or 6 bytes
-		)
-	{
-		pxAssert( cctype != Jcc_Unknown );
-		pxAssert( OperandSize == 1 || OperandSize == 4 );
-		
-		if( OperandSize == 1 )
-			xWrite8( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) );
-		else
-		{
-			if( cctype == Jcc_Unconditional )
-				xWrite8( 0xe9 );
-			else
-			{
-				xWrite8( 0x0f );
-				xWrite8( 0x80 | cctype );
-			}
-		}
-
-		xAdvancePtr( OperandSize );
-	}
-
-	// ------------------------------------------------------------------------
-	template< typename OperandType >
-	void xForwardJump<OperandType>::SetTarget() const
-	{
-		pxAssert( BasePtr != NULL );
-
-		sptr displacement = (sptr)xGetPtr() - (sptr)BasePtr;
-		if( OperandSize == 1 )
-		{
-			if( !is_s8( displacement ) )
-			{
-				pxAssert( false );
-// Don't ask. --arcum42
-#if !defined(__LINUX__) || !defined(DEBUG)
-
-				Console.Error( "Emitter Error: Invalid short jump displacement = 0x%x", (int)displacement );
-#endif
-			}
-			BasePtr[-1] = (s8)displacement;
-		}
-		else
-		{
-			// full displacement, no sanity checks needed :D
-			((s32*)BasePtr)[-1] = displacement;
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	// returns the inverted conditional type for this Jcc condition.  Ie, JNS will become JS.
-	//
-	static __forceinline JccComparisonType xInvertCond( JccComparisonType src )
-	{
-		pxAssert( src != Jcc_Unknown );
-		if( Jcc_Unconditional == src ) return Jcc_Unconditional;
-
-		// x86 conditionals are clever!  To invert conditional types, just invert the lower bit:
-		return (JccComparisonType)((int)src ^ 1);
+		return xAddressInfo( xEmptyReg, *this, 1<<shift );
 	}
 }
diff --git a/common/include/x86emitter/instructions.h b/common/include/x86emitter/instructions.h
index 207534997f..cf83124779 100644
--- a/common/include/x86emitter/instructions.h
+++ b/common/include/x86emitter/instructions.h
@@ -14,7 +14,7 @@
  */
 
 /*
- * ix86 definitions v0.9.0
+ * ix86 definitions v0.9.1
  *
  * Original Authors (v0.6.2 and prior):
  *		linuzappz <linuzappz@pcsx.net>
@@ -22,7 +22,7 @@
  *		goldfinger
  *		zerofrog(@gmail.com)
  *
- * Authors of v0.9.0:
+ * Authors of v0.9.1:
  *		Jake.Stine(@gmail.com)
  *		cottonvibes(@gmail.com)
  *		sudonim(1@gmail.com)
@@ -38,16 +38,16 @@ namespace x86Emitter
 	// ------------------------------------------------------------------------
 	// Group 1 Instruction Class
 
-	extern const Internal::xImpl_G1Logic<Internal::G1Type_AND,0x54> xAND;
-	extern const Internal::xImpl_G1Logic<Internal::G1Type_OR,0x56>  xOR;
-	extern const Internal::xImpl_G1Logic<Internal::G1Type_XOR,0x57> xXOR;
+	extern const xImpl_Group1		xADC;
+	extern const xImpl_Group1		xSBB;
 
-	extern const Internal::xImpl_G1Arith<Internal::G1Type_ADD,0x58> xADD;
-	extern const Internal::xImpl_G1Arith<Internal::G1Type_SUB,0x5c> xSUB;
-	extern const Internal::xImpl_G1Compare xCMP;
+	extern const xImpl_G1Logic		xAND;
+	extern const xImpl_G1Logic		xOR;
+	extern const xImpl_G1Logic		xXOR;
 
-	extern const Internal::xImpl_Group1<Internal::G1Type_ADC> xADC;
-	extern const Internal::xImpl_Group1<Internal::G1Type_SBB> xSBB;
+	extern const xImpl_G1Arith		xADD;
+	extern const xImpl_G1Arith		xSUB;
+	extern const xImpl_G1Compare	xCMP;
 
 	// ------------------------------------------------------------------------
 	// Group 2 Instruction Class
@@ -56,102 +56,66 @@ namespace x86Emitter
 	// zero.  This is a safe optimization since any zero-value shift does not affect any
 	// flags.
 
-	extern const Internal::MovImplAll xMOV;
-	extern const Internal::xImpl_Test xTEST;
+	extern const xImpl_Mov			xMOV;
+	extern const xImpl_Test			xTEST;
 
-	extern const Internal::Group2ImplAll<Internal::G2Type_ROL> xROL;
-	extern const Internal::Group2ImplAll<Internal::G2Type_ROR> xROR;
-	extern const Internal::Group2ImplAll<Internal::G2Type_RCL> xRCL;
-	extern const Internal::Group2ImplAll<Internal::G2Type_RCR> xRCR;
-	extern const Internal::Group2ImplAll<Internal::G2Type_SHL> xSHL;
-	extern const Internal::Group2ImplAll<Internal::G2Type_SHR> xSHR;
-	extern const Internal::Group2ImplAll<Internal::G2Type_SAR> xSAR;
+	extern const xImpl_Group2		xROL,	xROR,
+									xRCL,	xRCR,
+									xSHL,	xSHR,
+									xSAR;
 
 	// ------------------------------------------------------------------------
 	// Group 3 Instruction Class
 
-	extern const Internal::xImpl_Group3<Internal::G3Type_NOT> xNOT;
-	extern const Internal::xImpl_Group3<Internal::G3Type_NEG> xNEG;
-	extern const Internal::xImpl_Group3<Internal::G3Type_MUL> xUMUL;
-	extern const Internal::xImpl_Group3<Internal::G3Type_DIV> xUDIV;
-	extern const Internal::xImpl_iDiv xDIV;
-	extern const Internal::xImpl_iMul xMUL;
+	extern const xImpl_Group3		xNOT,	xNEG;
+	extern const xImpl_Group3		xUMUL,	xUDIV;
+	extern const xImpl_iDiv			xDIV;
+	extern const xImpl_iMul			xMUL;
 
-	extern const Internal::xImpl_IncDec<false> xINC;
-	extern const Internal::xImpl_IncDec<true>  xDEC;
+	extern const xImpl_IncDec		xINC,	xDEC;
 
-	extern const Internal::MovExtendImplAll<false> xMOVZX;
-	extern const Internal::MovExtendImplAll<true>  xMOVSX;
+	extern const xImpl_MovExtend	xMOVZX, xMOVSX;
 
-	extern const Internal::DwordShiftImplAll<false> xSHLD;
-	extern const Internal::DwordShiftImplAll<true>  xSHRD;
+	extern const xImpl_DwordShift	xSHLD,	xSHRD;
 
-	extern const Internal::xImpl_Group8<Internal::G8Type_BT> xBT;
-	extern const Internal::xImpl_Group8<Internal::G8Type_BTR> xBTR;
-	extern const Internal::xImpl_Group8<Internal::G8Type_BTS> xBTS;
-	extern const Internal::xImpl_Group8<Internal::G8Type_BTC> xBTC;
+	extern const xImpl_Group8		xBT;
+	extern const xImpl_Group8		xBTR;
+	extern const xImpl_Group8		xBTS;
+	extern const xImpl_Group8		xBTC;
 
-	extern const Internal::xImpl_JmpCall<true> xJMP;
-	extern const Internal::xImpl_JmpCall<false> xCALL;
+	extern const xImpl_BitScan		xBSF,	xBSR;
 
-	extern const Internal::xImpl_BitScan<0xbc> xBSF;
-	extern const Internal::xImpl_BitScan<0xbd> xBSR;
+	extern const xImpl_JmpCall		xJMP,	xCALL;
 
 	// ------------------------------------------------------------------------
-	extern const Internal::CMovImplGeneric xCMOV;
+	extern const xImpl_CMov
+		xCMOVA, xCMOVAE,
+		xCMOVB, xCMOVBE,
+		xCMOVG, xCMOVGE,
+		xCMOVL, xCMOVLE,
 
-	extern const Internal::CMovImplAll<Jcc_Above>			xCMOVA;
-	extern const Internal::CMovImplAll<Jcc_AboveOrEqual>	xCMOVAE;
-	extern const Internal::CMovImplAll<Jcc_Below>			xCMOVB;
-	extern const Internal::CMovImplAll<Jcc_BelowOrEqual>	xCMOVBE;
-
-	extern const Internal::CMovImplAll<Jcc_Greater>			xCMOVG;
-	extern const Internal::CMovImplAll<Jcc_GreaterOrEqual>	xCMOVGE;
-	extern const Internal::CMovImplAll<Jcc_Less>			xCMOVL;
-	extern const Internal::CMovImplAll<Jcc_LessOrEqual>		xCMOVLE;
-
-	extern const Internal::CMovImplAll<Jcc_Zero>			xCMOVZ;
-	extern const Internal::CMovImplAll<Jcc_Equal>			xCMOVE;
-	extern const Internal::CMovImplAll<Jcc_NotZero>			xCMOVNZ;
-	extern const Internal::CMovImplAll<Jcc_NotEqual>		xCMOVNE;
-
-	extern const Internal::CMovImplAll<Jcc_Overflow>		xCMOVO;
-	extern const Internal::CMovImplAll<Jcc_NotOverflow>		xCMOVNO;
-	extern const Internal::CMovImplAll<Jcc_Carry>			xCMOVC;
-	extern const Internal::CMovImplAll<Jcc_NotCarry>		xCMOVNC;
-
-	extern const Internal::CMovImplAll<Jcc_Signed>			xCMOVS;
-	extern const Internal::CMovImplAll<Jcc_Unsigned>		xCMOVNS;
-	extern const Internal::CMovImplAll<Jcc_ParityEven>		xCMOVPE;
-	extern const Internal::CMovImplAll<Jcc_ParityOdd>		xCMOVPO;
+		xCMOVZ, xCMOVE,
+		xCMOVNZ, xCMOVNE,
+		xCMOVO, xCMOVNO,
+		xCMOVC, xCMOVNC,
+		
+		xCMOVS, xCMOVNS,
+		xCMOVPE, xCMOVPO;
 
 	// ------------------------------------------------------------------------
-	extern const Internal::SetImplGeneric xSET;
+	extern const xImpl_Set 
+		xSETA, xSETAE,
+		xSETB, xSETBE,
+		xSETG, xSETGE,
+		xSETL, xSETLE,
 
-	extern const Internal::SetImplAll<Jcc_Above>			xSETA;
-	extern const Internal::SetImplAll<Jcc_AboveOrEqual>		xSETAE;
-	extern const Internal::SetImplAll<Jcc_Below>			xSETB;
-	extern const Internal::SetImplAll<Jcc_BelowOrEqual>		xSETBE;
+		xSETZ, xSETE,
+		xSETNZ, xSETNE,
+		xSETO, xSETNO,
+		xSETC, xSETNC,
 
-	extern const Internal::SetImplAll<Jcc_Greater>			xSETG;
-	extern const Internal::SetImplAll<Jcc_GreaterOrEqual>	xSETGE;
-	extern const Internal::SetImplAll<Jcc_Less>				xSETL;
-	extern const Internal::SetImplAll<Jcc_LessOrEqual>		xSETLE;
-
-	extern const Internal::SetImplAll<Jcc_Zero>				xSETZ;
-	extern const Internal::SetImplAll<Jcc_Equal>			xSETE;
-	extern const Internal::SetImplAll<Jcc_NotZero>			xSETNZ;
-	extern const Internal::SetImplAll<Jcc_NotEqual>			xSETNE;
-
-	extern const Internal::SetImplAll<Jcc_Overflow>			xSETO;
-	extern const Internal::SetImplAll<Jcc_NotOverflow>		xSETNO;
-	extern const Internal::SetImplAll<Jcc_Carry>			xSETC;
-	extern const Internal::SetImplAll<Jcc_NotCarry>			xSETNC;
-
-	extern const Internal::SetImplAll<Jcc_Signed>			xSETS;
-	extern const Internal::SetImplAll<Jcc_Unsigned>			xSETNS;
-	extern const Internal::SetImplAll<Jcc_ParityEven>		xSETPE;
-	extern const Internal::SetImplAll<Jcc_ParityOdd>		xSETPO;
+		xSETS, xSETNS,
+		xSETPE, xSETPO;
 
 	//////////////////////////////////////////////////////////////////////////////////////////
 	// Miscellaneous Instructions
@@ -419,10 +383,10 @@ namespace x86Emitter
 	extern const xImplSimd_DestRegSSE xMOVSHDUP;
 
 	extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 );
-	extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 );
+	extern void xINSERTPS( const xRegisterSSE& to, const ModSib32& from, u8 imm8 );
 
 	extern void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 );
-	extern void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 );
+	extern void xEXTRACTPS( const ModSib32& dest, const xRegisterSSE& from, u8 imm8 );
 
 	// ------------------------------------------------------------------------
 
@@ -431,7 +395,7 @@ namespace x86Emitter
 	extern const xImplSimd_DestRegEither xPOR;
 	extern const xImplSimd_DestRegEither xPXOR;
 
-	extern /*const*/ xImplSimd_Shuffle		xSHUF;
+	extern const xImplSimd_Shuffle		xSHUF;
 
 	// ------------------------------------------------------------------------
 
diff --git a/common/include/x86emitter/internal.h b/common/include/x86emitter/internal.h
index 4ea60eafc5..82364ccc50 100644
--- a/common/include/x86emitter/internal.h
+++ b/common/include/x86emitter/internal.h
@@ -17,3 +17,52 @@
 
 #include "x86types.h"
 #include "instructions.h"
+
+#define OpWriteSSE( pre, op )		xOpWrite0F( pre, op, to, from )
+
+namespace x86Emitter {
+
+extern void SimdPrefix( u8 prefix, u16 opcode );
+extern void EmitSibMagic( uint regfield, const void* address );
+extern void EmitSibMagic( uint regfield, const ModSibBase& info );
+extern void EmitSibMagic( uint reg1, const xRegisterBase& reg2 );
+extern void EmitSibMagic( const xRegisterBase& reg1, const xRegisterBase& reg2 );
+extern void EmitSibMagic( const xRegisterBase& reg1, const void* src );
+extern void EmitSibMagic( const xRegisterBase& reg1, const ModSibBase& sib );
+
+extern void _xMovRtoR( const xRegisterInt& to, const xRegisterInt& from );
+extern void _g1_EmitOp( G1Type InstType, const xRegisterInt& to, const xRegisterInt& from );
+
+template< typename T1, typename T2 > __emitinline
+void xOpWrite( u8 prefix, u8 opcode, const T1& param1, const T2& param2 )
+{
+	if( prefix != 0 )
+		xWrite16( (opcode<<8) | prefix );
+	else
+		xWrite8( opcode );
+
+	EmitSibMagic( param1, param2 );
+}
+
+template< typename T1, typename T2 > __emitinline
+void xOpWrite0F( u8 prefix, u16 opcode, const T1& param1, const T2& param2 )
+{
+	SimdPrefix( prefix, opcode );
+	EmitSibMagic( param1, param2 );
+}
+
+template< typename T1, typename T2 > __emitinline
+void xOpWrite0F( u8 prefix, u16 opcode, const T1& param1, const T2& param2, u8 imm8 )
+{
+	xOpWrite0F( prefix, opcode, param1, param2 );
+	xWrite8( imm8 );
+}
+
+template< typename T1, typename T2 > __emitinline
+void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2 )			{ xOpWrite0F( 0, opcode, param1, param2 ); }
+
+template< typename T1, typename T2 > __emitinline
+void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2, u8 imm8 )	{ xOpWrite0F( 0, opcode, param1, param2, imm8 ); }
+
+}
+
diff --git a/common/include/x86emitter/x86emitter.h b/common/include/x86emitter/x86emitter.h
index fb72cd05aa..eb5002d9f0 100644
--- a/common/include/x86emitter/x86emitter.h
+++ b/common/include/x86emitter/x86emitter.h
@@ -14,7 +14,7 @@
  */
 
 /*
- * ix86 public header v0.9.0
+ * ix86 public header v0.9.1
  *
  * Original Authors (v0.6.2 and prior):
  *		linuzappz <linuzappz@pcsx.net>
@@ -22,7 +22,7 @@
  *		goldfinger
  *		zerofrog(@gmail.com)
  *
- * Authors of v0.9.0:
+ * Authors of v0.9.1:
  *		Jake.Stine(@gmail.com)
  *		cottonvibes(@gmail.com)
  *		sudonim(1@gmail.com)
diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h
index ecfc7cb9d3..c373eb7f0b 100644
--- a/common/include/x86emitter/x86types.h
+++ b/common/include/x86emitter/x86types.h
@@ -15,8 +15,6 @@
 
 #pragma once
 
-#include "Utilities/Dependencies.h"
-
 // Register counts for x86/32 mode:
 static const uint iREGCNT_XMM = 8;
 static const uint iREGCNT_GPR = 8;
@@ -30,7 +28,6 @@ enum XMMSSEType
 };
 
 extern __threadlocal u8  *x86Ptr;
-
 extern __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM];
 
 namespace x86Emitter
@@ -41,27 +38,31 @@ extern void xWrite16( u16 val );
 extern void xWrite32( u32 val );
 extern void xWrite64( u64 val );
 
+extern const char *const x86_regnames_gpr8[8];
+extern const char *const x86_regnames_gpr16[8];
+extern const char *const x86_regnames_gpr32[8];
+
+extern const char *const x86_regnames_sse[8];
+extern const char *const x86_regnames_mmx[8];
+
+extern const char* xGetRegName( int regid, int operandSize );
+
 //------------------------------------------------------------------
 // templated version of is_s8 is required, so that u16's get correct sign extension treatment.
 template< typename T >
 static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; }
 
-template< typename T >
-__forceinline void xWrite( T val )
-{
-	*(T*)x86Ptr = val;
-	x86Ptr += sizeof(T);
-}
+template< typename T > __forceinline void xWrite( T val );
 
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const]
-//
+// --------------------------------------------------------------------------------------
+//  ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const]
+// --------------------------------------------------------------------------------------
 // This tells the recompiler's emitter to always use movaps instead of movdqa.  Both instructions
 // do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
 // and some marginal speed gains as a result.  (it's possible someday in the future the per-
 // formance of the two instructions could change, so this constant is provided to restore MOVDQA
 // use easily at a later time, if needed).
+//
 #define ALWAYS_USE_MOVAPS
 
 #ifdef ALWAYS_USE_MOVAPS
@@ -70,9 +71,9 @@ __forceinline void xWrite( T val )
 	static const bool AlwaysUseMovaps = false;
 #endif
 
-/////////////////////////////////////////////////////////////////////////////////////////////
-// __emitline - preprocessors definition
-//
+// --------------------------------------------------------------------------------------
+//  __emitline - preprocessors definition 
+// --------------------------------------------------------------------------------------
 // This is configured to inline emitter functions appropriately for release builds, and
 // disables some of the more aggressive inlines for dev builds (which can be helpful when
 // debugging).  Additionally,  I've set up the inlining to be as practical and intelligent
@@ -102,465 +103,7 @@ __forceinline void xWrite( T val )
 		Mod_Direct,			// direct reg/reg operation
 	};
 
-	static const int ModRm_UseSib = 4;		// same index value as ESP (used in RM field)
-	static const int ModRm_UseDisp32 = 5;	// same index value as EBP (used in Mod field)
-
-	class xAddressInfo;
-	class ModSibBase;
-
-	extern void xSetPtr( void* ptr );
-	extern u8* xGetPtr();
-	extern void xAlignPtr( uint bytes );
-	extern void xAdvancePtr( uint bytes );
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// xRegisterBase
-	// Unless templating some fancy stuff, use the friendly xRegister32/16/8 typedefs instead.
-	//
-	template< typename OperandType >
-	class xRegisterBase
-	{
-	public:
-		static const uint OperandSize = sizeof( OperandType );
-		static const xRegisterBase Empty;		// defined as an empty/unused value (-1)
-
-		int Id;
-
-		xRegisterBase(): Id( -1 ) {}
-		explicit xRegisterBase( int regId ) : Id( regId ) { pxAssert( Id >= -2 && Id < 8 ); }	// allow -2 for user-custom identifiers.
-
-		bool IsEmpty() const { return Id < 0; }
-
-		// Returns true if the register is a valid accumulator: Eax, Ax, Al.
-		bool IsAccumulator() const { return Id == 0; }
-
-		// returns true if the register is a valid MMX or XMM register.
-		bool IsSIMD() const { return OperandSize == 8 || OperandSize == 16; }
-
-		bool operator==( const xRegisterBase<OperandType>& src ) const	{ return (Id == src.Id); }
-		bool operator!=( const xRegisterBase<OperandType>& src ) const	{ return (Id != src.Id); }
-	};
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	//
-	template< typename OperandType >
-	class xRegister : public xRegisterBase<OperandType>
-	{
-	public:
-		xRegister(): xRegisterBase<OperandType>() {}
-		xRegister( const xRegisterBase<OperandType>& src ) : xRegisterBase<OperandType>( src ) {}
-		explicit xRegister( int regId ) : xRegisterBase<OperandType>( regId ) {}
-
-		bool operator==( const xRegister<OperandType>& src ) const	{ return this->Id == src.Id; }
-		bool operator!=( const xRegister<OperandType>& src ) const	{ return this->Id != src.Id; }
-
-		xRegister<OperandType>& operator=( const xRegisterBase<OperandType>& src )
-		{
-			this->Id = src.Id;
-			return *this;
-		}
-	};
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	//
-	template< typename OperandType >
-	class xRegisterSIMD : public xRegisterBase<OperandType>
-	{
-	public:
-		static const xRegisterSIMD Empty;		// defined as an empty/unused value (-1)
-
-	public:
-		xRegisterSIMD(): xRegisterBase<OperandType>() {}
-		explicit xRegisterSIMD( const xRegisterBase<OperandType>& src ) : xRegisterBase<OperandType>( src ) {}
-		explicit xRegisterSIMD( int regId ) : xRegisterBase<OperandType>( regId ) {}
-
-		bool operator==( const xRegisterSIMD<OperandType>& src ) const	{ return this->Id == src.Id; }
-		bool operator!=( const xRegisterSIMD<OperandType>& src ) const	{ return this->Id != src.Id; }
-
-		xRegisterSIMD<OperandType>& operator=( const xRegisterBase<OperandType>& src )
-		{
-			this->Id = src.Id;
-			return *this;
-		}
-	};
-
-	// ------------------------------------------------------------------------
-	// Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which
-	// means it finds undeclared variables when MSVC does not (Since MSVC compiles templates
-	// when they are actually used).  In practice this sucks since it means we have to move all
-	// our variable and function prototypes from a nicely/neatly unified location to being strewn
-	// all about the the templated code in haphazard fashion.  Yay.. >_<
-	//
-
-	typedef xRegisterSIMD<u128> xRegisterSSE;
-	typedef xRegisterSIMD<u64>  xRegisterMMX;
-	typedef xRegister<u32>  xRegister32;
-	typedef xRegister<u16>  xRegister16;
-	typedef xRegister<u8>   xRegister8;
-
-	class xRegisterCL : public xRegister8
-	{
-	public:
-		xRegisterCL(): xRegister8( 1 ) {}
-	};
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// Use 32 bit registers as out index register (for ModSib memory address calculations)
-	// Only xAddressReg provides operators for constructing xAddressInfo types.
-	//
-	class xAddressReg : public xRegister32
-	{
-	public:
-		static const xAddressReg Empty;		// defined as an empty/unused value (-1)
-
-	public:
-		inline xAddressReg(): xRegister32() {}
-		inline xAddressReg( const xAddressReg& src ) : xRegister32( src.Id ) {}
-		inline xAddressReg( const xRegister32& src ) : xRegister32( src ) {}
-		explicit inline xAddressReg( int regId ) : xRegister32( regId ) {}
-
-		// Returns true if the register is the stack pointer: ESP.
-		bool IsStackPointer() const { return Id == 4; }
-
-		inline xAddressInfo operator+( const xAddressReg& right ) const;
-		inline xAddressInfo operator+( const xAddressInfo& right ) const;
-		inline xAddressInfo operator+( s32 right ) const;
-		inline xAddressInfo operator+( const void* right ) const;
-
-		inline xAddressInfo operator-( s32 right ) const;
-		inline xAddressInfo operator-( const void* right ) const;
-
-		inline xAddressInfo operator*( u32 factor ) const;
-		inline xAddressInfo operator<<( u32 shift ) const;
-
-		inline xAddressReg& operator=( const xRegister32& src )
-		{
-			Id = src.Id;
-			return *this;
-		}
-	};
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	//
-	class xAddressInfo
-	{
-	public:
-		xAddressReg Base;		// base register (no scale)
-		xAddressReg Index;		// index reg gets multiplied by the scale
-		int Factor;				// scale applied to the index register, in factor form (not a shift!)
-		s32 Displacement;		// address displacement
-
-	public:
-		__forceinline xAddressInfo( const xAddressReg& base, const xAddressReg& index, int factor=1, s32 displacement=0 ) :
-			Base( base ),
-			Index( index ),
-			Factor( factor ),
-			Displacement( displacement )
-		{
-		}
-
-		__forceinline explicit xAddressInfo( const xAddressReg& index, int displacement=0 ) :
-			Base(),
-			Index( index ),
-			Factor(0),
-			Displacement( displacement )
-		{
-		}
-
-		__forceinline explicit xAddressInfo( s32 displacement ) :
-			Base(),
-			Index(),
-			Factor(0),
-			Displacement( displacement )
-		{
-		}
-
-		static xAddressInfo FromIndexReg( const xAddressReg& index, int scale=0, s32 displacement=0 );
-
-	public:
-		bool IsByteSizeDisp() const { return is_s8( Displacement ); }
-
-		__forceinline xAddressInfo& Add( s32 imm )
-		{
-			Displacement += imm;
-			return *this;
-		}
-
-		inline  xAddressInfo& Add( const xAddressReg& src );
-		inline  xAddressInfo& Add( const xAddressInfo& src );
-
-		__forceinline xAddressInfo operator+( const xAddressReg& right ) const { return xAddressInfo( *this ).Add( right ); }
-		__forceinline xAddressInfo operator+( const xAddressInfo& right ) const { return xAddressInfo( *this ).Add( right ); }
-		__forceinline xAddressInfo operator+( s32 imm ) const { return xAddressInfo( *this ).Add( imm ); }
-		__forceinline xAddressInfo operator-( s32 imm ) const { return xAddressInfo( *this ).Add( -imm ); }
-		__forceinline xAddressInfo operator+( const void* addr ) const { return xAddressInfo( *this ).Add( (uptr)addr ); }
-	};
-
-	extern const xRegisterSSE
-		xmm0, xmm1, xmm2, xmm3,
-		xmm4, xmm5, xmm6, xmm7;
-
-	extern const xRegisterMMX
-		mm0, mm1, mm2, mm3,
-		mm4, mm5, mm6, mm7;
-
-	extern const xAddressReg
-		eax, ebx, ecx, edx,
-		esi, edi, ebp, esp;
-
-	extern const xRegister16
-		ax, bx, cx, dx,
-		si, di, bp, sp;
-
-	extern const xRegister8
-		al, dl, bl,
-		ah, ch, dh, bh;
-
-	extern const xRegisterCL cl;		// I'm special!
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// xImmReg - used to represent an immediate value which can also be optimized to a register.
-	// Note that the immediate value represented by this structure is *always* legal.  The
-	// register assignment is an optional optimization which can be implemented in cases where
-	// an immediate is used enough times to merit allocating it to a register.
-	//
-	// Note: not all instructions support this operand type (yet).  You can always implement it
-	// manually by checking the status of IsReg() and generating the xOP conditionally.
-	//
-	template< typename OperandType >
-	class xImmReg
-	{
-		xRegister<OperandType> m_reg;
-		int m_imm;
-
-	public:
-		xImmReg() :
-			m_reg(), m_imm( 0 ) { }
-
-		xImmReg( int imm, const xRegister<OperandType>& reg=xRegister<OperandType>() ) :
-			m_reg( reg ), m_imm( imm ) { }
-
-		const xRegister<OperandType>& GetReg() const { return m_reg; }
-		const int GetImm() const { return m_imm; }
-		bool IsReg() const { return !m_reg.IsEmpty(); }
-	};
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// ModSib - Internal low-level representation of the ModRM/SIB information.
-	//
-	// This class serves two purposes:  It houses 'reduced' ModRM/SIB info only, which means
-	// that the Base, Index, Scale, and Displacement values are all in the correct arrange-
-	// ments, and it serves as a type-safe layer between the xRegister's operators (which
-	// generate xAddressInfo types) and the emitter's ModSib instruction forms.  Without this,
-	// the xRegister would pass as a ModSib type implicitly, and that would cause ambiguity
-	// on a number of instructions.
-	//
-	// End users should always use xAddressInfo instead.
-	//
-	class ModSibBase
-	{
-	public:
-		xAddressReg Base;		// base register (no scale)
-		xAddressReg Index;		// index reg gets multiplied by the scale
-		uint Scale;				// scale applied to the index register, in scale/shift form
-		s32 Displacement;		// offset applied to the Base/Index registers.
-
-	public:
-		explicit inline ModSibBase( const xAddressInfo& src );
-		explicit inline ModSibBase( s32 disp );
-		inline ModSibBase( xAddressReg base, xAddressReg index, int scale=0, s32 displacement=0 );
-		inline ModSibBase( const void* target );
-
-		inline bool IsByteSizeDisp() const { return is_s8( Displacement ); }
-
-		inline ModSibBase& Add( s32 imm )
-		{
-			Displacement += imm;
-			return *this;
-		}
-
-		__forceinline ModSibBase operator+( const s32 imm ) const { return ModSibBase( *this ).Add( imm ); }
-		__forceinline ModSibBase operator-( const s32 imm ) const { return ModSibBase( *this ).Add( -imm ); }
-
-	protected:
-		void Reduce();
-	};
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// Strictly-typed version of ModSibBase, which is used to apply operand size information
-	// to ImmToMem operations.
-	//
-	template< typename OperandType >
-	class ModSibStrict : public ModSibBase
-	{
-	public:
-		static const uint OperandSize = sizeof( OperandType );
-
-		__forceinline explicit ModSibStrict( const ModSibBase& src ) : ModSibBase( src ) {}
-		__forceinline explicit ModSibStrict( const xAddressInfo& src ) : ModSibBase( src ) {}
-		__forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {}
-		__forceinline ModSibStrict( const OperandType* target ) : ModSibBase( target ) {}
-		__forceinline ModSibStrict( xAddressReg base, xAddressReg index, int scale=0, s32 displacement=0 ) :
-			ModSibBase( base, index, scale, displacement ) {}
-
-		__forceinline ModSibStrict<OperandType>& Add( s32 imm )
-		{
-			Displacement += imm;
-			return *this;
-		}
-
-		__forceinline ModSibStrict<OperandType> operator+( const s32 imm ) const { return ModSibStrict<OperandType>( *this ).Add( imm ); }
-		__forceinline ModSibStrict<OperandType> operator-( const s32 imm ) const { return ModSibStrict<OperandType>( *this ).Add( -imm ); }
-
-		bool operator==( const ModSibStrict<OperandType>& src ) const
-		{
-			return
-				( Base == src.Base ) && ( Index == src.Index ) &&
-				( Scale == src.Scale ) && ( Displacement == src.Displacement );
-		}
-
-		bool operator!=( const ModSibStrict<OperandType>& src ) const
-		{
-			return !operator==( src );
-		}
-	};
-	
-	typedef ModSibStrict<u8>		ModSib8;
-	typedef ModSibStrict<u16>		ModSib16;
-	typedef ModSibStrict<u32>		ModSib32;
-	typedef ModSibStrict<u64>		ModSib64;
-	typedef ModSibStrict<u128>		ModSib128;
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// xAddressIndexerBase - This is a static class which provisions our ptr[] syntax.
-	//
-	struct xAddressIndexerBase
-	{
-		// passthrough instruction, allows ModSib to pass silently through ptr translation
-		// without doing anything and without compiler error.
-		const ModSibBase& operator[]( const ModSibBase& src ) const { return src; }
-
-		__forceinline ModSibBase operator[]( xAddressReg src ) const
-		{
-			return ModSibBase( src, xAddressReg::Empty );
-		}
-
-		__forceinline ModSibBase operator[]( const xAddressInfo& src ) const
-		{
-			return ModSibBase( src );
-		}
-
-		__forceinline ModSibBase operator[]( uptr src ) const
-		{
-			return ModSibBase( src );
-		}
-
-		__forceinline ModSibBase operator[]( const void* src ) const
-		{
-			return ModSibBase( (uptr)src );
-		}
-
-		xAddressIndexerBase() {}			// appease the GCC gods
-	};
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// Explicit version of ptr[], in the form of ptr32[], ptr16[], etc. which allows
-	// specification of the operand size for ImmToMem operations.
-	//
-	template< typename OperandType >
-	struct xAddressIndexer
-	{
-		static const uint OperandSize = sizeof( OperandType );
-
-		// passthrough instruction, allows ModSib to pass silently through ptr translation
-		// without doing anything and without compiler error.
-		const ModSibStrict<OperandType>& operator[]( const ModSibStrict<OperandType>& src ) const { return src; }
-
-		__forceinline ModSibStrict<OperandType> operator[]( xAddressReg src ) const
-		{
-			return ModSibStrict<OperandType>( src, xAddressReg::Empty );
-		}
-
-		__forceinline ModSibStrict<OperandType> operator[]( const xAddressInfo& src ) const
-		{
-			return ModSibStrict<OperandType>( src );
-		}
-
-		__forceinline ModSibStrict<OperandType> operator[]( uptr src ) const
-		{
-			return ModSibStrict<OperandType>( src );
-		}
-
-		__forceinline ModSibStrict<OperandType> operator[]( const void* src ) const
-		{
-			return ModSibStrict<OperandType>( (uptr)src );
-		}
-
-		xAddressIndexer() {}  // GCC initialization dummy
-	};
-
-	// ptr[] - use this form for instructions which can resolve the address operand size from
-	// the other register operand sizes.
-	extern const xAddressIndexerBase	ptr;
-	extern const xAddressIndexer<u128>	ptr128;
-	extern const xAddressIndexer<u64>	ptr64;
-	extern const xAddressIndexer<u32>	ptr32;	// explicitly typed addressing, usually needed for '[dest],imm' instruction forms
-	extern const xAddressIndexer<u16>	ptr16;	// explicitly typed addressing, usually needed for '[dest],imm' instruction forms
-	extern const xAddressIndexer<u8>	ptr8;	// explicitly typed addressing, usually needed for '[dest],imm' instruction forms
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	//
-	// [TODO] - make SSE version of thise, perhaps?
-	//
-	template< typename OperandType >
-	class xDirectOrIndirect
-	{
-		xRegister<OperandType> m_RegDirect;
-		ModSibStrict<OperandType> m_MemIndirect;
-
-	public:
-		xDirectOrIndirect() :
-			m_RegDirect(), m_MemIndirect( 0 ) {}
-
-		xDirectOrIndirect( const xRegister<OperandType>& srcreg ) :
-			m_RegDirect( srcreg ), m_MemIndirect( 0 ) {}
-
-		xDirectOrIndirect( const ModSibBase& srcmem ) :
-			m_RegDirect(), m_MemIndirect( srcmem ) {}
-
-		xDirectOrIndirect( const ModSibStrict<OperandType>& srcmem ) :
-			m_RegDirect(), m_MemIndirect( srcmem ) {}
-
-		const xRegister<OperandType>& GetReg() const { return m_RegDirect; }
-		const ModSibStrict<OperandType>& GetMem() const { return m_MemIndirect; }
-		bool IsDirect() const { return !m_RegDirect.IsEmpty(); }
-		bool IsIndirect() const { return m_RegDirect.IsEmpty(); }
-
-		bool operator==( const xDirectOrIndirect<OperandType>& src ) const
-		{
-			return IsDirect() ?
-				(m_RegDirect == src.m_RegDirect) :
-				(m_MemIndirect == src.m_MemIndirect);
-		}
-
-		bool operator!=( const xDirectOrIndirect<OperandType>& src ) const
-		{
-			return !operator==( src );
-		}
-
-		bool operator==( const xRegister<OperandType>& src ) const	{ return (m_RegDirect == src); }
-		bool operator!=( const xRegister<OperandType>& src ) const	{ return (m_RegDirect == src); }
-	};
-
-	typedef xImmReg<u8>		xImmOrReg8;
-	typedef xImmReg<u16>	xImmOrReg16;
-	typedef xImmReg<u32>	xImmOrReg32;
-
-	typedef xDirectOrIndirect<u8>	xDirectOrIndirect8;
-	typedef xDirectOrIndirect<u16>	xDirectOrIndirect16;
-	typedef xDirectOrIndirect<u32>	xDirectOrIndirect32;
-
-	//////////////////////////////////////////////////////////////////////////////////////////
+	// ----------------------------------------------------------------------------
 	// JccComparisonType - enumerated possibilities for inspired code branching!
 	//
 	enum JccComparisonType
@@ -592,7 +135,7 @@ __forceinline void xWrite( T val )
 	// Not supported yet:
 	//E3 cb 	JECXZ rel8 	Jump short if ECX register is 0.
 
-	//////////////////////////////////////////////////////////////////////////////////////////
+	// ----------------------------------------------------------------------------
 	// SSE2_ComparisonType - enumerated possibilities for SIMD data comparison!
 	//
 	enum SSE2_ComparisonType
@@ -607,9 +150,601 @@ __forceinline void xWrite( T val )
 		SSE2_Ordered
 	};
 
+	static const int ModRm_UseSib = 4;		// same index value as ESP (used in RM field)
+	static const int ModRm_UseDisp32 = 5;	// same index value as EBP (used in Mod field)
 
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// xSmartJump
+	class xAddressInfo;
+	class ModSibBase;
+
+	extern void xSetPtr( void* ptr );
+	extern u8* xGetPtr();
+	extern void xAlignPtr( uint bytes );
+	extern void xAdvancePtr( uint bytes );
+
+	extern JccComparisonType xInvertCond( JccComparisonType src );
+
+	// --------------------------------------------------------------------------------------
+	//  OperandSizedObject
+	// --------------------------------------------------------------------------------------
+	class OperandSizedObject
+	{
+	public:	
+		virtual uint GetOperandSize() const=0;
+
+		bool Is8BitOp() const		{ return GetOperandSize() == 1; }
+		void prefix16() const		{ if( GetOperandSize() == 2 ) xWrite8( 0x66 ); }
+		
+		void xWriteImm( int imm ) const
+		{
+			switch( GetOperandSize() )
+			{
+				case 1: xWrite8( imm ); break;
+				case 2: xWrite16( imm ); break;
+				case 4: xWrite32( imm ); break;
+				case 8: xWrite64( imm ); break;
+
+				jNO_DEFAULT
+			}
+		}
+	};
+
+	// Represents an unused or "empty" register assignment.  If encountered by the emitter, this
+	// will be ignored (in some cases it is disallowed and generates an assertion)
+	static const int xRegId_Empty	= -1;
+
+	// Represents an invalid or uninitialized register.  If this is encountered by the emitter it
+	// will generate an assertion.
+	static const int xRegId_Invalid	= -2;
+	
+	// --------------------------------------------------------------------------------------
+	//  xRegisterBase  -  type-unsafe x86 register representation.
+	// --------------------------------------------------------------------------------------
+	// Unless doing some fundamental stuff, use the friendly xRegister32/16/8 and xRegisterSSE/MMX
+	// instead, which are built using this class and provide strict register type safety when
+	// passed into emitter instructions.
+	//
+	class xRegisterBase : public OperandSizedObject
+	{
+	public:
+		int Id;
+
+		xRegisterBase(): Id( xRegId_Invalid ) {}
+		explicit xRegisterBase( int regId ) : Id( regId ) { pxAssert( (Id >= xRegId_Empty) && (Id < 8) ); }
+
+		bool IsEmpty() const		{ return Id < 0 ; }
+		bool IsInvalid() const		{ return Id == xRegId_Invalid; }
+
+		// Returns true if the register is a valid accumulator: Eax, Ax, Al, XMM0.
+		bool IsAccumulator() const	{ return Id == 0; }
+
+		// returns true if the register is a valid MMX or XMM register.
+		bool IsSIMD() const { return GetOperandSize() == 8 || GetOperandSize() == 16; }
+
+		bool operator==( const xRegisterBase& src ) const	{ return (Id == src.Id); }
+		bool operator!=( const xRegisterBase& src ) const	{ return (Id != src.Id); }
+
+		// Diagnostics -- returns a string representation of this register.  Return string
+		// is a valid non-null string for any Id, valid or invalid.  No assertions are generated.
+		const char* GetName();
+	};
+	
+	class xRegisterInt : public xRegisterBase
+	{
+		typedef xRegisterBase _parent;
+
+	public:
+		xRegisterInt() {}
+		explicit xRegisterInt( const xRegisterBase& src ) : _parent( src ) {}
+		explicit xRegisterInt( int regId ) : _parent( regId ) { }
+
+		bool IsSIMD() const { return false; }
+
+		bool operator==( const xRegisterInt& src ) const	{ return Id == src.Id && (GetOperandSize() == src.GetOperandSize()); }
+		bool operator!=( const xRegisterInt& src ) const	{ return !operator==(src); }
+	};
+
+	// --------------------------------------------------------------------------------------
+	//  xRegister8/16/32  -  Represents a basic 8/16/32 bit GPR on the x86
+	// --------------------------------------------------------------------------------------
+	class xRegister8 : public xRegisterInt
+	{
+		typedef xRegisterInt _parent;
+
+	public:
+		xRegister8(): _parent() {}
+		//explicit xRegister8( const xRegisterBase& src ) : _parent( src ) {}
+		explicit xRegister8( int regId ) : _parent( regId ) {}
+		
+		virtual uint GetOperandSize() const { return 1; }
+
+		bool operator==( const xRegister8& src ) const	{ return Id == src.Id; }
+		bool operator!=( const xRegister8& src ) const	{ return Id != src.Id; }
+	};
+
+	class xRegister16 : public xRegisterInt
+	{
+		typedef xRegisterInt _parent;
+
+	public:
+		xRegister16(): _parent() {}
+		//explicit xRegister16( const xRegisterBase& src ) : _parent( src ) {}
+		explicit xRegister16( int regId ) : _parent( regId ) {}
+
+		virtual uint GetOperandSize() const { return 2; }
+
+		bool operator==( const xRegister16& src ) const	{ return this->Id == src.Id; }
+		bool operator!=( const xRegister16& src ) const	{ return this->Id != src.Id; }
+	};
+	
+	class xRegister32 : public xRegisterInt
+	{
+		typedef xRegisterInt _parent;
+
+	public:
+		xRegister32(): _parent() {}
+		//explicit xRegister32( const xRegisterBase& src ) : _parent( src ) {}
+		explicit xRegister32( int regId ) : _parent( regId ) {}
+
+		virtual uint GetOperandSize() const { return 4; }
+
+		bool operator==( const xRegister32& src ) const	{ return this->Id == src.Id; }
+		bool operator!=( const xRegister32& src ) const	{ return this->Id != src.Id; }
+	};
+	
+	// --------------------------------------------------------------------------------------
+	//  xRegisterMMX/SSE  -  Represents either a 64 bit or 128 bit SIMD register
+	// --------------------------------------------------------------------------------------
+	// This register type is provided to allow legal syntax for instructions that accept either
+	// an XMM or MMX register as a parameter, but do not allow for a GPR.
+
+	class xRegisterMMX : public xRegisterBase
+	{
+		typedef xRegisterBase _parent;
+
+	public:
+		xRegisterMMX(): _parent() {}
+		xRegisterMMX( const xRegisterBase& src ) : _parent( src ) {}
+		explicit xRegisterMMX( int regId ) : _parent( regId ) {}
+
+		virtual uint GetOperandSize() const { return 8; }
+
+		bool operator==( const xRegisterMMX& src ) const	{ return this->Id == src.Id; }
+		bool operator!=( const xRegisterMMX& src ) const	{ return this->Id != src.Id; }
+	};
+
+	class xRegisterSSE : public xRegisterBase
+	{
+		typedef xRegisterBase _parent;
+
+	public:
+		xRegisterSSE(): _parent() {}
+		xRegisterSSE( const xRegisterBase& src ) : _parent( src ) {}
+		explicit xRegisterSSE( int regId ) : _parent( regId ) {}
+
+		virtual uint GetOperandSize() const { return 16; }
+
+		bool operator==( const xRegisterSSE& src ) const	{ return this->Id == src.Id; }
+		bool operator!=( const xRegisterSSE& src ) const	{ return this->Id != src.Id; }
+	};
+
+	class xRegisterCL : public xRegister8
+	{
+	public:
+		xRegisterCL(): xRegister8( 1 ) {}
+	};
+
+	// --------------------------------------------------------------------------------------
+	//  xAddressReg
+	// --------------------------------------------------------------------------------------
+	// Use 32 bit registers as our index registers (for ModSib-style memory address calculations).
+	// This type is implicitly exchangeable with xRegister32.
+	//
+	// Only xAddressReg provides operators for constructing xAddressInfo types.  These operators
+	// could have been added to xRegister32 directly instead, however I think this design makes
+	// more sense and allows the programmer a little more type protection if needed.
+	//
+	class xAddressReg : public xRegister32
+	{
+	public:
+		xAddressReg(): xRegister32() {}
+		xAddressReg( const xAddressReg& src ) : xRegister32( src.Id ) {}
+		xAddressReg( const xRegister32& src ) : xRegister32( src ) {}
+		explicit xAddressReg( int regId ) : xRegister32( regId ) {}
+
+		// Returns true if the register is the stack pointer: ESP.
+		bool IsStackPointer() const { return Id == 4; }
+
+		inline xAddressInfo operator+( const xAddressReg& right ) const;
+		inline xAddressInfo operator+( const xAddressInfo& right ) const;
+		inline xAddressInfo operator+( s32 right ) const;
+		inline xAddressInfo operator+( const void* right ) const;
+
+		inline xAddressInfo operator-( s32 right ) const;
+		inline xAddressInfo operator-( const void* right ) const;
+
+		inline xAddressInfo operator*( u32 factor ) const;
+		inline xAddressInfo operator<<( u32 shift ) const;
+
+		/*xAddressReg& operator=( const xRegister32& src )
+		{
+			Id = src.Id;
+			return *this;
+		}*/
+	};
+
+	class xRegisterEmpty
+	{
+	public:
+		xRegisterEmpty() {}
+
+		operator xRegister8() const
+		{
+			return xRegister8( xRegId_Empty );
+		}
+
+		operator xRegister16() const
+		{
+			return xRegister16( xRegId_Empty );
+		}
+
+		operator xRegisterMMX() const
+		{
+			return xRegisterMMX( xRegId_Empty );
+		}
+
+		operator xRegisterSSE() const
+		{
+			return xRegisterSSE( xRegId_Empty );
+		}
+
+		operator xAddressReg() const
+		{
+			return xAddressReg( xRegId_Empty );
+		}
+	};
+	
+	extern const xRegisterEmpty xEmptyReg;
+
+	// --------------------------------------------------------------------------------------
+	//  xAddressInfo
+	// --------------------------------------------------------------------------------------
+	class xAddressInfo
+	{
+	public:
+		xAddressReg Base;		// base register (no scale)
+		xAddressReg Index;		// index reg gets multiplied by the scale
+		int Factor;				// scale applied to the index register, in factor form (not a shift!)
+		s32 Displacement;		// address displacement
+
+	public:
+		__forceinline xAddressInfo( const xAddressReg& base, const xAddressReg& index, int factor=1, s32 displacement=0 ) :
+			Base( base ),
+			Index( index ),
+			Factor( factor ),
+			Displacement( displacement )
+		{
+			pxAssertMsg( base.Id != xRegId_Invalid, "Uninitialized x86 register." );
+			pxAssertMsg( index.Id != xRegId_Invalid, "Uninitialized x86 register." );
+		}
+
+		__forceinline explicit xAddressInfo( const xAddressReg& index, int displacement=0 ) :
+			Base( xEmptyReg ),
+			Index( index ),
+			Factor(0),
+			Displacement( displacement )
+		{
+			pxAssertMsg( index.Id != xRegId_Invalid, "Uninitialized x86 register." );
+		}
+
+		__forceinline explicit xAddressInfo( s32 displacement=0 ) :
+			Base( xEmptyReg ),
+			Index( xEmptyReg ),
+			Factor(0),
+			Displacement( displacement )
+		{
+		}
+
+		static xAddressInfo FromIndexReg( const xAddressReg& index, int scale=0, s32 displacement=0 );
+
+	public:
+		bool IsByteSizeDisp() const { return is_s8( Displacement ); }
+
+		__forceinline xAddressInfo& Add( s32 imm )
+		{
+			Displacement += imm;
+			return *this;
+		}
+
+		xAddressInfo& Add( const xAddressReg& src );
+		xAddressInfo& Add( const xAddressInfo& src );
+
+		__forceinline xAddressInfo operator+( const xAddressReg& right ) const	{ return xAddressInfo( *this ).Add( right ); }
+		__forceinline xAddressInfo operator+( const xAddressInfo& right ) const	{ return xAddressInfo( *this ).Add( right ); }
+		__forceinline xAddressInfo operator+( s32 imm ) const					{ return xAddressInfo( *this ).Add( imm ); }
+		__forceinline xAddressInfo operator-( s32 imm ) const					{ return xAddressInfo( *this ).Add( -imm ); }
+		__forceinline xAddressInfo operator+( const void* addr ) const			{ return xAddressInfo( *this ).Add( (uptr)addr ); }
+	};
+
+	extern const xRegisterSSE
+		xmm0, xmm1, xmm2, xmm3,
+		xmm4, xmm5, xmm6, xmm7;
+
+	extern const xRegisterMMX
+		mm0, mm1, mm2, mm3,
+		mm4, mm5, mm6, mm7;
+
+	extern const xAddressReg
+		eax, ebx, ecx, edx,
+		esi, edi, ebp, esp;
+
+	extern const xRegister16
+		ax, bx, cx, dx,
+		si, di, bp, sp;
+
+	extern const xRegister8
+		al, dl, bl,
+		ah, ch, dh, bh;
+
+	extern const xRegisterCL cl;		// I'm special!
+
+	// --------------------------------------------------------------------------------------
+	//  xImmReg< typename xRegType > 
+	// --------------------------------------------------------------------------------------
+	// Used to represent an immediate value which can also be optimized to a register. Note
+	// that the immediate value represented by this structure is *always* legal.  The register
+	// assignment is an optional optimization which can be implemented in cases where an
+	// immediate is used enough times to merit allocating it to a register.
+	//
+	// Note: not all instructions support this operand type (yet).  You can always implement it
+	// manually by checking the status of IsReg() and generating the xOP conditionally.
+	//
+	template< typename xRegType >
+	class xImmReg
+	{
+		xRegType m_reg;
+		int m_imm;
+
+	public:
+		xImmReg() :
+			m_reg(), m_imm( 0 ) { }
+
+		xImmReg( int imm, const xRegType& reg = xEmptyReg ) :
+			m_reg( reg ), m_imm( imm ) { }
+
+		const xRegType& GetReg() const { return m_reg; }
+		const int GetImm() const { return m_imm; }
+		bool IsReg() const { return !m_reg.IsEmpty(); }
+	};
+
+	// --------------------------------------------------------------------------------------
+	//  ModSib - Internal low-level representation of the ModRM/SIB information. 
+	// --------------------------------------------------------------------------------------
+	// This class serves two purposes:  It houses 'reduced' ModRM/SIB info only, which means
+	// that the Base, Index, Scale, and Displacement values are all in the correct arrange-
+	// ments, and it serves as a type-safe layer between the xRegister's operators (which
+	// generate xAddressInfo types) and the emitter's ModSib instruction forms.  Without this,
+	// the xRegister would pass as a ModSib type implicitly, and that would cause ambiguity
+	// on a number of instructions.
+	//
+	// End users should always use xAddressInfo instead.
+	//
+	class ModSibBase : public OperandSizedObject
+	{
+	public:
+		xAddressReg		Base;			// base register (no scale)
+		xAddressReg		Index;			// index reg gets multiplied by the scale
+		uint			Scale;			// scale applied to the index register, in scale/shift form
+		s32				Displacement;	// offset applied to the Base/Index registers.
+
+	public:
+		explicit ModSibBase( const xAddressInfo& src ) :
+			Base( src.Base ), Index( src.Index ), Scale( src.Factor ),
+			Displacement( src.Displacement )
+		{
+			Reduce();
+		}
+
+		ModSibBase( xAddressReg base, xAddressReg index, int scale=0, s32 displacement=0 ) :
+			Base( base ), Index( index ), Scale( scale ),
+			Displacement( displacement )
+		{
+			Reduce();
+		}
+
+		explicit ModSibBase( s32 disp ) :
+			Base(), Index(), Scale(0),
+			Displacement( disp )
+		{
+			// no reduction necessary :D
+		}
+
+		ModSibBase( const void* target ) :
+			Base(), Index(), Scale(0),
+			Displacement( (s32)target )
+		{
+			// no reduction necessary :D
+		}
+
+		virtual uint GetOperandSize() const { pxFail( "Invalid operation on ModSibBase" ); return 0; }
+		bool IsByteSizeDisp() const { return is_s8( Displacement ); }
+
+		ModSibBase& Add( s32 imm )
+		{
+			Displacement += imm;
+			return *this;
+		}
+
+		__forceinline ModSibBase operator+( const s32 imm ) const { return ModSibBase( *this ).Add( imm ); }
+		__forceinline ModSibBase operator-( const s32 imm ) const { return ModSibBase( *this ).Add( -imm ); }
+
+	protected:
+		void Reduce();
+	};
+
+	// --------------------------------------------------------------------------------------
+	//  ModSib32rrLass  -  base class 32, 16, and 8 bit operand types
+	// --------------------------------------------------------------------------------------
+	class ModSib32orLess : public ModSibBase
+	{
+		typedef ModSibBase _parent;
+
+	protected:
+		//explicit ModSib32orLess( const ModSibBase& src ) : _parent( src ) {}
+		explicit ModSib32orLess( const xAddressInfo& src ) : _parent( src ) {}
+		explicit ModSib32orLess( s32 disp ) : _parent( disp ) {}
+		ModSib32orLess( const void* target ) : _parent( target ) {}
+		ModSib32orLess( xAddressReg base, xAddressReg index, int scale=0, s32 displacement=0 ) :
+			_parent( base, index, scale, displacement ) {}
+	};
+	
+	// --------------------------------------------------------------------------------------
+	//  ModSib8/16/32/64/128
+	// --------------------------------------------------------------------------------------
+	// Strictly-typed version of ModSibBase, which is used to apply operand size information
+	// to operations that do not involve an implicit operand size via register (such as
+	// imm,mem or mem,imm)
+	//
+#define DECLARE_CLASS_ModSibBits( bits, baseClass ) \
+	class ModSib##bits : public baseClass \
+	{ \
+		typedef baseClass _parent; \
+	public: \
+		explicit ModSib##bits( const xAddressInfo& src ) : _parent( src ) {} \
+		explicit ModSib##bits( s32 disp ) : _parent( disp ) {} \
+		ModSib##bits( const u##bits* target ) : _parent( target ) {} \
+		ModSib##bits( xAddressReg base, xAddressReg index, int scale=0, s32 displacement=0 ) : \
+			_parent( base, index, scale, displacement ) {} \
+ \
+		virtual uint GetOperandSize() const { return bits / 8; } \
+ \
+		__forceinline ModSib##bits& Add( s32 imm ) \
+		{ \
+			Displacement += imm; \
+			return *this; \
+		} \
+ \
+		__forceinline ModSib##bits operator+( const s32 imm ) const { return ModSib##bits( *this ).Add( imm ); } \
+		__forceinline ModSib##bits operator-( const s32 imm ) const { return ModSib##bits( *this ).Add( -imm ); } \
+ \
+		bool operator==( const ModSib##bits& src ) const \
+		{ \
+			return \
+				( Base == src.Base ) && ( Index == src.Index ) && \
+				( Scale == src.Scale ) && ( Displacement == src.Displacement ); \
+		} \
+ \
+		bool operator!=( const ModSib##bits& src ) const \
+		{ \
+			return !operator==( src ); \
+		} \
+	}
+	
+	DECLARE_CLASS_ModSibBits( 8, ModSib32orLess );
+	DECLARE_CLASS_ModSibBits( 16, ModSib32orLess );
+	DECLARE_CLASS_ModSibBits( 32, ModSib32orLess );
+	DECLARE_CLASS_ModSibBits( 64, ModSibBase );
+	DECLARE_CLASS_ModSibBits( 128, ModSibBase );
+
+	// --------------------------------------------------------------------------------------
+	//  xAddressIndexer
+	// --------------------------------------------------------------------------------------
+	// This is a type-translation "interface class" which provisions our ptr[] syntax.
+	// xAddressReg types go in, and ModSibBase derived types come out.
+	//
+	template< typename xModSibType >
+	struct xAddressIndexer
+	{
+		// passthrough instruction, allows ModSib to pass silently through ptr translation
+		// without doing anything and without compiler error.
+		const xModSibType& operator[]( const xModSibType& src ) const { return src; }
+
+		xModSibType operator[]( xAddressReg src ) const
+		{
+			return xModSibType( src, xEmptyReg );
+		}
+
+		xModSibType operator[]( const xAddressInfo& src ) const
+		{
+			return xModSibType( src );
+		}
+
+		xModSibType operator[]( uptr src ) const
+		{
+			return xModSibType( src );
+		}
+
+		xModSibType operator[]( const void* src ) const
+		{
+			return xModSibType( (uptr)src );
+		}
+
+		xAddressIndexer() {}  // GCC initialization dummy
+	};
+
+	// ptr[] - use this form for instructions which can resolve the address operand size from
+	// the other register operand sizes.
+	extern const xAddressIndexer<ModSibBase>	ptr;
+	extern const xAddressIndexer<ModSib128>		ptr128;
+	extern const xAddressIndexer<ModSib64>		ptr64;
+	extern const xAddressIndexer<ModSib32>		ptr32;
+	extern const xAddressIndexer<ModSib16>		ptr16;
+	extern const xAddressIndexer<ModSib8>		ptr8;
+
+	// --------------------------------------------------------------------------------------
+	//  xDirectOrIndirect
+	// --------------------------------------------------------------------------------------
+	// This utility class can represent either a direct (register) or indirect (memory address)
+	// source or destination operand.  When supplied to an emitted instruction, the direct form
+	// is favored *if* it is not Empty (xEmptyReg).  Otherwise the indirect form is used.
+	//
+#if 0
+	template< typename xRegType, typename xSibType >
+	class xDirectOrIndirect
+	{
+		xRegType	m_RegDirect;
+		xSibType	m_MemIndirect;
+
+	public:
+		xDirectOrIndirect() :
+			m_RegDirect(), m_MemIndirect( 0 ) {}
+
+		xDirectOrIndirect( const xRegType& srcreg ) :
+			m_RegDirect( srcreg ), m_MemIndirect( 0 ) {}
+
+		explicit xDirectOrIndirect( const xSibType& srcmem ) :
+			m_RegDirect(), m_MemIndirect( srcmem ) {}
+
+		const xRegType& GetReg() const { return m_RegDirect; }
+		const xSibType& GetMem() const { return m_MemIndirect; }
+		bool IsDirect() const { return !m_RegDirect.IsEmpty(); }
+		bool IsIndirect() const { return m_RegDirect.IsEmpty(); }
+
+		bool operator==( const xDirectOrIndirect& src ) const
+		{
+			return IsDirect() ?
+				(m_RegDirect == src.m_RegDirect) :
+				(m_MemIndirect == src.m_MemIndirect);
+		}
+
+		bool operator!=( const xDirectOrIndirect& src ) const
+		{
+			return !operator==( src );
+		}
+
+		bool operator==( const xRegType& src ) const	{ return (m_RegDirect == src); }
+		bool operator!=( const xRegType& src ) const	{ return (m_RegDirect != src); }
+	};
+
+	typedef xDirectOrIndirect<xRegister8,ModSib8>		xDirectOrIndirect8;
+	typedef xDirectOrIndirect<xRegister16,ModSib16>		xDirectOrIndirect16;
+	typedef xDirectOrIndirect<xRegister32,ModSib32>		xDirectOrIndirect32;
+	typedef xDirectOrIndirect<xRegisterMMX,ModSib64>	xDirectOrIndirect64;
+	typedef xDirectOrIndirect<xRegisterSSE,ModSib128>	xDirectOrIndirect128;
+#endif
+
+	// --------------------------------------------------------------------------------------
+	//  xSmartJump
+	// --------------------------------------------------------------------------------------
 	// This class provides an interface for generating forward-based j8's or j32's "smartly"
 	// as per the measured displacement distance.  If the displacement is a valid s8, then
 	// a j8 is inserted, else a j32.
@@ -672,47 +807,46 @@ __forceinline void xWrite( T val )
 		void SetTarget();
 	};
 
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// xForwardJump
+	// --------------------------------------------------------------------------------------
+	//  xForwardJump
+	// --------------------------------------------------------------------------------------
 	// Primary use of this class is through the various xForwardJA8/xForwardJLE32/etc. helpers
 	// defined later in this header. :)
 	//
+
+	class xForwardJumpBase
+	{
+	public:
+		// pointer to base of the instruction *Following* the jump.  The jump address will be
+		// relative to this address.
+		s8* BasePtr;
+
+	public:
+		xForwardJumpBase( uint opsize, JccComparisonType cctype );
+
+	protected:
+		void _setTarget( uint opsize ) const;
+	};
+
 	template< typename OperandType >
-	class xForwardJump
+	class xForwardJump : public xForwardJumpBase
 	{
 	public:
 		static const uint OperandSize = sizeof( OperandType );
 
-		// pointer to base of the instruction *Following* the jump.  The jump address will be
-		// relative to this address.
-		s8* const BasePtr;
-
 		// The jump instruction is emitted at the point of object construction.  The conditional
 		// type must be valid (Jcc_Unknown generates an assertion).
-		inline xForwardJump( JccComparisonType cctype = Jcc_Unconditional );
+		xForwardJump( JccComparisonType cctype = Jcc_Unconditional ) : xForwardJumpBase( OperandSize, cctype ) { }
 
 		// Sets the jump target by writing back the current x86Ptr to the jump instruction.
 		// This method can be called multiple times, re-writing the jump instruction's target
 		// in each case. (the the last call is the one that takes effect).
-		inline void SetTarget() const;
+		void SetTarget() const
+		{
+			_setTarget( OperandSize );
+		}
 	};
 
-	//////////////////////////////////////////////////////////////////////////////////////////
-	//
-	namespace Internal
-	{
-		#include "implement/helpers.h"
-		#include "implement/simd_templated_helpers.h"
-		#include "implement/group1.h"
-		#include "implement/group2.h"
-		#include "implement/group3.h"
-		#include "implement/movs.h"		// cmov and movsx/zx
-		#include "implement/dwshift.h"	// doubleword shifts!
-		#include "implement/incdec.h"
-		#include "implement/test.h"
-		#include "implement/jmpcall.h"
-	}
-
 	static __forceinline xAddressInfo operator+( const void* addr, const xAddressReg& reg )
 	{
 		return xAddressInfo( reg, (sptr)addr );
@@ -734,10 +868,21 @@ __forceinline void xWrite( T val )
 	}
 }
 
+#include "implement/helpers.h"
+
 #include "implement/simd_helpers.h"
 #include "implement/simd_moremovs.h"
 #include "implement/simd_arithmetic.h"
 #include "implement/simd_comparisons.h"
 #include "implement/simd_shufflepack.h"
 
+#include "implement/group1.h"
+#include "implement/group2.h"
+#include "implement/group3.h"
+#include "implement/movs.h"		// cmov and movsx/zx
+#include "implement/dwshift.h"	// doubleword shifts!
+#include "implement/incdec.h"
+#include "implement/test.h"
+#include "implement/jmpcall.h"
+
 #include "inlines.inl"
diff --git a/common/src/x86emitter/groups.cpp b/common/src/x86emitter/groups.cpp
new file mode 100644
index 0000000000..eab6cd9639
--- /dev/null
+++ b/common/src/x86emitter/groups.cpp
@@ -0,0 +1,287 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2002-2009  PCSX2 Dev Team
+ * 
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * ix86 core v0.9.1
+ *
+ * Original Authors (v0.6.2 and prior):
+ *		linuzappz <linuzappz@pcsx.net>
+ *		alexey silinov
+ *		goldfinger
+ *		zerofrog(@gmail.com)
+ *
+ * Authors of v0.9.1:
+ *		Jake.Stine(@gmail.com)
+ *		cottonvibes(@gmail.com)
+ *		sudonim(1@gmail.com)
+ */
+
+#include "PrecompiledHeader.h"
+#include "internal.h"
+#include "implement/helpers.h"
+
+namespace x86Emitter {
+
+// =====================================================================================================
+//  Group 1 Instructions - ADD, SUB, ADC, etc.
+// =====================================================================================================
+
+// Note on "[Indirect],Imm" forms : use int as the source operand since it's "reasonably inert" from a
+// compiler perspective.  (using uint tends to make the compiler try and fail to match signed immediates
+// with one of the other overloads).
+static void _g1_IndirectImm( G1Type InstType, const ModSib32orLess& sibdest, int imm )
+{
+	if( sibdest.Is8BitOp() )
+	{
+		xWrite8( 0x80 );
+		EmitSibMagic( InstType, sibdest );
+		xWrite<s8>( imm );
+	}
+	else
+	{
+		sibdest.prefix16();
+		xWrite8( is_s8( imm ) ? 0x83 : 0x81 );
+		EmitSibMagic( InstType, sibdest );
+		if( is_s8( imm ) )
+			xWrite<s8>( imm );
+		else
+			sibdest.xWriteImm( imm );
+	}
+}
+
+void _g1_EmitOp( G1Type InstType, const xRegisterInt& to, const xRegisterInt& from )
+{
+	pxAssert( to.GetOperandSize() == from.GetOperandSize() );
+	to.prefix16();
+	xWrite8( (to.Is8BitOp() ? 0 : 1) | (InstType<<3) );
+	EmitSibMagic( from, to );
+}
+
+static void _g1_EmitOp( G1Type InstType, const ModSibBase& sibdest, const xRegisterInt& from )
+{
+	from.prefix16();
+	xWrite8( (from.Is8BitOp() ? 0 : 1) | (InstType<<3) ); 
+	EmitSibMagic( from, sibdest );
+}
+
+static void _g1_EmitOp( G1Type InstType, const xRegisterInt& to, const ModSibBase& sibsrc )
+{
+	to.prefix16();
+	xWrite8( (to.Is8BitOp() ? 2 : 3) | (InstType<<3) );
+	EmitSibMagic( to, sibsrc );
+}
+
+static void _g1_EmitOp( G1Type InstType, const xRegisterInt& to, int imm )
+{
+	to.prefix16();
+	if( !to.Is8BitOp() && is_s8( imm ) )
+	{
+		xWrite8( 0x83 );
+		EmitSibMagic( InstType, to );
+		xWrite<s8>( imm );
+	}
+	else
+	{
+		if( to.IsAccumulator() )
+			xWrite8( (to.Is8BitOp() ? 4 : 5) | (InstType<<3) );
+		else
+		{
+			xWrite8( to.Is8BitOp() ? 0x80 : 0x81 );
+			EmitSibMagic( InstType, to );
+		}
+		to.xWriteImm( imm );
+	}
+}
+
+#define ImplementGroup1( g1type, insttype ) \
+ 	void g1type::operator()( const xRegister8& to, const xRegister8& from ) const		{ _g1_EmitOp( insttype, to, from ); } \
+	void g1type::operator()( const xRegister16& to, const xRegister16& from ) const		{ _g1_EmitOp( insttype, to, from ); } \
+	void g1type::operator()( const xRegister32& to, const xRegister32& from ) const		{ _g1_EmitOp( insttype, to, from ); } \
+	void g1type::operator()( const ModSibBase& to, const xRegisterInt& from ) const		{ _g1_EmitOp( insttype, to, from ); } \
+	void g1type::operator()( const xRegisterInt& to, const ModSibBase& from ) const		{ _g1_EmitOp( insttype, to, from ); } \
+	void g1type::operator()( const xRegisterInt& to, int imm ) const					{ _g1_EmitOp( insttype, to, imm ); } \
+	void g1type::operator()( const ModSib32orLess& sibdest, int imm ) const				{ _g1_IndirectImm( insttype, sibdest, imm ); }
+
+ImplementGroup1( xImpl_Group1,		InstType )
+ImplementGroup1( xImpl_G1Logic,		InstType )
+ImplementGroup1( xImpl_G1Arith,		InstType )
+ImplementGroup1( xImpl_G1Compare,	G1Type_CMP )
+
+const xImpl_G1Logic		xAND	= { G1Type_AND,	{ 0x00, 0x54 }, { 0x66, 0x54 } };
+const xImpl_G1Logic		xOR		= { G1Type_OR,	{ 0x00, 0x56 }, { 0x66, 0x56 } };
+const xImpl_G1Logic		xXOR	= { G1Type_XOR,	{ 0x00, 0x57 }, { 0x66, 0x57 } };
+
+const xImpl_G1Arith		xADD	= { G1Type_ADD,	{ 0x00, 0x58 }, { 0x66, 0x58 }, { 0xf3, 0x58 }, { 0xf2, 0x58 } };
+const xImpl_G1Arith		xSUB	= { G1Type_SUB,	{ 0x00, 0x5c }, { 0x66, 0x5c }, { 0xf3, 0x5c }, { 0xf2, 0x5c } };
+const xImpl_G1Compare	xCMP	= {				{ 0x00, 0xc2 }, { 0x66, 0xc2 }, { 0xf3, 0xc2 }, { 0xf2, 0xc2 } };
+
+const xImpl_Group1		xADC	= { G1Type_ADC };
+const xImpl_Group1		xSBB	= { G1Type_SBB };
+
+// =====================================================================================================
+//  Group 2 Instructions - SHR, SHL, etc.
+// =====================================================================================================
+
+void xImpl_Group2::operator()( const xRegisterInt& to, const xRegisterCL& /* from */ ) const
+{
+	to.prefix16();
+	xWrite8( to.Is8BitOp() ? 0xd2 : 0xd3 );
+	EmitSibMagic( InstType, to );
+}
+
+void xImpl_Group2::operator()(const xRegisterInt& to, u8 imm ) const
+{
+	if( imm == 0 ) return;
+
+	to.prefix16();
+	if( imm == 1 )
+	{
+		// special encoding of 1's
+		xWrite8( to.Is8BitOp() ? 0xd0 : 0xd1 );
+		EmitSibMagic( InstType, to );
+	}
+	else
+	{
+		xWrite8( to.Is8BitOp() ? 0xc0 : 0xc1 );
+		EmitSibMagic( InstType, to );
+		xWrite8( imm );
+	}
+}
+
+void xImpl_Group2::operator()( const ModSib32orLess& sibdest, const xRegisterCL& /* from */ ) const
+{
+	sibdest.prefix16();
+	xWrite8( sibdest.Is8BitOp() ? 0xd2 : 0xd3 );
+	EmitSibMagic( InstType, sibdest );
+}
+
+void xImpl_Group2::operator()( const ModSib32orLess& sibdest, u8 imm ) const
+{
+	if( imm == 0 ) return;
+
+	sibdest.prefix16();
+	if( imm == 1 )
+	{
+		// special encoding of 1's
+		xWrite8( sibdest.Is8BitOp() ? 0xd0 : 0xd1 );
+		EmitSibMagic( InstType, sibdest );
+	}
+	else
+	{
+		xWrite8( sibdest.Is8BitOp() ? 0xc0 : 0xc1 );
+		EmitSibMagic( InstType, sibdest );
+		xWrite8( imm );
+	}
+}
+
+const xImpl_Group2 xROL = { G2Type_ROL };
+const xImpl_Group2 xROR = { G2Type_ROR };
+const xImpl_Group2 xRCL = { G2Type_RCL };
+const xImpl_Group2 xRCR = { G2Type_RCR };
+const xImpl_Group2 xSHL = { G2Type_SHL };
+const xImpl_Group2 xSHR = { G2Type_SHR };
+const xImpl_Group2 xSAR = { G2Type_SAR };
+
+
+// =====================================================================================================
+//  Group 3 Instructions - NOT, NEG, MUL, DIV
+// =====================================================================================================
+
+static void _g3_EmitOp( G3Type InstType, const xRegisterInt& from )
+{
+	from.prefix16();
+	xWrite8(from.Is8BitOp() ? 0xf6 : 0xf7 );
+	EmitSibMagic( InstType, from );
+}
+
+static void _g3_EmitOp( G3Type InstType, const ModSib32orLess& from )
+{
+	from.prefix16();
+	xWrite8( from.Is8BitOp() ? 0xf6 : 0xf7 );
+	EmitSibMagic( InstType, from );
+}
+
+void xImpl_Group3::operator()( const xRegisterInt& from ) const			{ _g3_EmitOp( InstType, from ); }
+void xImpl_Group3::operator()( const ModSib32orLess& from ) const		{ _g3_EmitOp( InstType, from ); }
+
+void xImpl_iDiv::operator()( const xRegisterInt& from ) const			{ _g3_EmitOp( G3Type_iDIV, from ); }
+void xImpl_iDiv::operator()( const ModSib32orLess& from ) const			{ _g3_EmitOp( G3Type_iDIV, from ); }
+
+template< typename SrcType >
+static void _imul_ImmStyle( const xRegisterInt& param1, const SrcType& param2, int imm )
+{
+	// for iMul OpSize is allowed to be 16 or 32 bit only.
+	const int OpSize = param1.GetOperandSize();
+
+	pxAssert( OpSize == param2.GetOperandSize() );
+	pxAssert( OpSize > 1 );
+	
+	xOpWrite0F( (OpSize == 2) ? 0x66 : 0, is_s8( imm ) ? 0x6b : 0x69, param1, param2 );
+
+	if( is_s8( imm ) )
+		xWrite8( (u8)imm );
+	else
+		param1.xWriteImm( imm );
+}
+
+void xImpl_iMul::operator()( const xRegisterInt& from ) const			{ _g3_EmitOp( G3Type_iMUL, from ); }
+void xImpl_iMul::operator()( const ModSib32orLess& from ) const			{ _g3_EmitOp( G3Type_iMUL, from ); }
+
+void xImpl_iMul::operator()( const xRegister32& to,	const xRegister32& from ) const			{ xOpWrite0F( 0xaf, to, from ); }
+void xImpl_iMul::operator()( const xRegister32& to,	const ModSibBase& src ) const			{ xOpWrite0F( 0xaf, to, src ); }
+void xImpl_iMul::operator()( const xRegister16& to,	const xRegister16& from ) const			{ xOpWrite0F( 0x66, 0xaf, to, from ); }
+void xImpl_iMul::operator()( const xRegister16& to,	const ModSibBase& src ) const			{ xOpWrite0F( 0x66, 0xaf, to, src ); }
+
+void xImpl_iMul::operator()( const xRegister32& to,	const xRegister32& from, s32 imm ) const{ _imul_ImmStyle( to, from, imm ); }
+void xImpl_iMul::operator()( const xRegister32& to,	const ModSibBase& from, s32 imm ) const	{ _imul_ImmStyle( to, from, imm ); }
+void xImpl_iMul::operator()( const xRegister16& to,	const xRegister16& from, s16 imm ) const{ _imul_ImmStyle( to, from, imm ); }
+void xImpl_iMul::operator()( const xRegister16& to,	const ModSibBase& from, s16 imm ) const	{ _imul_ImmStyle( to, from, imm ); }
+
+const xImpl_Group3 xNOT		= { G3Type_NOT };
+const xImpl_Group3 xNEG		= { G3Type_NEG };
+const xImpl_Group3 xUMUL	= { G3Type_MUL };
+const xImpl_Group3 xUDIV	= { G3Type_DIV };
+
+const xImpl_iDiv xDIV = { { 0x00, 0x5e }, { 0x66, 0x5e }, { 0xf3, 0x5e }, { 0xf2, 0x5e } };
+const xImpl_iMul xMUL = { { 0x00, 0x59 }, { 0x66, 0x59 }, { 0xf3, 0x59 }, { 0xf2, 0x59 } };
+
+// =====================================================================================================
+//  Group 8 Instructions
+// =====================================================================================================
+
+void xImpl_Group8::operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const	{ xOpWrite0F( 0xa3 | (InstType << 3), bitbase, bitoffset ); }
+void xImpl_Group8::operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const	{ xOpWrite0F( 0x66, 0xa3 | (InstType << 3), bitbase, bitoffset ); }
+void xImpl_Group8::operator()( const ModSib32& bitbase, u8 bitoffset ) const					{ xOpWrite0F( 0xba, InstType, bitbase, bitoffset ); }
+void xImpl_Group8::operator()( const ModSib16& bitbase, u8 bitoffset ) const					{ xOpWrite0F( 0x66, 0xba, InstType, bitbase, bitoffset ); }
+
+void xImpl_Group8::operator()( const xRegister16or32& bitbase, u8 bitoffset ) const
+{
+	xOpWrite0F( (bitbase->GetOperandSize() == 2) ? 0x66 : 0x00, 0xba, InstType, bitbase, bitoffset );
+}
+
+void xImpl_Group8::operator()( const ModSibBase& bitbase, const xRegister16or32& bitoffset ) const
+{
+	xOpWrite0F( (bitoffset->GetOperandSize() == 2) ? 0x66 : 0x00, 0xa3 | (InstType << 3), bitoffset, bitbase );
+}
+
+const xImpl_Group8	xBT		= { G8Type_BT };
+const xImpl_Group8	xBTR	= { G8Type_BTR };
+const xImpl_Group8	xBTS	= { G8Type_BTS };
+const xImpl_Group8	xBTC	= { G8Type_BTC };
+
+
+
+}	// End namespace x86Emitter
+
diff --git a/common/src/x86emitter/jmp.cpp b/common/src/x86emitter/jmp.cpp
index 62c32e9dbc..f4bd46622b 100644
--- a/common/src/x86emitter/jmp.cpp
+++ b/common/src/x86emitter/jmp.cpp
@@ -14,7 +14,7 @@
  */
 
 /*
- * ix86 core v0.9.0
+ * ix86 core v0.9.1
  *
  * Original Authors (v0.6.2 and prior):
  *		linuzappz <linuzappz@pcsx.net>
@@ -22,7 +22,7 @@
  *		goldfinger
  *		zerofrog(@gmail.com)
  *
- * Authors of v0.9.0:
+ * Authors of v0.9.1:
  *		Jake.Stine(@gmail.com)
  *		cottonvibes(@gmail.com)
  *		sudonim(1@gmail.com)
@@ -33,12 +33,15 @@
 
 namespace x86Emitter {
 
-using namespace Internal;
+void xImpl_JmpCall::operator()( const xRegister32& absreg ) const	{ xOpWrite( 0x00, 0xff, isJmp ? 4 : 2, absreg ); }
+void xImpl_JmpCall::operator()( const ModSib32& src ) const			{ xOpWrite( 0x00, 0xff, isJmp ? 4 : 2, src ); }
 
-const xImpl_JmpCall<true> xJMP;
-const xImpl_JmpCall<false> xCALL;
+void xImpl_JmpCall::operator()( const xRegister16& absreg ) const	{ xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, absreg ); }
+void xImpl_JmpCall::operator()( const ModSib16& src ) const			{ xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, src ); }
+
+const xImpl_JmpCall xJMP	= { true };
+const xImpl_JmpCall xCALL	= { false };
 
-// ------------------------------------------------------------------------
 void xSmartJump::SetTarget()
 {
 	u8* target = xGetPtr();
@@ -104,7 +107,7 @@ __emitinline s8* xJcc8( JccComparisonType comparison, s8 displacement )
 // slideForward - used internally by xSmartJump to indicate that the jump target is going
 // to slide forward in the event of an 8 bit displacement.
 //
-__emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward )
+__emitinline void xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward )
 {
 	// Calculate the potential j8 displacement first, assuming an instruction length of 2:
 	sptr displacement8 = (sptr)target - (sptr)(xGetPtr() + 2);
@@ -134,5 +137,56 @@ __emitinline void xJcc( JccComparisonType comparison, const void* target )
 	xJccKnownTarget( comparison, target, false );
 }
 
+xForwardJumpBase::xForwardJumpBase( uint opsize, JccComparisonType cctype )
+{
+	pxAssert( opsize == 1 || opsize == 4 );
+	pxAssertDev( cctype != Jcc_Unknown, "Invalid ForwardJump conditional type." );
+
+	BasePtr = (s8*)xGetPtr() +
+		((opsize == 1) ? 2 :					// j8's are always 2 bytes.
+		((cctype==Jcc_Unconditional) ? 5 : 6 ));	// j32's are either 5 or 6 bytes
+
+	if( opsize == 1 )
+		xWrite8( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) );
+	else
+	{
+		if( cctype == Jcc_Unconditional )
+			xWrite8( 0xe9 );
+		else
+		{
+			xWrite8( 0x0f );
+			xWrite8( 0x80 | cctype );
+		}
+	}
+
+	xAdvancePtr( opsize );
 }
 
+void xForwardJumpBase::_setTarget( uint opsize ) const
+{
+	pxAssertDev( BasePtr != NULL, "" );
+
+	sptr displacement = (sptr)xGetPtr() - (sptr)BasePtr;
+	if( opsize == 1 )
+	{
+		pxAssertDev( is_s8( displacement ), "Emitter Error: Invalid short jump displacement." );
+		BasePtr[-1] = (s8)displacement;
+	}
+	else
+	{
+		// full displacement, no sanity checks needed :D
+		((s32*)BasePtr)[-1] = displacement;
+	}
+}
+
+// returns the inverted conditional type for this Jcc condition.  Ie, JNS will become JS.
+__forceinline JccComparisonType xInvertCond( JccComparisonType src )
+{
+	pxAssert( src != Jcc_Unknown );
+	if( Jcc_Unconditional == src ) return Jcc_Unconditional;
+
+	// x86 conditionals are clever!  To invert conditional types, just invert the lower bit:
+	return (JccComparisonType)((int)src ^ 1);
+}
+
+}
diff --git a/common/src/x86emitter/legacy.cpp b/common/src/x86emitter/legacy.cpp
index 6ba6c856c9..546e0f4b23 100644
--- a/common/src/x86emitter/legacy.cpp
+++ b/common/src/x86emitter/legacy.cpp
@@ -43,7 +43,7 @@ emitterT void ModRM( uint mod, uint reg, uint rm )
 
 emitterT void SibSB( uint ss, uint index, uint base )
 {
-	// Note: Following ASSUMEs are for legacy support only.
+	// Note: Following asserts are for legacy support only.
 	// The new emitter performs these sanity checks during operand construction, so these
 	// assertions can probably be removed once all legacy emitter code has been removed.
 	pxAssert( ss < 4 );
@@ -54,60 +54,64 @@ emitterT void SibSB( uint ss, uint index, uint base )
 
 using namespace x86Emitter;
 
-template< typename ImmType >
-static __forceinline xRegister<ImmType> _reghlp( x86IntRegType src )
+static ModSib32 _mhlp32( x86IntRegType to )
 {
-	return xRegister<ImmType>( src );
+	return ptr32[xAddressReg( to )];
 }
 
-static __forceinline ModSibBase _mrmhlp( x86IntRegType src )
+static ModSib32 _mhlp32( x86IntRegType to1, x86IntRegType to2 )
 {
-	return ptr[_reghlp<u32>(src)];
+	return ptr32[xAddressReg( to1 ) + xAddressReg( to2 )];
 }
 
-template< typename ImmType >
-static __forceinline ModSibStrict<ImmType> _mhlp( x86IntRegType src )
+static ModSib16 _mhlp16( x86IntRegType to )
 {
-	return ModSibStrict<ImmType>( xAddressReg::Empty, xAddressReg(src) );
+	return ptr16[xAddressReg( to )];
 }
 
-template< typename ImmType >
-static __forceinline ModSibStrict<ImmType> _mhlp2( x86IntRegType src1, x86IntRegType src2 )
+static ModSib16 _mhlp16( x86IntRegType to1, x86IntRegType to2 )
 {
-	return ModSibStrict<ImmType>( xAddressReg(src2), xAddressReg(src1) );
+	return ptr16[xAddressReg( to1 ) + xAddressReg( to2 )];
+}
+
+static ModSib8 _mhlp8( x86IntRegType to )
+{
+	return ptr8[xAddressReg( to )];
+}
+
+static ModSib8 _mhlp8( x86IntRegType to1, x86IntRegType to2 )
+{
+	return ptr8[xAddressReg( to1 ) + xAddressReg( to2 )];
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////
 //
 #define DEFINE_LEGACY_HELPER( cod, bits ) \
-	emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from )	{ x##cod( _reghlp<u##bits>(to), _reghlp<u##bits>(from) ); } \
-	emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm )			{ x##cod( _reghlp<u##bits>(to), imm ); } \
-	emitterT void cod##bits##MtoR( x86IntRegType to, uptr from )			{ x##cod( _reghlp<u##bits>(to), (void*)from ); } \
-	emitterT void cod##bits##RtoM( uptr to, x86IntRegType from )			{ x##cod( (void*)to, _reghlp<u##bits>(from) ); } \
+	emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from )	{ x##cod( xRegister##bits(to), xRegister##bits(from) ); } \
+	emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm )			{ x##cod( xRegister##bits(to), imm ); } \
+	emitterT void cod##bits##MtoR( x86IntRegType to, uptr from )			{ x##cod( xRegister##bits(to), (void*)from ); } \
+	emitterT void cod##bits##RtoM( uptr to, x86IntRegType from )			{ x##cod( (void*)to, xRegister##bits(from) ); } \
 	emitterT void cod##bits##ItoM( uptr to, u##bits imm )					{ x##cod( ptr##bits[to], imm ); }  \
-	emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset )	{ x##cod( _mhlp<u##bits>(to) + offset, imm ); } \
-	emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x##cod( _reghlp<u##bits>(to), _mhlp<u##bits>(from) + offset ); } \
-	emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x##cod( _mhlp<u##bits>(to) + offset, _reghlp<u##bits>(from) ); } \
+	emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset )	{ x##cod( _mhlp##bits(to) + offset, imm ); } \
+	emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x##cod( xRegister##bits(to), _mhlp##bits(from) + offset ); } \
+	emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x##cod( _mhlp##bits(to) + offset, xRegister##bits(from) ); } \
 	emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \
-	{ x##cod( _mhlp2<u##bits>(to1,to2) + offset, _reghlp<u##bits>(from) ); } \
+	{ x##cod( _mhlp##bits(to1,to2) + offset, xRegister##bits(from) ); } \
 	emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \
-	{ x##cod( _reghlp<u##bits>(to), _mhlp2<u##bits>(from1,from2) + offset ); }
+	{ x##cod( xRegister##bits(to), _mhlp##bits(from1,from2) + offset ); }
 
 #define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \
-	emitterT void cod##bits##CLtoR( x86IntRegType to )						{ x##cod( _reghlp<u##bits>(to), cl ); } \
-	emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm )				{ x##cod( _reghlp<u##bits>(to), imm ); } \
+	emitterT void cod##bits##CLtoR( x86IntRegType to )						{ x##cod( xRegister##bits(to), cl ); } \
+	emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm )				{ x##cod( xRegister##bits(to), imm ); } \
 	emitterT void cod##bits##CLtoM( uptr to )								{ x##cod( ptr##bits[to], cl ); } \
 	emitterT void cod##bits##ItoM( uptr to, u8 imm )						{ x##cod( ptr##bits[to], imm ); }  \
-	emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset )	{ x##cod( _mhlp<u##bits>(to) + offset, imm ); } \
-	emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset )			{ x##cod( _mhlp<u##bits>(to) + offset, cl ); }
+	emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset )	{ x##cod( _mhlp##bits(to) + offset, imm ); } \
+	emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset )			{ x##cod( _mhlp##bits(to) + offset, cl ); }
 
 #define DEFINE_LEGACY_ONEREG_HELPER( cod, bits ) \
-	emitterT void cod##bits##R( x86IntRegType to )					{ x##cod( _reghlp<u##bits>(to) ); } \
+	emitterT void cod##bits##R( x86IntRegType to )					{ x##cod( xRegister##bits(to) ); } \
 	emitterT void cod##bits##M( uptr to )							{ x##cod( ptr##bits[to] ); } \
-	emitterT void cod##bits##Rm( x86IntRegType to, uptr offset )	{ x##cod( _mhlp<u##bits>(to) + offset ); }
-
-//emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset )
-//	{ cod( _mhlp2<u##bits>(to1,to2) + offset, _reghlp<u##bits>(from) ); }
+	emitterT void cod##bits##Rm( x86IntRegType to, uptr offset )	{ x##cod( _mhlp##bits(to) + offset ); }
 
 #define DEFINE_OPCODE_LEGACY( cod ) \
 	DEFINE_LEGACY_HELPER( cod, 32 ) \
@@ -198,7 +202,7 @@ emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2
 
 emitterT void AND32I8toR( x86IntRegType to, s8 from )
 {
-	xAND( _reghlp<u32>(to), from );
+	xAND( xRegister32(to), from );
 }
 
 emitterT void AND32I8toM( uptr to, s8 from )
diff --git a/common/src/x86emitter/movs.cpp b/common/src/x86emitter/movs.cpp
new file mode 100644
index 0000000000..66f95b00d6
--- /dev/null
+++ b/common/src/x86emitter/movs.cpp
@@ -0,0 +1,268 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2002-2009  PCSX2 Dev Team
+ * 
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * ix86 core v0.9.1
+ *
+ * Original Authors (v0.6.2 and prior):
+ *		linuzappz <linuzappz@pcsx.net>
+ *		alexey silinov
+ *		goldfinger
+ *		zerofrog(@gmail.com)
+ *
+ * Authors of v0.9.1:
+ *		Jake.Stine(@gmail.com)
+ *		cottonvibes(@gmail.com)
+ *		sudonim(1@gmail.com)
+ */
+
+#include "PrecompiledHeader.h"
+#include "internal.h"
+#include "implement/helpers.h"
+
+namespace x86Emitter {
+
+void _xMovRtoR( const xRegisterInt& to, const xRegisterInt& from )
+{
+	pxAssert( to.GetOperandSize() == from.GetOperandSize() );
+
+	if( to == from ) return;	// ignore redundant MOVs.
+
+	from.prefix16();
+	xWrite8( from.Is8BitOp() ? 0x88 : 0x89 );
+	EmitSibMagic( from, to );
+}
+
+void xImpl_Mov::operator()( const xRegister8& to, const xRegister8& from ) const
+{
+	if( to == from ) return;	// ignore redundant MOVs.
+	xWrite8( 0x88 );
+	EmitSibMagic( from, to );
+}
+
+void xImpl_Mov::operator()( const xRegister16& to, const xRegister16& from ) const
+{
+	if( to == from ) return;	// ignore redundant MOVs.
+	from.prefix16();
+	xWrite8( 0x89 );
+	EmitSibMagic( from, to );
+}
+
+void xImpl_Mov::operator()( const xRegister32& to, const xRegister32& from ) const
+{
+	if( to == from ) return;	// ignore redundant MOVs.
+	xWrite8( 0x89 );
+	EmitSibMagic( from, to );
+}
+
+void xImpl_Mov::operator()( const ModSibBase& dest, const xRegisterInt& from ) const
+{
+	from.prefix16();
+
+	// mov eax has a special from when writing directly to a DISP32 address
+	// (sans any register index/base registers).
+
+	if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() )
+	{
+		xWrite8( from.Is8BitOp() ? 0xa2 : 0xa3 );
+		xWrite32( dest.Displacement );
+	}
+	else
+	{
+		xWrite8( from.Is8BitOp() ? 0x88 : 0x89 );
+		EmitSibMagic( from.Id, dest );
+	}
+}
+
+void xImpl_Mov::operator()( const xRegisterInt& to, const ModSibBase& src ) const
+{
+	to.prefix16();
+
+	// mov eax has a special from when reading directly from a DISP32 address
+	// (sans any register index/base registers).
+
+	if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() )
+	{
+		xWrite8( to.Is8BitOp() ? 0xa0 : 0xa1 );
+		xWrite32( src.Displacement );
+	}
+	else
+	{
+		xWrite8( to.Is8BitOp() ? 0x8a : 0x8b );
+		EmitSibMagic( to, src );
+	}
+}
+
+void xImpl_Mov::operator()( const ModSib32orLess& dest, int imm ) const
+{
+	dest.prefix16();
+	xWrite8( dest.Is8BitOp() ? 0xc6 : 0xc7 );
+	EmitSibMagic( 0, dest );
+	dest.xWriteImm( imm );
+}
+
+// preserve_flags  - set to true to disable optimizations which could alter the state of
+//   the flags (namely replacing mov reg,0 with xor).
+void xImpl_Mov::operator()( const xRegisterInt& to, int imm, bool preserve_flags ) const
+{
+	if( !preserve_flags && (imm == 0) )
+		_g1_EmitOp( G1Type_XOR, to, to );
+	else
+	{
+		// Note: MOV does not have (reg16/32,imm8) forms.
+
+		to.prefix16();
+		xWrite8( (to.Is8BitOp() ? 0xb0 : 0xb8) | to.Id ); 
+		to.xWriteImm( imm );
+	}
+}
+
+const xImpl_Mov xMOV;
+
+// --------------------------------------------------------------------------------------
+//  CMOVcc
+// --------------------------------------------------------------------------------------
+
+#define ccSane()	pxAssertDev( ccType >= 0 && ccType <= 0x0f, "Invalid comparison type specifier." )
+
+// Macro useful for trapping unwanted use of EBP.
+//#define EbpAssert() pxAssert( to != ebp )
+#define EbpAssert()
+
+
+void xCMOV( JccComparisonType ccType, const xRegister32& to, const xRegister32& from )		{ ccSane(); xOpWrite0F( 0x40 | ccType, to, from ); }
+void xCMOV( JccComparisonType ccType, const xRegister32& to, const ModSibBase& sibsrc )		{ ccSane(); xOpWrite0F( 0x40 | ccType, to, sibsrc ); }
+//void xCMOV( JccComparisonType ccType, const xDirectOrIndirect32& to, const xDirectOrIndirect32& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }	// too.. lazy.. to fix.
+
+void xCMOV( JccComparisonType ccType, const xRegister16& to, const xRegister16& from )		{ ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, from ); }
+void xCMOV( JccComparisonType ccType, const xRegister16& to, const ModSibBase& sibsrc )		{ ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, sibsrc ); }
+//void xCMOV( JccComparisonType ccType, const xDirectOrIndirect16& to, const xDirectOrIndirect16& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
+
+void xSET( JccComparisonType ccType, const xRegister8& to )		{ ccSane(); xOpWrite0F( 0x90 | ccType, 0, to ); }
+void xSET( JccComparisonType ccType, const ModSib8& dest )		{ ccSane(); xOpWrite0F( 0x90 | ccType, 0, dest ); }
+
+void xImpl_CMov::operator()( const xRegister32& to, const xRegister32& from ) const					{ ccSane(); xOpWrite0F( 0x40 | ccType, to, from ); }
+void xImpl_CMov::operator()( const xRegister32& to, const ModSibBase& sibsrc ) const				{ ccSane(); xOpWrite0F( 0x40 | ccType, to, sibsrc ); }
+void xImpl_CMov::operator()( const xRegister16& to, const xRegister16& from ) const					{ ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, from ); }
+void xImpl_CMov::operator()( const xRegister16& to, const ModSibBase& sibsrc ) const				{ ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, sibsrc ); }
+
+//void xImpl_CMov::operator()( const xDirectOrIndirect32& to, const xDirectOrIndirect32& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
+//void xImpl_CMov::operator()( const xDirectOrIndirect16& to, const xDirectOrIndirect16& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
+
+void xImpl_Set::operator()( const xRegister8& to ) const				{ ccSane(); xOpWrite0F( 0x90 | ccType, 0, to ); }
+void xImpl_Set::operator()( const ModSib8& dest ) const					{ ccSane(); xOpWrite0F( 0x90 | ccType, 0, dest ); }
+//void xImpl_Set::operator()( const xDirectOrIndirect8& dest ) const		{ ccSane(); _DoI_helpermess( *this, dest ); }
+
+void xImpl_MovExtend::operator()( const xRegister16or32& to, const xRegister8& from ) const
+{
+	EbpAssert();
+	xOpWrite0F(
+		( to->GetOperandSize() == 2 ) ? 0x66 : 0,
+		SignExtend ? 0xbe : 0xb6,
+		to, from
+	);
+}
+
+void xImpl_MovExtend::operator()( const xRegister16or32& to, const ModSib8& sibsrc ) const
+{
+	EbpAssert();
+	xOpWrite0F(
+		( to->GetOperandSize() == 2 ) ? 0x66 : 0,
+		SignExtend ? 0xbe : 0xb6,
+		to, sibsrc
+	);
+}
+
+void xImpl_MovExtend::operator()( const xRegister32& to, const xRegister16& from ) const
+{
+	EbpAssert();
+	xOpWrite0F( SignExtend ? 0xbf : 0xb7, to, from );
+}
+
+void xImpl_MovExtend::operator()( const xRegister32& to, const ModSib16& sibsrc ) const
+{
+	EbpAssert();
+	xOpWrite0F( SignExtend ? 0xbf : 0xb7, to, sibsrc );
+}
+
+#if 0
+void xImpl_MovExtend::operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const
+{
+	EbpAssert();
+	_DoI_helpermess( *this, to, src );
+}
+
+void xImpl_MovExtend::operator()( const xRegister16or32& to, const xDirectOrIndirect8& src ) const
+{
+	EbpAssert();
+	_DoI_helpermess( *this, to, src );
+}
+#endif
+
+const xImpl_MovExtend xMOVSX = { true };
+const xImpl_MovExtend xMOVZX = { false };
+
+const xImpl_CMov	xCMOVA	= { Jcc_Above };
+const xImpl_CMov	xCMOVAE	= { Jcc_AboveOrEqual };
+const xImpl_CMov	xCMOVB	= { Jcc_Below };
+const xImpl_CMov	xCMOVBE	= { Jcc_BelowOrEqual };
+
+const xImpl_CMov	xCMOVG	= { Jcc_Greater };
+const xImpl_CMov	xCMOVGE	= { Jcc_GreaterOrEqual };
+const xImpl_CMov	xCMOVL	= { Jcc_Less };
+const xImpl_CMov	xCMOVLE	= { Jcc_LessOrEqual };
+
+const xImpl_CMov	xCMOVZ	= { Jcc_Zero };
+const xImpl_CMov	xCMOVE	= { Jcc_Equal };
+const xImpl_CMov	xCMOVNZ	= { Jcc_NotZero };
+const xImpl_CMov	xCMOVNE	= { Jcc_NotEqual };
+
+const xImpl_CMov	xCMOVO	= { Jcc_Overflow };
+const xImpl_CMov	xCMOVNO	= { Jcc_NotOverflow };
+const xImpl_CMov	xCMOVC	= { Jcc_Carry };
+const xImpl_CMov	xCMOVNC	= { Jcc_NotCarry };
+
+const xImpl_CMov	xCMOVS	= { Jcc_Signed };
+const xImpl_CMov	xCMOVNS	= { Jcc_Unsigned };
+const xImpl_CMov	xCMOVPE	= { Jcc_ParityEven };
+const xImpl_CMov	xCMOVPO	= { Jcc_ParityOdd };
+
+
+const xImpl_Set		xSETA	= { Jcc_Above };
+const xImpl_Set		xSETAE	= { Jcc_AboveOrEqual };
+const xImpl_Set		xSETB	= { Jcc_Below };
+const xImpl_Set		xSETBE	= { Jcc_BelowOrEqual };
+
+const xImpl_Set		xSETG	= { Jcc_Greater };
+const xImpl_Set		xSETGE	= { Jcc_GreaterOrEqual };
+const xImpl_Set		xSETL	= { Jcc_Less };
+const xImpl_Set		xSETLE	= { Jcc_LessOrEqual };
+
+const xImpl_Set		xSETZ	= { Jcc_Zero };
+const xImpl_Set		xSETE	= { Jcc_Equal };
+const xImpl_Set		xSETNZ	= { Jcc_NotZero };
+const xImpl_Set		xSETNE	= { Jcc_NotEqual };
+
+const xImpl_Set		xSETO	= { Jcc_Overflow };
+const xImpl_Set		xSETNO	= { Jcc_NotOverflow };
+const xImpl_Set		xSETC	= { Jcc_Carry };
+const xImpl_Set		xSETNC	= { Jcc_NotCarry };
+
+const xImpl_Set		xSETS	= { Jcc_Signed };
+const xImpl_Set		xSETNS	= { Jcc_Unsigned };
+const xImpl_Set		xSETPE	= { Jcc_ParityEven };
+const xImpl_Set		xSETPO	= { Jcc_ParityOdd };
+
+}	// end namespace x86Emitter
diff --git a/common/src/x86emitter/simd.cpp b/common/src/x86emitter/simd.cpp
index c2b282c027..f695952855 100644
--- a/common/src/x86emitter/simd.cpp
+++ b/common/src/x86emitter/simd.cpp
@@ -67,8 +67,6 @@ SSE_MXCSR::operator x86Emitter::ModSib32() const
 
 namespace x86Emitter {
 
-using namespace Internal;
-
 // ------------------------------------------------------------------------
 // SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
 // treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
@@ -76,7 +74,7 @@ using namespace Internal;
 // Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
 // generate an assertion.
 //
-__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode )
+__emitinline void SimdPrefix( u8 prefix, u16 opcode )
 {
 	const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
 
@@ -193,6 +191,9 @@ void xImplSimd_DestRegEither::operator()( const xRegisterSSE& to, const ModSibBa
 void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const			{ OpWriteSSE( 0x00, Opcode ); }
 void xImplSimd_DestRegEither::operator()( const xRegisterMMX& to, const ModSibBase& from ) const			{ OpWriteSSE( 0x00, Opcode ); }
 
+void xImplSimd_DestSSE_CmpImm::operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType imm ) const	{ xOpWrite0F( Prefix, Opcode, to, from, imm ); }
+void xImplSimd_DestSSE_CmpImm::operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType imm ) const		{ xOpWrite0F( Prefix, Opcode, to, from, imm ); }
+
 // =====================================================================================================
 //  SIMD Arithmetic Instructions
 // =====================================================================================================
@@ -351,16 +352,16 @@ const xImplSimd_Round xROUND =
 // =====================================================================================================
 
 void xImplSimd_Compare::PS( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ xOpWrite0F( 0x00, 0xc2, to, from, (u8)CType ); }
-void xImplSimd_Compare::PS( const xRegisterSSE& to, const ModSibBase& from ) const	{ xOpWrite0F( 0x00, 0xc2, to, from, (u8)CType ); }
+void xImplSimd_Compare::PS( const xRegisterSSE& to, const ModSibBase& from ) const		{ xOpWrite0F( 0x00, 0xc2, to, from, (u8)CType ); }
 
 void xImplSimd_Compare::PD( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ xOpWrite0F( 0x66, 0xc2, to, from, (u8)CType ); }
-void xImplSimd_Compare::PD( const xRegisterSSE& to, const ModSibBase& from ) const	{ xOpWrite0F( 0x66, 0xc2, to, from, (u8)CType ); }
+void xImplSimd_Compare::PD( const xRegisterSSE& to, const ModSibBase& from ) const		{ xOpWrite0F( 0x66, 0xc2, to, from, (u8)CType ); }
 
 void xImplSimd_Compare::SS( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ xOpWrite0F( 0xf3, 0xc2, to, from, (u8)CType ); }
-void xImplSimd_Compare::SS( const xRegisterSSE& to, const ModSibBase& from ) const	{ xOpWrite0F( 0xf3, 0xc2, to, from, (u8)CType ); }
+void xImplSimd_Compare::SS( const xRegisterSSE& to, const ModSibBase& from ) const		{ xOpWrite0F( 0xf3, 0xc2, to, from, (u8)CType ); }
 
 void xImplSimd_Compare::SD( const xRegisterSSE& to, const xRegisterSSE& from ) const	{ xOpWrite0F( 0xf2, 0xc2, to, from, (u8)CType ); }
-void xImplSimd_Compare::SD( const xRegisterSSE& to, const ModSibBase& from ) const	{ xOpWrite0F( 0xf2, 0xc2, to, from, (u8)CType ); }
+void xImplSimd_Compare::SD( const xRegisterSSE& to, const ModSibBase& from ) const		{ xOpWrite0F( 0xf2, 0xc2, to, from, (u8)CType ); }
 
 const xImplSimd_MinMax xMIN =
 {
@@ -486,7 +487,7 @@ void SimdImpl_PExtract::W( const xRegister32& to, const xRegisterSSE& from, u8 i
 void SimdImpl_PExtract::W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const		{ xOpWrite0F( 0xc5, to, from, imm8 ); }
 void SimdImpl_PExtract::W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const	{ xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); }
 
-/*const*/ xImplSimd_Shuffle xSHUF;
+const xImplSimd_Shuffle xSHUF = { };
 
 const xImplSimd_PShuffle xPSHUF =
 {
@@ -600,22 +601,22 @@ void xImplSimd_MoveDQ::operator()( const ModSibBase& to, const xRegisterSSE& fro
 }
 
 void xImplSimd_PMove::BW( const xRegisterSSE& to, const xRegisterSSE& from ) const		{ OpWriteSSE( 0x66, OpcodeBase ); }
-void xImplSimd_PMove::BW( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const	{ OpWriteSSE( 0x66, OpcodeBase ); }
+void xImplSimd_PMove::BW( const xRegisterSSE& to, const ModSib64& from ) const			{ OpWriteSSE( 0x66, OpcodeBase ); }
 
 void xImplSimd_PMove::BD( const xRegisterSSE& to, const xRegisterSSE& from ) const		{ OpWriteSSE( 0x66, OpcodeBase+0x100 ); }
-void xImplSimd_PMove::BD( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const	{ OpWriteSSE( 0x66, OpcodeBase+0x100 ); }
+void xImplSimd_PMove::BD( const xRegisterSSE& to, const ModSib32& from ) const			{ OpWriteSSE( 0x66, OpcodeBase+0x100 ); }
 
 void xImplSimd_PMove::BQ( const xRegisterSSE& to, const xRegisterSSE& from ) const		{ OpWriteSSE( 0x66, OpcodeBase+0x200 ); }
-void xImplSimd_PMove::BQ( const xRegisterSSE& to, const ModSibStrict<u16>& from ) const	{ OpWriteSSE( 0x66, OpcodeBase+0x200 ); }
+void xImplSimd_PMove::BQ( const xRegisterSSE& to, const ModSib16& from ) const			{ OpWriteSSE( 0x66, OpcodeBase+0x200 ); }
 
 void xImplSimd_PMove::WD( const xRegisterSSE& to, const xRegisterSSE& from ) const		{ OpWriteSSE( 0x66, OpcodeBase+0x300 ); }
-void xImplSimd_PMove::WD( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const	{ OpWriteSSE( 0x66, OpcodeBase+0x300 ); }
+void xImplSimd_PMove::WD( const xRegisterSSE& to, const ModSib64& from ) const			{ OpWriteSSE( 0x66, OpcodeBase+0x300 ); }
 
 void xImplSimd_PMove::WQ( const xRegisterSSE& to, const xRegisterSSE& from ) const		{ OpWriteSSE( 0x66, OpcodeBase+0x400 ); }
-void xImplSimd_PMove::WQ( const xRegisterSSE& to, const ModSibStrict<u32>& from ) const	{ OpWriteSSE( 0x66, OpcodeBase+0x400 ); }
+void xImplSimd_PMove::WQ( const xRegisterSSE& to, const ModSib32& from ) const			{ OpWriteSSE( 0x66, OpcodeBase+0x400 ); }
 
 void xImplSimd_PMove::DQ( const xRegisterSSE& to, const xRegisterSSE& from ) const		{ OpWriteSSE( 0x66, OpcodeBase+0x500 ); }
-void xImplSimd_PMove::DQ( const xRegisterSSE& to, const ModSibStrict<u64>& from ) const	{ OpWriteSSE( 0x66, OpcodeBase+0x500 ); }
+void xImplSimd_PMove::DQ( const xRegisterSSE& to, const ModSib64& from ) const			{ OpWriteSSE( 0x66, OpcodeBase+0x500 ); }
 
 
 const xImplSimd_MoveSSE xMOVAPS = { 0x00, true };
@@ -794,15 +795,15 @@ __forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u
 //  * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to  be written
 //    with 0.0 if set to 1.
 //
-__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 )		{ xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
-__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict<u32>& from, u8 imm8 )	{ xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
+__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 )	{ xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
+__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSib32& from, u8 imm8 )		{ xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); }
 
 // [SSE-4.1] Extract a single-precision floating-point value from src at an offset
 // determined by imm8[1-0]*32. The extracted single precision floating-point value
 // is stored into the low 32-bits of dest (or at a 32-bit memory pointer).
 //
-__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 )		{ xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); }
-__emitinline void xEXTRACTPS( const ModSibStrict<u32>& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
+__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 )	{ xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); }
+__emitinline void xEXTRACTPS( const ModSib32& dest, const xRegisterSSE& from, u8 imm8 )		{ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); }
 
 
 // =====================================================================================================
diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp
index d4b06ddee5..193230a1e3 100644
--- a/common/src/x86emitter/x86emitter.cpp
+++ b/common/src/x86emitter/x86emitter.cpp
@@ -14,7 +14,7 @@
  */
 
 /*
- * ix86 core v0.9.0
+ * ix86 core v0.9.1
  *
  * Original Authors (v0.6.2 and prior):
  *		linuzappz <linuzappz@pcsx.net>
@@ -22,7 +22,7 @@
  *		goldfinger
  *		zerofrog(@gmail.com)
  *
- * Authors of v0.9.0:
+ * Authors of v0.9.1:
  *		Jake.Stine(@gmail.com)
  *		cottonvibes(@gmail.com)
  *		sudonim(1@gmail.com)
@@ -70,6 +70,18 @@ __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT };
 
 namespace x86Emitter {
 
+template< typename T > __forceinline void xWrite( T val )
+{
+	*(T*)x86Ptr = val;
+	x86Ptr += sizeof(T);
+}
+
+template void xWrite<u8>( u8 val );
+template void xWrite<u16>( u16 val );
+template void xWrite<u32>( u32 val );
+template void xWrite<u64>( u64 val );
+template void xWrite<u128>( u128 val );
+
 __forceinline void xWrite8( u8 val )
 {
 	xWrite( val );
@@ -90,18 +102,16 @@ __forceinline void xWrite64( u64 val )
 	xWrite( val );
 }
 
-const xAddressIndexerBase	ptr;
-const xAddressIndexer<u128>	ptr128;
-const xAddressIndexer<u64>	ptr64;
-const xAddressIndexer<u32>	ptr32;
-const xAddressIndexer<u16>	ptr16;
-const xAddressIndexer<u8>	ptr8;
+const xAddressIndexer<ModSibBase>	ptr;
+const xAddressIndexer<ModSib128>	ptr128;
+const xAddressIndexer<ModSib64>		ptr64;
+const xAddressIndexer<ModSib32>		ptr32;
+const xAddressIndexer<ModSib16>		ptr16;
+const xAddressIndexer<ModSib8>		ptr8;
 
 // ------------------------------------------------------------------------
 
-template< typename OperandType > const xRegisterBase<OperandType> xRegisterBase<OperandType>::Empty;
-
-const xAddressReg xAddressReg::Empty;
+const xRegisterEmpty xEmptyReg;
 
 const xRegisterSSE
 	xmm0( 0 ), xmm1( 1 ),
@@ -165,284 +175,207 @@ const char *const x86_regnames_mmx[8] =
 	"mm4", "mm5", "mm6", "mm7"
 };
 
-//////////////////////////////////////////////////////////////////////////////////////////
-
-namespace Internal
+const char* xRegisterBase::GetName()
 {
+	if( Id == xRegId_Invalid ) return "invalid";
+	if( Id == xRegId_Empty ) return "empty";
+	
+	// bad error?  Return a "big" error string.  Might break formatting of register tables
+	// but that's the least of your worries if you see this baby.
+	if( Id >= 8 || Id <= -3 ) return "!Register index out of range!";
 
-	template< typename T >
-	const char* xGetRegName( const xRegister<T>& src )
+	switch( GetOperandSize() )
 	{
-		if( src.IsEmpty() ) return "empty";
-		switch( sizeof(T) )
+		case 1: return x86_regnames_gpr8[ Id ];
+		case 2: return x86_regnames_gpr16[ Id ];
+		case 4: return x86_regnames_gpr32[ Id ];
+		case 8: return x86_regnames_mmx[ Id ];
+		case 16: return x86_regnames_sse[ Id ];
+	}
+
+	return "oops?";
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Performance note: VC++ wants to use byte/word register form for the following
+// ModRM/SibSB constructors when we use xWrite<u8>, and furthermore unrolls the
+// the shift using a series of ADDs for the following results:
+//   add cl,cl
+//   add cl,cl
+//   add cl,cl
+//   or  cl,bl
+//   add cl,cl
+//  ... etc.
+//
+// This is unquestionably bad optimization by Core2 standard, an generates tons of
+// register aliases and false dependencies. (although may have been ideal for early-
+// brand P4s with a broken barrel shifter?).  The workaround is to do our own manual
+// x86Ptr access and update using a u32 instead of u8.  Thanks to little endianness,
+// the same end result is achieved and no false dependencies are generated.  The draw-
+// back is that it clobbers 3 bytes past the end of the write, which could cause a
+// headache for someone who himself is doing some kind of headache-inducing amount of
+// recompiler SMC.  So we don't do a work-around, and just hope for the compiler to
+// stop sucking someday instead. :)
+//
+// (btw, I know this isn't a critical performance item by any means, but it's
+//  annoying simply because it *should* be an easy thing to optimize)
+
+static __forceinline void ModRM( uint mod, uint reg, uint rm )
+{
+	xWrite8( (mod << 6) | (reg << 3) | rm );
+}
+
+static __forceinline void SibSB( u32 ss, u32 index, u32 base )
+{
+	xWrite8( (ss << 6) | (index << 3) | base );
+}
+
+void EmitSibMagic( uint regfield, const void* address )
+{
+	ModRM( 0, regfield, ModRm_UseDisp32 );
+	xWrite<s32>( (s32)address );
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// emitter helpers for xmm instruction with prefixes, most of which are using
+// the basic opcode format (items inside braces denote optional or conditional
+// emission):
+//
+//   [Prefix] / 0x0f / [OpcodePrefix] / Opcode / ModRM+[SibSB]
+//
+// Prefixes are typically 0x66, 0xf2, or 0xf3.  OpcodePrefixes are either 0x38 or
+// 0x3a [and other value will result in assertion failue].
+//
+__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib )
+{
+	SimdPrefix( prefix, opcode );
+	EmitSibMagic( instId, sib );
+}
+
+__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data )
+{
+	SimdPrefix( prefix, opcode );
+	EmitSibMagic( instId, data );
+}
+
+__emitinline void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib )
+{
+	xOpWrite0F( 0, opcode, instId, sib );
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
+// instruction ca be encoded as ModRm alone.
+static __forceinline bool NeedsSibMagic( const ModSibBase& info )
+{
+	// no registers? no sibs!
+	// (ModSibBase::Reduce always places a register in Index, and optionally leaves
+	// Base empty if only register is specified)
+	if( info.Index.IsEmpty() ) return false;
+
+	// A scaled register needs a SIB
+	if( info.Scale != 0 ) return true;
+
+	// two registers needs a SIB
+	if( !info.Base.IsEmpty() ) return true;
+
+	return false;
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Conditionally generates Sib encoding information!
+//
+// regfield - register field to be written to the ModRm.  This is either a register specifier
+//   or an opcode extension.  In either case, the instruction determines the value for us.
+//
+void EmitSibMagic( uint regfield, const ModSibBase& info )
+{
+	pxAssertDev( regfield < 8, "Invalid x86 register identifier." );
+
+	int displacement_size = (info.Displacement == 0) ? 0 :
+		( ( info.IsByteSizeDisp() ) ? 1 : 2 );
+
+	if( !NeedsSibMagic( info ) )
+	{
+		// Use ModRm-only encoding, with the rm field holding an index/base register, if
+		// one has been specified.  If neither register is specified then use Disp32 form,
+		// which is encoded as "EBP w/o displacement" (which is why EBP must always be
+		// encoded *with* a displacement of 0, if it would otherwise not have one).
+
+		if( info.Index.IsEmpty() )
 		{
-			case 1: return x86_regnames_gpr8[ src.Id ];
-			case 2: return x86_regnames_gpr16[ src.Id ];
-			case 4: return x86_regnames_gpr32[ src.Id ];
-		}
-	}
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// Performance note: VC++ wants to use byte/word register form for the following
-	// ModRM/SibSB constructors when we use xWrite<u8>, and furthermore unrolls the
-	// the shift using a series of ADDs for the following results:
-	//   add cl,cl
-	//   add cl,cl
-	//   add cl,cl
-	//   or  cl,bl
-	//   add cl,cl
-	//  ... etc.
-	//
-	// This is unquestionably bad optimization by Core2 standard, an generates tons of
-	// register aliases and false dependencies. (although may have been ideal for early-
-	// brand P4s with a broken barrel shifter?).  The workaround is to do our own manual
-	// x86Ptr access and update using a u32 instead of u8.  Thanks to little endianness,
-	// the same end result is achieved and no false dependencies are generated.  The draw-
-	// back is that it clobbers 3 bytes past the end of the write, which could cause a
-	// headache for someone who himself is doing some kind of headache-inducing amount of
-	// recompiler SMC.  So we don't do a work-around, and just hope for the compiler to
-	// stop sucking someday instead. :)
-	//
-	// (btw, I know this isn't a critical performance item by any means, but it's
-	//  annoying simply because it *should* be an easy thing to optimize)
-
-	static __forceinline void ModRM( uint mod, uint reg, uint rm )
-	{
-		xWrite8( (mod << 6) | (reg << 3) | rm );
-	}
-
-	static __forceinline void SibSB( u32 ss, u32 index, u32 base )
-	{
-		xWrite8( (ss << 6) | (index << 3) | base );
-	}
-
-	__forceinline void EmitSibMagic( uint regfield, const void* address )
-	{
-		ModRM( 0, regfield, ModRm_UseDisp32 );
-		xWrite<s32>( (s32)address );
-	}
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// emitter helpers for xmm instruction with prefixes, most of which are using
-	// the basic opcode format (items inside braces denote optional or conditional
-	// emission):
-	//
-	//   [Prefix] / 0x0f / [OpcodePrefix] / Opcode / ModRM+[SibSB]
-	//
-	// Prefixes are typically 0x66, 0xf2, or 0xf3.  OpcodePrefixes are either 0x38 or
-	// 0x3a [and other value will result in assertion failue].
-	//
-	__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib )
-	{
-		SimdPrefix( prefix, opcode );
-		EmitSibMagic( instId, sib );
-	}
-
-	__emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data )
-	{
-		SimdPrefix( prefix, opcode );
-		EmitSibMagic( instId, data );
-	}
-
-	__emitinline void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib )
-	{
-		xOpWrite0F( 0, opcode, instId, sib );
-	}
-
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
-	// instruction ca be encoded as ModRm alone.
-	static __forceinline bool NeedsSibMagic( const ModSibBase& info )
-	{
-		// no registers? no sibs!
-		// (ModSibBase::Reduce always places a register in Index, and optionally leaves
-		// Base empty if only register is specified)
-		if( info.Index.IsEmpty() ) return false;
-
-		// A scaled register needs a SIB
-		if( info.Scale != 0 ) return true;
-
-		// two registers needs a SIB
-		if( !info.Base.IsEmpty() ) return true;
-
-		return false;
-	}
-
-	//////////////////////////////////////////////////////////////////////////////////////////
-	// Conditionally generates Sib encoding information!
-	//
-	// regfield - register field to be written to the ModRm.  This is either a register specifier
-	//   or an opcode extension.  In either case, the instruction determines the value for us.
-	//
-	__noinline void EmitSibMagic( uint regfield, const ModSibBase& info )
-	{
-		pxAssertDev( regfield < 8, "Invalid x86 register identifier." );
-
-		int displacement_size = (info.Displacement == 0) ? 0 :
-			( ( info.IsByteSizeDisp() ) ? 1 : 2 );
-
-		if( !NeedsSibMagic( info ) )
-		{
-			// Use ModRm-only encoding, with the rm field holding an index/base register, if
-			// one has been specified.  If neither register is specified then use Disp32 form,
-			// which is encoded as "EBP w/o displacement" (which is why EBP must always be
-			// encoded *with* a displacement of 0, if it would otherwise not have one).
-
-			if( info.Index.IsEmpty() )
-			{
-				EmitSibMagic( regfield, (void*)info.Displacement );
-				return;
-			}
-			else
-			{
-				if( info.Index == ebp && displacement_size == 0 )
-					displacement_size = 1;		// forces [ebp] to be encoded as [ebp+0]!
-
-				ModRM( displacement_size, regfield, info.Index.Id );
-			}
+			EmitSibMagic( regfield, (void*)info.Displacement );
+			return;
 		}
 		else
 		{
-			// In order to encode "just" index*scale (and no base), we have to encode
-			// it as a special [index*scale + displacement] form, which is done by
-			// specifying EBP as the base register and setting the displacement field
-			// to zero. (same as ModRm w/o SIB form above, basically, except the
-			// ModRm_UseDisp flag is specified in the SIB instead of the ModRM field).
+			if( info.Index == ebp && displacement_size == 0 )
+				displacement_size = 1;		// forces [ebp] to be encoded as [ebp+0]!
 
-			if( info.Base.IsEmpty() )
-			{
-				ModRM( 0, regfield, ModRm_UseSib );
-				SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
-				xWrite<s32>( info.Displacement );
-				return;
-			}
-			else
-			{
-				if( info.Base == ebp && displacement_size == 0 )
-					displacement_size = 1;		// forces [ebp] to be encoded as [ebp+0]!
-
-				ModRM( displacement_size, regfield, ModRm_UseSib );
-				SibSB( info.Scale, info.Index.Id, info.Base.Id );
-			}
+			ModRM( displacement_size, regfield, info.Index.Id );
 		}
+	}
+	else
+	{
+		// In order to encode "just" index*scale (and no base), we have to encode
+		// it as a special [index*scale + displacement] form, which is done by
+		// specifying EBP as the base register and setting the displacement field
+		// to zero. (same as ModRm w/o SIB form above, basically, except the
+		// ModRm_UseDisp flag is specified in the SIB instead of the ModRM field).
 
-		if( displacement_size != 0 )
+		if( info.Base.IsEmpty() )
 		{
-			if( displacement_size == 1 )
-				xWrite<s8>( info.Displacement );
-			else
-				xWrite<s32>( info.Displacement );
+			ModRM( 0, regfield, ModRm_UseSib );
+			SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
+			xWrite<s32>( info.Displacement );
+			return;
 		}
+		else
+		{
+			if( info.Base == ebp && displacement_size == 0 )
+				displacement_size = 1;		// forces [ebp] to be encoded as [ebp+0]!
+
+			ModRM( displacement_size, regfield, ModRm_UseSib );
+			SibSB( info.Scale, info.Index.Id, info.Base.Id );
+		}
+	}
+
+	if( displacement_size != 0 )
+	{
+		if( displacement_size == 1 )
+			xWrite<s8>( info.Displacement );
+		else
+			xWrite<s32>( info.Displacement );
 	}
 }
 
-using namespace Internal;
+// Writes a ModRM byte for "Direct" register access forms, which is used for all
+// instructions taking a form of [reg,reg].
+void EmitSibMagic( uint reg1, const xRegisterBase& reg2 )
+{
+	xWrite8( (Mod_Direct << 6) | (reg1 << 3) | reg2.Id );
+}
 
-const MovImplAll xMOV;
-const xImpl_Test xTEST;
+void EmitSibMagic( const xRegisterBase& reg1, const xRegisterBase& reg2 )
+{
+	xWrite8( (Mod_Direct << 6) | (reg1.Id << 3) | reg2.Id );
+}
 
-const xImpl_G1Logic<G1Type_AND,0x54> xAND;
-const xImpl_G1Logic<G1Type_OR,0x56>  xOR;
-const xImpl_G1Logic<G1Type_XOR,0x57> xXOR;
+void EmitSibMagic( const xRegisterBase& reg1, const void* src )
+{
+	EmitSibMagic( reg1.Id, src );
+}
 
-const xImpl_G1Arith<G1Type_ADD,0x58> xADD;
-const xImpl_G1Arith<G1Type_SUB,0x5c> xSUB;
+void EmitSibMagic( const xRegisterBase& reg1, const ModSibBase& sib )
+{
+	EmitSibMagic( reg1.Id, sib );
+}
 
-const xImpl_Group1<G1Type_ADC> xADC;
-const xImpl_Group1<G1Type_SBB> xSBB;
-const xImpl_G1Compare xCMP;
+// --------------------------------------------------------------------------------------
+//  xSetPtr / xAlignPtr / xGetPtr / xAdvancePtr
+// --------------------------------------------------------------------------------------
 
-const Group2ImplAll<G2Type_ROL> xROL;
-const Group2ImplAll<G2Type_ROR> xROR;
-const Group2ImplAll<G2Type_RCL> xRCL;
-const Group2ImplAll<G2Type_RCR> xRCR;
-const Group2ImplAll<G2Type_SHL> xSHL;
-const Group2ImplAll<G2Type_SHR> xSHR;
-const Group2ImplAll<G2Type_SAR> xSAR;
-
-const xImpl_Group3<G3Type_NOT> xNOT;
-const xImpl_Group3<G3Type_NEG> xNEG;
-const xImpl_Group3<G3Type_MUL> xUMUL;
-const xImpl_Group3<G3Type_DIV> xUDIV;
-const xImpl_iDiv xDIV;
-const xImpl_iMul xMUL;
-
-const xImpl_IncDec<false> xINC;
-const xImpl_IncDec<true>  xDEC;
-
-const MovExtendImplAll<false> xMOVZX;
-const MovExtendImplAll<true>  xMOVSX;
-
-const DwordShiftImplAll<false> xSHLD;
-const DwordShiftImplAll<true>  xSHRD;
-
-const xImpl_Group8<G8Type_BT> xBT;
-const xImpl_Group8<G8Type_BTR> xBTR;
-const xImpl_Group8<G8Type_BTS> xBTS;
-const xImpl_Group8<G8Type_BTC> xBTC;
-
-const xImpl_BitScan<0xbc> xBSF;
-const xImpl_BitScan<0xbd> xBSR;
-
-// ------------------------------------------------------------------------
-const CMovImplGeneric xCMOV;
-
-const CMovImplAll<Jcc_Above>			xCMOVA;
-const CMovImplAll<Jcc_AboveOrEqual>		xCMOVAE;
-const CMovImplAll<Jcc_Below>			xCMOVB;
-const CMovImplAll<Jcc_BelowOrEqual>		xCMOVBE;
-
-const CMovImplAll<Jcc_Greater>			xCMOVG;
-const CMovImplAll<Jcc_GreaterOrEqual>	xCMOVGE;
-const CMovImplAll<Jcc_Less>				xCMOVL;
-const CMovImplAll<Jcc_LessOrEqual>		xCMOVLE;
-
-const CMovImplAll<Jcc_Zero>				xCMOVZ;
-const CMovImplAll<Jcc_Equal>			xCMOVE;
-const CMovImplAll<Jcc_NotZero>			xCMOVNZ;
-const CMovImplAll<Jcc_NotEqual>			xCMOVNE;
-
-const CMovImplAll<Jcc_Overflow>			xCMOVO;
-const CMovImplAll<Jcc_NotOverflow>		xCMOVNO;
-const CMovImplAll<Jcc_Carry>			xCMOVC;
-const CMovImplAll<Jcc_NotCarry>			xCMOVNC;
-
-const CMovImplAll<Jcc_Signed>			xCMOVS;
-const CMovImplAll<Jcc_Unsigned>			xCMOVNS;
-const CMovImplAll<Jcc_ParityEven>		xCMOVPE;
-const CMovImplAll<Jcc_ParityOdd>		xCMOVPO;
-
-// ------------------------------------------------------------------------
-const SetImplGeneric xSET;
-
-const SetImplAll<Jcc_Above>				xSETA;
-const SetImplAll<Jcc_AboveOrEqual>		xSETAE;
-const SetImplAll<Jcc_Below>				xSETB;
-const SetImplAll<Jcc_BelowOrEqual>		xSETBE;
-
-const SetImplAll<Jcc_Greater>			xSETG;
-const SetImplAll<Jcc_GreaterOrEqual>	xSETGE;
-const SetImplAll<Jcc_Less>				xSETL;
-const SetImplAll<Jcc_LessOrEqual>		xSETLE;
-
-const SetImplAll<Jcc_Zero>				xSETZ;
-const SetImplAll<Jcc_Equal>				xSETE;
-const SetImplAll<Jcc_NotZero>			xSETNZ;
-const SetImplAll<Jcc_NotEqual>			xSETNE;
-
-const SetImplAll<Jcc_Overflow>			xSETO;
-const SetImplAll<Jcc_NotOverflow>		xSETNO;
-const SetImplAll<Jcc_Carry>				xSETC;
-const SetImplAll<Jcc_NotCarry>			xSETNC;
-
-const SetImplAll<Jcc_Signed>			xSETS;
-const SetImplAll<Jcc_Unsigned>			xSETNS;
-const SetImplAll<Jcc_ParityEven>		xSETPE;
-const SetImplAll<Jcc_ParityOdd>			xSETPO;
-
-
-// ------------------------------------------------------------------------
 // Assigns the current emitter buffer target address.
 // This is provided instead of using x86Ptr directly, since we may in the future find
 // a need to change the storage class system for the x86Ptr 'under the hood.'
@@ -451,7 +384,6 @@ __emitinline void xSetPtr( void* ptr )
 	x86Ptr = (u8*)ptr;
 }
 
-// ------------------------------------------------------------------------
 // Retrieves the current emitter buffer target address.
 // This is provided instead of using x86Ptr directly, since we may in the future find
 // a need to change the storage class system for the x86Ptr 'under the hood.'
@@ -460,14 +392,12 @@ __emitinline u8* xGetPtr()
 	return x86Ptr;
 }
 
-// ------------------------------------------------------------------------
 __emitinline void xAlignPtr( uint bytes )
 {
 	// forward align
 	x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) );
 }
 
-// ------------------------------------------------------------------------
 __emitinline void xAdvancePtr( uint bytes )
 {
 	if( IsDevBuild )
@@ -480,6 +410,66 @@ __emitinline void xAdvancePtr( uint bytes )
 		x86Ptr += bytes;
 }
 
+// --------------------------------------------------------------------------------------
+//  xAddressInfo Method Implementations
+// --------------------------------------------------------------------------------------
+
+xAddressInfo& xAddressInfo::Add( const xAddressReg& src )
+{
+	if( src == Index )
+	{
+		Factor++;
+	}
+	else if( src == Base )
+	{
+		// Compound the existing register reference into the Index/Scale pair.
+		Base = xEmptyReg;
+
+		if( src == Index )
+			Factor++;
+		else
+		{
+			pxAssertDev( Index.IsEmpty(), "x86Emitter: Only one scaled index register is allowed in an address modifier." );
+			Index = src;
+			Factor = 2;
+		}
+	}
+	else if( Base.IsEmpty() )
+		Base = src;
+	else if( Index.IsEmpty() )
+		Index = src;
+	else
+		pxFailDev( L"x86Emitter: address modifiers cannot have more than two index registers." );	// oops, only 2 regs allowed per ModRm!
+
+	return *this;
+}
+
+xAddressInfo& xAddressInfo::Add( const xAddressInfo& src )
+{
+	Add( src.Base );
+	Add( src.Displacement );
+
+	// If the factor is 1, we can just treat index like a base register also.
+	if( src.Factor == 1 )
+	{
+		Add( src.Index );
+	}
+	else if( Index.IsEmpty() )
+	{
+		Index = src.Index;
+		Factor = src.Factor;
+	}
+	else if( Index == src.Index )
+	{
+		Factor += src.Factor;
+	}
+	else
+		pxFailDev( L"x86Emitter: address modifiers cannot have more than two index registers." );	// oops, only 2 regs allowed per ModRm!
+
+	return *this;
+}
+
+
 // ------------------------------------------------------------------------
 // Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values.
 // Necessary because by default ModSib compounds registers into Index when possible.
@@ -515,7 +505,7 @@ void ModSibBase::Reduce()
 		Index = Base;
 		Scale = 0;
 		if( !Base.IsStackPointer() )	// prevent ESP from being encoded 'alone'
-			Base = xAddressReg::Empty;
+			Base = xEmptyReg;
 		return;
 	}
 
@@ -567,11 +557,8 @@ void ModSibBase::Reduce()
 // preserve_flags - set to ture to disable use of SHL on [Index*Base] forms
 // of LEA, which alters flags states.
 //
-template< typename OperandType >
-static void EmitLeaMagic( xRegister<OperandType> to, const ModSibBase& src, bool preserve_flags )
+static void EmitLeaMagic( const xRegisterInt& to, const ModSibBase& src, bool preserve_flags )
 {
-	typedef xRegister<OperandType> ToReg;
-
 	int displacement_size = (src.Displacement == 0) ? 0 :
 		( ( src.IsByteSizeDisp() ) ? 1 : 2 );
 
@@ -590,7 +577,7 @@ static void EmitLeaMagic( xRegister<OperandType> to, const ModSibBase& src, bool
 		}
 		else if( displacement_size == 0 )
 		{
-			xMOV( to, ToReg( src.Index.Id ) );
+			_xMovRtoR( to, src.Index );
 			return;
 		}
 		else
@@ -600,7 +587,7 @@ static void EmitLeaMagic( xRegister<OperandType> to, const ModSibBase& src, bool
 				// encode as MOV and ADD combo.  Make sure to use the immediate on the
 				// ADD since it can encode as an 8-bit sign-extended value.
 
-				xMOV( to, ToReg( src.Index.Id ) );
+				_xMovRtoR( to, src.Index );
 				xADD( to, src.Displacement );
 				return;
 			}
@@ -627,7 +614,7 @@ static void EmitLeaMagic( xRegister<OperandType> to, const ModSibBase& src, bool
 				// (this does not apply to older model P4s with the broken barrel shifter,
 				//  but we currently aren't optimizing for that target anyway).
 
-				xMOV( to, ToReg( src.Index.Id ) );
+				_xMovRtoR( to, src.Index );
 				xSHL( to, src.Scale );
 				return;
 			}
@@ -646,14 +633,14 @@ static void EmitLeaMagic( xRegister<OperandType> to, const ModSibBase& src, bool
 					if( src.Index == esp )
 					{
 						// ESP is not encodable as an index (ix86 ignores it), thus:
-						xMOV( to, ToReg( src.Base.Id ) );	// will do the trick!
+						_xMovRtoR( to, src.Base );	// will do the trick!
 						if( src.Displacement ) xADD( to, src.Displacement );
 						return;
 					}
 					else if( src.Displacement == 0 )
 					{
-						xMOV( to, ToReg( src.Base.Id ) );
-						xADD( to, ToReg( src.Index.Id ) );
+						_xMovRtoR( to, src.Base );
+						_g1_EmitOp( G1Type_ADD, to, src.Index );
 						return;
 					}
 				}
@@ -662,7 +649,7 @@ static void EmitLeaMagic( xRegister<OperandType> to, const ModSibBase& src, bool
 					// special case handling of ESP as Index, which is replaceable with
 					// a single MOV even when preserve_flags is set! :D
 
-					xMOV( to, ToReg( src.Base.Id ) );
+					_xMovRtoR( to, src.Base );
 					return;
 				}
 			}
@@ -697,6 +684,112 @@ __emitinline void xLEA( xRegister16 to, const ModSibBase& src, bool preserve_fla
 	EmitLeaMagic( to, src, preserve_flags );
 }
 
+// =====================================================================================================
+//  TEST / INC / DEC
+// =====================================================================================================
+void xImpl_Test::operator()( const xRegister8& to, const xRegister8& from ) const
+{
+	xWrite8( 0x84 );
+	EmitSibMagic( from, to );
+}
+
+void xImpl_Test::operator()( const xRegister16& to, const xRegister16& from ) const
+{
+	to.prefix16();
+	xWrite8( 0x85 );
+	EmitSibMagic( from, to );
+}
+
+void xImpl_Test::operator()( const xRegister32& to, const xRegister32& from ) const
+{
+	xWrite8( 0x85 );
+	EmitSibMagic( from, to );
+}
+
+void xImpl_Test::operator()( const ModSib32orLess& dest, int imm ) const
+{
+	dest.prefix16();
+	xWrite8( dest.Is8BitOp() ? 0xf6 : 0xf7 );
+	EmitSibMagic( 0, dest );
+	dest.xWriteImm( imm );
+}
+
+void xImpl_Test::operator()( const xRegisterInt& to, int imm ) const
+{
+	to.prefix16();
+
+	if( to.IsAccumulator() )
+		xWrite8( to.Is8BitOp() ? 0xa8 : 0xa9 );
+	else
+	{
+		xWrite8( to.Is8BitOp() ? 0xf6 : 0xf7 );
+		EmitSibMagic( 0, to );
+	}
+	to.xWriteImm( imm );
+}
+
+void xImpl_BitScan::operator()( const xRegister32& to, const xRegister32& from ) const		{ xOpWrite0F( Opcode, to, from ); }
+void xImpl_BitScan::operator()( const xRegister16& to, const xRegister16& from ) const		{ xOpWrite0F( 0x66, Opcode, to, from ); }
+void xImpl_BitScan::operator()( const xRegister16or32& to, const ModSibBase& sibsrc ) const
+{
+	xOpWrite0F( (to->GetOperandSize() == 2) ? 0x66 : 0x00, Opcode, to, sibsrc );
+}
+
+void xImpl_IncDec::operator()( const xRegisterInt& to ) const
+{
+	if( to.Is8BitOp() )
+	{
+		xWrite8( 0xfe );
+		EmitSibMagic( isDec ? 1 : 0, to );
+	}
+	else
+	{
+		to.prefix16();
+		xWrite8( (isDec ? 0x48 : 0x40) | to.Id );
+	}
+}
+
+void xImpl_IncDec::operator()( const ModSib32orLess& to ) const
+{
+	to.prefix16();
+	xWrite8( to.Is8BitOp() ? 0xfe : 0xff );
+	EmitSibMagic( isDec ? 1 : 0, to );
+}
+
+void xImpl_DwordShift::operator()( const xRegister32& to,	const xRegister32& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( OpcodeBase+1, to, from ); }
+void xImpl_DwordShift::operator()( const xRegister16& to,	const xRegister16& from, const xRegisterCL& /* clreg */ ) const	{ xOpWrite0F( 0x66, OpcodeBase+1, to, from ); }
+void xImpl_DwordShift::operator()( const xRegister32& to,	const xRegister32& from, u8 shiftcnt ) const
+{
+	if( shiftcnt != 0 )
+		xOpWrite0F( OpcodeBase, to, from );
+}
+void xImpl_DwordShift::operator()( const xRegister16& to,	const xRegister16& from, u8 shiftcnt ) const
+{
+	if( shiftcnt != 0 )
+		xOpWrite0F( 0x66, OpcodeBase, to, from );
+}
+
+void xImpl_DwordShift::operator()( const ModSibBase& dest, const xRegister16or32& from, const xRegisterCL& /* clreg */ ) const
+{
+	xOpWrite0F( (from->GetOperandSize() == 2) ? 0x66 : 0x00, OpcodeBase, from, dest );
+}
+
+void xImpl_DwordShift::operator()( const ModSibBase& dest, const xRegister16or32& from, u8 shiftcnt ) const
+{
+	if( shiftcnt != 0 )
+		xOpWrite0F( (from->GetOperandSize() == 2) ? 0x66 : 0x00, OpcodeBase, from, dest, shiftcnt );
+}
+
+const xImpl_Test		xTEST;
+
+const xImpl_BitScan		xBSF = { 0xbc };
+const xImpl_BitScan		xBSR = { 0xbd };
+
+const xImpl_IncDec		xINC = { false };
+const xImpl_IncDec		xDEC = { true };
+
+const xImpl_DwordShift	xSHLD	= { 0xa4 };
+const xImpl_DwordShift	xSHRD	= { 0xac };
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // Push / Pop Emitters