Implemented Jmp/Jcc and MOVSX/ZX instructions, and added 'i' prefix to most things (will add 'i' to a few more soon -- I think iRegister will be nicer than 'x86Register').

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@982 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-04-15 15:45:52 +00:00 · 2009-04-15 15:45:52 +00:00 · 74db1c8189
parent af792b7694
commit 74db1c8189
9 changed files with 673 additions and 647 deletions
--- a/pcsx2/x86/iVUmicroLower.cpp
+++ b/pcsx2/x86/iVUmicroLower.cpp
@ -804,10 +804,10 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
 			using namespace x86Emitter;
 			x86IndexReg thisreg( x86reg );

-			if ( _X ) MOV(ptr32[thisreg+offset],    0x00000000);
-			if ( _Y ) MOV(ptr32[thisreg+offset+4],  0x00000000);
-			if ( _Z ) MOV(ptr32[thisreg+offset+8],  0x00000000);
-			if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000);
+			if ( _X ) iMOV(ptr32[thisreg+offset],    0x00000000);
+			if ( _Y ) iMOV(ptr32[thisreg+offset+4],  0x00000000);
+			if ( _Z ) iMOV(ptr32[thisreg+offset+8],  0x00000000);
+			if ( _W ) iMOV(ptr32[thisreg+offset+12], 0x3f800000);
 		}
 		return;
 	}
--- a/pcsx2/x86/ix86-32/recVTLB.cpp
+++ b/pcsx2/x86/ix86-32/recVTLB.cpp
@ -25,6 +25,7 @@
 #include "iR5900.h"

 using namespace vtlb_private;
+using namespace x86Emitter;

 // NOTICE: This function *destroys* EAX!!
 // Moves 128 bits of memory from the source register ptr to the dest register ptr.
@ -33,22 +34,20 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
 {
 	// (this is one of my test cases for the new emitter --air)

-	using namespace x86Emitter;
-
 	x86IndexReg src( srcRm );
 	x86IndexReg dest( destRm );

-	MOV( eax, ptr[src] );
-	MOV( ptr[dest], eax );
+	iMOV( eax, ptr[src] );
+	iMOV( ptr[dest], eax );

-	MOV( eax, ptr[src+4] );
-	MOV( ptr[dest+4], eax );
+	iMOV( eax, ptr[src+4] );
+	iMOV( ptr[dest+4], eax );

-	MOV( eax, ptr[src+8] );
-	MOV( ptr[dest+8], eax );
+	iMOV( eax, ptr[src+8] );
+	iMOV( ptr[dest+8], eax );

-	MOV( eax, ptr[src+12] );
-	MOV( ptr[dest+12], eax );
+	iMOV( eax, ptr[src+12] );
+	iMOV( ptr[dest+12], eax );
 }

 /*
@ -166,6 +165,7 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
 	}
 }

+// ------------------------------------------------------------------------
 static void _vtlb_DynGen_IndirectRead( u32 bits )
 {
 	int szidx;
@ -188,6 +188,7 @@ static void _vtlb_DynGen_IndirectRead( u32 bits )
 	CALL32R(EAX);
 }

+// ------------------------------------------------------------------------
 // Recompiled input registers:
 //   ecx = source addr to read from
 //   edx = ptr to dest to write to
@ -199,17 +200,18 @@ void vtlb_DynGenRead64(u32 bits)
 	SHR32ItoR(EAX,VTLB_PAGE_BITS);
 	MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
 	ADD32RtoR(ECX,EAX);
-	u8* _fullread = JS8(0);
+	iForwardJS8 _fullread;

 	_vtlb_DynGen_DirectRead( bits, false );
-	u8* cont = JMP8(0);
+	iForwardJump8 cont;

-	x86SetJ8(_fullread);
+	_fullread.SetTarget();
+	
 	_vtlb_DynGen_IndirectRead( bits );
-
-	x86SetJ8(cont);
+	cont.SetTarget();
 }

+// ------------------------------------------------------------------------
 // Recompiled input registers:
 //   ecx - source address to read from
 //   Returns read value in eax.
@ -221,12 +223,12 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
 	SHR32ItoR(EAX,VTLB_PAGE_BITS);
 	MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
 	ADD32RtoR(ECX,EAX);
-	u8* _fullread = JS8(0);
+	iForwardJS8 _fullread;

 	_vtlb_DynGen_DirectRead( bits, sign );
-	u8* cont = JMP8(0);
+	iForwardJump8 cont;

-	x86SetJ8(_fullread);
+	_fullread.SetTarget();
 	_vtlb_DynGen_IndirectRead( bits );

 	// perform sign extension on the result:
@ -245,11 +247,10 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
 		else
 			MOVZX32R16toR(EAX,EAX);
 	}
-
-	x86SetJ8(cont);
+	cont.SetTarget();
 }

-//
+// ------------------------------------------------------------------------
 // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
 // recompiler if the TLB is changed.
 void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
@ -317,6 +318,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
 	}
 }

+// ------------------------------------------------------------------------
 // Recompiled input registers:
 //   ecx - source address to read from
 //   Returns read value in eax.
@ -449,6 +451,7 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
 	}
 }

+// ------------------------------------------------------------------------
 static void _vtlb_DynGen_IndirectWrite( u32 bits )
 {
 	int szidx=0;
@ -468,24 +471,26 @@ static void _vtlb_DynGen_IndirectWrite( u32 bits )
 	CALL32R(EAX);
 }

+// ------------------------------------------------------------------------
 void vtlb_DynGenWrite(u32 sz)
 {
 	MOV32RtoR(EAX,ECX);
 	SHR32ItoR(EAX,VTLB_PAGE_BITS);
 	MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
 	ADD32RtoR(ECX,EAX);
-	u8* _full=JS8(0);
+	iForwardJS8 _full;

 	_vtlb_DynGen_DirectWrite( sz );
-	u8* cont = JMP8(0);
+	iForwardJump8 cont;

-	x86SetJ8(_full);
+	_full.SetTarget();
 	_vtlb_DynGen_IndirectWrite( sz );

-	x86SetJ8(cont);
+	cont.SetTarget();
 }


+// ------------------------------------------------------------------------
 // Generates code for a store instruction, where the address is a known constant.
 // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
 // recompiler if the TLB is changed.
--- a/pcsx2/x86/ix86/ix86.cpp
+++ b/pcsx2/x86/ix86/ix86.cpp
@ -96,22 +96,25 @@ const x86Register8

 namespace Internal
 {
-	const Group1ImplAll<G1Type_ADD> ADD;
-	const Group1ImplAll<G1Type_OR>  OR;
-	const Group1ImplAll<G1Type_ADC> ADC;
-	const Group1ImplAll<G1Type_SBB> SBB;
-	const Group1ImplAll<G1Type_AND> AND;
-	const Group1ImplAll<G1Type_SUB> SUB;
-	const Group1ImplAll<G1Type_XOR> XOR;
-	const Group1ImplAll<G1Type_CMP> CMP;
+	const Group1ImplAll<G1Type_ADD> iADD;
+	const Group1ImplAll<G1Type_OR>  iOR;
+	const Group1ImplAll<G1Type_ADC> iADC;
+	const Group1ImplAll<G1Type_SBB> iSBB;
+	const Group1ImplAll<G1Type_AND> iAND;
+	const Group1ImplAll<G1Type_SUB> iSUB;
+	const Group1ImplAll<G1Type_XOR> iXOR;
+	const Group1ImplAll<G1Type_CMP> iCMP;

-	const Group2ImplAll<G2Type_ROL> ROL;
-	const Group2ImplAll<G2Type_ROR> ROR;
-	const Group2ImplAll<G2Type_RCL> RCL;
-	const Group2ImplAll<G2Type_RCR> RCR;
-	const Group2ImplAll<G2Type_SHL> SHL;
-	const Group2ImplAll<G2Type_SHR> SHR;
-	const Group2ImplAll<G2Type_SAR> SAR;
+	const Group2ImplAll<G2Type_ROL> iROL;
+	const Group2ImplAll<G2Type_ROR> iROR;
+	const Group2ImplAll<G2Type_RCL> iRCL;
+	const Group2ImplAll<G2Type_RCR> iRCR;
+	const Group2ImplAll<G2Type_SHL> iSHL;
+	const Group2ImplAll<G2Type_SHR> iSHR;
+	const Group2ImplAll<G2Type_SAR> iSAR;
+
+	const MovExtendImplAll<true>  iMOVSX;
+	const MovExtendImplAll<false> iMOVZX;

 	// Performance note: VC++ wants to use byte/word register form for the following
 	// ModRM/SibSB constructors if we use iWrite<u8>, and furthermore unrolls the
@ -149,11 +152,9 @@ namespace Internal
 	// instruction ca be encoded as ModRm alone.
 	static __forceinline bool NeedsSibMagic( const ModSibBase& info )
 	{
-		// If base register is ESP, then we need a SIB:
-		if( info.Base.IsStackPointer() ) return true;
-
 		// no registers? no sibs!
-		// (ModSibBase::Reduce
+		// (ModSibBase::Reduce always places a register in Index, and optionally leaves
+		// Base empty if only register is specified)
 		if( info.Index.IsEmpty() ) return false;

 		// A scaled register needs a SIB
@ -188,7 +189,7 @@ namespace Internal
 			if( info.Index.IsEmpty() )
 			{
 				ModRM( 0, regfield, ModRm_UseDisp32 );
-				iWrite<u32>( info.Displacement );
+				iWrite<s32>( info.Displacement );
 				return;
 			}
 			else
@ -211,7 +212,7 @@ namespace Internal
 			{
 				ModRM( 0, regfield, ModRm_UseSib );
 				SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
-				iWrite<u32>( info.Displacement );
+				iWrite<s32>( info.Displacement );
 				return;
 			}
 			else
@ -226,7 +227,7 @@ namespace Internal

 		if( displacement_size != 0 )
 		{
-			*(u32*)x86Ptr = info.Displacement;
+			*(s32*)x86Ptr = info.Displacement;
 			x86Ptr += (displacement_size == 1) ? 1 : 4;
 		}
 	}
@ -234,6 +235,44 @@ namespace Internal

 using namespace Internal;

+// ------------------------------------------------------------------------
+// Assigns the current emitter buffer target address.
+// This is provided instead of using x86Ptr directly, since we may in the future find
+// a need to change the storage class system for the x86Ptr 'under the hood.'
+__emitinline void iSetPtr( void* ptr ) 
+{
+	x86Ptr = (u8*)ptr;
+}
+
+// ------------------------------------------------------------------------
+// Retrieves the current emitter buffer target address.
+// This is provided instead of using x86Ptr directly, since we may in the future find
+// a need to change the storage class system for the x86Ptr 'under the hood.'
+__emitinline u8* iGetPtr()
+{
+	return x86Ptr;
+}
+
+// ------------------------------------------------------------------------
+__emitinline void iAlignPtr( uint bytes ) 
+{
+	// forward align
+	x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) );
+}
+
+// ------------------------------------------------------------------------
+__emitinline void iAdvancePtr( uint bytes )
+{
+	if( IsDevBuild )
+	{
+		// common debugger courtesy: advance with INT3 as filler.
+		for( uint i=0; i<bytes; i++ )
+			iWrite<u8>( 0xcc );
+	}
+	else
+		x86Ptr += bytes;
+}
+
 // ------------------------------------------------------------------------
 // Internal implementation of EmitSibMagic which has been custom tailored
 // to optimize special forms of the Lea instructions accordingly, such
@ -258,21 +297,33 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )

 		if( src.Index.IsEmpty() )
 		{
-			MOV( to, src.Displacement );
+			iMOV( to, src.Displacement );
 			return;
 		}
 		else if( displacement_size == 0 )
 		{
-			MOV( to, ToReg( src.Index.Id ) );
+			iMOV( to, ToReg( src.Index.Id ) );
 			return;
 		}
 		else
 		{
-			// note: no need to do ebp+0 check since we encode all 0 displacements as
-			// register assignments above (via MOV)
+			if( !preserve_flags )
+			{
+				// encode as MOV and ADD combo.  Make sure to use the immediate on the
+				// ADD since it can encode as an 8-bit sign-extended value.
+				
+				iMOV( to, ToReg( src.Index.Id ) );
+				iADD( to, src.Displacement );
+				return;
+			}
+			else
+			{
+				// note: no need to do ebp+0 check since we encode all 0 displacements as
+				// register assignments above (via MOV)

-			iWrite<u8>( 0x8d );
-			ModRM( displacement_size, to.Id, src.Index.Id );
+				iWrite<u8>( 0x8d );
+				ModRM( displacement_size, to.Id, src.Index.Id );
+			}
 		}
 	}
 	else
@ -288,8 +339,8 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )
 				// (this does not apply to older model P4s with the broken barrel shifter,
 				//  but we currently aren't optimizing for that target anyway).

-				MOV( to, ToReg( src.Index.Id ) );
-				SHL( to, src.Scale );
+				iMOV( to, ToReg( src.Index.Id ) );
+				iSHL( to, src.Scale );
 				return;
 			}
 			iWrite<u8>( 0x8d );
@ -300,26 +351,46 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )
 		}
 		else
 		{
+			if( src.Scale == 0 )
+			{
+				if( !preserve_flags )
+				{
+					if( src.Index == esp )
+					{
+						// ESP is not encodable as an index (ix86 ignores it), thus:
+						iMOV( to, ToReg( src.Base.Id ) );	// will do the trick!
+						iADD( to, src.Displacement );
+						return;
+					}
+					else if( src.Displacement == 0 )
+					{
+						iMOV( to, ToReg( src.Base.Id ) );
+						iADD( to, ToReg( src.Index.Id ) );
+						return;
+					}
+				}
+				else if( (src.Index == esp) && (src.Displacement == 0) )
+				{
+					// special case handling of ESP as Index, which is replaceable with
+					// a single MOV even when preserve_flags is set! :D
+					
+					iMOV( to, ToReg( src.Base.Id ) );
+					return;
+				}
+			}
+
 			if( src.Base == ebp && displacement_size == 0 )
 				displacement_size = 1;		// forces [ebp] to be encoded as [ebp+0]!

 			iWrite<u8>( 0x8d );
 			ModRM( displacement_size, to.Id, ModRm_UseSib );
 			SibSB( src.Scale, src.Index.Id, src.Base.Id );
-			
-			/*switch( displacement_size )
-			{
-				case 0: break;
-				case 1: emit.write<u8>( src.Displacement );  break;
-				case 2: emit.write<u32>( src.Displacement ); break;
-				jNO_DEFAULT
-			}*/
 		}
 	}

 	if( displacement_size != 0 )
 	{
-		*(u32*)x86Ptr = src.Displacement;
+		*(s32*)x86Ptr = src.Displacement;
 		x86Ptr += (displacement_size == 1) ? 1 : 4;
 	}
 }
@ -350,6 +421,7 @@ protected:
 	static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }

 public:
+	// ------------------------------------------------------------------------
 	static __forceinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from )
 	{
 		if( to == from ) return;	// ignore redundant MOVs.
@ -359,6 +431,7 @@ public:
 		ModRM( 3, from.Id, to.Id );
 	}

+	// ------------------------------------------------------------------------
 	static __forceinline void Emit( const ModSibBase& dest, const x86Register<OperandSize>& from )
 	{
 		prefix16();
@ -378,6 +451,7 @@ public:
 		}
 	}

+	// ------------------------------------------------------------------------
 	static __forceinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& src )
 	{
 		prefix16();
@ -397,6 +471,7 @@ public:
 		}
 	}

+	// ------------------------------------------------------------------------
 	static __forceinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
 	{
 		// Note: MOV does not have (reg16/32,imm8) forms.
@ -406,6 +481,7 @@ public:
 		iWrite<ImmType>( imm );
 	}

+	// ------------------------------------------------------------------------
 	static __forceinline void Emit( ModSibStrict<OperandSize> dest, ImmType imm )
 	{
 		prefix16();
@ -436,50 +512,50 @@ namespace Internal
 // TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D

 // ---------- 32 Bit Interface -----------
-__forceinline void MOV( const x86Register32& to,	const x86Register32& from )	{ MOV32i::Emit( to, from ); }
-__forceinline void MOV( const x86Register32& to,	const void* src )			{ MOV32i::Emit( to, ptr32[src] ); }
-__forceinline void MOV( const void* dest,			const x86Register32& from )	{ MOV32i::Emit( ptr32[dest], from ); }
-__noinline void MOV( const ModSibBase& sibdest,		const x86Register32& from )	{ MOV32::Emit( sibdest, from ); }
-__noinline void MOV( const x86Register32& to,		const ModSibBase& sibsrc )	{ MOV32::Emit( to, sibsrc ); }
-__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm )					{ MOV32::Emit( sibdest, imm ); }
+__forceinline void iMOV( const x86Register32& to,	const x86Register32& from )	{ MOV32i::Emit( to, from ); }
+__forceinline void iMOV( const x86Register32& to,	const void* src )			{ MOV32i::Emit( to, ptr32[src] ); }
+__forceinline void iMOV( const void* dest,			const x86Register32& from )	{ MOV32i::Emit( ptr32[dest], from ); }
+__noinline void iMOV( const ModSibBase& sibdest,	const x86Register32& from )	{ MOV32::Emit( sibdest, from ); }
+__noinline void iMOV( const x86Register32& to,		const ModSibBase& sibsrc )	{ MOV32::Emit( to, sibsrc ); }
+__noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm )					{ MOV32::Emit( sibdest, imm ); }

-void MOV( const x86Register32& to, u32 imm, bool preserve_flags )
+void iMOV( const x86Register32& to, u32 imm, bool preserve_flags )
 {
 	if( !preserve_flags && (imm == 0) )
-		XOR( to, to );
+		iXOR( to, to );
 	else
 		MOV32i::Emit( to, imm );
 }


 // ---------- 16 Bit Interface -----------
-__forceinline void MOV( const x86Register16& to,	const x86Register16& from )	{ MOV16i::Emit( to, from ); }
-__forceinline void MOV( const x86Register16& to,	const void* src )			{ MOV16i::Emit( to, ptr16[src] ); }
-__forceinline void MOV( const void* dest,			const x86Register16& from )	{ MOV16i::Emit( ptr16[dest], from ); }
-__noinline void MOV( const ModSibBase& sibdest,		const x86Register16& from )	{ MOV16::Emit( sibdest, from ); }
-__noinline void MOV( const x86Register16& to,		const ModSibBase& sibsrc )	{ MOV16::Emit( to, sibsrc ); }
-__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm )					{ MOV16::Emit( sibdest, imm ); }
+__forceinline void iMOV( const x86Register16& to,	const x86Register16& from )	{ MOV16i::Emit( to, from ); }
+__forceinline void iMOV( const x86Register16& to,	const void* src )			{ MOV16i::Emit( to, ptr16[src] ); }
+__forceinline void iMOV( const void* dest,			const x86Register16& from )	{ MOV16i::Emit( ptr16[dest], from ); }
+__noinline void iMOV( const ModSibBase& sibdest,	const x86Register16& from )	{ MOV16::Emit( sibdest, from ); }
+__noinline void iMOV( const x86Register16& to,		const ModSibBase& sibsrc )	{ MOV16::Emit( to, sibsrc ); }
+__noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm )					{ MOV16::Emit( sibdest, imm ); }

-void MOV( const x86Register16& to, u16 imm, bool preserve_flags )
+void iMOV( const x86Register16& to, u16 imm, bool preserve_flags )
 {
 	if( !preserve_flags && (imm == 0) )
-		XOR( to, to );
+		iXOR( to, to );
 	else
 		MOV16i::Emit( to, imm );
 }

 // ---------- 8 Bit Interface -----------
-__forceinline void MOV( const x86Register8& to,		const x86Register8& from )	{ MOV8i::Emit( to, from ); }
-__forceinline void MOV( const x86Register8& to,		const void* src )			{ MOV8i::Emit( to, ptr8[src] ); }
-__forceinline void MOV( const void* dest,			const x86Register8& from )	{ MOV8i::Emit( ptr8[dest], from ); }
-__noinline void MOV( const ModSibBase& sibdest,		const x86Register8& from )	{ MOV8::Emit( sibdest, from ); }
-__noinline void MOV( const x86Register8& to,		const ModSibBase& sibsrc )	{ MOV8::Emit( to, sibsrc ); }
-__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm )					{ MOV8::Emit( sibdest, imm ); }
+__forceinline void iMOV( const x86Register8& to,	const x86Register8& from )	{ MOV8i::Emit( to, from ); }
+__forceinline void iMOV( const x86Register8& to,	const void* src )			{ MOV8i::Emit( to, ptr8[src] ); }
+__forceinline void iMOV( const void* dest,			const x86Register8& from )	{ MOV8i::Emit( ptr8[dest], from ); }
+__noinline void iMOV( const ModSibBase& sibdest,	const x86Register8& from )	{ MOV8::Emit( sibdest, from ); }
+__noinline void iMOV( const x86Register8& to,		const ModSibBase& sibsrc )	{ MOV8::Emit( to, sibsrc ); }
+__noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm )					{ MOV8::Emit( sibdest, imm ); }

-void MOV( const x86Register8& to, u8 imm, bool preserve_flags )
+void iMOV( const x86Register8& to, u8 imm, bool preserve_flags )
 {
 	if( !preserve_flags && (imm == 0) )
-		XOR( to, to );
+		iXOR( to, to );
 	else
 		MOV8i::Emit( to, imm );
 }
@ -510,7 +586,8 @@ __forceinline void POP( x86Register32 from )	{ write8( 0x58 | from.Id ); }

 __emitinline void POP( const ModSibBase& from )
 {
-	iWrite<u8>( 0x8f ); Internal::EmitSibMagic( 0, from );
+	iWrite<u8>( 0x8f );
+	Internal::EmitSibMagic( 0, from );
 }

 __forceinline void PUSH( u32 imm )				{ write8( 0x68 ); write32( imm ); }
@ -518,7 +595,8 @@ __forceinline void PUSH( x86Register32 from )	{ write8( 0x50 | from.Id ); }

 __emitinline void PUSH( const ModSibBase& from )
 {
-	iWrite<u8>( 0xff ); Internal::EmitSibMagic( 6, from );
+	iWrite<u8>( 0xff );
+	Internal::EmitSibMagic( 6, from );
 }

 // pushes the EFLAGS register onto the stack
--- a/pcsx2/x86/ix86/ix86_group1.cpp
+++ b/pcsx2/x86/ix86/ix86_group1.cpp
@ -1,225 +0,0 @@
-/*  Pcsx2 - Pc Ps2 Emulator
- *  Copyright (C) 2002-2009  Pcsx2 Team
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *  
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *  
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
- */
-
-#include "PrecompiledHeader.h"
-#include "ix86_internal.h"
-
-//------------------------------------------------------------------
-// x86 Group 1 Instructions
-//------------------------------------------------------------------
-// Group 1 instructions all adhere to the same encoding scheme, and so they all
-// share the same emitter which has been coded here.
-//
-// Group 1 Table:  [column value is the Reg field of the ModRM byte]
-//
-//    0    1    2    3    4    5    6    7
-//   ADD  OR   ADC  SBB  AND  SUB  XOR  CMP
-//
-
-namespace x86Emitter {
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// x86RegConverter - this class is used internally by the emitter as a helper for
-// converting 8 and 16 register forms into 32 bit forms.  This way the end-user exposed API
-// can use type-safe 8/16/32 bit register types, and the underlying code can use a single
-// unified emitter to generate all function variations + prefixes and such. :)
-//
-class x86RegConverter : public x86Register32
-{
-public:
-	x86RegConverter( x86Register32 src ) : x86Register32( src ) {}
-	x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {}
-	x86RegConverter( x86Register8 src )  : x86Register32( src.Id ) {}
-};
-
-enum Group1InstructionType
-{
-	G1Type_ADD=0,
-	G1Type_OR,
-	G1Type_ADC,
-	G1Type_SBB,
-	G1Type_AND,
-	G1Type_SUB,
-	G1Type_XOR,
-	G1Type_CMP
-};
-
-
-static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false ) 
-{
-	write8( (bit8form ? 0 : 1) | (inst<<3) ); 
-	ModRM( 3, from.Id, to.Id );
-}
-
-static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false )
-{
-	write8( (bit8form ? 0 : 1) | (inst<<3) ); 
-	EmitSibMagic( from, sibdest );
-}
-
-static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false )
-{
-	write8( (bit8form ? 2 : 3) | (inst<<3) );
-	EmitSibMagic( to, sibsrc );
-}
-
-// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit
-// instruction (AX,BX,etc).
-template< typename T >
-static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm )
-{
-	bool bit8form = (sizeof(T) == 1);
-
-	if( !bit8form && is_s8( imm ) )
-	{
-		write8( 0x83 );
-		ModRM( 3, inst, to.Id );
-		write8( (s8)imm );
-	}
-	else
-	{
-		if( to == eax )
-			write8( (bit8form ? 4 : 5) | (inst<<3) );
-		else
-		{
-			write8( bit8form ? 0x80 : 0x81 );
-			ModRM( 3, inst, to.Id );
-		}
-		x86write<T>( imm );
-	}
-}
-
-// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit
-// instruction (AX,BX,etc).
-template< typename T >
-static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm )
-{
-	bool bit8form = (sizeof(T) == 1);
-
-	write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) );
-
-	EmitSibMagic( inst, sibdest );
-
-	if( !bit8form && is_s8( imm ) )
-		write8( (s8)imm );
-	else
-		x86write<T>( imm );
-}
-
-// 16 bit instruction prefix!
-static __forceinline void prefix16() { write8(0x66); }
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-#define DEFINE_GROUP1_OPCODE( cod ) \
-	emitterT void cod##32( x86Register32 to, x86Register32 from )  { Group1( G1Type_##cod, to,	from	); } \
-	emitterT void cod##32( x86Register32 to, void* from )          { Group1( G1Type_##cod, to,	ptr[from]	); } \
-	emitterT void cod##32( x86Register32 to, const ModSib& from )  { Group1( G1Type_##cod, to,	from	); } \
-	emitterT void cod##32( x86Register32 to, u32 imm )             { Group1_Imm( G1Type_##cod, to,	imm	); } \
-	emitterT void cod##32( const ModSib& to, x86Register32 from )  { Group1( G1Type_##cod, to,	from	); } \
-	emitterT void cod##32( void* to, x86Register32 from )          { Group1( G1Type_##cod, ptr[to],	from	); } \
-	emitterT void cod##32( void* to, u32 imm )                     { Group1_Imm( G1Type_##cod, ptr[to],	imm	); } \
-	emitterT void cod##32( const ModSib& to, u32 imm )             { Group1_Imm( G1Type_##cod, to,	imm	); } \
- \
-	emitterT void cod##16( x86Register16 to, x86Register16 from )  { prefix16(); Group1( G1Type_##cod, to,	from	); } \
-	emitterT void cod##16( x86Register16 to, void* from )          { prefix16(); Group1( G1Type_##cod, to,	ptr[from]	); } \
-	emitterT void cod##16( x86Register16 to, const ModSib& from )  { prefix16(); Group1( G1Type_##cod, to,	from	); } \
-	emitterT void cod##16( x86Register16 to, u16 imm )             { prefix16(); Group1_Imm( G1Type_##cod, to,	imm	); } \
-	emitterT void cod##16( const ModSib& to, x86Register16 from )  { prefix16(); Group1( G1Type_##cod, to,	from	); } \
-	emitterT void cod##16( void* to, x86Register16 from )          { prefix16(); Group1( G1Type_##cod, ptr[to],	from	); } \
-	emitterT void cod##16( void* to, u16 imm )                     { prefix16(); Group1_Imm( G1Type_##cod, ptr[to],	imm	); } \
-	emitterT void cod##16( const ModSib& to, u16 imm )             { prefix16(); Group1_Imm( G1Type_##cod, to,	imm	); } \
- \
-	emitterT void cod##8( x86Register8 to, x86Register8 from )     { Group1( G1Type_##cod, to,	from	, true ); } \
-	emitterT void cod##8( x86Register8 to, void* from )            { Group1( G1Type_##cod, to,	ptr[from], true ); } \
-	emitterT void cod##8( x86Register8 to, const ModSib& from )    { Group1( G1Type_##cod, to,	from	, true ); } \
-	emitterT void cod##8( x86Register8 to, u8 imm )                { Group1_Imm( G1Type_##cod, to,	imm	); } \
-	emitterT void cod##8( const ModSib& to, x86Register8 from )    { Group1( G1Type_##cod, to,	from	, true ); } \
-	emitterT void cod##8( void* to, x86Register8 from )            { Group1( G1Type_##cod, ptr[to],	from	, true ); } \
-	emitterT void cod##8( void* to, u8 imm )                       { Group1_Imm( G1Type_##cod, ptr[to],	imm	); } \
-	emitterT void cod##8( const ModSib& to, u8 imm )               { Group1_Imm( G1Type_##cod, to,	imm	); }
-
-DEFINE_GROUP1_OPCODE( ADD )
-DEFINE_GROUP1_OPCODE( CMP )
-DEFINE_GROUP1_OPCODE( OR )
-DEFINE_GROUP1_OPCODE( ADC )
-DEFINE_GROUP1_OPCODE( SBB )
-DEFINE_GROUP1_OPCODE( AND )
-DEFINE_GROUP1_OPCODE( SUB )
-DEFINE_GROUP1_OPCODE( XOR )
-
-}		// end namespace x86Emitter
-
-
-static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src )
-{
-	return x86Emitter::x86Register32( src );
-}
-
-static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src )
-{
-	return x86Emitter::x86Register16( src );
-}
-
-static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src )
-{
-	return x86Emitter::x86Register8( src );
-}
-
-static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src )
-{
-	return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) );
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-#define DEFINE_LEGACY_HELPER( cod, bits ) \
-	emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from )	{ x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \
-	emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm )			{ x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \
-	emitterT void cod##bits##MtoR( x86IntRegType to, uptr from )			{ x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \
-	emitterT void cod##bits##RtoM( uptr to, x86IntRegType from )			{ x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \
-	emitterT void cod##bits##ItoM( uptr to, u##bits imm )					{ x86Emitter::cod##bits( (void*)to, imm ); } \
-	emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset )	{ x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \
-	emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \
-	emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); }
-
-#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \
-	DEFINE_LEGACY_HELPER( cod, 32 ) \
-	DEFINE_LEGACY_HELPER( cod, 16 ) \
-	DEFINE_LEGACY_HELPER( cod, 8 )
-
-DEFINE_GROUP1_OPCODE_LEGACY( ADD )
-DEFINE_GROUP1_OPCODE_LEGACY( CMP )
-DEFINE_GROUP1_OPCODE_LEGACY( OR )
-DEFINE_GROUP1_OPCODE_LEGACY( ADC )
-DEFINE_GROUP1_OPCODE_LEGACY( SBB )
-DEFINE_GROUP1_OPCODE_LEGACY( AND )
-DEFINE_GROUP1_OPCODE_LEGACY( SUB )
-DEFINE_GROUP1_OPCODE_LEGACY( XOR )
-
-// Special forms needed by the legacy emitter syntax:
-
-emitterT void AND32I8toR( x86IntRegType to, s8 from ) 
-{
-	x86Emitter::AND32( _reghlp32(to), from );
-}
-
-emitterT void AND32I8toM( uptr to, s8 from ) 
-{
-	x86Emitter::AND32( (void*)to, from );
-}
--- a/pcsx2/x86/ix86/ix86_inlines.inl
+++ b/pcsx2/x86/ix86/ix86_inlines.inl
@ -234,4 +234,58 @@ namespace x86Emitter

 		return *this;
 	}
+	
+	//////////////////////////////////////////////////////////////////////////////////////////
+	//
+	
+	// ------------------------------------------------------------------------
+	template< typename OperandType >
+	iForwardJump<OperandType>::iForwardJump( JccComparisonType cctype ) :
+		BasePtr( (s8*)iGetPtr() +
+			((OperandSize == 1) ? 2 :		// j8's are always 2 bytes.
+			((cctype==Jcc_Unconditional) ? 5 : 6 ))	// j32's are either 5 or 6 bytes
+		)
+	{
+		jASSUME( cctype != Jcc_Unknown );
+		jASSUME( OperandSize == 1 || OperandSize == 4 );
+		
+		if( OperandSize == 1 )
+			iWrite<u8>( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) );
+		else
+		{
+			if( cctype == Jcc_Unconditional )
+				iWrite<u8>( 0xe9 );
+			else
+			{
+				iWrite<u8>( 0x0f );
+				iWrite<u8>( 0x80 | cctype );
+			}
+		}
+
+		iAdvancePtr( OperandSize );
+	}
+
+	// ------------------------------------------------------------------------
+	template< typename OperandType >
+	void iForwardJump<OperandType>::SetTarget() const
+	{
+		jASSUME( BasePtr != NULL );
+
+		sptr displacement = (sptr)iGetPtr() - (sptr)BasePtr;
+		if( OperandSize == 1 )
+		{
+			if( !is_s8( displacement ) )
+			{
+				assert( false );
+				Console::Error( "Emitter Error: Invalid short jump displacement = 0x%x", params (int)displacement );
+			}
+			BasePtr[-1] = (s8)displacement;
+		}
+		else
+		{
+			// full displacement, no sanity checks needed :D
+			((s32*)BasePtr)[-1] = displacement;
+		}
+	}
+
 }
--- a/pcsx2/x86/ix86/ix86_instructions.h
+++ b/pcsx2/x86/ix86/ix86_instructions.h
@ -35,6 +35,9 @@

 namespace x86Emitter
 {
+	extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false );
+
+
 	// ----- Lea Instructions (Load Effective Address) -----
 	// Note: alternate (void*) forms of these instructions are not provided since those
 	// forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs
@ -56,56 +59,153 @@ namespace x86Emitter
 	static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); }

 	// ------------------------------------------------------------------------
-	using Internal::ADD;
-	using Internal::OR;
-	using Internal::ADC;
-	using Internal::SBB;
-	using Internal::AND;
-	using Internal::SUB;
-	using Internal::XOR;
-	using Internal::CMP;
+	using Internal::iADD;
+	using Internal::iOR;
+	using Internal::iADC;
+	using Internal::iSBB;
+	using Internal::iAND;
+	using Internal::iSUB;
+	using Internal::iXOR;
+	using Internal::iCMP;

-	using Internal::ROL;
-	using Internal::ROR;
-	using Internal::RCL;
-	using Internal::RCR;
-	using Internal::SHL;
-	using Internal::SHR;
-	using Internal::SAR;
+	using Internal::iROL;
+	using Internal::iROR;
+	using Internal::iRCL;
+	using Internal::iRCR;
+	using Internal::iSHL;
+	using Internal::iSHR;
+	using Internal::iSAR;

+	using Internal::iMOVSX;
+	using Internal::iMOVZX;
+
+	//////////////////////////////////////////////////////////////////////////////////////////
+	// MOV instructions!
 	// ---------- 32 Bit Interface -----------
-	extern void MOV( const x86Register32& to, const x86Register32& from );
-	extern void MOV( const ModSibBase& sibdest, const x86Register32& from );
-	extern void MOV( const x86Register32& to, const ModSibBase& sibsrc );
-	extern void MOV( const x86Register32& to, const void* src );
-	extern void MOV( const void* dest, const x86Register32& from );
+	extern void iMOV( const x86Register32& to, const x86Register32& from );
+	extern void iMOV( const ModSibBase& sibdest, const x86Register32& from );
+	extern void iMOV( const x86Register32& to, const ModSibBase& sibsrc );
+	extern void iMOV( const x86Register32& to, const void* src );
+	extern void iMOV( const void* dest, const x86Register32& from );

 	// preserve_flags  - set to true to disable optimizations which could alter the state of
 	//   the flags (namely replacing mov reg,0 with xor).
-	extern void MOV( const x86Register32& to, u32 imm, bool preserve_flags=false );
-	extern void MOV( const ModSibStrict<4>& sibdest, u32 imm );
+	extern void iMOV( const x86Register32& to, u32 imm, bool preserve_flags=false );
+	extern void iMOV( const ModSibStrict<4>& sibdest, u32 imm );

 	// ---------- 16 Bit Interface -----------
-	extern void MOV( const x86Register16& to, const x86Register16& from );
-	extern void MOV( const ModSibBase& sibdest, const x86Register16& from );
-	extern void MOV( const x86Register16& to, const ModSibBase& sibsrc );
-	extern void MOV( const x86Register16& to, const void* src );
-	extern void MOV( const void* dest, const x86Register16& from );
+	extern void iMOV( const x86Register16& to, const x86Register16& from );
+	extern void iMOV( const ModSibBase& sibdest, const x86Register16& from );
+	extern void iMOV( const x86Register16& to, const ModSibBase& sibsrc );
+	extern void iMOV( const x86Register16& to, const void* src );
+	extern void iMOV( const void* dest, const x86Register16& from );

 	// preserve_flags  - set to true to disable optimizations which could alter the state of
 	//   the flags (namely replacing mov reg,0 with xor).
-	extern void MOV( const x86Register16& to, u16 imm, bool preserve_flags=false );
-	extern void MOV( const ModSibStrict<2>& sibdest, u16 imm );
+	extern void iMOV( const x86Register16& to, u16 imm, bool preserve_flags=false );
+	extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm );

 	// ---------- 8 Bit Interface -----------
-	extern void MOV( const x86Register8& to, const x86Register8& from );
-	extern void MOV( const ModSibBase& sibdest, const x86Register8& from );
-	extern void MOV( const x86Register8& to, const ModSibBase& sibsrc );
-	extern void MOV( const x86Register8& to, const void* src );
-	extern void MOV( const void* dest, const x86Register8& from );
+	extern void iMOV( const x86Register8& to, const x86Register8& from );
+	extern void iMOV( const ModSibBase& sibdest, const x86Register8& from );
+	extern void iMOV( const x86Register8& to, const ModSibBase& sibsrc );
+	extern void iMOV( const x86Register8& to, const void* src );
+	extern void iMOV( const void* dest, const x86Register8& from );

-	extern void MOV( const x86Register8& to, u8 imm, bool preserve_flags=false );
-	extern void MOV( const ModSibStrict<1>& sibdest, u8 imm );
+	extern void iMOV( const x86Register8& to, u8 imm, bool preserve_flags=false );
+	extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm );

+	//////////////////////////////////////////////////////////////////////////////////////////
+	// JMP / Jcc Instructions!
+
+#define DEFINE_FORWARD_JUMP( label, cond ) \
+	template< typename OperandType > \
+	class iForward##label : public iForwardJump<OperandType> \
+	{ \
+	public: \
+		iForward##label() : iForwardJump<OperandType>( cond ) {} \
+	};
+
+	// ------------------------------------------------------------------------
+	// Note: typedefs below  are defined individually in order to appease Intellisense
+	// resolution.  Including them into the class definition macro above breaks it.
+
+	typedef iForwardJump<s8>  iForwardJump8;
+	typedef iForwardJump<s32> iForwardJump32;
+	
+
+	DEFINE_FORWARD_JUMP( JA,	Jcc_Above );
+	DEFINE_FORWARD_JUMP( JB,	Jcc_Below );
+	DEFINE_FORWARD_JUMP( JAE,	Jcc_AboveOrEqual );
+	DEFINE_FORWARD_JUMP( JBE,	Jcc_BelowOrEqual );
+
+	typedef iForwardJA<s8>		iForwardJA8;
+	typedef iForwardJA<s32>		iForwardJA32;
+	typedef iForwardJB<s8>		iForwardJB8;
+	typedef iForwardJB<s32>		iForwardJB32;
+	typedef iForwardJAE<s8>		iForwardJAE8;
+	typedef iForwardJAE<s32>	iForwardJAE32;
+	typedef iForwardJBE<s8>		iForwardJBE8;
+	typedef iForwardJBE<s32>	iForwardJBE32;
+
+	DEFINE_FORWARD_JUMP( JG,	Jcc_Greater );
+	DEFINE_FORWARD_JUMP( JL,	Jcc_Less );
+	DEFINE_FORWARD_JUMP( JGE,	Jcc_GreaterOrEqual );
+	DEFINE_FORWARD_JUMP( JLE,	Jcc_LessOrEqual );
+
+	typedef iForwardJG<s8>		iForwardJG8;
+	typedef iForwardJG<s32>		iForwardJG32;
+	typedef iForwardJL<s8>		iForwardJL8;
+	typedef iForwardJL<s32>		iForwardJL32;
+	typedef iForwardJGE<s8>		iForwardJGE8;
+	typedef iForwardJGE<s32>	iForwardJGE32;
+	typedef iForwardJLE<s8>		iForwardJLE8;
+	typedef iForwardJLE<s32>	iForwardJLE32;
+
+	DEFINE_FORWARD_JUMP( JZ,	Jcc_Zero );
+	DEFINE_FORWARD_JUMP( JE,	Jcc_Equal );
+	DEFINE_FORWARD_JUMP( JNZ,	Jcc_NotZero );
+	DEFINE_FORWARD_JUMP( JNE,	Jcc_NotEqual );
+
+	typedef iForwardJZ<s8>		iForwardJZ8;
+	typedef iForwardJZ<s32>		iForwardJZ32;
+	typedef iForwardJE<s8>		iForwardJE8;
+	typedef iForwardJE<s32>		iForwardJE32;
+	typedef iForwardJNZ<s8>		iForwardJNZ8;
+	typedef iForwardJNZ<s32>	iForwardJNZ32;
+	typedef iForwardJNE<s8>		iForwardJNE8;
+	typedef iForwardJNE<s32>	iForwardJNE32;
+
+	DEFINE_FORWARD_JUMP( JS,	Jcc_Signed );
+	DEFINE_FORWARD_JUMP( JNS,	Jcc_Unsigned );
+
+	typedef iForwardJS<s8>		iForwardJS8;
+	typedef iForwardJS<s32>		iForwardJS32;
+	typedef iForwardJNS<s8>		iForwardJNS8;
+	typedef iForwardJNS<s32>	iForwardJNS32;
+
+	DEFINE_FORWARD_JUMP( JO,	Jcc_Overflow );
+	DEFINE_FORWARD_JUMP( JNO,	Jcc_NotOverflow );
+
+	typedef iForwardJO<s8>		iForwardJO8;
+	typedef iForwardJO<s32>		iForwardJO32;
+	typedef iForwardJNO<s8>		iForwardJNO8;
+	typedef iForwardJNO<s32>	iForwardJNO32;
+
+	DEFINE_FORWARD_JUMP( JC,	Jcc_Carry );
+	DEFINE_FORWARD_JUMP( JNC,	Jcc_NotCarry );
+
+	typedef iForwardJC<s8>		iForwardJC8;
+	typedef iForwardJC<s32>		iForwardJC32;
+	typedef iForwardJNC<s8>		iForwardJNC8;
+	typedef iForwardJNC<s32>	iForwardJNC32;
+
+	DEFINE_FORWARD_JUMP( JPE,	Jcc_ParityEven );
+	DEFINE_FORWARD_JUMP( JPO,	Jcc_ParityOdd );
+	
+	typedef iForwardJPE<s8>		iForwardJPE8;
+	typedef iForwardJPE<s32>	iForwardJPE32;
+	typedef iForwardJPO<s8>		iForwardJPO8;
+	typedef iForwardJPO<s32>	iForwardJPO32;
 }

--- a/pcsx2/x86/ix86/ix86_jmp.cpp
+++ b/pcsx2/x86/ix86/ix86_jmp.cpp
@ -36,157 +36,83 @@
 #include "System.h"
 #include "ix86_internal.h"

+namespace x86Emitter {

-// Another Work-in-Progress!!
-
-
-/*
-emitterT void x86SetPtr( u8* ptr ) 
+// ------------------------------------------------------------------------
+void iSmartJump::SetTarget()
 {
-	x86Ptr = ptr;
+	jASSUME( !m_written );
+	if( m_written )
+		throw Exception::InvalidOperation( "Attempted to set SmartJump label multiple times." );
+
+	m_target = iGetPtr();
+	if( m_baseptr == NULL ) return;
+
+	iSetPtr( m_baseptr );
+	u8* const saveme = m_baseptr + GetMaxInstructionSize();
+	iJccKnownTarget( m_cc, m_target, true );
+
+	// Copy recompiled data inward if the jump instruction didn't fill the
+	// alloted buffer (means that we optimized things to a j8!)
+
+	const int spacer = (sptr)saveme - (sptr)iGetPtr();
+	if( spacer != 0 )
+	{
+		u8* destpos = iGetPtr();
+		const int copylen = (sptr)m_target - (sptr)saveme;
+
+		memcpy_fast( destpos, saveme, copylen );
+		iSetPtr( m_target - spacer );
+	}
+
+	m_written = true;
 }

 //////////////////////////////////////////////////////////////////////////////////////////
-// x86Ptr Label API
 //

-class x86Label
+// ------------------------------------------------------------------------
+// Writes a jump at the current x86Ptr, which targets a pre-established target address.
+// (usually a backwards jump)
+//
+// slideForward - used internally by iSmartJump to indicate that the jump target is going
+// to slide forward in the event of an 8 bit displacement.
+//
+__emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward )
 {
-public:
-	class Entry
-	{
-	protected:
-		u8* (*m_emit)( u8* emitTo, u8* label_target, int cc );	// callback for the instruction to emit (cc = comparison type)
-		u8* m_base;			// base address of the instruction (passed to the instruction)
-		int m_cc;			// comparison type of the instruction
-		
-	public:
-		explicit Entry( int cc ) :
-			m_base( x86Ptr )
-		,	m_writebackpos( writebackidx )
-		{
-		}
+	// Calculate the potential j8 displacement first, assuming an instruction length of 2:
+	sptr displacement8 = (sptr)target - ((sptr)iGetPtr() + 2);

-		void Commit( const u8* target ) const
-		{
-			//uptr reltarget = (uptr)m_base - (uptr)target;
-			//*((u32*)&m_base[m_writebackpos]) = reltarget;
-			jASSUME( m_emit != NULL );
-			jASSUME( m_base != NULL );
-			return m_emit( m_base, target, m_cc );
-		}
-	};
+	const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0;
+	displacement8 -= slideVal;

-protected:
-	u8* m_target;		// x86Ptr target address of this label
-	Entry m_writebacks[8];
-	int m_writeback_curpos;
-
-public:
-	// creates a label list with no valid target.
-	// Use x86LabelList::Set() to set a target prior to class destruction.
-	x86Label() : m_target()
-	{
-	}
-
-	x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() )
-	{
-	}
+	// if the following assert fails it means we accidentally used slideForard on a backward
+	// jump (which is an invalid operation since there's nothing to slide forward).
+	if( slideForward ) jASSUME( displacement8 >= 0 );
 	
-	// Performs all address writebacks on destruction.
-	virtual ~x86Label()
+	if( is_s8( displacement8 ) )
 	{
-		IssueWritebacks();
+		iWrite<u8>( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) );
+		iWrite<s8>( displacement8 );
 	}
+	else
+	{
+		// Perform a 32 bit jump instead. :(

-	void SetTarget() { m_address = x86Ptr; }
-	void SetTarget( void* addr ) { m_address = (u8*)addr; }
-
-	void Clear()
-	{
-		m_writeback_curpos = 0;
-	}
-	
-	// Adds a jump/call instruction to this label for writebacks.
-	void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc )
-	{
-		jASSUME( m_writeback_curpos < MaxWritebacks );
-		m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) );
-		m_writeback_curpos++;
-	}
-	
-	void IssueWritebacks() const
-	{
-		const std::list<Entry>::const_iterator& start = m_list_writebacks.
-		for( ; start!=end; start++ )
+		if( comparison == Jcc_Unconditional )
+			iWrite<u8>( 0xe9 );
+		else
 		{
-			Entry& current = *start;
-			u8* donespot = current.Commit();
-			
-			// Copy the data from the m_nextinst to the current location,
-			// and update any additional writebacks (but what about multiple labels?!?)
-
+			iWrite<u8>( 0x0f );
+			iWrite<u8>( 0x80 | comparison );
 		}
+		iWrite<s32>( (sptr)target - ((sptr)iGetPtr() + 4) );
 	}
-};
-#endif
-
-void JMP( x86Label& dest )
-{
-	dest.AddWriteback( x86Ptr, emitJMP, 0 );
 }

-void JLE( x86Label& dest )
+__emitinline void iJcc( JccComparisonType comparison, void* target )
 {
-	dest.AddWriteback( x86Ptr, emitJCC, 0 );
+	iJccKnownTarget( comparison, target );
 }

-void x86SetJ8( u8* j8 )
-{
-	u32 jump = ( x86Ptr - j8 ) - 1;
-
-	if ( jump > 0x7f ) {
-		Console::Error( "j8 greater than 0x7f!!" );
-		assert(0);
-	}
-	*j8 = (u8)jump;
-}
-
-void x86SetJ8A( u8* j8 )
-{
-	u32 jump = ( x86Ptr - j8 ) - 1;
-
-	if ( jump > 0x7f ) {
-		Console::Error( "j8 greater than 0x7f!!" );
-		assert(0);
-	}
-
-	if( ((uptr)x86Ptr&0xf) > 4 ) {
-
-		uptr newjump = jump + 16-((uptr)x86Ptr&0xf);
-
-		if( newjump <= 0x7f ) {
-			jump = newjump;
-			while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
-		}
-	}
-	*j8 = (u8)jump;
-}
-
-emitterT void x86SetJ32( u32* j32 ) 
-{
-	*j32 = ( x86Ptr - (u8*)j32 ) - 4;
-}
-
-emitterT void x86SetJ32A( u32* j32 )
-{
-	while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
-	x86SetJ32(j32);
-}
-
-emitterT void x86Align( int bytes ) 
-{
-	// forward align
-	x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) );
-}
-*/
+}
--- a/pcsx2/x86/ix86/ix86_legacy.cpp
+++ b/pcsx2/x86/ix86/ix86_legacy.cpp
@ -57,27 +57,29 @@ static __forceinline ModSibStrict<OperandSize> _mhlp2( x86IntRegType src1, x86In
 	return ModSibStrict<OperandSize>( x86IndexReg(src2), x86IndexReg(src1) );
 }

+//////////////////////////////////////////////////////////////////////////////////////////
+//
 #define DEFINE_LEGACY_HELPER( cod, bits ) \
-	emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from )	{ cod( _reghlp<bits/8>(to), _reghlp<bits/8>(from) ); } \
-	emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm )			{ cod( _reghlp<bits/8>(to), imm ); } \
-	emitterT void cod##bits##MtoR( x86IntRegType to, uptr from )			{ cod( _reghlp<bits/8>(to), (void*)from ); } \
-	emitterT void cod##bits##RtoM( uptr to, x86IntRegType from )			{ cod( (void*)to, _reghlp<bits/8>(from) ); } \
-	emitterT void cod##bits##ItoM( uptr to, u##bits imm )					{ cod( ptr##bits[to], imm ); }  \
-	emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset )	{ cod( _mhlp<bits/8>(to) + offset, imm ); } \
-	emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { cod( _reghlp<bits/8>(to), _mhlp<bits/8>(from) + offset ); } \
-	emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { cod( _mhlp<bits/8>(to) + offset, _reghlp<bits/8>(from) ); } \
+	emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from )	{ i##cod( _reghlp<bits/8>(to), _reghlp<bits/8>(from) ); } \
+	emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm )			{ i##cod( _reghlp<bits/8>(to), imm ); } \
+	emitterT void cod##bits##MtoR( x86IntRegType to, uptr from )			{ i##cod( _reghlp<bits/8>(to), (void*)from ); } \
+	emitterT void cod##bits##RtoM( uptr to, x86IntRegType from )			{ i##cod( (void*)to, _reghlp<bits/8>(from) ); } \
+	emitterT void cod##bits##ItoM( uptr to, u##bits imm )					{ i##cod( ptr##bits[to], imm ); }  \
+	emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset )	{ i##cod( _mhlp<bits/8>(to) + offset, imm ); } \
+	emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _reghlp<bits/8>(to), _mhlp<bits/8>(from) + offset ); } \
+	emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _mhlp<bits/8>(to) + offset, _reghlp<bits/8>(from) ); } \
 	emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \
-	{ cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
+	{ i##cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
 	emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \
-	{ cod( _reghlp<bits/8>(to), _mhlp2<bits/8>(from1,from2) + offset ); }
+	{ i##cod( _reghlp<bits/8>(to), _mhlp2<bits/8>(from1,from2) + offset ); }
 	
 #define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \
-	emitterT void cod##bits##CLtoR( x86IntRegType to )				{ cod( _reghlp<bits/8>(to), cl ); } \
-	emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm )		{ cod( _reghlp<bits/8>(to), imm ); } \
-	emitterT void cod##bits##CLtoM( uptr to )						{ cod( ptr##bits[to], cl ); } \
-	emitterT void cod##bits##ItoM( uptr to, u8 imm )				{ cod( ptr##bits[to], imm ); }  \
-	emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset )	{ cod( _mhlp<bits/8>(to) + offset, imm ); } \
-	emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { cod( _mhlp<bits/8>(to) + offset, cl ); }
+	emitterT void cod##bits##CLtoR( x86IntRegType to )						{ i##cod( _reghlp<bits/8>(to), cl ); } \
+	emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm )				{ i##cod( _reghlp<bits/8>(to), imm ); } \
+	emitterT void cod##bits##CLtoM( uptr to )								{ i##cod( ptr##bits[to], cl ); } \
+	emitterT void cod##bits##ItoM( uptr to, u8 imm )						{ i##cod( ptr##bits[to], imm ); }  \
+	emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset )	{ i##cod( _mhlp<bits/8>(to) + offset, imm ); } \
+	emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset )			{ i##cod( _mhlp<bits/8>(to) + offset, cl ); }

 //emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \
 //	{ cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
@ -113,32 +115,44 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR )

 DEFINE_OPCODE_LEGACY( MOV )

+// ------------------------------------------------------------------------
+#define DEFINE_LEGACY_MOVEXTEND( form, srcbits ) \
+	emitterT void MOV##form##X32R##srcbits##toR( x86IntRegType to, x86IntRegType from )					{ iMOV##form##X( x86Register32( to ), x86Register##srcbits( from ) ); } \
+	emitterT void MOV##form##X32Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset )	{ iMOV##form##X( x86Register32( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \
+	emitterT void MOV##form##X32M##srcbits##toR( x86IntRegType to, u32 from )							{ iMOV##form##X( x86Register32( to ), ptr##srcbits[from] ); }
+
+DEFINE_LEGACY_MOVEXTEND( S, 16 )
+DEFINE_LEGACY_MOVEXTEND( Z, 16 )
+DEFINE_LEGACY_MOVEXTEND( S, 8 )
+DEFINE_LEGACY_MOVEXTEND( Z, 8 )
+
+
 // mov r32 to [r32<<scale+from2]
 emitterT void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
 {
-	MOV( x86Register32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
+	iMOV( x86Register32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
 }

 emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
 {
-	MOV( x86Register16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
+	iMOV( x86Register16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
 }

 emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
 {
-	MOV( x86Register8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
+	iMOV( x86Register8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
 }

 // Special forms needed by the legacy emitter syntax:

 emitterT void AND32I8toR( x86IntRegType to, s8 from ) 
 {
-	AND( _reghlp<4>(to), from );
+	iAND( _reghlp<4>(to), from );
 }

 emitterT void AND32I8toM( uptr to, s8 from ) 
 {
-	AND( ptr8[to], from );
+	iAND( ptr8[to], from );
 }


@ -310,103 +324,6 @@ emitterT void NOP( void )
 	write8(0x90);
 }

-
-/* movsx r8 to r32 */
-emitterT void MOVSX32R8toR( x86IntRegType to, x86IntRegType from ) 
-{
-	RexRB(0,to,from);
-	write16( 0xBE0F ); 
-	ModRM( 3, to, from ); 
-}
-
-emitterT void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset )
-{
-	RexRB(0,to,from);
-	write16( 0xBE0F ); 
-	WriteRmOffsetFrom(to,from,offset);
-}
-
-/* movsx m8 to r32 */
-emitterT void MOVSX32M8toR( x86IntRegType to, u32 from ) 
-{
-	RexR(0,to);
-	write16( 0xBE0F ); 
-	ModRM( 0, to, DISP32 );
-	write32( MEMADDR(from, 4) );
-}
-
-/* movsx r16 to r32 */
-emitterT void MOVSX32R16toR( x86IntRegType to, x86IntRegType from ) 
-{
-	RexRB(0,to,from);
-	write16( 0xBF0F ); 
-	ModRM( 3, to, from ); 
-}
-
-emitterT void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset )
-{
-	RexRB(0,to,from);
-	write16( 0xBF0F );
-	WriteRmOffsetFrom(to,from,offset);
-}
-
-/* movsx m16 to r32 */
-emitterT void MOVSX32M16toR( x86IntRegType to, u32 from ) 
-{
-	RexR(0,to);
-	write16( 0xBF0F ); 
-	ModRM( 0, to, DISP32 );
-	write32( MEMADDR(from, 4) );
-}
-
-/* movzx r8 to r32 */
-emitterT void MOVZX32R8toR( x86IntRegType to, x86IntRegType from ) 
-{
-	RexRB(0,to,from);
-	write16( 0xB60F ); 
-	ModRM( 3, to, from ); 
-}
-
-emitterT void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset )
-{
-	RexRB(0,to,from);
-	write16( 0xB60F );
-	WriteRmOffsetFrom(to,from,offset);
-}
-
-/* movzx m8 to r32 */
-emitterT void MOVZX32M8toR( x86IntRegType to, u32 from ) 
-{
-	RexR(0,to);
-	write16( 0xB60F ); 
-	ModRM( 0, to, DISP32 );
-	write32( MEMADDR(from, 4) );
-}
-
-/* movzx r16 to r32 */
-emitterT void MOVZX32R16toR( x86IntRegType to, x86IntRegType from ) 
-{
-	RexRB(0,to,from);
-	write16( 0xB70F ); 
-	ModRM( 3, to, from ); 
-}
-
-emitterT void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset )
-{
-	RexRB(0,to,from);
-	write16( 0xB70F );
-	WriteRmOffsetFrom(to,from,offset);
-}
-
-/* movzx m16 to r32 */
-emitterT void MOVZX32M16toR( x86IntRegType to, u32 from ) 
-{
-	RexR(0,to);
-	write16( 0xB70F ); 
-	ModRM( 0, to, DISP32 );
-	write32( MEMADDR(from, 4) );
-}
-
 /* cmovbe r32 to r32 */
 emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from )
 {
--- a/pcsx2/x86/ix86/ix86_types.h
+++ b/pcsx2/x86/ix86/ix86_types.h
@ -18,9 +18,8 @@

 #pragma once

-extern void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs
+extern void cpudetectInit();//this is all that needs to be called and will fill up the below structs

-typedef struct CAPABILITIES CAPABILITIES;
 //cpu capabilities structure
 struct CAPABILITIES {
   u32 hasFloatingPointUnit;
@ -137,9 +136,9 @@ namespace x86Emitter
 // single-line functions anyway.
 //
 #ifdef PCSX2_DEVBUILD
-#define __emitinline
+#	define __emitinline
 #else
-#define __emitinline __forceinline
+#	define __emitinline __forceinline
 #endif

 #ifdef _MSC_VER
@ -148,13 +147,18 @@ namespace x86Emitter
 #	define __noinline
 #endif

-
 	static const int ModRm_UseSib = 4;		// same index value as ESP (used in RM field)
 	static const int ModRm_UseDisp32 = 5;	// same index value as EBP (used in Mod field)

 	class x86AddressInfo;
 	class ModSibBase;

+	extern void iSetPtr( void* ptr );
+	extern u8* iGetPtr();
+	extern void iAlignPtr( uint bytes );
+	extern void iAdvancePtr( uint bytes );
+
+
 	static __forceinline void write8( u8 val )
 	{
 		iWrite( val );
@ -195,7 +199,7 @@ namespace x86Emitter
 		x86Register(): Id( -1 ) {}
 		explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }

-		bool IsEmpty() const { return Id == -1; }
+		bool IsEmpty() const { return Id < 0; }

 		// Returns true if the register is a valid accumulator: Eax, Ax, Al.
 		bool IsAccumulator() const { return Id == 0; }
@ -220,7 +224,7 @@ namespace x86Emitter
 	// ------------------------------------------------------------------------
 	// Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which
 	// means it finds undeclared variables when MSVC does not (Since MSVC compiles templates
-	// when they are actually used).  In practice this sucks since it means we have to move all'
+	// when they are actually used).  In practice this sucks since it means we have to move all
 	// our variable and function prototypes from a nicely/neatly unified location to being strewn
 	// all about the the templated code in haphazard fashion.  Yay.. >_<
 	//
@ -476,6 +480,118 @@ namespace x86Emitter
 	extern const x86IndexerTypeExplicit<2> ptr16;
 	extern const x86IndexerTypeExplicit<1> ptr8;	

+	//////////////////////////////////////////////////////////////////////////////////////////
+	// JccComparisonType - enumerated possibilities for inspired code branching!
+	//
+	enum JccComparisonType
+	{
+		Jcc_Unknown			= -2,
+		Jcc_Unconditional	= -1,
+		Jcc_Overflow		= 0x0,
+		Jcc_NotOverflow		= 0x1,
+		Jcc_Below			= 0x2,
+		Jcc_Carry			= 0x2,
+		Jcc_AboveOrEqual	= 0x3,
+		Jcc_NotCarry		= 0x3,
+		Jcc_Zero			= 0x4,
+		Jcc_Equal			= 0x4,
+		Jcc_NotZero			= 0x5,
+		Jcc_NotEqual		= 0x5,
+		Jcc_BelowOrEqual	= 0x6,
+		Jcc_Above			= 0x7,
+		Jcc_Signed			= 0x8,
+		Jcc_Unsigned		= 0x9,
+		Jcc_ParityEven		= 0xa,
+		Jcc_ParityOdd		= 0xb,
+		Jcc_Less			= 0xc,
+		Jcc_GreaterOrEqual	= 0xd,
+		Jcc_LessOrEqual		= 0xe,
+		Jcc_Greater			= 0xf,
+	};
+	
+	// Not supported yet:
+	//E3 cb 	JECXZ rel8 	Jump short if ECX register is 0.
+
+
+	//////////////////////////////////////////////////////////////////////////////////////////
+	// iSmartJump
+	// This class provides an interface for generating forward-based j8's or j32's "smartly"
+	// as per the measured displacement distance.  If the displacement is a valid s8, then
+	// a j8 is inserted, else a j32.
+	// 
+	// Performance Analysis:  j8's use 4 less byes per opcode, and thus can provide
+	// minor speed benefits in the form of L1/L2 cache clutter.  They're also notably faster
+	// on P4's, and mildly faster on AMDs.  (Core2's and i7's don't care)
+	//
+	class iSmartJump
+	{
+	protected:
+		u8* m_target;				// x86Ptr target address of this label
+		u8* m_baseptr;				// base address of the instruction (passed to the instruction emitter)
+		JccComparisonType m_cc;		// comparison type of the instruction
+		bool m_written;				// set true when the jump is written (at which point the object becomes invalid)
+
+	public:
+
+		const int GetMaxInstructionSize() const
+		{
+			jASSUME( m_cc != Jcc_Unknown );
+			return ( m_cc == Jcc_Unconditional ) ? 5 : 6;
+		}
+
+		// Creates a backward jump label which will be passed into a Jxx instruction (or few!)
+		// later on, and the current x86Ptr is recorded as the target [thus making the class
+		// creation point the jump target].
+		iSmartJump()
+		{
+			m_target = iGetPtr();
+			m_baseptr = NULL;
+			m_cc = Jcc_Unknown;
+			m_written = false;
+		}
+
+		// ccType - Comparison type to be written back to the jump instruction position.
+		//
+		iSmartJump( JccComparisonType ccType )
+		{
+			jASSUME( ccType != Jcc_Unknown );
+			m_target = NULL;
+			m_baseptr = iGetPtr();
+			m_cc = ccType;
+			m_written = false;
+			iAdvancePtr( GetMaxInstructionSize() );
+		}
+
+		JccComparisonType GetCondition() const
+		{
+			return m_cc;
+		}
+
+		u8* GetTarget() const
+		{
+			return m_target;
+		}
+
+		void SetTarget();
+	};
+
+	//////////////////////////////////////////////////////////////////////////////////////////
+	// 
+	template< typename OperandType >
+	class iForwardJump
+	{
+	public:
+		static const uint OperandSize = sizeof( OperandType );
+
+		// pointer to base of the instruction *Following* the jump.  The jump address will be
+		// relative to this address.
+		s8* const BasePtr;
+
+	public:	
+		iForwardJump( JccComparisonType cctype = Jcc_Unconditional );
+		void SetTarget() const;
+	};
+	
 	//////////////////////////////////////////////////////////////////////////////////////////
 	//	
 	namespace Internal
@ -678,13 +794,6 @@ namespace x86Emitter
 			}
 		};

-		// if the immediate is zero, we can replace the instruction, or ignore it
-		// entirely, depending on the instruction being issued.  That's what we do here.
-		//  (returns FALSE if no optimization is performed)
-		// [TODO] : Work-in-progress!
-		//template< G1Type InstType, typename RegType >
-		//static __forceinline void _optimize_imm0( RegType to );
-		
 		// -------------------------------------------------------------------
 		//
 		template< G1Type InstType >
@ -789,7 +898,6 @@ namespace x86Emitter
 			__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const					{ m_8::Emit( sibdest, imm ); }
 			void operator()( const x86Register8& to, u8 imm ) const										{ m_8i::Emit( to, imm ); }

-
 			Group2ImplAll() {}		// I am a class with no members, so I need an explicit constructor!  Sense abounds.
 		};

@ -799,22 +907,85 @@ namespace x86Emitter
 		// importing Internal into x86Emitter, which done at the header file level would defeat
 		// the purpose!)

-		extern const Group1ImplAll<G1Type_ADD> ADD;
-		extern const Group1ImplAll<G1Type_OR>  OR;
-		extern const Group1ImplAll<G1Type_ADC> ADC;
-		extern const Group1ImplAll<G1Type_SBB> SBB;
-		extern const Group1ImplAll<G1Type_AND> AND;
-		extern const Group1ImplAll<G1Type_SUB> SUB;
-		extern const Group1ImplAll<G1Type_XOR> XOR;
-		extern const Group1ImplAll<G1Type_CMP> CMP;
+		extern const Group1ImplAll<G1Type_ADD> iADD;
+		extern const Group1ImplAll<G1Type_OR>  iOR;
+		extern const Group1ImplAll<G1Type_ADC> iADC;
+		extern const Group1ImplAll<G1Type_SBB> iSBB;
+		extern const Group1ImplAll<G1Type_AND> iAND;
+		extern const Group1ImplAll<G1Type_SUB> iSUB;
+		extern const Group1ImplAll<G1Type_XOR> iXOR;
+		extern const Group1ImplAll<G1Type_CMP> iCMP;

-		extern const Group2ImplAll<G2Type_ROL> ROL;
-		extern const Group2ImplAll<G2Type_ROR> ROR;
-		extern const Group2ImplAll<G2Type_RCL> RCL;
-		extern const Group2ImplAll<G2Type_RCR> RCR;
-		extern const Group2ImplAll<G2Type_SHL> SHL;
-		extern const Group2ImplAll<G2Type_SHR> SHR;
-		extern const Group2ImplAll<G2Type_SAR> SAR;
+		extern const Group2ImplAll<G2Type_ROL> iROL;
+		extern const Group2ImplAll<G2Type_ROR> iROR;
+		extern const Group2ImplAll<G2Type_RCL> iRCL;
+		extern const Group2ImplAll<G2Type_RCR> iRCR;
+		extern const Group2ImplAll<G2Type_SHL> iSHL;
+		extern const Group2ImplAll<G2Type_SHR> iSHR;
+		extern const Group2ImplAll<G2Type_SAR> iSAR;
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// Mov with sign/zero extension implementations:
+		//
+		template< int DestOperandSize, int SrcOperandSize >
+		class MovExtendImpl
+		{
+		protected:
+			static bool Is8BitOperand()	{ return SrcOperandSize == 1; }
+			static void prefix16()		{ if( DestOperandSize == 2 ) iWrite<u8>( 0x66 ); }
+			static __forceinline void emit_base( bool SignExtend )
+			{
+				prefix16();
+				iWrite<u8>( 0x0f );
+				iWrite<u8>( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) );
+			}
+
+		public: 
+			MovExtendImpl() {}		// For the love of GCC.
+
+			static __emitinline void Emit( const x86Register<DestOperandSize>& to, const x86Register<SrcOperandSize>& from, bool SignExtend )
+			{
+				emit_base( SignExtend );
+				ModRM( 3, from.Id, to.Id );
+			}
+
+			static __emitinline void Emit( const x86Register<DestOperandSize>& to, const ModSibStrict<SrcOperandSize>& sibsrc, bool SignExtend )
+			{
+				emit_base( SignExtend );
+				EmitSibMagic( to.Id, sibsrc );
+			}
+		};
+
+		// ------------------------------------------------------------------------
+		template< bool SignExtend >
+		class MovExtendImplAll
+		{
+		protected:
+			typedef MovExtendImpl<4, 2> m_16to32;
+			typedef MovExtendImpl<4, 1> m_8to32;
+
+		public:
+			__forceinline void operator()( const x86Register32& to, const x86Register16& from )	const	{ m_16to32::Emit( to, from, SignExtend ); }
+			__noinline void operator()( const x86Register32& to, const ModSibStrict<2>& sibsrc ) const	{ m_16to32::Emit( to, sibsrc, SignExtend ); }
+
+			__forceinline void operator()( const x86Register32& to, const x86Register8& from ) const	{ m_8to32::Emit( to, from, SignExtend ); }
+			__noinline void operator()( const x86Register32& to, const ModSibStrict<1>& sibsrc ) const	{ m_8to32::Emit( to, sibsrc, SignExtend ); }
+
+			MovExtendImplAll() {}		// don't ask.
+		};
+
+		// ------------------------------------------------------------------------
+		
+		extern const MovExtendImplAll<true>  iMOVSX;
+		extern const MovExtendImplAll<false> iMOVZX;
+
+
+		// if the immediate is zero, we can replace the instruction, or ignore it
+		// entirely, depending on the instruction being issued.  That's what we do here.
+		//  (returns FALSE if no optimization is performed)
+		// [TODO] : Work-in-progress!
+		//template< G1Type InstType, typename RegType >
+		//static __forceinline void _optimize_imm0( RegType to );

 		/*template< G1Type InstType, typename RegType >
 		static __forceinline void _optimize_imm0( const RegType& to )
@ -822,26 +993,26 @@ namespace x86Emitter
 			switch( InstType )
 			{
 				// ADD, SUB, and OR can be ignored if the imm is zero..
-			case G1Type_ADD:
-			case G1Type_SUB:
-			case G1Type_OR:
-				return true;
+				case G1Type_ADD:
+				case G1Type_SUB:
+				case G1Type_OR:
+					return true;

 				// ADC and SBB can never be ignored (could have carry bits)
 				// XOR behavior is distinct as well [or is it the same as NEG or NOT?]
-			case G1Type_ADC:
-			case G1Type_SBB:
-			case G1Type_XOR:
-				return false;
+				case G1Type_ADC:
+				case G1Type_SBB:
+				case G1Type_XOR:
+					return false;

 				// replace AND with XOR (or SUB works too.. whatever!)
-			case G1Type_AND:
-				XOR( to, to );
+				case G1Type_AND:
+					iXOR( to, to );
 				return true;

 				// replace CMP with OR reg,reg:
-			case G1Type_CMP:
-				OR( to, to );
+				case G1Type_CMP:
+					iOR( to, to );
 				return true;

 				jNO_DEFAULT