Added CMOV to the emitter, renamed x86Struct stuff to iStruct, renamed XMMREGS / X86REGS / MMXREGS defines to iRegCnt_XMM / iRegCnt_GPR / iRegCnt_MMX, and undid a couple u32 optimizations which could have caused unexpected behavior in the future, if we ever decided to employ some particularly obscure case of self-modifying code.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@985 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-04-15 21:00:32 +00:00 · 2009-04-15 21:00:32 +00:00 · f228a91c93
parent 4d2adcae9f
commit f228a91c93
19 changed files with 1014 additions and 1002 deletions
--- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
+++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
@ -2925,6 +2925,18 @@
 				RelativePath="..\..\x86\ix86\ix86_fpu.cpp"
 				>
 			</File>
+			<File
+				RelativePath="..\..\x86\ix86\ix86_impl_group1.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\x86\ix86\ix86_impl_group2.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\x86\ix86\ix86_impl_movs.h"
+				>
+			</File>
 			<File
 				RelativePath="..\..\x86\ix86\ix86_inlines.inl"
 				>
--- a/pcsx2/x86/iCore.cpp
+++ b/pcsx2/x86/iCore.cpp
@ -40,10 +40,10 @@ u32 g_recWriteback = 0;
 char g_globalXMMLocked = 0;
 #endif

-_xmmregs xmmregs[XMMREGS], s_saveXMMregs[XMMREGS];
+_xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM];

 // X86 caching
-_x86regs x86regs[X86REGS], s_saveX86regs[X86REGS];
+_x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];

 #include <vector>
 using namespace std;
@ -119,16 +119,16 @@ int  _getFreeXMMreg()
 	int i, tempi;
 	u32 bestcount = 0x10000;

-	for (i=0; i<XMMREGS; i++) {
-		if (xmmregs[(i+s_xmmchecknext)%XMMREGS].inuse == 0) {
-			int ret = (s_xmmchecknext+i)%XMMREGS;
-			s_xmmchecknext = (s_xmmchecknext+i+1)%XMMREGS;
+	for (i=0; i<iREGCNT_XMM; i++) {
+		if (xmmregs[(i+s_xmmchecknext)%iREGCNT_XMM].inuse == 0) {
+			int ret = (s_xmmchecknext+i)%iREGCNT_XMM;
+			s_xmmchecknext = (s_xmmchecknext+i+1)%iREGCNT_XMM;
 			return ret;
 		}
 	}

 	// check for dead regs
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].needed) continue;
 		if (xmmregs[i].type == XMMTYPE_GPRREG ) {
 			if( !(g_pCurInstInfo->regs[xmmregs[i].reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)) ) {
@ -139,7 +139,7 @@ int  _getFreeXMMreg()
 	}

 	// check for future xmm usage
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].needed) continue;
 		if (xmmregs[i].type == XMMTYPE_GPRREG ) {
 			if( !(g_pCurInstInfo->regs[xmmregs[i].reg] & EEINST_XMM) ) {
@ -151,7 +151,7 @@ int  _getFreeXMMreg()

 	tempi = -1;
 	bestcount = 0xffff;
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].needed) continue;
 		if (xmmregs[i].type != XMMTYPE_TEMP) {

@ -196,7 +196,7 @@ int _allocVFtoXMMreg(VURegs *VU, int xmmreg, int vfreg, int mode) {
 	int i;
 	int readfromreg = -1;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if ((xmmregs[i].inuse == 0)  || (xmmregs[i].type != XMMTYPE_VFREG) || 
 		     (xmmregs[i].reg != vfreg) || (xmmregs[i].VU != XMM_CONV_VU(VU))) 
 			continue;
@ -250,7 +250,7 @@ int _checkXMMreg(int type, int reg, int mode)
 {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse && (xmmregs[i].type == (type&0xff)) && (xmmregs[i].reg == reg)) {

 			if ( !(xmmregs[i].mode & MODE_READ) ) {
@ -279,7 +279,7 @@ int _allocACCtoXMMreg(VURegs *VU, int xmmreg, int mode) {
 	int i;
 	int readfromreg = -1;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_ACC) continue;
 		if (xmmregs[i].VU != XMM_CONV_VU(VU) ) continue;
@ -335,7 +335,7 @@ int _allocACCtoXMMreg(VURegs *VU, int xmmreg, int mode) {
 int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode) {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_FPREG) continue;
 		if (xmmregs[i].reg != fpreg) continue;
@ -372,7 +372,7 @@ int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
 {
 	int i;

-	for (i=0; i<XMMREGS; i++) 
+	for (i=0; i<iREGCNT_XMM; i++) 
 	{
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_GPRREG) continue;
@ -478,7 +478,7 @@ int _allocFPACCtoXMMreg(int xmmreg, int mode)
 {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_FPACC) continue;

@ -516,7 +516,7 @@ int _allocFPACCtoXMMreg(int xmmreg, int mode)
 void _addNeededVFtoXMMreg(int vfreg) {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_VFREG) continue;
 		if (xmmregs[i].reg != vfreg) continue;
@ -530,7 +530,7 @@ void _addNeededGPRtoXMMreg(int gprreg)
 {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_GPRREG) continue;
 		if (xmmregs[i].reg != gprreg) continue;
@ -544,7 +544,7 @@ void _addNeededGPRtoXMMreg(int gprreg)
 void _addNeededACCtoXMMreg() {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_ACC) continue;

@ -557,7 +557,7 @@ void _addNeededACCtoXMMreg() {
 void _addNeededFPtoXMMreg(int fpreg) {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_FPREG) continue;
 		if (xmmregs[i].reg != fpreg) continue;
@ -571,7 +571,7 @@ void _addNeededFPtoXMMreg(int fpreg) {
 void _addNeededFPACCtoXMMreg() {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;
 		if (xmmregs[i].type != XMMTYPE_FPACC) continue;

@ -584,7 +584,7 @@ void _addNeededFPACCtoXMMreg() {
 void _clearNeededXMMregs() {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {

 		if( xmmregs[i].needed ) {

@ -605,7 +605,7 @@ void _deleteVFtoXMMreg(int reg, int vu, int flush)
 	int i;
 	VURegs *VU = vu ? &VU1 : &VU0;
 	
-	for (i=0; i<XMMREGS; i++) 
+	for (i=0; i<iREGCNT_XMM; i++) 
 	{
 		if (xmmregs[i].inuse && (xmmregs[i].type == XMMTYPE_VFREG) && 
 		   (xmmregs[i].reg == reg) && (xmmregs[i].VU == vu))  
@ -627,13 +627,13 @@ void _deleteVFtoXMMreg(int reg, int vu, int flush)
 								// xyz, don't destroy w
 								int t0reg;
 								
-								for (t0reg = 0; t0reg < XMMREGS; ++t0reg) 
+								for (t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg) 
 								{
 									if (!xmmregs[t0reg].inuse ) 
 										break;
 								}

-								if (t0reg < XMMREGS ) 
+								if (t0reg < iREGCNT_XMM ) 
 								{
 									SSE_MOVHLPS_XMM_to_XMM(t0reg, i);
 									SSE_MOVLPS_XMM_to_M64(VU_VFx_ADDR(xmmregs[i].reg), i);
@ -675,7 +675,7 @@ void _deleteACCtoXMMreg(int vu, int flush)
 	int i;
 	VURegs *VU = vu ? &VU1 : &VU0;
 	
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse && (xmmregs[i].type == XMMTYPE_ACC) && (xmmregs[i].VU == vu)) {

 			switch(flush) {
@ -691,11 +691,11 @@ void _deleteACCtoXMMreg(int vu, int flush)
 							if( xmmregs[i].mode & MODE_VUZ ) {
 								// xyz, don't destroy w
 								int t0reg;
-								for(t0reg = 0; t0reg < XMMREGS; ++t0reg ) {
+								for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) {
 									if( !xmmregs[t0reg].inuse ) break;
 								}

-								if( t0reg < XMMREGS ) {
+								if( t0reg < iREGCNT_XMM ) {
 									SSE_MOVHLPS_XMM_to_XMM(t0reg, i);
 									SSE_MOVLPS_XMM_to_M64(VU_ACCx_ADDR, i);
 									SSE_MOVSS_XMM_to_M32(VU_ACCx_ADDR+8, t0reg);
@ -735,7 +735,7 @@ void _deleteACCtoXMMreg(int vu, int flush)
 void _deleteGPRtoXMMreg(int reg, int flush)
 {
 	int i;
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {

 		if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_GPRREG && xmmregs[i].reg == reg ) {

@ -769,7 +769,7 @@ void _deleteGPRtoXMMreg(int reg, int flush)
 void _deleteFPtoXMMreg(int reg, int flush)
 {
 	int i;
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_FPREG && xmmregs[i].reg == reg ) {
 			switch(flush) {
 				case 0:
@ -795,7 +795,7 @@ void _deleteFPtoXMMreg(int reg, int flush)

 void _freeXMMreg(int xmmreg) 
 {
-	assert( xmmreg < XMMREGS );
+	assert( xmmreg < iREGCNT_XMM );

 	if (!xmmregs[xmmreg].inuse) return;
 	
@ -810,11 +810,11 @@ void _freeXMMreg(int xmmreg)
 				{
 					// don't destroy w
 					int t0reg;
-					for(t0reg = 0; t0reg < XMMREGS; ++t0reg ) {
+					for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) {
 						if( !xmmregs[t0reg].inuse ) break;
 					}

-					if( t0reg < XMMREGS ) 
+					if( t0reg < iREGCNT_XMM ) 
 					{
 						SSE_MOVHLPS_XMM_to_XMM(t0reg, xmmreg);
 						SSE_MOVLPS_XMM_to_M64(VU_VFx_ADDR(xmmregs[xmmreg].reg), xmmreg);
@ -852,11 +852,11 @@ void _freeXMMreg(int xmmreg)
 					// don't destroy w
 					int t0reg;
 					
-					for(t0reg = 0; t0reg < XMMREGS; ++t0reg ) {
+					for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) {
 						if( !xmmregs[t0reg].inuse ) break;
 					}

-					if( t0reg < XMMREGS ) 
+					if( t0reg < iREGCNT_XMM ) 
 					{
 						SSE_MOVHLPS_XMM_to_XMM(t0reg, xmmreg);
 						SSE_MOVLPS_XMM_to_M64(VU_ACCx_ADDR, xmmreg);
@ -909,7 +909,7 @@ void _freeXMMreg(int xmmreg)
 int _getNumXMMwrite()
 {
 	int num = 0, i;
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if( xmmregs[i].inuse && (xmmregs[i].mode&MODE_WRITE) ) ++num;
 	}

@ -920,12 +920,12 @@ u8 _hasFreeXMMreg()
 {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (!xmmregs[i].inuse) return 1;
 	}

 	// check for dead regs
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].needed) continue;
 		if (xmmregs[i].type == XMMTYPE_GPRREG ) {
 			if( !EEINST_ISLIVEXMM(xmmregs[i].reg) ) {
@ -935,7 +935,7 @@ u8 _hasFreeXMMreg()
 	}

 	// check for dead regs
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].needed) continue;
 		if (xmmregs[i].type == XMMTYPE_GPRREG  ) {
 			if( !(g_pCurInstInfo->regs[xmmregs[i].reg]&EEINST_USED) ) {
@ -951,12 +951,12 @@ void _moveXMMreg(int xmmreg)
 	int i;
 	if( !xmmregs[xmmreg].inuse ) return;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse) continue;
 		break;
 	}

-	if( i == XMMREGS ) {
+	if( i == iREGCNT_XMM ) {
 		_freeXMMreg(xmmreg);
 		return;
 	}
@ -971,7 +971,7 @@ void _flushXMMregs()
 {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;

 		assert( xmmregs[i].type != XMMTYPE_TEMP );
@ -988,7 +988,7 @@ void _freeXMMregs()
 {
 	int i;

-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (xmmregs[i].inuse == 0) continue;

 		assert( xmmregs[i].type != XMMTYPE_TEMP );
--- a/pcsx2/x86/iCore.h
+++ b/pcsx2/x86/iCore.h
@ -114,7 +114,7 @@ struct _x86regs {
 	u32 extra; // extra info assoc with the reg
 };

-extern _x86regs x86regs[X86REGS], s_saveX86regs[X86REGS];
+extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];

 uptr _x86GetAddr(int type, int reg);
 void _initX86regs();
@ -287,7 +287,7 @@ extern u32 g_recWriteback; // used for jumps (VUrec mess!)
 extern u32 g_cpuRegHasLive1, g_cpuPrevRegHasLive1;
 extern u32 g_cpuRegHasSignExt, g_cpuPrevRegHasSignExt;

-extern _xmmregs xmmregs[XMMREGS], s_saveXMMregs[XMMREGS];
+extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM];

 extern u16 g_x86AllocCounter;
 extern u16 g_xmmAllocCounter;
@ -361,7 +361,7 @@ void _recMove128MtoRmOffset(u32 offset, u32 from);
 // a negative shift is for sign extension
 extern int _signExtendGPRtoMMX(x86MMXRegType to, u32 gprreg, int shift);

-extern _mmxregs mmxregs[MMXREGS], s_saveMMXregs[MMXREGS];
+extern _mmxregs mmxregs[iREGCNT_MMX], s_saveMMXregs[iREGCNT_MMX];
 extern u16 x86FpuState;

 extern void iDumpRegisters(u32 startpc, u32 temp);
--- a/pcsx2/x86/iVUmicroUpper.cpp
+++ b/pcsx2/x86/iVUmicroUpper.cpp
@ -1640,7 +1640,7 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
 		}

 		if( regd == EEREC_ACC ) {
-			assert( EEREC_TEMP < XMMREGS );
+			assert( EEREC_TEMP < iREGCNT_XMM );
 			SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
 			SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
 			if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); }
--- a/pcsx2/x86/iVUzerorec.cpp
+++ b/pcsx2/x86/iVUzerorec.cpp
@ -223,7 +223,7 @@ public:
 	u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only
 	u32 vuxy; // corresponding bit is set if reg's xyz channels are used only

-	_xmmregs startregs[XMMREGS], endregs[XMMREGS];
+	_xmmregs startregs[iREGCNT_XMM], endregs[iREGCNT_XMM];
 	int nStartx86, nEndx86; // indices into s_vecRegArray

 	int allocX86Regs;
@ -571,7 +571,7 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
 		if( (*itblock)->nStartx86 >= 0 ) {
 			pregs = &s_vecRegArray[(*itblock)->nStartx86];
 			fprintf(f, "STR: ");
-			for(i = 0; i < X86REGS; ++i) {
+			for(i = 0; i < iREGCNT_GPR; ++i) {
 				if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg);
 				else fprintf(f, "-1 ");
 			}
@ -581,7 +581,7 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
 		if( (*itblock)->nEndx86 >= 0 ) {
 			fprintf(f, "END: ");
 			pregs = &s_vecRegArray[(*itblock)->nEndx86];
-			for(i = 0; i < X86REGS; ++i) {
+			for(i = 0; i < iREGCNT_GPR; ++i) {
 				if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg);
 				else fprintf(f, "-1 ");
 			}
@ -1879,14 +1879,14 @@ void VuBaseBlock::AssignVFRegs()

 	if( type & BLOCKTYPE_ANALYZED ) {
 		// check if changed
-		for(i = 0; i < XMMREGS; ++i) {
+		for(i = 0; i < iREGCNT_XMM; ++i) {
 			if( xmmregs[i].inuse != startregs[i].inuse )
 				break;
 			if( xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type) )
 				break;
 		}

-		if( i == XMMREGS ) return; // nothing changed
+		if( i == iREGCNT_XMM ) return; // nothing changed
 	}

 	u8* oldX86 = x86Ptr;
@ -1904,7 +1904,7 @@ void VuBaseBlock::AssignVFRegs()


 			// redo the counters so that the proper regs are released
-			for(int j = 0; j < XMMREGS; ++j) {
+			for(int j = 0; j < iREGCNT_XMM; ++j) {
 				if( xmmregs[j].inuse ) {
 					if( xmmregs[j].type == XMMTYPE_VFREG ) {
 						int count = 0;
@ -2119,10 +2119,10 @@ void VuBaseBlock::AssignVIRegs(int parent)
 	// child
 	assert( allocX86Regs == -1 );
 	allocX86Regs = s_vecRegArray.size();
-	s_vecRegArray.resize(allocX86Regs+X86REGS);
+	s_vecRegArray.resize(allocX86Regs+iREGCNT_GPR);

 	_x86regs* pregs = &s_vecRegArray[allocX86Regs];
-	memset(pregs, 0, sizeof(_x86regs)*X86REGS);
+	memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR);

 	assert( parents.size() > 0 );

@ -2210,10 +2210,10 @@ static void SuperVUAssignRegs()

 			// assign the regs
 			int regid = s_vecRegArray.size();
-			s_vecRegArray.resize(regid+X86REGS);
+			s_vecRegArray.resize(regid+iREGCNT_GPR);

 			_x86regs* mergedx86 = &s_vecRegArray[regid];
-			memset(mergedx86, 0, sizeof(_x86regs)*X86REGS);
+			memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR);

 			if( !bfirst ) {
 				*(u32*)usedregs = *((u32*)usedregs+1) = *((u32*)usedregs+2) = *((u32*)usedregs+3) = 0;
@ -2221,7 +2221,7 @@ static void SuperVUAssignRegs()
 				FORIT(itblock2, s_markov.children) {
 					assert( (*itblock2)->allocX86Regs >= 0 );
 					_x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs];
-					for(int i = 0; i < X86REGS; ++i) {
+					for(int i = 0; i < iREGCNT_GPR; ++i) {
 						if( pregs[i].inuse && pregs[i].reg < 16) {
 							//assert( pregs[i].reg < 16);
 							usedregs[pregs[i].reg]++;
@ -2237,7 +2237,7 @@ static void SuperVUAssignRegs()
 						mergedx86[num].reg = i;
 						mergedx86[num].type = (s_vu?X86TYPE_VU1:0)|X86TYPE_VI;
 						mergedx86[num].mode = MODE_READ;
-						if( ++num >= X86REGS )
+						if( ++num >= iREGCNT_GPR )
 							break;
 						if( num == ESP )
 							++num;
@ -2559,7 +2559,7 @@ void svudispfntemp()
 // frees all regs taking into account the livevars
 void SuperVUFreeXMMregs(u32* livevars)
 {
-	for(int i = 0; i < XMMREGS; ++i) {
+	for(int i = 0; i < iREGCNT_XMM; ++i) {
 		if( xmmregs[i].inuse ) {
 			// same reg
 			if( (xmmregs[i].mode & MODE_WRITE) ) {
@ -2772,7 +2772,7 @@ void VuBaseBlock::Recompile()
 #ifdef SUPERVU_X86CACHING
 		if( nEndx86 >= 0 ) {
 			_x86regs* endx86 = &s_vecRegArray[nEndx86];
-			for(int i = 0; i < X86REGS; ++i) {
+			for(int i = 0; i < iREGCNT_GPR; ++i) {
 				if( endx86[i].inuse ) {

 					if( s_JumpX86 == i && x86regs[s_JumpX86].inuse ) {
@ -3239,7 +3239,7 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)

 #ifdef SUPERVU_X86CACHING
 	// redo the counters so that the proper regs are released
-	for(int j = 0; j < X86REGS; ++j) {
+	for(int j = 0; j < iREGCNT_GPR; ++j) {
 		if( x86regs[j].inuse && X86_ISVI(x86regs[j].type) ) {
 			int count = 0;
 			itinst2 = itinst;
--- a/pcsx2/x86/ix86-32/iCore-32.cpp
+++ b/pcsx2/x86/ix86-32/iCore-32.cpp
@ -78,16 +78,16 @@ int _getFreeX86reg(int mode)
 	int i, tempi;
 	u32 bestcount = 0x10000;

-	int maxreg = (mode&MODE_8BITREG)?4:X86REGS;
+	int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR;

-	for (i=0; i<X86REGS; i++) {
-		int reg = (g_x86checknext+i)%X86REGS;
+	for (i=0; i<iREGCNT_GPR; i++) {
+		int reg = (g_x86checknext+i)%iREGCNT_GPR;
 		if( reg == 0 || reg == ESP ) continue;
 		if( reg >= maxreg ) continue;
 		if( (mode&MODE_NOFRAME) && reg==EBP ) continue;

 		if (x86regs[reg].inuse == 0) {
-			g_x86checknext = (reg+1)%X86REGS;
+			g_x86checknext = (reg+1)%iREGCNT_GPR;
 			return reg;
 		}
 	}
@ -207,16 +207,16 @@ int _allocX86reg(int x86reg, int type, int reg, int mode)
 	// don't alloc EAX and ESP,EBP if MODE_NOFRAME
 	int oldmode = mode;
 	int noframe = mode&MODE_NOFRAME;
-	int maxreg = (mode&MODE_8BITREG)?4:X86REGS;
+	int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR;
 	mode &= ~(MODE_NOFRAME|MODE_8BITREG);
 	int readfromreg = -1;

 	if( type != X86TYPE_TEMP ) {

-		if( maxreg < X86REGS ) {
+		if( maxreg < iREGCNT_GPR ) {
 			// make sure reg isn't in the higher regs
 			
-			for(i = maxreg; i < X86REGS; ++i) {
+			for(i = maxreg; i < iREGCNT_GPR; ++i) {
 				if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) continue;

 				if( mode & MODE_READ ) {
@ -324,7 +324,7 @@ int _checkX86reg(int type, int reg, int mode)
 {
 	int i;

-	for (i=0; i<X86REGS; i++) {
+	for (i=0; i<iREGCNT_GPR; i++) {
 		if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type) {

 			if( !(x86regs[i].mode & MODE_READ) && (mode&MODE_READ) ) {
@ -348,7 +348,7 @@ void _addNeededX86reg(int type, int reg)
 {
 	int i;

-	for (i=0; i<X86REGS; i++) {
+	for (i=0; i<iREGCNT_GPR; i++) {
 		if (!x86regs[i].inuse || x86regs[i].reg != reg || x86regs[i].type != type ) continue;

 		x86regs[i].counter = g_x86AllocCounter++;
@ -359,7 +359,7 @@ void _addNeededX86reg(int type, int reg)
 void _clearNeededX86regs() {
 	int i;

-	for (i=0; i<X86REGS; i++) {
+	for (i=0; i<iREGCNT_GPR; i++) {
 		if (x86regs[i].needed ) {
 			if( x86regs[i].inuse && (x86regs[i].mode&MODE_WRITE) )
 				x86regs[i].mode |= MODE_READ;
@ -372,7 +372,7 @@ void _deleteX86reg(int type, int reg, int flush)
 {
 	int i;

-	for (i=0; i<X86REGS; i++) {
+	for (i=0; i<iREGCNT_GPR; i++) {
 		if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type) {
 			switch(flush) {
 				case 0:
@ -401,7 +401,7 @@ void _deleteX86reg(int type, int reg, int flush)

 void _freeX86reg(int x86reg)
 {
-	assert( x86reg >= 0 && x86reg < X86REGS );
+	assert( x86reg >= 0 && x86reg < iREGCNT_GPR );

 	if( x86regs[x86reg].inuse && (x86regs[x86reg].mode&MODE_WRITE) ) {
 		x86regs[x86reg].mode &= ~MODE_WRITE;
@ -419,7 +419,7 @@ void _freeX86reg(int x86reg)
 void _freeX86regs() {
 	int i;

-	for (i=0; i<X86REGS; i++) {
+	for (i=0; i<iREGCNT_GPR; i++) {
 		if (!x86regs[i].inuse) continue;

 		_freeX86reg(i);
@ -459,16 +459,16 @@ int  _getFreeMMXreg()
 	int tempi = -1;
 	u32 bestcount = 0x10000;

-	for (i=0; i<MMXREGS; i++) {
-		if (mmxregs[(s_mmxchecknext+i)%MMXREGS].inuse == 0) {
-			int ret = (s_mmxchecknext+i)%MMXREGS;
-			s_mmxchecknext = (s_mmxchecknext+i+1)%MMXREGS;
+	for (i=0; i<iREGCNT_MMX; i++) {
+		if (mmxregs[(s_mmxchecknext+i)%iREGCNT_MMX].inuse == 0) {
+			int ret = (s_mmxchecknext+i)%iREGCNT_MMX;
+			s_mmxchecknext = (s_mmxchecknext+i+1)%iREGCNT_MMX;
 			return ret;
 		}
 	}

 	// check for dead regs
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].needed) continue;
 		if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { // mmxregs[i] is unsigned, and MMX_GPR == 0, so the first part is always true. 
 			if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR] & (EEINST_LIVE0|EEINST_LIVE1)) ) {
@ -483,7 +483,7 @@ int  _getFreeMMXreg()
 	}

 	// check for future xmm usage
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].needed) continue;
 		if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) {
 			if( !(g_pCurInstInfo->regs[mmxregs[i].reg] & EEINST_MMX) ) {
@ -493,7 +493,7 @@ int  _getFreeMMXreg()
 		}
 	}

-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].needed) continue;
 		if (mmxregs[i].reg != MMX_TEMP) {

@ -523,7 +523,7 @@ int _allocMMXreg(int mmxreg, int reg, int mode)
 	int i;

 	if( reg != MMX_TEMP ) {
-		for (i=0; i<MMXREGS; i++) {
+		for (i=0; i<iREGCNT_MMX; i++) {
 			if (mmxregs[i].inuse == 0 || mmxregs[i].reg != reg ) continue;

 			if( MMX_ISGPR(reg)) {
@ -602,7 +602,7 @@ int _allocMMXreg(int mmxreg, int reg, int mode)
 int _checkMMXreg(int reg, int mode)
 {
 	int i;
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].inuse && mmxregs[i].reg == reg ) {

 			if( !(mmxregs[i].mode & MODE_READ) && (mode&MODE_READ) ) {
@ -635,7 +635,7 @@ void _addNeededMMXreg(int reg)
 {
 	int i;

-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].inuse == 0) continue;
 		if (mmxregs[i].reg != reg) continue;

@ -648,7 +648,7 @@ void _clearNeededMMXregs()
 {
 	int i;

-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if( mmxregs[i].needed ) {
 			// setup read to any just written regs
 			if( mmxregs[i].inuse && (mmxregs[i].mode&MODE_WRITE) )
@ -661,7 +661,7 @@ void _clearNeededMMXregs()
 void _deleteMMXreg(int reg, int flush)
 {
 	int i;
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {

 		if (mmxregs[i].inuse && mmxregs[i].reg == reg ) {

@ -696,7 +696,7 @@ void _deleteMMXreg(int reg, int flush)
 int _getNumMMXwrite()
 {
 	int num = 0, i;
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if( mmxregs[i].inuse && (mmxregs[i].mode&MODE_WRITE) ) ++num;
 	}

@ -706,12 +706,12 @@ int _getNumMMXwrite()
 u8 _hasFreeMMXreg()
 {
 	int i;
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (!mmxregs[i].inuse) return 1;
 	}

 	// check for dead regs
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].needed) continue;
 		if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) {
 			if( !EEINST_ISLIVE64(mmxregs[i].reg-MMX_GPR) ) {
@ -721,7 +721,7 @@ u8 _hasFreeMMXreg()
 	}

 	// check for dead regs
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].needed) continue;
 		if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) {
 			if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) {
@ -735,7 +735,7 @@ u8 _hasFreeMMXreg()

 void _freeMMXreg(int mmxreg)
 {
-	assert( mmxreg < MMXREGS );
+	assert( mmxreg < iREGCNT_MMX );
 	if (!mmxregs[mmxreg].inuse) return;
 	
 	if (mmxregs[mmxreg].mode & MODE_WRITE ) {
@ -762,12 +762,12 @@ void _moveMMXreg(int mmxreg)
 	int i;
 	if( !mmxregs[mmxreg].inuse ) return;

-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].inuse) continue;
 		break;
 	}

-	if( i == MMXREGS ) {
+	if( i == iREGCNT_MMX ) {
 		_freeMMXreg(mmxreg);
 		return;
 	}
@ -783,7 +783,7 @@ void _flushMMXregs()
 {
 	int i;

-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].inuse == 0) continue;

 		if( mmxregs[i].mode & MODE_WRITE ) {
@ -807,7 +807,7 @@ void _flushMMXregs()
 void _freeMMXregs()
 {
 	int i;
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (mmxregs[i].inuse == 0) continue;

 		assert( mmxregs[i].reg != MMX_TEMP );
--- a/pcsx2/x86/ix86-32/iR5900-32.cpp
+++ b/pcsx2/x86/ix86-32/iR5900-32.cpp
@ -301,7 +301,7 @@ void _eeFlushAllUnused()
 	}

 	//TODO when used info is done for FPU and VU0
-	for(i = 0; i < XMMREGS; ++i) {
+	for(i = 0; i < iREGCNT_XMM; ++i) {
 		if( xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG )
 			_freeXMMreg(i);
 	}
@ -394,7 +394,7 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr)
 int _flushXMMunused()
 {
 	int i;
-	for (i=0; i<XMMREGS; i++) {
+	for (i=0; i<iREGCNT_XMM; i++) {
 		if (!xmmregs[i].inuse || xmmregs[i].needed || !(xmmregs[i].mode&MODE_WRITE) ) continue;
 		
 		if (xmmregs[i].type == XMMTYPE_GPRREG ) {
@ -413,7 +413,7 @@ int _flushXMMunused()
 int _flushMMXunused()
 {
 	int i;
-	for (i=0; i<MMXREGS; i++) {
+	for (i=0; i<iREGCNT_MMX; i++) {
 		if (!mmxregs[i].inuse || mmxregs[i].needed || !(mmxregs[i].mode&MODE_WRITE) ) continue;
 		
 		if( MMX_ISGPR(mmxregs[i].reg) ) {
@ -1217,7 +1217,7 @@ void recompileNextInstruction(int delayslot)

 	g_pCurInstInfo++;

-	for(i = 0; i < MMXREGS; ++i) {
+	for(i = 0; i < iREGCNT_MMX; ++i) {
 		if( mmxregs[i].inuse ) {
 			assert( MMX_ISGPR(mmxregs[i].reg) );
 			count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR);
@ -1226,7 +1226,7 @@ void recompileNextInstruction(int delayslot)
 		}
 	}

-	for(i = 0; i < XMMREGS; ++i) {
+	for(i = 0; i < iREGCNT_XMM; ++i) {
 		if( xmmregs[i].inuse ) {
 			count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, xmmregs[i].type, xmmregs[i].reg);
 			if( count > 0 ) xmmregs[i].counter = 1000-count;
@ -1587,7 +1587,7 @@ StartRecomp:
 							// see how many stores there are
 							u32 j;
 							// use xmmregs since only supporting lwc1,lq,swc1,sq
-							for(j = i+8; j < s_nEndBlock && j < i+4*XMMREGS; j += 4 ) {
+							for(j = i+8; j < s_nEndBlock && j < i+4*iREGCNT_XMM; j += 4 ) {
 								u32 nncode = *(u32*)PSM(j);
 								if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) ||
 									_eeLoadWritesRs(nncode))
@ -1596,7 +1596,7 @@ StartRecomp:

 							if( j > i+8 ) {
 								u32 num = (j-i)>>2; // number of stores that can coissue
-								assert( num <= XMMREGS );
+								assert( num <= iREGCNT_XMM );

 								g_pCurInstInfo[0].numpeeps = num-1;
 								g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
--- a/pcsx2/x86/ix86/ix86.cpp
+++ b/pcsx2/x86/ix86/ix86.cpp
@ -62,7 +62,7 @@ __threadlocal u8  *x86Ptr;
 __threadlocal u8  *j8Ptr[32];
 __threadlocal u32 *j32Ptr[32];

-__threadlocal XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT };
+__threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT };

 namespace x86Emitter {

@ -73,22 +73,22 @@ const x86IndexerTypeExplicit<1> ptr8;

 // ------------------------------------------------------------------------

-template< int OperandSize > const x86Register<OperandSize> x86Register<OperandSize>::Empty;
+template< int OperandSize > const iRegister<OperandSize> iRegister<OperandSize>::Empty;
 const x86IndexReg	x86IndexReg::Empty;

-const x86Register32
+const iRegister32
 	eax( 0 ), ebx( 3 ),
 	ecx( 1 ), edx( 2 ),
 	esi( 6 ), edi( 7 ),
 	ebp( 5 ), esp( 4 );

-const x86Register16
+const iRegister16
 	ax( 0 ), bx( 3 ),
 	cx( 1 ), dx( 2 ),
 	si( 6 ), di( 7 ),
 	bp( 5 ), sp( 4 );

-const x86Register8
+const iRegister8
 	al( 0 ), cl( 1 ),
 	dl( 2 ), bl( 3 ),
 	ah( 4 ), ch( 5 ),
@ -96,28 +96,8 @@ const x86Register8

 namespace Internal
 {
-	const Group1ImplAll<G1Type_ADD> iADD;
-	const Group1ImplAll<G1Type_OR>  iOR;
-	const Group1ImplAll<G1Type_ADC> iADC;
-	const Group1ImplAll<G1Type_SBB> iSBB;
-	const Group1ImplAll<G1Type_AND> iAND;
-	const Group1ImplAll<G1Type_SUB> iSUB;
-	const Group1ImplAll<G1Type_XOR> iXOR;
-	const Group1ImplAll<G1Type_CMP> iCMP;
-
-	const Group2ImplAll<G2Type_ROL> iROL;
-	const Group2ImplAll<G2Type_ROR> iROR;
-	const Group2ImplAll<G2Type_RCL> iRCL;
-	const Group2ImplAll<G2Type_RCR> iRCR;
-	const Group2ImplAll<G2Type_SHL> iSHL;
-	const Group2ImplAll<G2Type_SHR> iSHR;
-	const Group2ImplAll<G2Type_SAR> iSAR;
-
-	const MovExtendImplAll<true>  iMOVSX;
-	const MovExtendImplAll<false> iMOVZX;
-
 	// Performance note: VC++ wants to use byte/word register form for the following
-	// ModRM/SibSB constructors if we use iWrite<u8>, and furthermore unrolls the
+	// ModRM/SibSB constructors when we use iWrite<u8>, and furthermore unrolls the
 	// the shift using a series of ADDs for the following results:
 	//   add cl,cl
 	//   add cl,cl
@ -130,21 +110,38 @@ namespace Internal
 	// register aliases and false dependencies. (although may have been ideal for early-
 	// brand P4s with a broken barrel shifter?).  The workaround is to do our own manual
 	// x86Ptr access and update using a u32 instead of u8.  Thanks to little endianness,
-	// the same end result is achieved and no false dependencies are generated.
+	// the same end result is achieved and no false dependencies are generated.  The draw-
+	// back is that it clobbers 3 bytes past the end of the write, which could cause a 
+	// headache for someone who himself is doing some kind of headache-inducing amount of
+	// recompiler SMC.  So we don't do a work-around, and just hope for the compiler to
+	// stop sucking someday instead. :)
 	//
 	// (btw, I know this isn't a critical performance item by any means, but it's
 	//  annoying simply because it *should* be an easy thing to optimize)

 	__forceinline void ModRM( uint mod, uint reg, uint rm )
 	{
-		*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
-		x86Ptr++;
+		iWrite<u8>( (mod << 6) | (reg << 3) | rm );
+		//*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
+		//x86Ptr++;
 	}

 	__forceinline void SibSB( u32 ss, u32 index, u32 base )
 	{
-		*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
-		x86Ptr++;
+		iWrite<u8>( (ss << 6) | (index << 3) | base );
+		//*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
+		//x86Ptr++;
+	}
+
+	__forceinline void iWriteDisp( int regfield, s32 displacement )
+	{
+		ModRM( 0, regfield, ModRm_UseDisp32 );
+		iWrite<s32>( displacement );
+	}
+
+	__forceinline void iWriteDisp( int regfield, const void* address )
+	{
+		iWriteDisp( regfield, (s32)address );
 	}

 	// ------------------------------------------------------------------------
@ -172,7 +169,7 @@ namespace Internal
 	// regfield - register field to be written to the ModRm.  This is either a register specifier
 	//   or an opcode extension.  In either case, the instruction determines the value for us.
 	//
-	__forceinline void EmitSibMagic( uint regfield, const ModSibBase& info )
+	void EmitSibMagic( uint regfield, const ModSibBase& info )
 	{
 		jASSUME( regfield < 8 );

@ -188,8 +185,7 @@ namespace Internal

 			if( info.Index.IsEmpty() )
 			{
-				ModRM( 0, regfield, ModRm_UseDisp32 );
-				iWrite<s32>( info.Displacement );
+				iWriteDisp( regfield, info.Displacement );
 				return;
 			}
 			else
@ -227,14 +223,63 @@ namespace Internal

 		if( displacement_size != 0 )
 		{
-			*(s32*)x86Ptr = info.Displacement;
-			x86Ptr += (displacement_size == 1) ? 1 : 4;
+			if( displacement_size == 1 )
+				iWrite<s8>( info.Displacement );
+			else
+				iWrite<s32>( info.Displacement );
 		}
 	}
 }

 using namespace Internal;

+const Group1ImplAll<G1Type_ADD> iADD;
+const Group1ImplAll<G1Type_OR>  iOR;
+const Group1ImplAll<G1Type_ADC> iADC;
+const Group1ImplAll<G1Type_SBB> iSBB;
+const Group1ImplAll<G1Type_AND> iAND;
+const Group1ImplAll<G1Type_SUB> iSUB;
+const Group1ImplAll<G1Type_XOR> iXOR;
+const Group1ImplAll<G1Type_CMP> iCMP;
+
+const Group2ImplAll<G2Type_ROL> iROL;
+const Group2ImplAll<G2Type_ROR> iROR;
+const Group2ImplAll<G2Type_RCL> iRCL;
+const Group2ImplAll<G2Type_RCR> iRCR;
+const Group2ImplAll<G2Type_SHL> iSHL;
+const Group2ImplAll<G2Type_SHR> iSHR;
+const Group2ImplAll<G2Type_SAR> iSAR;
+
+const MovExtendImplAll<true>  iMOVSX;
+const MovExtendImplAll<false> iMOVZX;
+
+const CMovImplGeneric iCMOV;
+
+const CMovImplAll<Jcc_Above>			iCMOVA;
+const CMovImplAll<Jcc_AboveOrEqual>		iCMOVAE;
+const CMovImplAll<Jcc_Below>			iCMOVB;
+const CMovImplAll<Jcc_BelowOrEqual>		iCMOVBE;
+
+const CMovImplAll<Jcc_Greater>			iCMOVG;
+const CMovImplAll<Jcc_GreaterOrEqual>	iCMOVGE;
+const CMovImplAll<Jcc_Less>				iCMOVL;
+const CMovImplAll<Jcc_LessOrEqual>		iCMOVLE;
+
+const CMovImplAll<Jcc_Zero>				iCMOVZ;
+const CMovImplAll<Jcc_Equal>			iCMOVE;
+const CMovImplAll<Jcc_NotZero>			iCMOVNZ;
+const CMovImplAll<Jcc_NotEqual>			iCMOVNE;
+
+const CMovImplAll<Jcc_Overflow>			iCMOVO;
+const CMovImplAll<Jcc_NotOverflow>		iCMOVNO;
+const CMovImplAll<Jcc_Carry>			iCMOVC;
+const CMovImplAll<Jcc_NotCarry>			iCMOVNC;
+
+const CMovImplAll<Jcc_Signed>			iCMOVS;
+const CMovImplAll<Jcc_Unsigned>			iCMOVNS;
+const CMovImplAll<Jcc_ParityEven>		iCMOVPE;
+const CMovImplAll<Jcc_ParityOdd>		iCMOVPO;
+
 // ------------------------------------------------------------------------
 // Assigns the current emitter buffer target address.
 // This is provided instead of using x86Ptr directly, since we may in the future find
@ -390,18 +435,20 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )

 	if( displacement_size != 0 )
 	{
-		*(s32*)x86Ptr = src.Displacement;
-		x86Ptr += (displacement_size == 1) ? 1 : 4;
+		if( displacement_size == 1 )
+			iWrite<s8>( src.Displacement );
+		else
+			iWrite<s32>( src.Displacement );
 	}
 }

-__emitinline void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags )
+__emitinline void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags )
 {
 	EmitLeaMagic( to, src, preserve_flags );
 }


-__emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags )
+__emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags )
 {
 	write8( 0x66 );
 	EmitLeaMagic( to, src, preserve_flags );
@ -410,7 +457,7 @@ __emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_fl
 //////////////////////////////////////////////////////////////////////////////////////////
 // MOV instruction Implementation

-template< typename ImmType, typename SibMagicType >
+template< typename ImmType >
 class MovImpl
 {
 public: 
@ -422,7 +469,7 @@ protected:

 public:
 	// ------------------------------------------------------------------------
-	static __forceinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from )
+	static __forceinline void Emit( const iRegister<OperandSize>& to, const iRegister<OperandSize>& from )
 	{
 		if( to == from ) return;	// ignore redundant MOVs.

@ -432,7 +479,7 @@ public:
 	}

 	// ------------------------------------------------------------------------
-	static __forceinline void Emit( const ModSibBase& dest, const x86Register<OperandSize>& from )
+	static __forceinline void Emit( const ModSibBase& dest, const iRegister<OperandSize>& from )
 	{
 		prefix16();

@ -447,12 +494,12 @@ public:
 		else
 		{
 			iWrite<u8>( Is8BitOperand() ? 0x88 : 0x89 );
-			SibMagicType::Emit( from.Id, dest );
+			EmitSibMagic( from.Id, dest );
 		}
 	}

 	// ------------------------------------------------------------------------
-	static __forceinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& src )
+	static __forceinline void Emit( const iRegister<OperandSize>& to, const ModSibBase& src )
 	{
 		prefix16();

@ -467,12 +514,50 @@ public:
 		else
 		{
 			iWrite<u8>( Is8BitOperand() ? 0x8a : 0x8b );
-			SibMagicType::Emit( to.Id, src );
+			EmitSibMagic( to.Id, src );
 		}
 	}

 	// ------------------------------------------------------------------------
-	static __forceinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
+	static __forceinline void Emit( void* dest, const iRegister<OperandSize>& from )
+	{
+		prefix16();
+
+		// mov eax has a special from when writing directly to a DISP32 address
+
+		if( from.IsAccumulator() )
+		{
+			iWrite<u8>( Is8BitOperand() ? 0xa2 : 0xa3 );
+			iWrite<s32>( (s32)dest );
+		}
+		else
+		{
+			iWrite<u8>( Is8BitOperand() ? 0x88 : 0x89 );
+			iWriteDisp( from.Id, dest );
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	static __forceinline void Emit( const iRegister<OperandSize>& to, const void* src )
+	{
+		prefix16();
+
+		// mov eax has a special from when reading directly from a DISP32 address
+
+		if( to.IsAccumulator() )
+		{
+			iWrite<u8>( Is8BitOperand() ? 0xa0 : 0xa1 );
+			iWrite<s32>( (s32)src );
+		}
+		else
+		{
+			iWrite<u8>( Is8BitOperand() ? 0x8a : 0x8b );
+			iWriteDisp( to.Id, src );
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	static __forceinline void Emit( const iRegister<OperandSize>& to, ImmType imm )
 	{
 		// Note: MOV does not have (reg16/32,imm8) forms.

@ -486,20 +571,16 @@ public:
 	{
 		prefix16();
 		iWrite<u8>( Is8BitOperand() ? 0xc6 : 0xc7 );
-		SibMagicType::Emit( 0, dest );
+		EmitSibMagic( 0, dest );
 		iWrite<ImmType>( imm );
 	}
 };

 namespace Internal
 {
-	typedef MovImpl<u32,SibMagic> MOV32;
-	typedef MovImpl<u16,SibMagic> MOV16;
-	typedef MovImpl<u8,SibMagic>  MOV8;
-
-	typedef MovImpl<u32,SibMagicInline> MOV32i;
-	typedef MovImpl<u16,SibMagicInline> MOV16i;
-	typedef MovImpl<u8,SibMagicInline>  MOV8i;
+	typedef MovImpl<u32> MOV32;
+	typedef MovImpl<u16> MOV16;
+	typedef MovImpl<u8>  MOV8;
 }

 // Inlining Notes:
@ -512,96 +593,72 @@ namespace Internal
 // TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D

 // ---------- 32 Bit Interface -----------
-__forceinline void iMOV( const x86Register32& to,	const x86Register32& from )	{ MOV32i::Emit( to, from ); }
-__forceinline void iMOV( const x86Register32& to,	const void* src )			{ MOV32i::Emit( to, ptr32[src] ); }
-__forceinline void iMOV( const void* dest,			const x86Register32& from )	{ MOV32i::Emit( ptr32[dest], from ); }
-__noinline void iMOV( const ModSibBase& sibdest,	const x86Register32& from )	{ MOV32::Emit( sibdest, from ); }
-__noinline void iMOV( const x86Register32& to,		const ModSibBase& sibsrc )	{ MOV32::Emit( to, sibsrc ); }
+__forceinline void iMOV( const iRegister32& to,	const iRegister32& from )		{ MOV32::Emit( to, from ); }
+__forceinline void iMOV( const iRegister32& to,	const void* src )				{ MOV32::Emit( to, ptr32[src] ); }
+__forceinline void iMOV( void* dest,			const iRegister32& from )		{ MOV32::Emit( ptr32[dest], from ); }
+__noinline void iMOV( const ModSibBase& sibdest,	const iRegister32& from )	{ MOV32::Emit( sibdest, from ); }
+__noinline void iMOV( const iRegister32& to,		const ModSibBase& sibsrc )	{ MOV32::Emit( to, sibsrc ); }
 __noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm )					{ MOV32::Emit( sibdest, imm ); }

-void iMOV( const x86Register32& to, u32 imm, bool preserve_flags )
+void iMOV( const iRegister32& to, u32 imm, bool preserve_flags )
 {
 	if( !preserve_flags && (imm == 0) )
 		iXOR( to, to );
 	else
-		MOV32i::Emit( to, imm );
+		MOV32::Emit( to, imm );
 }


 // ---------- 16 Bit Interface -----------
-__forceinline void iMOV( const x86Register16& to,	const x86Register16& from )	{ MOV16i::Emit( to, from ); }
-__forceinline void iMOV( const x86Register16& to,	const void* src )			{ MOV16i::Emit( to, ptr16[src] ); }
-__forceinline void iMOV( const void* dest,			const x86Register16& from )	{ MOV16i::Emit( ptr16[dest], from ); }
-__noinline void iMOV( const ModSibBase& sibdest,	const x86Register16& from )	{ MOV16::Emit( sibdest, from ); }
-__noinline void iMOV( const x86Register16& to,		const ModSibBase& sibsrc )	{ MOV16::Emit( to, sibsrc ); }
+__forceinline void iMOV( const iRegister16& to,	const iRegister16& from )		{ MOV16::Emit( to, from ); }
+__forceinline void iMOV( const iRegister16& to,	const void* src )				{ MOV16::Emit( to, ptr16[src] ); }
+__forceinline void iMOV( void* dest,			const iRegister16& from )		{ MOV16::Emit( ptr16[dest], from ); }
+__noinline void iMOV( const ModSibBase& sibdest,	const iRegister16& from )	{ MOV16::Emit( sibdest, from ); }
+__noinline void iMOV( const iRegister16& to,		const ModSibBase& sibsrc )	{ MOV16::Emit( to, sibsrc ); }
 __noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm )					{ MOV16::Emit( sibdest, imm ); }

-void iMOV( const x86Register16& to, u16 imm, bool preserve_flags )
+void iMOV( const iRegister16& to, u16 imm, bool preserve_flags )
 {
 	if( !preserve_flags && (imm == 0) )
 		iXOR( to, to );
 	else
-		MOV16i::Emit( to, imm );
+		MOV16::Emit( to, imm );
 }

 // ---------- 8 Bit Interface -----------
-__forceinline void iMOV( const x86Register8& to,	const x86Register8& from )	{ MOV8i::Emit( to, from ); }
-__forceinline void iMOV( const x86Register8& to,	const void* src )			{ MOV8i::Emit( to, ptr8[src] ); }
-__forceinline void iMOV( const void* dest,			const x86Register8& from )	{ MOV8i::Emit( ptr8[dest], from ); }
-__noinline void iMOV( const ModSibBase& sibdest,	const x86Register8& from )	{ MOV8::Emit( sibdest, from ); }
-__noinline void iMOV( const x86Register8& to,		const ModSibBase& sibsrc )	{ MOV8::Emit( to, sibsrc ); }
+__forceinline void iMOV( const iRegister8& to,	const iRegister8& from )		{ MOV8::Emit( to, from ); }
+__forceinline void iMOV( const iRegister8& to,	const void* src )				{ MOV8::Emit( to, ptr8[src] ); }
+__forceinline void iMOV( void* dest,			const iRegister8& from )		{ MOV8::Emit( ptr8[dest], from ); }
+__noinline void iMOV( const ModSibBase& sibdest,	const iRegister8& from )	{ MOV8::Emit( sibdest, from ); }
+__noinline void iMOV( const iRegister8& to,		const ModSibBase& sibsrc )		{ MOV8::Emit( to, sibsrc ); }
 __noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm )					{ MOV8::Emit( sibdest, imm ); }

-void iMOV( const x86Register8& to, u8 imm, bool preserve_flags )
+void iMOV( const iRegister8& to, u8 imm, bool preserve_flags )
 {
 	if( !preserve_flags && (imm == 0) )
 		iXOR( to, to );
 	else
-		MOV8i::Emit( to, imm );
+		MOV8::Emit( to, imm );
 }


-//////////////////////////////////////////////////////////////////////////////////////////
-// Miscellaneous Section!
-// Various Instructions with no parameter and no special encoding logic.
-//
-__forceinline void RET()	{ write8( 0xC3 ); }
-__forceinline void CBW()	{ write16( 0x9866 );  }
-__forceinline void CWD()	{ write8( 0x98 ); }
-__forceinline void CDQ()	{ write8( 0x99 ); }
-__forceinline void CWDE()	{ write8( 0x98 ); }
-
-__forceinline void LAHF()	{ write8( 0x9f ); }
-__forceinline void SAHF()	{ write8( 0x9e ); }
-
-
 //////////////////////////////////////////////////////////////////////////////////////////
 // Push / Pop Emitters
 //
 // Note: pushad/popad implementations are intentionally left out.  The instructions are
 // invalid in x64, and are super slow on x32.  Use multiple Push/Pop instructions instead.

-
-__forceinline void POP( x86Register32 from )	{ write8( 0x58 | from.Id ); }
-
-__emitinline void POP( const ModSibBase& from )
+__emitinline void iPOP( const ModSibBase& from )
 {
 	iWrite<u8>( 0x8f );
 	Internal::EmitSibMagic( 0, from );
 }

-__forceinline void PUSH( u32 imm )				{ write8( 0x68 ); write32( imm ); }
-__forceinline void PUSH( x86Register32 from )	{ write8( 0x50 | from.Id ); }
-
-__emitinline void PUSH( const ModSibBase& from )
+__emitinline void iPUSH( const ModSibBase& from )
 {
 	iWrite<u8>( 0xff );
 	Internal::EmitSibMagic( 6, from );
 }

-// pushes the EFLAGS register onto the stack
-__forceinline void PUSHFD() { write8( 0x9C ); }
-// pops the EFLAGS register from the stack
-__forceinline void POPFD() { write8( 0x9D ); }

 }
--- a/pcsx2/x86/ix86/ix86.h
+++ b/pcsx2/x86/ix86/ix86.h
@ -42,15 +42,6 @@
 // ix86_inlines.inl file when it is known that inlining of ModSib functions are
 // wanted).
 //
-//
-// Important when Using the New Emitter:
-//   Make sure there is *no* data in use or of importance past the end of the
-//   current x86Ptr.  Ie, don't do fancy x86Ptr rewind tricks of your own.  The
-//   emitter uses optimized writes which will clobber data past the end of the
-//   instruction it's emitting, so even if you know for sure the instruction you
-//   are writing is 5 bytes, the emitter will likely emit 9 bytes and the re-
-//   wind the x86Ptr to the end of the instruction.
-//

 #pragma once

--- a/pcsx2/x86/ix86/ix86_impl_group1.h
+++ b/pcsx2/x86/ix86/ix86_impl_group1.h
@ -0,0 +1,179 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
+
+// Instructions implemented in this header are as follows -->>
+
+enum G1Type
+{
+	G1Type_ADD=0,
+	G1Type_OR,
+	G1Type_ADC,
+	G1Type_SBB,
+	G1Type_AND,
+	G1Type_SUB,
+	G1Type_XOR,
+	G1Type_CMP
+};
+
+// -------------------------------------------------------------------
+template< typename ImmType, G1Type InstType >
+class Group1Impl
+{
+public: 
+	static const uint OperandSize = sizeof(ImmType);
+
+	Group1Impl() {}		// because GCC doesn't like static classes
+
+protected:
+	static bool Is8BitOperand()	{ return OperandSize == 1; }
+	static void prefix16()		{ if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
+
+public:
+	static __emitinline void Emit( const iRegister<OperandSize>& to, const iRegister<OperandSize>& from ) 
+	{
+		prefix16();
+		iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); 
+		ModRM( 3, from.Id, to.Id );
+	}
+
+	static __emitinline void Emit( const ModSibBase& sibdest, const iRegister<OperandSize>& from ) 
+	{
+		prefix16();
+		iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); 
+		EmitSibMagic( from.Id, sibdest );
+	}
+
+	static __emitinline void Emit( const iRegister<OperandSize>& to, const ModSibBase& sibsrc ) 
+	{
+		prefix16();
+		iWrite<u8>( (Is8BitOperand() ? 2 : 3) | (InstType<<3) );
+		EmitSibMagic( to.Id, sibsrc );
+	}
+
+	static __emitinline void Emit( void* dest, const iRegister<OperandSize>& from ) 
+	{
+		prefix16();
+		iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); 
+		iWriteDisp( from.Id, dest );
+	}
+
+	static __emitinline void Emit( const iRegister<OperandSize>& to, const void* src ) 
+	{
+		prefix16();
+		iWrite<u8>( (Is8BitOperand() ? 2 : 3) | (InstType<<3) );
+		iWriteDisp( to.Id, src );
+	}
+
+	static __emitinline void Emit( const iRegister<OperandSize>& to, ImmType imm ) 
+	{
+		prefix16();
+		if( !Is8BitOperand() && is_s8( imm ) )
+		{
+			iWrite<u8>( 0x83 );
+			ModRM( 3, InstType, to.Id );
+			iWrite<s8>( imm );
+		}
+		else
+		{
+			if( to.IsAccumulator() )
+				iWrite<u8>( (Is8BitOperand() ? 4 : 5) | (InstType<<3) );
+			else
+			{
+				iWrite<u8>( Is8BitOperand() ? 0x80 : 0x81 );
+				ModRM( 3, InstType, to.Id );
+			}
+			iWrite<ImmType>( imm );
+		}
+	}
+
+	static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, ImmType imm ) 
+	{
+		if( Is8BitOperand() )
+		{
+			iWrite<u8>( 0x80 );
+			EmitSibMagic( InstType, sibdest );
+			iWrite<ImmType>( imm );
+		}
+		else
+		{		
+			prefix16();
+			iWrite<u8>( is_s8( imm ) ? 0x83 : 0x81 );
+			EmitSibMagic( InstType, sibdest );
+			if( is_s8( imm ) )
+				iWrite<s8>( imm );
+			else
+				iWrite<ImmType>( imm );
+		}
+	}
+};
+
+
+// -------------------------------------------------------------------
+//
+template< G1Type InstType >
+class Group1ImplAll
+{
+protected:
+	typedef Group1Impl<u32, InstType> m_32;
+	typedef Group1Impl<u16, InstType> m_16;
+	typedef Group1Impl<u8, InstType>  m_8;
+
+	// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
+
+public:
+	// ---------- 32 Bit Interface -----------
+	__forceinline void operator()( const iRegister32& to,	const iRegister32& from ) const	{ m_32::Emit( to, from ); }
+	__forceinline void operator()( const iRegister32& to,	const void* src ) const			{ m_32::Emit( to, src ); }
+	__forceinline void operator()( void* dest,				const iRegister32& from ) const	{ m_32::Emit( dest, from ); }
+	__noinline void operator()( const ModSibBase& sibdest,	const iRegister32& from ) const	{ m_32::Emit( sibdest, from ); }
+	__noinline void operator()( const iRegister32& to,		const ModSibBase& sibsrc ) const{ m_32::Emit( to, sibsrc ); }
+	__noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const				{ m_32::Emit( sibdest, imm ); }
+
+	void operator()( const iRegister32& to, u32 imm, bool needs_flags=false ) const
+	{
+		//if( needs_flags || (imm != 0) || !_optimize_imm0() )
+		m_32::Emit( to, imm );
+	}
+
+	// ---------- 16 Bit Interface -----------
+	__forceinline void operator()( const iRegister16& to,	const iRegister16& from ) const	{ m_16::Emit( to, from ); }
+	__forceinline void operator()( const iRegister16& to,	const void* src ) const			{ m_16::Emit( to, src ); }
+	__forceinline void operator()( void* dest,				const iRegister16& from ) const	{ m_16::Emit( dest, from ); }
+	__noinline void operator()( const ModSibBase& sibdest,	const iRegister16& from ) const	{ m_16::Emit( sibdest, from ); }
+	__noinline void operator()( const iRegister16& to,		const ModSibBase& sibsrc ) const{ m_16::Emit( to, sibsrc ); }
+	__noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const				{ m_16::Emit( sibdest, imm ); }
+
+	void operator()( const iRegister16& to, u16 imm, bool needs_flags=false ) const			{ m_16::Emit( to, imm ); }
+
+	// ---------- 8 Bit Interface -----------
+	__forceinline void operator()( const iRegister8& to,	const iRegister8& from ) const	{ m_8::Emit( to, from ); }
+	__forceinline void operator()( const iRegister8& to,	const void* src ) const			{ m_8::Emit( to, src ); }
+	__forceinline void operator()( void* dest,				const iRegister8& from ) const	{ m_8::Emit( dest, from ); }
+	__noinline void operator()( const ModSibBase& sibdest,	const iRegister8& from ) const	{ m_8::Emit( sibdest, from ); }
+	__noinline void operator()( const iRegister8& to,		const ModSibBase& sibsrc ) const{ m_8::Emit( to, sibsrc ); }
+	__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const				{ m_8::Emit( sibdest, imm ); }
+
+	void operator()( const iRegister8& to, u8 imm, bool needs_flags=false ) const			{ m_8::Emit( to, imm ); }
+
+	Group1ImplAll() {}		// Why does GCC need these?
+};
+
--- a/pcsx2/x86/ix86/ix86_impl_group2.h
+++ b/pcsx2/x86/ix86/ix86_impl_group2.h
@ -0,0 +1,151 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
+
+// Instructions implemented in this header are as follows -->>
+
+enum G2Type
+{
+	G2Type_ROL=0,
+	G2Type_ROR,
+	G2Type_RCL,
+	G2Type_RCR,
+	G2Type_SHL,
+	G2Type_SHR,
+	G2Type_Unused,
+	G2Type_SAR
+};
+
+// -------------------------------------------------------------------
+// Group 2 (shift) instructions have no Sib/ModRM forms.
+// Optimization Note: For Imm forms, we ignore the instruction if the shift count is zero.
+// This is a safe optimization since any zero-value shift does not affect any flags.
+//
+template< typename ImmType, G2Type InstType >
+class Group2Impl
+{
+public: 
+	static const uint OperandSize = sizeof(ImmType);
+
+	Group2Impl() {}		// For the love of GCC.
+
+protected:
+	static bool Is8BitOperand()	{ return OperandSize == 1; }
+	static void prefix16()		{ if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
+
+public:
+	static __emitinline void Emit( const iRegister<OperandSize>& to, const iRegister8& from ) 
+	{
+		jASSUME( from == cl );	// cl is the only valid shift register.  (turn this into a compile time check?)
+
+		prefix16();
+		iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
+		ModRM( 3, InstType, to.Id );
+	}
+
+	static __emitinline void Emit( const iRegister<OperandSize>& to, u8 imm ) 
+	{
+		if( imm == 0 ) return;
+
+		prefix16();
+		if( imm == 1 )
+		{
+			// special encoding of 1's
+			iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
+			ModRM( 3, InstType, to.Id );
+		}
+		else
+		{
+			iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
+			ModRM( 3, InstType, to.Id );
+			iWrite<u8>( imm );
+		}
+	}
+
+	static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, const iRegister8& from ) 
+	{
+		jASSUME( from == cl );	// cl is the only valid shift register.  (turn this into a compile time check?)
+
+		prefix16();
+		iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
+		EmitSibMagic( from.Id, sibdest );
+	}
+
+	static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, u8 imm ) 
+	{
+		if( imm == 0 ) return;
+
+		prefix16();
+		if( imm == 1 )
+		{
+			// special encoding of 1's
+			iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
+			EmitSibMagic( InstType, sibdest );
+		}
+		else
+		{
+			iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
+			EmitSibMagic( InstType, sibdest );
+			iWrite<u8>( imm );
+		}
+	}
+};
+
+// -------------------------------------------------------------------
+//
+template< G2Type InstType >
+class Group2ImplAll
+{
+protected:
+	typedef Group2Impl<u32, InstType> m_32;
+	typedef Group2Impl<u16, InstType> m_16;
+	typedef Group2Impl<u8, InstType>  m_8;
+
+	// Inlining Notes:
+	//   I've set up the inlining to be as practical and intelligent as possible, which means
+	//   forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
+	//   virtually no code.  In the case of (Reg, Imm) forms, the inlining is up to the dis-
+	//   creation of the compiler.
+	// 
+
+	// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
+
+public:
+	// ---------- 32 Bit Interface -----------
+	__forceinline void operator()( const iRegister32& to,		const iRegister8& from ) const	{ m_32::Emit( to, from ); }
+	__noinline void operator()( const ModSibStrict<4>& sibdest,	const iRegister8& from ) const	{ m_32::Emit( sibdest, from ); }
+	__noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const					{ m_32::Emit( sibdest, imm ); }
+	void operator()( const iRegister32& to, u8 imm ) const										{ m_32::Emit( to, imm ); }
+
+	// ---------- 16 Bit Interface -----------
+	__forceinline void operator()( const iRegister16& to,		const iRegister8& from ) const	{ m_16::Emit( to, from ); }
+	__noinline void operator()( const ModSibStrict<2>& sibdest,	const iRegister8& from ) const	{ m_16::Emit( sibdest, from ); }
+	__noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const					{ m_16::Emit( sibdest, imm ); }
+	void operator()( const iRegister16& to, u8 imm ) const										{ m_16::Emit( to, imm ); }
+
+	// ---------- 8 Bit Interface -----------
+	__forceinline void operator()( const iRegister8& to,		const iRegister8& from ) const	{ m_8::Emit( to, from ); }
+	__noinline void operator()( const ModSibStrict<1>& sibdest,	const iRegister8& from ) const	{ m_8::Emit( sibdest, from ); }
+	__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const					{ m_8::Emit( sibdest, imm ); }
+	void operator()( const iRegister8& to, u8 imm ) const										{ m_8::Emit( to, imm ); }
+
+	Group2ImplAll() {}		// I am a class with no members, so I need an explicit constructor!  Sense abounds.
+};
--- a/pcsx2/x86/ix86/ix86_impl_movs.h
+++ b/pcsx2/x86/ix86/ix86_impl_movs.h
@ -0,0 +1,157 @@
+/*  Pcsx2 - Pc Ps2 Emulator
+ *  Copyright (C) 2002-2009  Pcsx2 Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+
+// Header: ix86_impl_movs.h -- covers cmov and movsx/movzx.
+// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// CMOV !!  [in all of it's disappointing lack-of glory]
+//
+template< int OperandSize >
+class CMovImpl
+{
+protected:
+	static bool Is8BitOperand()	{ return OperandSize == 1; }
+	static void prefix16()		{ if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
+	
+	static __forceinline void emit_base( JccComparisonType cc )
+	{
+		jASSUME( cc >= 0 && cc <= 0x0f );
+		prefix16();
+		write8( 0x0f );
+		write8( 0x40 | cc );	
+	}
+
+public:
+	CMovImpl() {}
+
+	static __emitinline void Emit( JccComparisonType cc, const iRegister<OperandSize>& to, const iRegister<OperandSize>& from )
+	{
+		emit_base( cc );
+		ModRM( 3, to.Id, from.Id );
+	}
+
+	static __emitinline void Emit( JccComparisonType cc, const iRegister<OperandSize>& to, const void* src )
+	{
+		emit_base( cc );
+		iWriteDisp( to.Id, src );
+	}
+
+	static __emitinline void Emit( JccComparisonType cc, const iRegister<OperandSize>& to, const ModSibBase& sibsrc )
+	{
+		emit_base( cc );
+		EmitSibMagic( to.Id, sibsrc );
+	}
+
+};
+
+// ------------------------------------------------------------------------
+class CMovImplGeneric
+{
+protected:
+	typedef CMovImpl<4> m_32;
+	typedef CMovImpl<2> m_16;
+
+public:
+	__forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const iRegister32& from ) const		{ m_32::Emit( ccType, to, from ); }
+	__forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const void* src ) const				{ m_32::Emit( ccType, to, src ); }
+	__noinline void operator()( JccComparisonType ccType, const iRegister32& to, const ModSibBase& sibsrc ) const		{ m_32::Emit( ccType, to, sibsrc ); }
+
+	__forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const iRegister16& from ) const		{ m_16::Emit( ccType, to, from ); }
+	__forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const void* src ) const				{ m_16::Emit( ccType, to, src ); }
+	__noinline void operator()( JccComparisonType ccType, const iRegister16& to, const ModSibBase& sibsrc ) const		{ m_16::Emit( ccType, to, sibsrc ); }
+
+	CMovImplGeneric() {}		// don't ask.
+};
+
+// ------------------------------------------------------------------------
+template< JccComparisonType ccType >
+class CMovImplAll
+{
+protected:
+	typedef CMovImpl<4> m_32;
+	typedef CMovImpl<2> m_16;
+
+public:
+	__forceinline void operator()( const iRegister32& to, const iRegister32& from ) const	{ m_32::Emit( ccType, to, from ); }
+	__forceinline void operator()( const iRegister32& to, const void* src ) const			{ m_32::Emit( ccType, to, src ); }
+	__noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const		{ m_32::Emit( ccType, to, sibsrc ); }
+
+	__forceinline void operator()( const iRegister16& to, const iRegister16& from ) const	{ m_16::Emit( ccType, to, from ); }
+	__forceinline void operator()( const iRegister16& to, const void* src ) const			{ m_16::Emit( ccType, to, src ); }
+	__noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const		{ m_16::Emit( ccType, to, sibsrc ); }
+
+	CMovImplAll() {}		// don't ask.
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Mov with sign/zero extension implementations (movsx / movzx)
+//
+template< int DestOperandSize, int SrcOperandSize >
+class MovExtendImpl
+{
+protected:
+	static bool Is8BitOperand()	{ return SrcOperandSize == 1; }
+	static void prefix16()		{ if( DestOperandSize == 2 ) iWrite<u8>( 0x66 ); }
+	static __forceinline void emit_base( bool SignExtend )
+	{
+		prefix16();
+		iWrite<u8>( 0x0f );
+		iWrite<u8>( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) );
+	}
+
+public: 
+	MovExtendImpl() {}		// For the love of GCC.
+
+	static __emitinline void Emit( const iRegister<DestOperandSize>& to, const iRegister<SrcOperandSize>& from, bool SignExtend )
+	{
+		emit_base( SignExtend );
+		ModRM( 3, from.Id, to.Id );
+	}
+
+	static __emitinline void Emit( const iRegister<DestOperandSize>& to, const ModSibStrict<SrcOperandSize>& sibsrc, bool SignExtend )
+	{
+		emit_base( SignExtend );
+		EmitSibMagic( to.Id, sibsrc );
+	}
+};
+
+// ------------------------------------------------------------------------
+template< bool SignExtend >
+class MovExtendImplAll
+{
+protected:
+	typedef MovExtendImpl<4, 2> m_16to32;
+	typedef MovExtendImpl<4, 1> m_8to32;
+	typedef MovExtendImpl<2, 1> m_8to16;
+
+public:
+	__forceinline void operator()( const iRegister32& to, const iRegister16& from )	const		{ m_16to32::Emit( to, from, SignExtend ); }
+	__noinline void operator()( const iRegister32& to, const ModSibStrict<2>& sibsrc ) const	{ m_16to32::Emit( to, sibsrc, SignExtend ); }
+
+	__forceinline void operator()( const iRegister32& to, const iRegister8& from ) const		{ m_8to32::Emit( to, from, SignExtend ); }
+	__noinline void operator()( const iRegister32& to, const ModSibStrict<1>& sibsrc ) const	{ m_8to32::Emit( to, sibsrc, SignExtend ); }
+
+	__forceinline void operator()( const iRegister16& to, const iRegister8& from ) const		{ m_8to16::Emit( to, from, SignExtend ); }
+	__noinline void operator()( const iRegister16& to, const ModSibStrict<1>& sibsrc ) const	{ m_8to16::Emit( to, sibsrc, SignExtend ); }
+
+	MovExtendImplAll() {}		// don't ask.
+};
+
--- a/pcsx2/x86/ix86/ix86_inlines.inl
+++ b/pcsx2/x86/ix86/ix86_inlines.inl
@ -53,29 +53,29 @@ namespace x86Emitter
 	//////////////////////////////////////////////////////////////////////////////////////////
 	// x86Register Method Implementations
 	//
-	__forceinline x86AddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const
+	__forceinline iAddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const
 	{
-		return x86AddressInfo( *this, right );
+		return iAddressInfo( *this, right );
 	}

-	__forceinline x86AddressInfo x86IndexReg::operator+( const x86AddressInfo& right ) const
+	__forceinline iAddressInfo x86IndexReg::operator+( const iAddressInfo& right ) const
 	{
 		return right + *this;
 	}

-	__forceinline x86AddressInfo x86IndexReg::operator+( s32 right ) const
+	__forceinline iAddressInfo x86IndexReg::operator+( s32 right ) const
 	{
-		return x86AddressInfo( *this, right );
+		return iAddressInfo( *this, right );
 	}

-	__forceinline x86AddressInfo x86IndexReg::operator*( u32 right ) const
+	__forceinline iAddressInfo x86IndexReg::operator*( u32 right ) const
 	{
-		return x86AddressInfo( Empty, *this, right );
+		return iAddressInfo( Empty, *this, right );
 	}

-	__forceinline x86AddressInfo x86IndexReg::operator<<( u32 shift ) const
+	__forceinline iAddressInfo x86IndexReg::operator<<( u32 shift ) const
 	{
-		return x86AddressInfo( Empty, *this, 1<<shift );
+		return iAddressInfo( Empty, *this, 1<<shift );
 	}

 	//////////////////////////////////////////////////////////////////////////////////////////
@ -83,7 +83,7 @@ namespace x86Emitter
 	//

 	// ------------------------------------------------------------------------
-	__forceinline ModSibBase::ModSibBase( const x86AddressInfo& src ) :
+	__forceinline ModSibBase::ModSibBase( const iAddressInfo& src ) :
 		Base( src.Base ),
 		Index( src.Index ),
 		Scale( src.Factor ),
@ -181,9 +181,9 @@ namespace x86Emitter
 	}

 	//////////////////////////////////////////////////////////////////////////////////////////
-	// x86AddressInfo Method Implementations
+	// iAddressInfo Method Implementations
 	//
-	__forceinline x86AddressInfo& x86AddressInfo::Add( const x86IndexReg& src )
+	__forceinline iAddressInfo& iAddressInfo::Add( const x86IndexReg& src )
 	{
 		if( src == Index )
 		{
@ -214,7 +214,7 @@ namespace x86Emitter
 	}

 	// ------------------------------------------------------------------------
-	__forceinline x86AddressInfo& x86AddressInfo::Add( const x86AddressInfo& src )
+	__forceinline iAddressInfo& iAddressInfo::Add( const iAddressInfo& src )
 	{
 		Add( src.Base );
 		Add( src.Displacement );
--- a/pcsx2/x86/ix86/ix86_instructions.h
+++ b/pcsx2/x86/ix86/ix86_instructions.h
@ -43,76 +43,80 @@ namespace x86Emitter
 	// forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs
 	// instead.

-	extern void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags=false );
-	extern void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags=false );
+	extern void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags=false );
+	extern void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags=false );

 	// ----- Push / Pop Instructions  -----
+	// Note: pushad/popad implementations are intentionally left out.  The instructions are
+	// invalid in x64, and are super slow on x32.  Use multiple Push/Pop instructions instead.

-	extern void POP( x86Register32 from );
-	extern void POP( const ModSibBase& from );
+	extern void iPOP( const ModSibBase& from );
+	extern void iPUSH( const ModSibBase& from );

-	extern void PUSH( u32 imm );
-	extern void PUSH( x86Register32 from );
-	extern void PUSH( const ModSibBase& from );
+	static __forceinline void iPOP( iRegister32 from )	{ write8( 0x58 | from.Id ); }
+	static __forceinline void iPOP( void* from )		{ iPOP( ptr[from] ); }

-	static __forceinline void POP( void* from )  { POP( ptr[from] ); }
-	static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); }
+	static __forceinline void iPUSH( u32 imm )			{ write8( 0x68 ); write32( imm ); }
+	static __forceinline void iPUSH( iRegister32 from )	{ write8( 0x50 | from.Id ); }
+	static __forceinline void iPUSH( void* from )		{ iPUSH( ptr[from] ); }

-	// ------------------------------------------------------------------------
-	using Internal::iADD;
-	using Internal::iOR;
-	using Internal::iADC;
-	using Internal::iSBB;
-	using Internal::iAND;
-	using Internal::iSUB;
-	using Internal::iXOR;
-	using Internal::iCMP;
+	// pushes the EFLAGS register onto the stack
+	static __forceinline void iPUSHFD()	{ write8( 0x9C ); }
+	// pops the EFLAGS register from the stack
+	static __forceinline void iPOPFD()	{ write8( 0x9D ); }

-	using Internal::iROL;
-	using Internal::iROR;
-	using Internal::iRCL;
-	using Internal::iRCR;
-	using Internal::iSHL;
-	using Internal::iSHR;
-	using Internal::iSAR;
+	// ----- Miscellaneous Instructions  -----
+	// Various Instructions with no parameter and no special encoding logic.

-	using Internal::iMOVSX;
-	using Internal::iMOVZX;
+	__forceinline void iRET()	{ write8( 0xC3 ); }
+	__forceinline void iCBW()	{ write16( 0x9866 );  }
+	__forceinline void iCWD()	{ write8( 0x98 ); }
+	__forceinline void iCDQ()	{ write8( 0x99 ); }
+	__forceinline void iCWDE()	{ write8( 0x98 ); }
+
+	__forceinline void iLAHF()	{ write8( 0x9f ); }
+	__forceinline void iSAHF()	{ write8( 0x9e ); }
+
+	__forceinline void iSTC()	{ write8( 0xF9 ); }
+	__forceinline void iCLC()	{ write8( 0xF8 ); }
+
+	// NOP 1-byte
+	__forceinline void iNOP()	{ write8(0x90); }

 	//////////////////////////////////////////////////////////////////////////////////////////
 	// MOV instructions!
 	// ---------- 32 Bit Interface -----------
-	extern void iMOV( const x86Register32& to, const x86Register32& from );
-	extern void iMOV( const ModSibBase& sibdest, const x86Register32& from );
-	extern void iMOV( const x86Register32& to, const ModSibBase& sibsrc );
-	extern void iMOV( const x86Register32& to, const void* src );
-	extern void iMOV( const void* dest, const x86Register32& from );
+	extern void iMOV( const iRegister32& to, const iRegister32& from );
+	extern void iMOV( const ModSibBase& sibdest, const iRegister32& from );
+	extern void iMOV( const iRegister32& to, const ModSibBase& sibsrc );
+	extern void iMOV( const iRegister32& to, const void* src );
+	extern void iMOV( void* dest, const iRegister32& from );

 	// preserve_flags  - set to true to disable optimizations which could alter the state of
 	//   the flags (namely replacing mov reg,0 with xor).
-	extern void iMOV( const x86Register32& to, u32 imm, bool preserve_flags=false );
+	extern void iMOV( const iRegister32& to, u32 imm, bool preserve_flags=false );
 	extern void iMOV( const ModSibStrict<4>& sibdest, u32 imm );

 	// ---------- 16 Bit Interface -----------
-	extern void iMOV( const x86Register16& to, const x86Register16& from );
-	extern void iMOV( const ModSibBase& sibdest, const x86Register16& from );
-	extern void iMOV( const x86Register16& to, const ModSibBase& sibsrc );
-	extern void iMOV( const x86Register16& to, const void* src );
-	extern void iMOV( const void* dest, const x86Register16& from );
+	extern void iMOV( const iRegister16& to, const iRegister16& from );
+	extern void iMOV( const ModSibBase& sibdest, const iRegister16& from );
+	extern void iMOV( const iRegister16& to, const ModSibBase& sibsrc );
+	extern void iMOV( const iRegister16& to, const void* src );
+	extern void iMOV( void* dest, const iRegister16& from );

 	// preserve_flags  - set to true to disable optimizations which could alter the state of
 	//   the flags (namely replacing mov reg,0 with xor).
-	extern void iMOV( const x86Register16& to, u16 imm, bool preserve_flags=false );
+	extern void iMOV( const iRegister16& to, u16 imm, bool preserve_flags=false );
 	extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm );

 	// ---------- 8 Bit Interface -----------
-	extern void iMOV( const x86Register8& to, const x86Register8& from );
-	extern void iMOV( const ModSibBase& sibdest, const x86Register8& from );
-	extern void iMOV( const x86Register8& to, const ModSibBase& sibsrc );
-	extern void iMOV( const x86Register8& to, const void* src );
-	extern void iMOV( const void* dest, const x86Register8& from );
+	extern void iMOV( const iRegister8& to, const iRegister8& from );
+	extern void iMOV( const ModSibBase& sibdest, const iRegister8& from );
+	extern void iMOV( const iRegister8& to, const ModSibBase& sibsrc );
+	extern void iMOV( const iRegister8& to, const void* src );
+	extern void iMOV( void* dest, const iRegister8& from );

-	extern void iMOV( const x86Register8& to, u8 imm, bool preserve_flags=false );
+	extern void iMOV( const iRegister8& to, u8 imm, bool preserve_flags=false );
 	extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm );

 	//////////////////////////////////////////////////////////////////////////////////////////
--- a/pcsx2/x86/ix86/ix86_legacy.cpp
+++ b/pcsx2/x86/ix86/ix86_legacy.cpp
@ -35,9 +35,9 @@
 using namespace x86Emitter;

 template< int OperandSize >
-static __forceinline x86Register<OperandSize> _reghlp( x86IntRegType src )
+static __forceinline iRegister<OperandSize> _reghlp( x86IntRegType src )
 {
-	return x86Register<OperandSize>( src );
+	return iRegister<OperandSize>( src );
 }

 static __forceinline ModSibBase _mrmhlp( x86IntRegType src )
@ -116,31 +116,34 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR )
 DEFINE_OPCODE_LEGACY( MOV )

 // ------------------------------------------------------------------------
-#define DEFINE_LEGACY_MOVEXTEND( form, srcbits ) \
-	emitterT void MOV##form##X32R##srcbits##toR( x86IntRegType to, x86IntRegType from )					{ iMOV##form##X( x86Register32( to ), x86Register##srcbits( from ) ); } \
-	emitterT void MOV##form##X32Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset )	{ iMOV##form##X( x86Register32( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \
-	emitterT void MOV##form##X32M##srcbits##toR( x86IntRegType to, u32 from )							{ iMOV##form##X( x86Register32( to ), ptr##srcbits[from] ); }
+#define DEFINE_LEGACY_MOVEXTEND( form, destbits, srcbits ) \
+	emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from )				{ iMOV##form##( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \
+	emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset )	{ iMOV##form##( iRegister##destbits( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \
+	emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from )						{ iMOV##form##( iRegister##destbits( to ), ptr##srcbits[from] ); }

-DEFINE_LEGACY_MOVEXTEND( S, 16 )
-DEFINE_LEGACY_MOVEXTEND( Z, 16 )
-DEFINE_LEGACY_MOVEXTEND( S, 8 )
-DEFINE_LEGACY_MOVEXTEND( Z, 8 )
+DEFINE_LEGACY_MOVEXTEND( SX, 32, 16 )
+DEFINE_LEGACY_MOVEXTEND( ZX, 32, 16 )
+DEFINE_LEGACY_MOVEXTEND( SX, 32, 8 )
+DEFINE_LEGACY_MOVEXTEND( ZX, 32, 8 )
+
+DEFINE_LEGACY_MOVEXTEND( SX, 16, 8 )
+DEFINE_LEGACY_MOVEXTEND( ZX, 16, 8 )


 // mov r32 to [r32<<scale+from2]
 emitterT void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
 {
-	iMOV( x86Register32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
+	iMOV( iRegister32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
 }

 emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
 {
-	iMOV( x86Register16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
+	iMOV( iRegister16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
 }

 emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
 {
-	iMOV( x86Register8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
+	iMOV( iRegister8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
 }

 // Special forms needed by the legacy emitter syntax:
@ -155,6 +158,11 @@ emitterT void AND32I8toM( uptr to, s8 from )
 	iAND( ptr8[to], from );
 }

+/* cmove r32 to r32*/
+emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ) 
+{
+	iCMOVE( iRegister32(to), iRegister32(from) );
+}


 // Note: the 'to' field can either be a register or a special opcode extension specifier
@ -224,23 +232,6 @@ emitterT u32* J32Rel( int cc, u32 to )
 	return (u32*)( x86Ptr - 4 );
 }

-emitterT void CMOV32RtoR( int cc, int to, int from )
-{
-	RexRB(0, to, from);
-	write8( 0x0F );
-	write8( cc );
-	ModRM( 3, to, from );
-}
-
-emitterT void CMOV32MtoR( int cc, int to, uptr from )
-{
-	RexR(0, to);
-	write8( 0x0F );
-	write8( cc );
-	ModRM( 0, to, DISP32 );
-	write32( MEMADDR(from, 4) );
-}
-
 ////////////////////////////////////////////////////
 emitterT void x86SetPtr( u8* ptr ) 
 {
@ -308,213 +299,9 @@ emitterT void x86Align( int bytes )
 /* IX86 instructions */
 /********************/

-emitterT void STC( void )
-{
-	write8( 0xF9 );
-}
-
-emitterT void CLC( void )
-{
-	write8( 0xF8 );
-}
-
-// NOP 1-byte
-emitterT void NOP( void )
-{
-	write8(0x90);
-}
-
-/* cmovbe r32 to r32 */
-emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x46, to, from );
-}
-
-/* cmovbe m32 to r32*/
-emitterT void CMOVBE32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x46, to, from );
-}
-
-/* cmovb r32 to r32 */
-emitterT void CMOVB32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x42, to, from );
-}
-
-/* cmovb m32 to r32*/
-emitterT void CMOVB32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x42, to, from );
-}
-
-/* cmovae r32 to r32 */
-emitterT void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x43, to, from );
-}
-
-/* cmovae m32 to r32*/
-emitterT void CMOVAE32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x43, to, from );
-}
-
-/* cmova r32 to r32 */
-emitterT void CMOVA32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x47, to, from );
-}
-
-/* cmova m32 to r32*/
-emitterT void CMOVA32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x47, to, from );
-}
-
-/* cmovo r32 to r32 */
-emitterT void CMOVO32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x40, to, from );
-}
-
-/* cmovo m32 to r32 */
-emitterT void CMOVO32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x40, to, from );
-}
-
-/* cmovp r32 to r32 */
-emitterT void CMOVP32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x4A, to, from );
-}
-
-/* cmovp m32 to r32 */
-emitterT void CMOVP32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x4A, to, from );
-}
-
-/* cmovs r32 to r32 */
-emitterT void CMOVS32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x48, to, from );
-}
-
-/* cmovs m32 to r32 */
-emitterT void CMOVS32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x48, to, from );
-}
-
-/* cmovno r32 to r32 */
-emitterT void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x41, to, from );
-}
-
-/* cmovno m32 to r32 */
-emitterT void CMOVNO32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x41, to, from );
-}
-
-/* cmovnp r32 to r32 */
-emitterT void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x4B, to, from );
-}
-
-/* cmovnp m32 to r32 */
-emitterT void CMOVNP32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x4B, to, from );
-}
-
-/* cmovns r32 to r32 */
-emitterT void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x49, to, from );
-}
-
-/* cmovns m32 to r32 */
-emitterT void CMOVNS32MtoR( x86IntRegType to, uptr from )
-{
-	CMOV32MtoR( 0x49, to, from );
-}
-
-/* cmovne r32 to r32 */
-emitterT void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from )
-{
-	CMOV32RtoR( 0x45, to, from );
-}
-
-/* cmovne m32 to r32*/
-emitterT void CMOVNE32MtoR( x86IntRegType to, uptr from ) 
-{
-	CMOV32MtoR( 0x45, to, from );
-}
-
-/* cmove r32 to r32*/
-emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ) 
-{
-	CMOV32RtoR( 0x44, to, from );
-}
-
-/* cmove m32 to r32*/
-emitterT void CMOVE32MtoR( x86IntRegType to, uptr from ) 
-{
-	CMOV32MtoR( 0x44, to, from );
-}
-
-/* cmovg r32 to r32*/
-emitterT void CMOVG32RtoR( x86IntRegType to, x86IntRegType from ) 
-{
-	CMOV32RtoR( 0x4F, to, from );
-}
-
-/* cmovg m32 to r32*/
-emitterT void CMOVG32MtoR( x86IntRegType to, uptr from ) 
-{
-	CMOV32MtoR( 0x4F, to, from );
-}
-
-/* cmovge r32 to r32*/
-emitterT void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from ) 
-{
-	CMOV32RtoR( 0x4D, to, from );
-}
-
-/* cmovge m32 to r32*/
-emitterT void CMOVGE32MtoR( x86IntRegType to, uptr from ) 
-{
-	CMOV32MtoR( 0x4D, to, from );
-}
-
-/* cmovl r32 to r32*/
-emitterT void CMOVL32RtoR( x86IntRegType to, x86IntRegType from ) 
-{
-	CMOV32RtoR( 0x4C, to, from );
-}
-
-/* cmovl m32 to r32*/
-emitterT void CMOVL32MtoR( x86IntRegType to, uptr from ) 
-{
-	CMOV32MtoR( 0x4C, to, from );
-}
-
-/* cmovle r32 to r32*/
-emitterT void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from ) 
-{
-	CMOV32RtoR( 0x4E, to, from );
-}
-
-/* cmovle m32 to r32*/
-emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from ) 
-{
-	CMOV32MtoR( 0x4E, to, from );
-}
+emitterT void STC( void ) { iSTC(); }
+emitterT void CLC( void ) { iCLC(); }
+emitterT void NOP( void ) { iNOP(); }

 ////////////////////////////////////
 // arithmetic instructions		 /
@ -1173,34 +960,31 @@ emitterT void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); }
 emitterT void SETE8R( x86IntRegType to ) { SET8R(0x94, to); }

 /* push imm32 */
-emitterT void PUSH32I( u32 from ) { PUSH( from ); }
+emitterT void PUSH32I( u32 from ) { iPUSH( from ); }

 /* push r32 */
-emitterT void PUSH32R( x86IntRegType from )  { PUSH( x86Register32( from ) ); }
+emitterT void PUSH32R( x86IntRegType from )  { iPUSH( iRegister32( from ) ); }

 /* push m32 */
 emitterT void PUSH32M( u32 from )
 {
-	PUSH( ptr[from] );
+	iPUSH( ptr[from] );
 }

 /* pop r32 */
-emitterT void POP32R( x86IntRegType from ) { POP( x86Register32( from ) ); }
-
-/* pushfd */
+emitterT void POP32R( x86IntRegType from ) { iPOP( iRegister32( from ) ); }
 emitterT void PUSHFD( void ) { write8( 0x9C ); }
-/* popfd */
 emitterT void POPFD( void ) { write8( 0x9D ); }

-emitterT void RET( void ) { /*write8( 0xf3 );  <-- K8 opt?*/ write8( 0xC3 ); }
+emitterT void RET( void ) { iRET(); }

-emitterT void CBW( void ) { write16( 0x9866 );  }
-emitterT void CWD( void )  { write8( 0x98 ); }
-emitterT void CDQ( void ) { write8( 0x99 ); }
-emitterT void CWDE() { write8(0x98); }
+emitterT void CBW( void ) { iCBW();  }
+emitterT void CWD( void )  { iCWD(); }
+emitterT void CDQ( void ) { iCDQ(); }
+emitterT void CWDE() { iCWDE(); }

-emitterT void LAHF() { write8(0x9f); }
-emitterT void SAHF() { write8(0x9e); }
+emitterT void LAHF() { iLAHF(); }
+emitterT void SAHF() { iSAHF(); }

 emitterT void BT32ItoR( x86IntRegType to, u8 from ) 
 {
@ -1230,34 +1014,34 @@ emitterT void BSWAP32R( x86IntRegType to )

 emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset)
 {
-	LEA( x86Register32( to ), ptr[x86IndexReg(from)+offset] );
+	iLEA( iRegister32( to ), ptr[x86IndexReg(from)+offset] );
 }

 emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
 { 
-	LEA( x86Register32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
+	iLEA( iRegister32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
 }

 // Don't inline recursive functions
 emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
 {
-	LEA( x86Register32( to ), ptr[x86IndexReg(from)*(1<<scale)] );
+	iLEA( iRegister32( to ), ptr[x86IndexReg(from)*(1<<scale)] );
 }

 // to = from + offset
 emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, s16 offset)
 {
-	LEA( x86Register16( to ), ptr[x86IndexReg(from)+offset] );
+	iLEA( iRegister16( to ), ptr[x86IndexReg(from)+offset] );
 }

 // to = from0 + from1
 emitterT void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
 {
-	LEA( x86Register16( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
+	iLEA( iRegister16( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
 }

 // to = from << scale (max is 3)
 emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
 {
-	LEA( x86Register16( to ), ptr[x86IndexReg(from)*(1<<scale)] );
+	iLEA( iRegister16( to ), ptr[x86IndexReg(from)*(1<<scale)] );
 }
--- a/pcsx2/x86/ix86/ix86_legacy_internal.h
+++ b/pcsx2/x86/ix86/ix86_legacy_internal.h
@ -56,11 +56,7 @@ emitterT void ModRM( uint mod, uint reg, uint rm )
 	jASSUME( mod < 4 );
 	jASSUME( reg < 8 );
 	jASSUME( rm < 8 );
-	//write8( (mod << 6) | (reg << 3) | rm );
-
-	*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
-	x86Ptr++;
-	
+	write8( (mod << 6) | (reg << 3) | rm );
 }

 emitterT void SibSB( uint ss, uint index, uint base )
@ -71,8 +67,5 @@ emitterT void SibSB( uint ss, uint index, uint base )
 	jASSUME( ss < 4 );
 	jASSUME( index < 8 );
 	jASSUME( base < 8 );
-	//write8( (ss << 6) | (index << 3) | base );
-
-	*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
-	x86Ptr++;
+	write8( (ss << 6) | (index << 3) | base );
 }
--- a/pcsx2/x86/ix86/ix86_sse.cpp
+++ b/pcsx2/x86/ix86/ix86_sse.cpp
@ -37,21 +37,21 @@ static const bool AlwaysUseMovaps = true;
 //------------------------------------------------------------------

 #define SSEMtoR( code, overb ) \
-	assert( to < XMMREGS ), \
+	assert( to < iREGCNT_XMM ), \
 	RexR(0, to),             \
 	write16( code ), \
 	ModRM( 0, to, DISP32 ), \
 	write32( MEMADDR(from, 4 + overb) )

 #define SSERtoM( code, overb ) \
-	assert( from < XMMREGS), \
+	assert( from < iREGCNT_XMM), \
    RexR(0, from),  \
 	write16( code ), \
 	ModRM( 0, from, DISP32 ), \
 	write32( MEMADDR(to, 4 + overb) )

 #define SSE_SS_MtoR( code, overb ) \
-	assert( to < XMMREGS ), \
+	assert( to < iREGCNT_XMM ), \
 	write8( 0xf3 ), \
    RexR(0, to),                      \
 	write16( code ), \
@ -59,7 +59,7 @@ static const bool AlwaysUseMovaps = true;
 	write32( MEMADDR(from, 4 + overb) )

 #define SSE_SS_RtoM( code, overb ) \
-	assert( from < XMMREGS), \
+	assert( from < iREGCNT_XMM), \
 	write8( 0xf3 ), \
 	RexR(0, from), \
 	write16( code ), \
@ -67,7 +67,7 @@ static const bool AlwaysUseMovaps = true;
 	write32( MEMADDR(to, 4 + overb) )

 #define SSERtoR( code ) \
-	assert( to < XMMREGS && from < XMMREGS), \
+	assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
    RexRB(0, to, from),            \
 	write16( code ), \
 	ModRM( 3, to, from )
@ -85,21 +85,21 @@ static const bool AlwaysUseMovaps = true;
 	SSERtoR( code )

 #define _SSERtoR66( code ) \
-	assert( to < XMMREGS && from < XMMREGS), \
+	assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
 	write8( 0x66 ), \
 	RexRB(0, from, to), \
 	write16( code ), \
 	ModRM( 3, from, to )

 #define SSE_SS_RtoR( code ) \
-	assert( to < XMMREGS && from < XMMREGS), \
+	assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
 	write8( 0xf3 ), \
    RexRB(0, to, from),              \
 	write16( code ), \
 	ModRM( 3, to, from )

 #define SSE_SD_MtoR( code, overb ) \
-	assert( to < XMMREGS ) , \
+	assert( to < iREGCNT_XMM ) , \
 	write8( 0xf2 ), \
    RexR(0, to),                      \
 	write16( code ), \
@ -107,7 +107,7 @@ static const bool AlwaysUseMovaps = true;
 	write32( MEMADDR(from, 4 + overb) ) \

 #define SSE_SD_RtoM( code, overb ) \
-	assert( from < XMMREGS) , \
+	assert( from < iREGCNT_XMM) , \
 	write8( 0xf2 ), \
 	RexR(0, from), \
 	write16( code ), \
@ -115,7 +115,7 @@ static const bool AlwaysUseMovaps = true;
 	write32( MEMADDR(to, 4 + overb) ) \

 #define SSE_SD_RtoR( code ) \
-	assert( to < XMMREGS && from < XMMREGS) , \
+	assert( to < iREGCNT_XMM && from < iREGCNT_XMM) , \
 	write8( 0xf2 ), \
    RexRB(0, to, from),   \
 	write16( code ), \
--- a/pcsx2/x86/ix86/ix86_tools.cpp
+++ b/pcsx2/x86/ix86/ix86_tools.cpp
@ -28,7 +28,7 @@ u8 g_globalMMXSaved = 0;
 u8 g_globalXMMSaved = 0;

 PCSX2_ALIGNED16( static u64 g_globalMMXData[8] );
-PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] );
+PCSX2_ALIGNED16( static u64 g_globalXMMData[2*iREGCNT_XMM] );


 /////////////////////////////////////////////////////////////////////
--- a/pcsx2/x86/ix86/ix86_types.h
+++ b/pcsx2/x86/ix86/ix86_types.h
@ -91,10 +91,10 @@ extern CPUINFO cpuinfo;
 #define __threadlocal __thread
 #endif

-// x86 opcode descriptors
-#define XMMREGS 8
-#define X86REGS 8
-#define MMXREGS 8
+// Register counts for x86/32 mode:
+static const uint iREGCNT_XMM = 8;
+static const uint iREGCNT_GPR = 8;
+static const uint iREGCNT_MMX = 8;

 enum XMMSSEType
 {
@ -104,10 +104,10 @@ enum XMMSSEType
 };

 extern __threadlocal u8  *x86Ptr;
-extern __threadlocal u8  *j8Ptr[32];
-extern __threadlocal u32 *j32Ptr[32];
+extern __threadlocal u8  *j8Ptr[32];		// depreciated item.  use local u8* vars instead.
+extern __threadlocal u32 *j32Ptr[32];		// depreciated item.  use local u32* vars instead.

-extern __threadlocal XMMSSEType g_xmmtypes[XMMREGS];
+extern __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM];

 //------------------------------------------------------------------
 // templated version of is_s8 is required, so that u16's get correct sign extension treatment.
@ -150,7 +150,7 @@ namespace x86Emitter
 	static const int ModRm_UseSib = 4;		// same index value as ESP (used in RM field)
 	static const int ModRm_UseDisp32 = 5;	// same index value as EBP (used in Mod field)

-	class x86AddressInfo;
+	class iAddressInfo;
 	class ModSibBase;

 	extern void iSetPtr( void* ptr );
@ -188,33 +188,33 @@ namespace x86Emitter
 	//////////////////////////////////////////////////////////////////////////////////////////
 	//
 	template< int OperandSize >
-	class x86Register
+	class iRegister
 	{
 	public:
-		static const x86Register Empty;		// defined as an empty/unused value (-1)
+		static const iRegister Empty;		// defined as an empty/unused value (-1)

 		int Id;

-		x86Register( const x86Register<OperandSize>& src ) : Id( src.Id ) {}
-		x86Register(): Id( -1 ) {}
-		explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
+		iRegister( const iRegister<OperandSize>& src ) : Id( src.Id ) {}
+		iRegister(): Id( -1 ) {}
+		explicit iRegister( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }

 		bool IsEmpty() const { return Id < 0; }

 		// Returns true if the register is a valid accumulator: Eax, Ax, Al.
 		bool IsAccumulator() const { return Id == 0; }

-		bool operator==( const x86Register<OperandSize>& src ) const
+		bool operator==( const iRegister<OperandSize>& src ) const
 		{
 			return (Id == src.Id);
 		}

-		bool operator!=( const x86Register<OperandSize>& src ) const
+		bool operator!=( const iRegister<OperandSize>& src ) const
 		{
 			return (Id != src.Id);
 		}

-		x86Register<OperandSize>& operator=( const x86Register<OperandSize>& src )
+		iRegister<OperandSize>& operator=( const iRegister<OperandSize>& src )
 		{
 			Id = src.Id;
 			return *this;
@ -229,62 +229,62 @@ namespace x86Emitter
 	// all about the the templated code in haphazard fashion.  Yay.. >_<
 	//

-	typedef x86Register<4> x86Register32;
-	typedef x86Register<2> x86Register16;
-	typedef x86Register<1> x86Register8;
+	typedef iRegister<4> iRegister32;
+	typedef iRegister<2> iRegister16;
+	typedef iRegister<1> iRegister8;

-	extern const x86Register32 eax;
-	extern const x86Register32 ebx;
-	extern const x86Register32 ecx;
-	extern const x86Register32 edx;
-	extern const x86Register32 esi;
-	extern const x86Register32 edi;
-	extern const x86Register32 ebp;
-	extern const x86Register32 esp;
+	extern const iRegister32 eax;
+	extern const iRegister32 ebx;
+	extern const iRegister32 ecx;
+	extern const iRegister32 edx;
+	extern const iRegister32 esi;
+	extern const iRegister32 edi;
+	extern const iRegister32 ebp;
+	extern const iRegister32 esp;

-	extern const x86Register16 ax;
-	extern const x86Register16 bx;
-	extern const x86Register16 cx;
-	extern const x86Register16 dx;
-	extern const x86Register16 si;
-	extern const x86Register16 di;
-	extern const x86Register16 bp;
-	extern const x86Register16 sp;
+	extern const iRegister16 ax;
+	extern const iRegister16 bx;
+	extern const iRegister16 cx;
+	extern const iRegister16 dx;
+	extern const iRegister16 si;
+	extern const iRegister16 di;
+	extern const iRegister16 bp;
+	extern const iRegister16 sp;

-	extern const x86Register8 al;
-	extern const x86Register8 cl;
-	extern const x86Register8 dl;
-	extern const x86Register8 bl;
-	extern const x86Register8 ah;
-	extern const x86Register8 ch;
-	extern const x86Register8 dh;
-	extern const x86Register8 bh;
+	extern const iRegister8 al;
+	extern const iRegister8 cl;
+	extern const iRegister8 dl;
+	extern const iRegister8 bl;
+	extern const iRegister8 ah;
+	extern const iRegister8 ch;
+	extern const iRegister8 dh;
+	extern const iRegister8 bh;

 	//////////////////////////////////////////////////////////////////////////////////////////
 	// Use 32 bit registers as out index register (for ModSib memory address calculations)
-	// Only x86IndexReg provides operators for constructing x86AddressInfo types.
-	class x86IndexReg : public x86Register32
+	// Only x86IndexReg provides operators for constructing iAddressInfo types.
+	class x86IndexReg : public iRegister32
 	{
 	public:
 		static const x86IndexReg Empty;		// defined as an empty/unused value (-1)
 	
 	public:
-		x86IndexReg(): x86Register32() {}
-		x86IndexReg( const x86IndexReg& src ) : x86Register32( src.Id ) {}
-		x86IndexReg( const x86Register32& src ) : x86Register32( src ) {}
-		explicit x86IndexReg( int regId ) : x86Register32( regId ) {}
+		x86IndexReg(): iRegister32() {}
+		x86IndexReg( const x86IndexReg& src ) : iRegister32( src.Id ) {}
+		x86IndexReg( const iRegister32& src ) : iRegister32( src ) {}
+		explicit x86IndexReg( int regId ) : iRegister32( regId ) {}

 		// Returns true if the register is the stack pointer: ESP.
 		bool IsStackPointer() const { return Id == 4; }

-		x86AddressInfo operator+( const x86IndexReg& right ) const;
-		x86AddressInfo operator+( const x86AddressInfo& right ) const;
-		x86AddressInfo operator+( s32 right ) const;
+		iAddressInfo operator+( const x86IndexReg& right ) const;
+		iAddressInfo operator+( const iAddressInfo& right ) const;
+		iAddressInfo operator+( s32 right ) const;

-		x86AddressInfo operator*( u32 factor ) const;
-		x86AddressInfo operator<<( u32 shift ) const;
+		iAddressInfo operator*( u32 factor ) const;
+		iAddressInfo operator<<( u32 shift ) const;
 		
-		x86IndexReg& operator=( const x86Register32& src )
+		x86IndexReg& operator=( const iRegister32& src )
 		{
 			Id = src.Id;
 			return *this;
@ -293,7 +293,7 @@ namespace x86Emitter

 	//////////////////////////////////////////////////////////////////////////////////////////
 	//
-	class x86AddressInfo
+	class iAddressInfo
 	{
 	public:
 		x86IndexReg Base;		// base register (no scale)
@ -302,7 +302,7 @@ namespace x86Emitter
 		s32 Displacement;		// address displacement

 	public:
-		__forceinline x86AddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) :
+		__forceinline iAddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) :
 			Base( base ),
 			Index( index ),
 			Factor( factor ),
@ -310,7 +310,7 @@ namespace x86Emitter
 		{
 		}

-		__forceinline explicit x86AddressInfo( const x86IndexReg& base, int displacement=0 ) :
+		__forceinline explicit iAddressInfo( const x86IndexReg& base, int displacement=0 ) :
 			Base( base ),
 			Index(),
 			Factor(0),
@ -318,7 +318,7 @@ namespace x86Emitter
 		{
 		}
 		
-		__forceinline explicit x86AddressInfo( s32 displacement ) :
+		__forceinline explicit iAddressInfo( s32 displacement ) :
 			Base(),
 			Index(),
 			Factor(0),
@ -326,24 +326,24 @@ namespace x86Emitter
 		{
 		}
 		
-		static x86AddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 );
+		static iAddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 );

 	public:
 		bool IsByteSizeDisp() const { return is_s8( Displacement ); }

-		__forceinline x86AddressInfo& Add( s32 imm )
+		__forceinline iAddressInfo& Add( s32 imm )
 		{
 			Displacement += imm;
 			return *this;
 		}
 		
-		__forceinline x86AddressInfo& Add( const x86IndexReg& src );
-		__forceinline x86AddressInfo& Add( const x86AddressInfo& src );
+		__forceinline iAddressInfo& Add( const x86IndexReg& src );
+		__forceinline iAddressInfo& Add( const iAddressInfo& src );

-		__forceinline x86AddressInfo operator+( const x86IndexReg& right ) const { return x86AddressInfo( *this ).Add( right ); }
-		__forceinline x86AddressInfo operator+( const x86AddressInfo& right ) const { return x86AddressInfo( *this ).Add( right ); }
-		__forceinline x86AddressInfo operator+( s32 imm ) const { return x86AddressInfo( *this ).Add( imm ); }
-		__forceinline x86AddressInfo operator-( s32 imm ) const { return x86AddressInfo( *this ).Add( -imm ); }
+		__forceinline iAddressInfo operator+( const x86IndexReg& right ) const { return iAddressInfo( *this ).Add( right ); }
+		__forceinline iAddressInfo operator+( const iAddressInfo& right ) const { return iAddressInfo( *this ).Add( right ); }
+		__forceinline iAddressInfo operator+( s32 imm ) const { return iAddressInfo( *this ).Add( imm ); }
+		__forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); }
 	};

 	//////////////////////////////////////////////////////////////////////////////////////////
@ -351,12 +351,12 @@ namespace x86Emitter
 	//
 	// This class serves two purposes:  It houses 'reduced' ModRM/SIB info only, which means
 	// that the Base, Index, Scale, and Displacement values are all in the correct arrange-
-	// ments, and it serves as a type-safe layer between the x86Register's operators (which
-	// generate x86AddressInfo types) and the emitter's ModSib instruction forms.  Without this,
-	// the x86Register would pass as a ModSib type implicitly, and that would cause ambiguity
+	// ments, and it serves as a type-safe layer between the iRegister's operators (which
+	// generate iAddressInfo types) and the emitter's ModSib instruction forms.  Without this,
+	// the iRegister would pass as a ModSib type implicitly, and that would cause ambiguity
 	// on a number of instructions.
 	//
-	// End users should always use x86AddressInfo instead.
+	// End users should always use iAddressInfo instead.
 	//
 	class ModSibBase
 	{
@ -367,7 +367,7 @@ namespace x86Emitter
 		s32 Displacement;		// offset applied to the Base/Index registers.

 	public:
-		explicit ModSibBase( const x86AddressInfo& src );
+		explicit ModSibBase( const iAddressInfo& src );
 		explicit ModSibBase( s32 disp );
 		ModSibBase( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 );
 		
@ -394,7 +394,7 @@ namespace x86Emitter
 	class ModSibStrict : public ModSibBase
 	{
 	public:
-		__forceinline explicit ModSibStrict( const x86AddressInfo& src ) : ModSibBase( src ) {}
+		__forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibBase( src ) {}
 		__forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {}
 		__forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) :
 			ModSibBase( base, index, scale, displacement ) {}
@ -423,7 +423,7 @@ namespace x86Emitter
 			return ModSibBase( src, x86IndexReg::Empty );
 		}

-		__forceinline ModSibBase operator[]( const x86AddressInfo& src ) const
+		__forceinline ModSibBase operator[]( const iAddressInfo& src ) const
 		{
 			return ModSibBase( src );
 		}
@ -457,7 +457,7 @@ namespace x86Emitter
 			return ModSibStrict<OperandSize>( src, x86IndexReg::Empty );
 		}

-		__forceinline ModSibStrict<OperandSize> operator[]( const x86AddressInfo& src ) const
+		__forceinline ModSibStrict<OperandSize> operator[]( const iAddressInfo& src ) const
 		{
 			return ModSibStrict<OperandSize>( src );
 		}
@ -598,387 +598,14 @@ namespace x86Emitter
 	{
 		extern void ModRM( uint mod, uint reg, uint rm );
 		extern void SibSB( u32 ss, u32 index, u32 base );
+		extern void iWriteDisp( int regfield, s32 displacement );
+		extern void iWriteDisp( int regfield, const void* address );
+
 		extern void EmitSibMagic( uint regfield, const ModSibBase& info );

-		struct SibMagic
-		{
-			static void Emit( uint regfield, const ModSibBase& info )
-			{
-				EmitSibMagic( regfield, info );
-			}
-		};
-
-		struct SibMagicInline
-		{
-			static __forceinline void Emit( uint regfield, const ModSibBase& info )
-			{
-				EmitSibMagic( regfield, info );
-			}
-		};	
-
-	
-		enum G1Type
-		{
-			G1Type_ADD=0,
-			G1Type_OR,
-			G1Type_ADC,
-			G1Type_SBB,
-			G1Type_AND,
-			G1Type_SUB,
-			G1Type_XOR,
-			G1Type_CMP
-		};
-
-		enum G2Type
-		{
-			G2Type_ROL=0,
-			G2Type_ROR,
-			G2Type_RCL,
-			G2Type_RCR,
-			G2Type_SHL,
-			G2Type_SHR,
-			G2Type_Unused,
-			G2Type_SAR
-		};
-
-		// -------------------------------------------------------------------
-		template< typename ImmType, G1Type InstType, typename SibMagicType >
-		class Group1Impl
-		{
-		public: 
-			static const uint OperandSize = sizeof(ImmType);
-
-			Group1Impl() {}		// because GCC doesn't like static classes
-
-		protected:
-			static bool Is8BitOperand()	{ return OperandSize == 1; }
-			static void prefix16()		{ if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
-
-		public:
-			static __emitinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from ) 
-			{
-				prefix16();
-				iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); 
-				ModRM( 3, from.Id, to.Id );
-			}
-			
-			static __emitinline void Emit( const ModSibBase& sibdest, const x86Register<OperandSize>& from ) 
-			{
-				prefix16();
-				iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); 
-				SibMagicType::Emit( from.Id, sibdest );
-			}
-
-			static __emitinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& sibsrc ) 
-			{
-				prefix16();
-				iWrite<u8>( (Is8BitOperand() ? 2 : 3) | (InstType<<3) );
-				SibMagicType::Emit( to.Id, sibsrc );
-			}
-
-			static __emitinline void Emit( const x86Register<OperandSize>& to, ImmType imm ) 
-			{
-				prefix16();
-				if( !Is8BitOperand() && is_s8( imm ) )
-				{
-					iWrite<u8>( 0x83 );
-					ModRM( 3, InstType, to.Id );
-					iWrite<s8>( imm );
-				}
-				else
-				{
-					if( to.IsAccumulator() )
-						iWrite<u8>( (Is8BitOperand() ? 4 : 5) | (InstType<<3) );
-					else
-					{
-						iWrite<u8>( Is8BitOperand() ? 0x80 : 0x81 );
-						ModRM( 3, InstType, to.Id );
-					}
-					iWrite<ImmType>( imm );
-				}
-			}
-
-			static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, ImmType imm ) 
-			{
-				if( Is8BitOperand() )
-				{
-					iWrite<u8>( 0x80 );
-					SibMagicType::Emit( InstType, sibdest );
-					iWrite<ImmType>( imm );
-				}
-				else
-				{		
-					prefix16();
-					iWrite<u8>( is_s8( imm ) ? 0x83 : 0x81 );
-					SibMagicType::Emit( InstType, sibdest );
-					if( is_s8( imm ) )
-						iWrite<s8>( imm );
-					else
-						iWrite<ImmType>( imm );
-				}
-			}
-		};
-
-		// -------------------------------------------------------------------
-		// Group 2 (shift) instructions have no Sib/ModRM forms.
-		// Note: For Imm forms, we ignore the instruction if the shift count is zero.  This
-		// is a safe optimization since any zero-value shift does not affect any flags.
-		//
-		template< typename ImmType, G2Type InstType, typename SibMagicType >
-		class Group2Impl
-		{
-		public: 
-			static const uint OperandSize = sizeof(ImmType);
-
-			Group2Impl() {}		// For the love of GCC.
-
-		protected:
-			static bool Is8BitOperand()	{ return OperandSize == 1; }
-			static void prefix16()		{ if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
-
-		public:
-			static __emitinline void Emit( const x86Register<OperandSize>& to, const x86Register8& from ) 
-			{
-				jASSUME( from == cl );	// cl is the only valid shift register.  (turn this into a compile time check?)
-
-				prefix16();
-				iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
-				ModRM( 3, InstType, to.Id );
-			}
-
-			static __emitinline void Emit( const x86Register<OperandSize>& to, u8 imm ) 
-			{
-				if( imm == 0 ) return;
-
-				prefix16();
-				if( imm == 1 )
-				{
-					// special encoding of 1's
-					iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
-					ModRM( 3, InstType, to.Id );
-				}
-				else
-				{
-					iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
-					ModRM( 3, InstType, to.Id );
-					iWrite<u8>( imm );
-				}
-			}
-
-			static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, const x86Register8& from ) 
-			{
-				jASSUME( from == cl );	// cl is the only valid shift register.  (turn this into a compile time check?)
-
-				prefix16();
-				iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
-				SibMagicType::Emit( from.Id, sibdest );
-			}
-
-			static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, u8 imm ) 
-			{
-				if( imm == 0 ) return;
-
-				prefix16();
-				if( imm == 1 )
-				{
-					// special encoding of 1's
-					iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
-					SibMagicType::Emit( InstType, sibdest );
-				}
-				else
-				{
-					iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
-					SibMagicType::Emit( InstType, sibdest );
-					iWrite<u8>( imm );
-				}
-			}
-		};
-
-		// -------------------------------------------------------------------
-		//
-		template< G1Type InstType >
-		class Group1ImplAll
-		{
-		protected:
-			typedef Group1Impl<u32, InstType, SibMagic> m_32;
-			typedef Group1Impl<u16, InstType, SibMagic> m_16;
-			typedef Group1Impl<u8, InstType, SibMagic>  m_8;
-
-			typedef Group1Impl<u32, InstType, SibMagicInline> m_32i;
-			typedef Group1Impl<u16, InstType, SibMagicInline> m_16i;
-			typedef Group1Impl<u8, InstType, SibMagicInline>  m_8i;
-
-			// Inlining Notes:
-			//   I've set up the inlining to be as practical and intelligent as possible, which means
-			//   forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
-			//   virtually no code.  In the case of (Reg, Imm) forms, the inlining is up to the dis-
-			//   creation of the compiler.
-			// 
-
-			// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
-
-		public:
-			// ---------- 32 Bit Interface -----------
-			__forceinline void operator()( const x86Register32& to,	const x86Register32& from ) const	{ m_32i::Emit( to, from ); }
-			__forceinline void operator()( const x86Register32& to,	const void* src ) const				{ m_32i::Emit( to, ptr32[src] ); }
-			__forceinline void operator()( const void* dest,		const x86Register32& from ) const	{ m_32i::Emit( ptr32[dest], from ); }
-			__noinline void operator()( const ModSibBase& sibdest,	const x86Register32& from ) const	{ m_32::Emit( sibdest, from ); }
-			__noinline void operator()( const x86Register32& to,	const ModSibBase& sibsrc ) const	{ m_32::Emit( to, sibsrc ); }
-			__noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const					{ m_32::Emit( sibdest, imm ); }
-
-			void operator()( const x86Register32& to, u32 imm, bool needs_flags=false ) const
-			{
-				//if( needs_flags || (imm != 0) || !_optimize_imm0() )
-				m_32i::Emit( to, imm );
-			}
-
-			// ---------- 16 Bit Interface -----------
-			__forceinline void operator()( const x86Register16& to,	const x86Register16& from ) const	{ m_16i::Emit( to, from ); }
-			__forceinline void operator()( const x86Register16& to,	const void* src ) const				{ m_16i::Emit( to, ptr16[src] ); }
-			__forceinline void operator()( const void* dest,		const x86Register16& from ) const	{ m_16i::Emit( ptr16[dest], from ); }
-			__noinline void operator()( const ModSibBase& sibdest,	const x86Register16& from ) const	{ m_16::Emit( sibdest, from ); }
-			__noinline void operator()( const x86Register16& to,	const ModSibBase& sibsrc ) const	{ m_16::Emit( to, sibsrc ); }
-			__noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const					{ m_16::Emit( sibdest, imm ); }
-
-			void operator()( const x86Register16& to, u16 imm, bool needs_flags=false ) const			{ m_16i::Emit( to, imm ); }
-
-			// ---------- 8 Bit Interface -----------
-			__forceinline void operator()( const x86Register8& to,	const x86Register8& from ) const	{ m_8i::Emit( to, from ); }
-			__forceinline void operator()( const x86Register8& to,	const void* src ) const				{ m_8i::Emit( to, ptr8[src] ); }
-			__forceinline void operator()( const void* dest,		const x86Register8& from ) const	{ m_8i::Emit( ptr8[dest], from ); }
-			__noinline void operator()( const ModSibBase& sibdest,	const x86Register8& from ) const	{ m_8::Emit( sibdest, from ); }
-			__noinline void operator()( const x86Register8& to,		const ModSibBase& sibsrc ) const	{ m_8::Emit( to, sibsrc ); }
-			__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const					{ m_8::Emit( sibdest, imm ); }
-
-			void operator()( const x86Register8& to, u8 imm, bool needs_flags=false ) const				{ m_8i::Emit( to, imm ); }
-
-			Group1ImplAll() {}		// Why does GCC need these?
-		};
-
-
-		// -------------------------------------------------------------------
-		//
-		template< G2Type InstType >
-		class Group2ImplAll
-		{
-		protected:
-			typedef Group2Impl<u32, InstType, SibMagic> m_32;
-			typedef Group2Impl<u16, InstType, SibMagic> m_16;
-			typedef Group2Impl<u8, InstType, SibMagic>  m_8;
-
-			typedef Group2Impl<u32, InstType, SibMagicInline> m_32i;
-			typedef Group2Impl<u16, InstType, SibMagicInline> m_16i;
-			typedef Group2Impl<u8, InstType, SibMagicInline>  m_8i;
-
-			// Inlining Notes:
-			//   I've set up the inlining to be as practical and intelligent as possible, which means
-			//   forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
-			//   virtually no code.  In the case of (Reg, Imm) forms, the inlining is up to the dis-
-			//   creation of the compiler.
-			// 
-
-			// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
-
-		public:
-			// ---------- 32 Bit Interface -----------
-			__forceinline void operator()( const x86Register32& to,		const x86Register8& from ) const{ m_32i::Emit( to, from ); }
-			__noinline void operator()( const ModSibStrict<4>& sibdest,	const x86Register8& from ) const{ m_32::Emit( sibdest, from ); }
-			__noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const					{ m_32::Emit( sibdest, imm ); }
-			void operator()( const x86Register32& to, u8 imm ) const									{ m_32i::Emit( to, imm ); }
-
-			// ---------- 16 Bit Interface -----------
-			__forceinline void operator()( const x86Register16& to,		const x86Register8& from ) const{ m_16i::Emit( to, from ); }
-			__noinline void operator()( const ModSibStrict<2>& sibdest,	const x86Register8& from ) const{ m_16::Emit( sibdest, from ); }
-			__noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const					{ m_16::Emit( sibdest, imm ); }
-			void operator()( const x86Register16& to, u8 imm ) const									{ m_16i::Emit( to, imm ); }
-
-			// ---------- 8 Bit Interface -----------
-			__forceinline void operator()( const x86Register8& to,		const x86Register8& from ) const{ m_8i::Emit( to, from ); }
-			__noinline void operator()( const ModSibStrict<1>& sibdest,	const x86Register8& from ) const{ m_8::Emit( sibdest, from ); }
-			__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const					{ m_8::Emit( sibdest, imm ); }
-			void operator()( const x86Register8& to, u8 imm ) const										{ m_8i::Emit( to, imm ); }
-
-			Group2ImplAll() {}		// I am a class with no members, so I need an explicit constructor!  Sense abounds.
-		};
-
-		// Define the externals for Group1/2 instructions here (inside the Internal namespace).
-		// and then import then into the x86Emitter namespace later.  Done because it saves a
-		// lot of Internal:: namespace resolution mess, and is better than the alternative of
-		// importing Internal into x86Emitter, which done at the header file level would defeat
-		// the purpose!)
-
-		extern const Group1ImplAll<G1Type_ADD> iADD;
-		extern const Group1ImplAll<G1Type_OR>  iOR;
-		extern const Group1ImplAll<G1Type_ADC> iADC;
-		extern const Group1ImplAll<G1Type_SBB> iSBB;
-		extern const Group1ImplAll<G1Type_AND> iAND;
-		extern const Group1ImplAll<G1Type_SUB> iSUB;
-		extern const Group1ImplAll<G1Type_XOR> iXOR;
-		extern const Group1ImplAll<G1Type_CMP> iCMP;
-
-		extern const Group2ImplAll<G2Type_ROL> iROL;
-		extern const Group2ImplAll<G2Type_ROR> iROR;
-		extern const Group2ImplAll<G2Type_RCL> iRCL;
-		extern const Group2ImplAll<G2Type_RCR> iRCR;
-		extern const Group2ImplAll<G2Type_SHL> iSHL;
-		extern const Group2ImplAll<G2Type_SHR> iSHR;
-		extern const Group2ImplAll<G2Type_SAR> iSAR;
-
-		//////////////////////////////////////////////////////////////////////////////////////////
-		// Mov with sign/zero extension implementations:
-		//
-		template< int DestOperandSize, int SrcOperandSize >
-		class MovExtendImpl
-		{
-		protected:
-			static bool Is8BitOperand()	{ return SrcOperandSize == 1; }
-			static void prefix16()		{ if( DestOperandSize == 2 ) iWrite<u8>( 0x66 ); }
-			static __forceinline void emit_base( bool SignExtend )
-			{
-				prefix16();
-				iWrite<u8>( 0x0f );
-				iWrite<u8>( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) );
-			}
-
-		public: 
-			MovExtendImpl() {}		// For the love of GCC.
-
-			static __emitinline void Emit( const x86Register<DestOperandSize>& to, const x86Register<SrcOperandSize>& from, bool SignExtend )
-			{
-				emit_base( SignExtend );
-				ModRM( 3, from.Id, to.Id );
-			}
-
-			static __emitinline void Emit( const x86Register<DestOperandSize>& to, const ModSibStrict<SrcOperandSize>& sibsrc, bool SignExtend )
-			{
-				emit_base( SignExtend );
-				EmitSibMagic( to.Id, sibsrc );
-			}
-		};
-
-		// ------------------------------------------------------------------------
-		template< bool SignExtend >
-		class MovExtendImplAll
-		{
-		protected:
-			typedef MovExtendImpl<4, 2> m_16to32;
-			typedef MovExtendImpl<4, 1> m_8to32;
-
-		public:
-			__forceinline void operator()( const x86Register32& to, const x86Register16& from )	const	{ m_16to32::Emit( to, from, SignExtend ); }
-			__noinline void operator()( const x86Register32& to, const ModSibStrict<2>& sibsrc ) const	{ m_16to32::Emit( to, sibsrc, SignExtend ); }
-
-			__forceinline void operator()( const x86Register32& to, const x86Register8& from ) const	{ m_8to32::Emit( to, from, SignExtend ); }
-			__noinline void operator()( const x86Register32& to, const ModSibStrict<1>& sibsrc ) const	{ m_8to32::Emit( to, sibsrc, SignExtend ); }
-
-			MovExtendImplAll() {}		// don't ask.
-		};
-
-		// ------------------------------------------------------------------------
-		
-		extern const MovExtendImplAll<true>  iMOVSX;
-		extern const MovExtendImplAll<false> iMOVZX;
-
+		#include "ix86_impl_group1.h"
+		#include "ix86_impl_group2.h"
+		#include "ix86_impl_movs.h"

 		// if the immediate is zero, we can replace the instruction, or ignore it
 		// entirely, depending on the instruction being issued.  That's what we do here.
@ -1021,6 +648,63 @@ namespace x86Emitter
 		}*/

 	}
+
+	// ------------------------------------------------------------------------
+
+	// ----- Group 1 Instruction Class -----
+
+	extern const Internal::Group1ImplAll<Internal::G1Type_ADD> iADD;
+	extern const Internal::Group1ImplAll<Internal::G1Type_OR>  iOR;
+	extern const Internal::Group1ImplAll<Internal::G1Type_ADC> iADC;
+	extern const Internal::Group1ImplAll<Internal::G1Type_SBB> iSBB;
+	extern const Internal::Group1ImplAll<Internal::G1Type_AND> iAND;
+	extern const Internal::Group1ImplAll<Internal::G1Type_SUB> iSUB;
+	extern const Internal::Group1ImplAll<Internal::G1Type_XOR> iXOR;
+	extern const Internal::Group1ImplAll<Internal::G1Type_CMP> iCMP;
+
+	// ----- Group 2 Instruction Class -----
+	// Optimization Note: For Imm forms, we ignore the instruction if the shift count is
+	// zero.  This is a safe optimization since any zero-value shift does not affect any
+	// flags.
+
+	extern const Internal::Group2ImplAll<Internal::G2Type_ROL> iROL;
+	extern const Internal::Group2ImplAll<Internal::G2Type_ROR> iROR;
+	extern const Internal::Group2ImplAll<Internal::G2Type_RCL> iRCL;
+	extern const Internal::Group2ImplAll<Internal::G2Type_RCR> iRCR;
+	extern const Internal::Group2ImplAll<Internal::G2Type_SHL> iSHL;
+	extern const Internal::Group2ImplAll<Internal::G2Type_SHR> iSHR;
+	extern const Internal::Group2ImplAll<Internal::G2Type_SAR> iSAR;
+
+	extern const Internal::MovExtendImplAll<true>  iMOVSX;
+	extern const Internal::MovExtendImplAll<false> iMOVZX;	
+
+	extern const Internal::CMovImplGeneric iCMOV;
+
+	extern const Internal::CMovImplAll<Jcc_Above>			iCMOVA;
+	extern const Internal::CMovImplAll<Jcc_AboveOrEqual>	iCMOVAE;
+	extern const Internal::CMovImplAll<Jcc_Below>			iCMOVB;
+	extern const Internal::CMovImplAll<Jcc_BelowOrEqual>	iCMOVBE;
+
+	extern const Internal::CMovImplAll<Jcc_Greater>			iCMOVG;
+	extern const Internal::CMovImplAll<Jcc_GreaterOrEqual>	iCMOVGE;
+	extern const Internal::CMovImplAll<Jcc_Less>			iCMOVL;
+	extern const Internal::CMovImplAll<Jcc_LessOrEqual>		iCMOVLE;
+
+	extern const Internal::CMovImplAll<Jcc_Zero>			iCMOVZ;
+	extern const Internal::CMovImplAll<Jcc_Equal>			iCMOVE;
+	extern const Internal::CMovImplAll<Jcc_NotZero>			iCMOVNZ;
+	extern const Internal::CMovImplAll<Jcc_NotEqual>		iCMOVNE;
+
+	extern const Internal::CMovImplAll<Jcc_Overflow>		iCMOVO;
+	extern const Internal::CMovImplAll<Jcc_NotOverflow>		iCMOVNO;
+	extern const Internal::CMovImplAll<Jcc_Carry>			iCMOVC;
+	extern const Internal::CMovImplAll<Jcc_NotCarry>		iCMOVNC;
+
+	extern const Internal::CMovImplAll<Jcc_Signed>			iCMOVS;
+	extern const Internal::CMovImplAll<Jcc_Unsigned>		iCMOVNS;
+	extern const Internal::CMovImplAll<Jcc_ParityEven>		iCMOVPE;
+	extern const Internal::CMovImplAll<Jcc_ParityOdd>		iCMOVPO;
+
 }

 #include "ix86_inlines.inl"