EE: very minor VTLB optimisation. Smaller than r4803 probably (but shouldn't have any possibility of slowing down any code).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4807 96395faa-99c1-11dd-bbfe-3dabce05a288
2011-07-16 02:38:30 +00:00 · 2011-07-16 02:38:30 +00:00 · 3286f0079e
parent 9b66620287
commit 3286f0079e
1 changed files with 37 additions and 31 deletions
--- a/pcsx2/x86/ix86-32/recVTLB.cpp
+++ b/pcsx2/x86/ix86-32/recVTLB.cpp
@ -247,22 +247,23 @@ static __pagealigned u8 m_IndirectDispatchers[__pagesize];
 // mode        - 0 for read, 1 for write!
 // operandsize - 0 thru 4 represents 8, 16, 32, 64, and 128 bits.
 //
-static u8* GetIndirectDispatcherPtr( int mode, int operandsize )
+static u8* GetIndirectDispatcherPtr( int mode, int operandsize, bool sign = false )
 {
 	// Each dispatcher is aligned to 64 bytes.  The actual dispatchers are only like
 	// 20-some bytes each, but 64 byte alignment on functions that are called
 	// more frequently than a hot sex hotline at 1:15am is probably a good thing.

-	// 5*64?  Because 5 operand types per each mode :D
+	// 7*64? 5 widths with two sign extension modes for 8 and 16 bit reads

-	return &m_IndirectDispatchers[(mode*(5*64)) + (operandsize*64)];
+	assert(mode || operandsize >= 2 ? !sign : true);
+	return &m_IndirectDispatchers[(mode*(7*64)) + (sign*5*64) + (operandsize*64)];
 }

 // ------------------------------------------------------------------------
 // Generates a JS instruction that targets the appropriate templated instance of
 // the vtlb Indirect Dispatcher.
 //
-static void DynGen_IndirectDispatch( int mode, int bits )
+static void DynGen_IndirectDispatch( int mode, int bits, bool sign = false )
 {
 	int szidx;
 	switch( bits )
@ -274,7 +275,7 @@ static void DynGen_IndirectDispatch( int mode, int bits )
 		case 128:	szidx=4;	break;
 		jNO_DEFAULT;
 	}
-	xJS( GetIndirectDispatcherPtr( mode, szidx ) );
+	xJS( GetIndirectDispatcherPtr( mode, szidx, sign ) );
 }

 // One-time initialization procedure.  Multiple subsequent calls during the lifespan of the
@ -296,7 +297,9 @@ void vtlb_dynarec_init()
 	{
 		for( int bits=0; bits<5; ++bits )
 		{
-			xSetPtr( GetIndirectDispatcherPtr( mode, bits ) );
+			for (int sign = 0; sign < (!mode && bits < 2 ? 2 : 1); sign++)
+			{
+				xSetPtr( GetIndirectDispatcherPtr( mode, bits, !!sign ) );

 				xMOVZX( eax, al );
 				xSUB( ecx, 0x80000000 );
@ -305,9 +308,29 @@ void vtlb_dynarec_init()
 				// jump to the indirect handler, which is a __fastcall C++ function.
 				// [ecx is address, edx is data]
 				xCALL( ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]] );
+
+				if (!mode)
+				{
+					if (bits == 0)
+					{
+						if (sign)
+							xMOVSX(eax, al);
+						else
+							xMOVZX(eax, al);
+					}
+					else if (bits == 1)
+					{
+						if (sign)
+							xMOVSX(eax, ax);
+						else
+							xMOVZX(eax, ax);
+					}
+				}
+
 				xJMP( ebx );
 			}
 		}
+	}

 	HostSys::MemProtectStatic( m_IndirectDispatchers, PageAccess_ExecOnly() );
 }
@ -336,27 +359,10 @@ void vtlb_DynGenRead32(u32 bits, bool sign)

 	uptr* writeback = DynGen_PrepRegs();

-	DynGen_IndirectDispatch( 0, bits );
+	DynGen_IndirectDispatch( 0, bits, sign && bits < 32 );
 	DynGen_DirectRead( bits, sign );

 	*writeback = (uptr)xGetPtr();
-
-	// perform sign extension on the result:
-
-	if( bits == 8 )
-	{
-		if( sign )
-			xMOVSX( eax, al );
-		else
-			xMOVZX( eax, al );
-	}
-	else if( bits == 16 )
-	{
-		if( sign )
-			xMOVSX( eax, ax );
-		else
-			xMOVZX( eax, ax );
-	}
 }

 // ------------------------------------------------------------------------