Minor Optimization: Added Const support to Vtlb's Load and Store implementations.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@627 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-02-27 06:40:05 +00:00
parent a5ab9ad28f
commit e205999744
4 changed files with 276 additions and 164 deletions

View File

@ -37,7 +37,6 @@ static std::string disOut;
#ifdef PCSX2_DEVBUILD #ifdef PCSX2_DEVBUILD
static void debugI() static void debugI()
{ {
//CPU_LOG("%s\n", disR5900Current.getCString());
if (cpuRegs.GPR.n.r0.UD[0] || cpuRegs.GPR.n.r0.UD[1]) Console::Error("R0 is not zero!!!!"); if (cpuRegs.GPR.n.r0.UD[0] || cpuRegs.GPR.n.r0.UD[1]) Console::Error("R0 is not zero!!!!");
} }
#else #else
@ -64,6 +63,17 @@ static void execI()
// } // }
//} //}
// Another method of instruction dumping:
/*if( cpuRegs.cycle > 0x4f24d714 )
{
//CPU_LOG( "%s\n", disR5900Current.getCString());
disOut.clear();
opcode.disasm( disOut );
disOut += '\n';
CPU_LOG( disOut.c_str() );
}*/
cpuBlockCycles += opcode.cycles; cpuBlockCycles += opcode.cycles;
cpuRegs.pc += 4; cpuRegs.pc += 4;

View File

@ -61,6 +61,10 @@ extern void vtlb_DynGenWrite(u32 sz);
extern void vtlb_DynGenRead32(u32 bits, bool sign); extern void vtlb_DynGenRead32(u32 bits, bool sign);
extern void vtlb_DynGenRead64(u32 sz); extern void vtlb_DynGenRead64(u32 sz);
extern void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const );
extern void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const );
extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const );
namespace vtlb_private namespace vtlb_private
{ {
static const uint VTLB_PAGE_BITS = 12; static const uint VTLB_PAGE_BITS = 12;

View File

@ -2081,27 +2081,36 @@ void recLoad64( u32 bits, bool sign )
_deleteEEreg(_Rs_, 1); _deleteEEreg(_Rs_, 1);
_eeOnLoadWrite(_Rt_); _eeOnLoadWrite(_Rt_);
EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension -> what does this really do ? EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension
_deleteEEreg(_Rt_, 0); _deleteEEreg(_Rt_, 0);
// Load ECX with the source memory address that we're reading from. // Load EDX with the destination.
MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); // 64/128 bit modes load the result directly into the cpuRegs.GPR struct.
if ( _Imm_ != 0 )
ADD32ItoR( ECX, _Imm_ );
if( bits == 128 ) // force 16 byte alignment on 128 bit reads if( _Rt_ )
AND32I8toR(ECX,0xF0); MOV32ItoR(EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
// Load EDX with the destination. 64/128 bit modes load the result directly into
// the cpuRegs.GPR struct.
if ( _Rt_ )
MOV32ItoR(EDX, (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
else else
MOV32ItoR(EDX, (int)&dummyValue[0] ); MOV32ItoR(EDX, (uptr)&dummyValue[0] );
vtlb_DynGenRead64(bits); if( IS_EECONSTREG( _Rs_ ) )
{
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
if( bits == 128 ) srcadr &= ~0x0f;
vtlb_DynGenRead64_Const( bits, srcadr );
}
else
{
// Load ECX with the source memory address that we're reading from.
MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
if ( _Imm_ != 0 )
ADD32ItoR( ECX, _Imm_ );
if( bits == 128 ) // force 16 byte alignment on 128 bit reads
AND32I8toR(ECX,0xF0);
vtlb_DynGenRead64(bits);
}
} }
void recLoad32(u32 bits,bool sign) void recLoad32(u32 bits,bool sign)
@ -2115,40 +2124,26 @@ void recLoad32(u32 bits,bool sign)
_eeOnLoadWrite(_Rt_); _eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0); _deleteEEreg(_Rt_, 0);
// Load ECX with the source memory address that we're reading from.
MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
if ( _Imm_ != 0 )
ADD32ItoR( ECX, _Imm_ );
// 8/16/32 bit modes return the loaded value in EAX. // 8/16/32 bit modes return the loaded value in EAX.
//MOV32ItoR(EDX, (int)&dummyValue[0] );
vtlb_DynGenRead32(bits, sign); if( IS_EECONSTREG( _Rs_ ) )
if ( _Rt_ )
{ {
// Perform sign extension if needed u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
vtlb_DynGenRead32_Const( bits, sign, srcadr );
//MOV32MtoR( EAX, (int)&dummyValue[0] ); //ewww, lame ! movsx /zx has r/m forms too ... }
/*if (bits==8) else
{ {
if (sign) // Load ECX with the source memory address that we're reading from.
//MOVSX32M8toR( EAX, (int)&dummyValue[0] ); MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
MOVSX32R8toR( EAX, EAX ); if ( _Imm_ != 0 )
//else ADD32ItoR( ECX, _Imm_ );
//MOVZX32M8toR( EAX, (int)&dummyValue[0] );
//MOVZX32R8toR( EAX, EAX ); vtlb_DynGenRead32(bits, sign);
} }
else if (bits==16)
{
if (sign)
//MOVSX32M16toR( EAX, (int)&dummyValue[0] );
MOVSX32R16toR( EAX, EAX );
//else
//MOVZX32M16toR( EAX, (int)&dummyValue[0] );
//MOVZX32R16toR( EAX, EAX );
}*/
if( _Rt_ )
{
// EAX holds the loaded value, so sign extend as needed:
if (sign) if (sign)
CDQ(); CDQ();
else else
@ -2463,6 +2458,7 @@ void recLQ( void )
ADD32ItoR( ESP, 8 ); ADD32ItoR( ESP, 8 );
*/ */
} }
void recStore(u32 sz) void recStore(u32 sz)
{ {
//no int 3? i love to get my hands dirty ;p - Raz //no int 3? i love to get my hands dirty ;p - Raz
@ -2470,16 +2466,15 @@ void recStore(u32 sz)
_deleteEEreg(_Rs_, 1); _deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1); _deleteEEreg(_Rt_, 1);
MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); // Performance note: Const prop for the store address is good, always.
if ( _Imm_ != 0 ) // Constprop for the value being stored is not really worthwhile (better to use register
{ // allocation -- simpler code and just as fast)
ADD32ItoR( ECX, _Imm_);
}
if (sz==128) // Load EDX first with the value being written, or the address of the value
{ // being written (64/128 bit modes). TODO: use register allocation, if the
AND32I8toR(ECX,0xF0); // value is allocated to a register.
}
if (sz<64) if (sz<64)
{ {
@ -2492,23 +2487,29 @@ void recStore(u32 sz)
{ {
MOV32ItoR(EDX,(int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); MOV32ItoR(EDX,(int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]);
} }
vtlb_DynGenWrite(sz);
/* // Load ECX with the destination address, or issue a direct optimized write
if (sz==8) // if the address is a constant propagation.
CALLFunc( (int)memWrite8 );
else if (sz==16) if( GPR_IS_CONST1( _Rs_ ) )
CALLFunc( (int)memWrite16 ); {
else if (sz==32) u32 dstadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
CALLFunc( (int)memWrite32 ); if( sz == 128 ) dstadr &= ~0x0f;
else if (sz==64) vtlb_DynGenWrite_Const( sz, dstadr );
CALLFunc( (int)memWrite64 ); }
else if (sz==128) else
CALLFunc( (int)memWrite128 ); {
*/ MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
if ( _Imm_ != 0 )
ADD32ItoR(ECX, _Imm_);
if (sz==128)
AND32I8toR(ECX,0xF0);
vtlb_DynGenWrite(sz);
}
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recSB( void ) void recSB( void )
{ {

View File

@ -27,6 +27,21 @@
using namespace vtlb_private; using namespace vtlb_private;
// NOTICE: This function *destroys* EAX!!
// Moves 128 bits of memory from the source register ptr to the dest register ptr.
// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason)
void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
{
MOV32RmtoR(EAX,srcRm);
MOV32RtoRm(destRm,EAX);
MOV32RmtoROffset(EAX,srcRm,4);
MOV32RtoRmOffset(destRm,EAX,4);
MOV32RmtoROffset(EAX,srcRm,8);
MOV32RtoRmOffset(destRm,EAX,8);
MOV32RmtoROffset(EAX,srcRm,12);
MOV32RtoRmOffset(destRm,EAX,12);
}
/* /*
// Pseudo-Code For the following Dynarec Implementations --> // Pseudo-Code For the following Dynarec Implementations -->
@ -78,18 +93,31 @@ using namespace vtlb_private;
*/ */
//////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Load Implementations
//ecx = addr static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
//edx = ptr
void vtlb_DynGenRead64(u32 bits)
{ {
MOV32RtoR(EAX,ECX); switch( bits )
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread=JS8(0);
switch(bits)
{ {
case 8:
if( sign )
MOVSX32Rm8toR(EAX,ECX);
else
MOVZX32Rm8toR(EAX,ECX);
break;
case 16:
if( sign )
MOVSX32Rm16toR(EAX,ECX);
else
MOVZX32Rm16toR(EAX,ECX);
break;
case 32:
MOV32RmtoR(EAX,ECX);
break;
case 64: case 64:
if( _hasFreeMMXreg() ) if( _hasFreeMMXreg() )
{ {
@ -106,7 +134,7 @@ void vtlb_DynGenRead64(u32 bits)
MOV32RmtoROffset(EAX,ECX,4); MOV32RmtoROffset(EAX,ECX,4);
MOV32RtoRmOffset(EDX,EAX,4); MOV32RtoRmOffset(EDX,EAX,4);
} }
break; break;
case 128: case 128:
if( _hasFreeXMMreg() ) if( _hasFreeXMMreg() )
@ -121,31 +149,25 @@ void vtlb_DynGenRead64(u32 bits)
// Could put in an MMX optimization here as well, but no point really. // Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg. // It's almost never used since there's almost always a free XMM reg.
MOV32RmtoR(EAX,ECX); MOV128_MtoM( EDX, ECX ); // dest <- src!
MOV32RtoRm(EDX,EAX);
MOV32RmtoROffset(EAX,ECX,4);
MOV32RtoRmOffset(EDX,EAX,4);
MOV32RmtoROffset(EAX,ECX,8);
MOV32RtoRmOffset(EDX,EAX,8);
MOV32RmtoROffset(EAX,ECX,12);
MOV32RtoRmOffset(EDX,EAX,12);
} }
break; break;
jNO_DEFAULT jNO_DEFAULT
} }
}
u8* cont=JMP8(0); static void _vtlb_DynGen_IndirectRead( u32 bits )
x86SetJ8(_fullread); {
int szidx; int szidx;
switch(bits) switch( bits )
{ {
case 64: szidx=3; break; case 8: szidx=0; break;
case 128: szidx=4; break; case 16: szidx=1; break;
case 32: szidx=2; break;
case 64: szidx=3; break;
case 128: szidx=4; break;
jNO_DEFAULT jNO_DEFAULT
} }
@ -155,12 +177,33 @@ void vtlb_DynGenRead64(u32 bits)
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2); MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
SUB32ItoR(ECX,0x80000000); SUB32ItoR(ECX,0x80000000);
CALL32R(EAX); CALL32R(EAX);
}
// Recompiled input registers:
// ecx = source addr to read from
// edx = ptr to dest to write to
void vtlb_DynGenRead64(u32 bits)
{
jASSUME( bits == 64 || bits == 128 );
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread = JS8(0);
_vtlb_DynGen_DirectRead( bits, false );
u8* cont = JMP8(0);
x86SetJ8(_fullread);
_vtlb_DynGen_IndirectRead( bits );
x86SetJ8(cont); x86SetJ8(cont);
} }
// ecx - source address to read from // Recompiled input registers:
// Returns read value in eax. // ecx - source address to read from
// Returns read value in eax.
void vtlb_DynGenRead32(u32 bits, bool sign) void vtlb_DynGenRead32(u32 bits, bool sign)
{ {
jASSUME( bits <= 32 ); jASSUME( bits <= 32 );
@ -169,49 +212,13 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
SHR32ItoR(EAX,VTLB_PAGE_BITS); SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX); ADD32RtoR(ECX,EAX);
u8* _fullread=JS8(0); u8* _fullread = JS8(0);
switch(bits) _vtlb_DynGen_DirectRead( bits, sign );
{ u8* cont = JMP8(0);
case 8:
if( sign )
MOVSX32Rm8toR(EAX,ECX);
else
MOVZX32Rm8toR(EAX,ECX);
break;
case 16:
if( sign )
MOVSX32Rm16toR(EAX,ECX);
else
MOVZX32Rm16toR(EAX,ECX);
break;
case 32:
MOV32RmtoR(EAX,ECX);
break;
jNO_DEFAULT
}
u8* cont=JMP8(0);
x86SetJ8(_fullread); x86SetJ8(_fullread);
int szidx; _vtlb_DynGen_IndirectRead( bits );
switch(bits)
{
case 8: szidx=0; break;
case 16: szidx=1; break;
case 32: szidx=2; break;
jNO_DEFAULT
}
MOVZX32R8toR(EAX,EAX);
SUB32RtoR(ECX,EAX);
//eax=[funct+eax]
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
SUB32ItoR(ECX,0x80000000);
CALL32R(EAX);
// perform sign extension on the result: // perform sign extension on the result:
@ -233,25 +240,83 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
x86SetJ8(cont); x86SetJ8(cont);
} }
void vtlb_DynGenWrite(u32 sz) void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
{ {
MOV32RtoR(EAX,ECX); jASSUME( bits == 64 || bits == 128 );
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); void* vmv_ptr = &vmap[addr_const>>VTLB_PAGE_BITS];
ADD32RtoR(ECX,EAX);
u8* _full=JS8(0); MOV32MtoR(EAX,(uptr)vmv_ptr);
switch(sz) MOV32ItoR(ECX,addr_const);
ADD32RtoR(ECX,EAX); // ecx=ppf
u8* _fullread = JS8(0);
_vtlb_DynGen_DirectRead( bits, false );
u8* cont = JMP8(0);
x86SetJ8(_fullread);
_vtlb_DynGen_IndirectRead( bits );
x86SetJ8(cont);
}
// Recompiled input registers:
// ecx - source address to read from
// Returns read value in eax.
void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
{
jASSUME( bits <= 32 );
void* vmv_ptr = &vmap[addr_const>>VTLB_PAGE_BITS];
MOV32MtoR(EAX,(uptr)vmv_ptr);
MOV32ItoR(ECX,addr_const);
ADD32RtoR(ECX,EAX); // ecx=ppf
u8* _fullread = JS8(0);
_vtlb_DynGen_DirectRead( bits, sign );
u8* cont = JMP8(0);
x86SetJ8(_fullread);
_vtlb_DynGen_IndirectRead( bits );
// perform sign extension on the result:
if( bits==8 )
{
if( sign )
MOVSX32R8toR(EAX,EAX);
else
MOVZX32R8toR(EAX,EAX);
}
else if( bits==16 )
{
if( sign )
MOVSX32R16toR(EAX,EAX);
else
MOVZX32R16toR(EAX,EAX);
}
x86SetJ8(cont);
}
//////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Store Implementations
static void _vtlb_DynGen_DirectWrite( u32 bits )
{
switch(bits)
{ {
//8 , 16, 32 : data on EDX //8 , 16, 32 : data on EDX
case 8: case 8:
MOV8RtoRm(ECX,EDX); MOV8RtoRm(ECX,EDX);
break; break;
case 16: case 16:
MOV16RtoRm(ECX,EDX); MOV16RtoRm(ECX,EDX);
break; break;
case 32: case 32:
MOV32RtoRm(ECX,EDX); MOV32RtoRm(ECX,EDX);
break; break;
case 64: case 64:
if( _hasFreeMMXreg() ) if( _hasFreeMMXreg() )
@ -269,7 +334,7 @@ void vtlb_DynGenWrite(u32 sz)
MOV32RmtoROffset(EAX,EDX,4); MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4); MOV32RtoRmOffset(ECX,EAX,4);
} }
break; break;
case 128: case 128:
if( _hasFreeXMMreg() ) if( _hasFreeXMMreg() )
@ -284,23 +349,16 @@ void vtlb_DynGenWrite(u32 sz)
// Could put in an MMX optimization here as well, but no point really. // Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg. // It's almost never used since there's almost always a free XMM reg.
MOV32RmtoR(EAX,EDX); MOV128_MtoM( ECX, EDX ); // dest <- src!
MOV32RtoRm(ECX,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
MOV32RmtoROffset(EAX,EDX,8);
MOV32RtoRmOffset(ECX,EAX,8);
MOV32RmtoROffset(EAX,EDX,12);
MOV32RtoRmOffset(ECX,EAX,12);
} }
break; break;
} }
u8* cont=JMP8(0); }
x86SetJ8(_full);
static void _vtlb_DynGen_IndirectWrite( u32 bits )
{
int szidx=0; int szidx=0;
switch(sz) switch( bits )
{ {
case 8: szidx=0; break; case 8: szidx=0; break;
case 16: szidx=1; break; case 16: szidx=1; break;
@ -314,6 +372,45 @@ void vtlb_DynGenWrite(u32 sz)
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2); MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2);
SUB32ItoR(ECX,0x80000000); SUB32ItoR(ECX,0x80000000);
CALL32R(EAX); CALL32R(EAX);
}
void vtlb_DynGenWrite(u32 sz)
{
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _full=JS8(0);
_vtlb_DynGen_DirectWrite( sz );
u8* cont = JMP8(0);
x86SetJ8(_full);
_vtlb_DynGen_IndirectWrite( sz );
x86SetJ8(cont);
}
// Generates code for a store instruction, where the address is a known constant.
void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
{
// Important: It's not technically safe to do a const lookup of the VTLB here, since
// the VTLB could feasibly be remapped by other recompiled code at any time.
// So we're limited in exactly how much we can pre-calcuate.
void* vmv_ptr = &vmap[addr_const>>VTLB_PAGE_BITS];
MOV32MtoR(EAX,(uptr)vmv_ptr);
MOV32ItoR(ECX,addr_const);
ADD32RtoR(ECX,EAX); // ecx=ppf
u8* _full = JS8(0);
_vtlb_DynGen_DirectWrite( bits );
u8* cont = JMP8(0);
x86SetJ8(_full);
_vtlb_DynGen_IndirectWrite( bits );
x86SetJ8(cont); x86SetJ8(cont);
} }