Implemented MMX/XMM optimizations for VTLB's vtlb_DynGenRead function (small speedup).

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@648 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2009-01-28 05:49:25 +00:00 committed by Gregory Hainaut
parent 3bc7465b23
commit c09f69f242
3 changed files with 51 additions and 83 deletions

View File

@ -21,6 +21,7 @@
#include "vtlb.h"
#include "COP0.h"
#include "x86/ix86/ix86.h"
#include "iCore.h"
using namespace R5900;
@ -481,12 +482,12 @@ void vtlb_Term()
//nothing to do for now
}
#include "iR5900.h"
//ecx = addr
//edx = ptr
void vtlb_DynGenRead(u32 sz,int freereg)
void vtlb_DynGenRead(u32 sz)
{
freereg=-1;
/*
u32 vmv=vmap[addr>>VTLB_PAGE_BITS];
s32 ppf=addr+vmv;
@ -554,10 +555,12 @@ void vtlb_DynGenRead(u32 sz,int freereg)
break;
case 64:
if (freereg>0)
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,ECX,0);
MOVQRtoRmOffset(EDX,freereg,0);
_freeMMXreg(freereg);
}
else
{
@ -568,11 +571,14 @@ void vtlb_DynGenRead(u32 sz,int freereg)
MOV32RtoRmOffset(EDX,EAX,4);
}
break;
case 128:
if (freereg>0)
if( _hasFreeXMMreg() )
{
SSE_MOVAPSRmtoROffset(freereg,ECX,0);
SSE_MOVAPSRtoRmOffset(EDX,freereg,0);
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,ECX,0);
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
_freeXMMreg(freereg);
}
else
{
@ -590,6 +596,7 @@ void vtlb_DynGenRead(u32 sz,int freereg)
}
break;
}
u8* cont=JMP8(0);
x86SetJ8(_fullread);
int szidx=0;
@ -612,7 +619,7 @@ void vtlb_DynGenRead(u32 sz,int freereg)
x86SetJ8(cont);
}
void vtlb_DynGenWrite(u32 sz,int freereg)
void vtlb_DynGenWrite(u32 sz)
{
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
@ -631,29 +638,14 @@ void vtlb_DynGenWrite(u32 sz,int freereg)
case 32:
MOV32RtoRm(ECX,EDX);
break;
/*
case 64:
//write8(0xCC);
POP32R(EAX);
MOV32RtoRm(ECX,EAX);
POP32R(EAX);
MOV32RtoRmOffset(ECX,EAX,4);
break;*/
case 64:
case 128:
if (freereg>0)
if( _hasFreeMMXreg() )
{
if (sz==64)
{
MOVQRmtoROffset(freereg,EDX,0);
MOVQRtoRmOffset(ECX,freereg,0);
}
else
{
SSE_MOVAPSRmtoROffset(freereg,EDX,0);
SSE_MOVAPSRtoRmOffset(ECX,freereg,0);
}
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,EDX,0);
MOVQRtoRmOffset(ECX,freereg,0);
_freeMMXreg( freereg );
}
else
{
@ -662,13 +654,27 @@ void vtlb_DynGenWrite(u32 sz,int freereg)
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
if (sz==128)
{
MOV32RmtoROffset(EAX,EDX,8);
MOV32RtoRmOffset(ECX,EAX,8);
MOV32RmtoROffset(EAX,EDX,12);
MOV32RtoRmOffset(ECX,EAX,12);
}
}
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
SSE2_MOVDQARtoRmOffset(ECX,freereg,0);
_freeXMMreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoRm(ECX,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
MOV32RmtoROffset(EAX,EDX,8);
MOV32RtoRmOffset(ECX,EAX,8);
MOV32RmtoROffset(EAX,EDX,12);
MOV32RtoRmOffset(ECX,EAX,12);
}
break;
}

View File

@ -59,8 +59,8 @@ void __fastcall vtlb_memWrite32(u32 mem, u32 value);
void __fastcall vtlb_memWrite64(u32 mem, const u64* value);
void __fastcall vtlb_memWrite128(u32 mem, const u64* value);
void vtlb_DynGenWrite(u32 sz,int freereg);
void vtlb_DynGenRead(u32 sz,int freereg);
extern void vtlb_DynGenWrite(u32 sz);
extern void vtlb_DynGenRead(u32 sz);
#endif

View File

@ -2071,33 +2071,6 @@ void SetFastMemory(int bSetFast)
// nothing
}
static __forceinline void vtlb_DynGenOp(bool Read,u32 sz)
{
int reg=-1;
if (sz==64 && _hasFreeMMXreg())
{
reg=_allocMMXreg(-1, MMX_TEMP, 0);
}
else if (sz==128 && _hasFreeXMMreg())
{
reg=_allocTempXMMreg(XMMT_FPS,-1);
}
if (Read)
vtlb_DynGenRead(sz,reg);
else
vtlb_DynGenWrite(sz,reg);
if (reg!=-1)
{
if (sz==128)
_freeXMMreg(reg);
else
_freeMMXreg(reg);
}
}
void recLoad(u32 sz,bool sx)
{
//no int 3? i love to get my hands dirty ;p - Raz
@ -2105,33 +2078,25 @@ void recLoad(u32 sz,bool sx)
_deleteEEreg(_Rs_, 1);
_eeOnLoadWrite(_Rt_);
if (sz>=64)
{
EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension -> what does this really do ?
}
_deleteEEreg(_Rt_, 0);
MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
if ( _Imm_ != 0 )
{
ADD32ItoR( ECX, _Imm_ );
}
if (sz==128)
{
AND32I8toR(ECX,0xF0);
}
if ( _Rt_ && sz>=64)
{
MOV32ItoR(EDX, (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
}
else
{
MOV32ItoR(EDX, (int)&dummyValue[0] );
}
vtlb_DynGenOp(true,sz);
vtlb_DynGenRead(sz);
/*
if (sz==8)
@ -2151,7 +2116,7 @@ void recLoad(u32 sz,bool sx)
MOV32MtoR( EAX, (int)&dummyValue[0] ); //ewww, lame ! movsx /zx has r/m forms too ...
if (sz==8)
{
if (sx)
if (sx)
MOVSX32R8toR( EAX, EAX );
else
MOVZX32R8toR( EAX, EAX );
@ -2507,7 +2472,7 @@ void recStore(u32 sz)
}
vtlb_DynGenOp(false,sz);
vtlb_DynGenWrite(sz);
/*
if (sz==8)
@ -2682,7 +2647,7 @@ void recLWC1( void )
MOV32ItoR(EDX, (int)&fpuRegs.fpr[ _Rt_ ].UL ); //no 0 for fpu ?
//CALLFunc( (int)memRead32 );
vtlb_DynGenOp(true,32);
vtlb_DynGenRead(32);
}
////////////////////////////////////////////////////
@ -2698,8 +2663,7 @@ void recSWC1( void )
}
MOV32MtoR(EDX, (int)&fpuRegs.fpr[ _Rt_ ].UL );
//CALLFunc( (int)memWrite32 );
vtlb_DynGenOp(false,32);
vtlb_DynGenWrite(32);
}
////////////////////////////////////////////////////
@ -2727,8 +2691,7 @@ void recLQC2( void )
{
MOV32ItoR(EDX, (int)&dummyValue[0] );
}
//CALLFunc( (int)memRead128 );
vtlb_DynGenOp(true,128);
vtlb_DynGenRead(128);
}
////////////////////////////////////////////////////
@ -2744,8 +2707,7 @@ void recSQC2( void )
}
MOV32ItoR(EDX, (int)&VU0.VF[_Ft_].UD[0] );
//CALLFunc( (int)memWrite128 );
vtlb_DynGenOp(false,128);
vtlb_DynGenWrite(128);
}
#endif