From c5b366095a2c27b0d307fc0bcb3b39c97906a2da Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 19 Mar 2009 10:20:56 +0000 Subject: [PATCH] Fix for some random slowdown introduced in 815 (bad cacheline coloring caused a random 30% speed drop when not using speedhacks) >_< git-svn-id: http://pcsx2.googlecode.com/svn/trunk@819 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/PS2Etypes.h | 2 +- pcsx2/vtlb.cpp | 78 ++++++++++++++++------------------- pcsx2/vtlb.h | 15 +++++-- pcsx2/x86/ix86-32/recVTLB.cpp | 16 +++---- 4 files changed, 57 insertions(+), 54 deletions(-) diff --git a/common/include/PS2Etypes.h b/common/include/PS2Etypes.h index 258deaaded..9dceb3cd0a 100644 --- a/common/include/PS2Etypes.h +++ b/common/include/PS2Etypes.h @@ -82,7 +82,7 @@ typedef unsigned int uint; // Note: building the 'extern' into PCSX2_ALIGNED16_DECL fixes Visual Assist X's intellisense. #define PCSX2_ALIGNED(alig,x) __declspec(align(alig)) x -#define PCSX2_ALIGNED_EXTERN(alig,x) __declspec(align(alig)) x +#define PCSX2_ALIGNED_EXTERN(alig,x) extern __declspec(align(alig)) x #define PCSX2_ALIGNED16(x) __declspec(align(16)) x #define PCSX2_ALIGNED16_EXTERN(x) extern __declspec(align(16)) x diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index f2e3c82ba4..5bc06b18a9 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -50,13 +50,7 @@ using namespace vtlb_private; namespace vtlb_private { - s32 pmap[VTLB_PMAP_ITEMS]; //512KB - s32 vmap[VTLB_VMAP_ITEMS]; //4MB - - // first indexer -- 8/16/32/64/128 bit tables [values 0-4] - // second indexer -- read/write [0 or 1] - // third indexer -- 128 possible handlers! - void* RWFT[5][2][128]; + PCSX2_ALIGNED( 64, MapData vtlbdata ); } vtlbHandler vtlbHandlerCount=0; @@ -101,7 +95,7 @@ callfunction: template __forceinline DataType __fastcall MemOp_r0(u32 addr) { - u32 vmv=vmap[addr>>VTLB_PAGE_BITS]; + u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; s32 ppf=addr+vmv; if (!(ppf<0)) @@ -111,13 +105,13 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr) u32 hand=(u8)vmv; u32 paddr=ppf-hand+0x80000000; //SysPrintf("Translated 0x%08X to 0x%08X\n",addr,paddr); - //return reinterpret_cast::HandlerType*>(RWFT[TemplateHelper::sidx][0][hand])(paddr,data); + //return reinterpret_cast::HandlerType*>(vtlbdata.RWFT[TemplateHelper::sidx][0][hand])(paddr,data); switch( DataSize ) { - case 8: return ((vtlbMemR8FP*)RWFT[0][0][hand])(paddr); - case 16: return ((vtlbMemR16FP*)RWFT[1][0][hand])(paddr); - case 32: return ((vtlbMemR32FP*)RWFT[2][0][hand])(paddr); + case 8: return ((vtlbMemR8FP*)vtlbdata.RWFT[0][0][hand])(paddr); + case 16: return ((vtlbMemR16FP*)vtlbdata.RWFT[1][0][hand])(paddr); + case 32: return ((vtlbMemR32FP*)vtlbdata.RWFT[2][0][hand])(paddr); jNO_DEFAULT; } @@ -127,7 +121,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr) template __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) { - u32 vmv=vmap[addr>>VTLB_PAGE_BITS]; + u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; s32 ppf=addr+vmv; if (!(ppf<0)) @@ -146,8 +140,8 @@ __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) switch( DataSize ) { - case 64: ((vtlbMemR64FP*)RWFT[3][0][hand])(paddr, data); break; - case 128: ((vtlbMemR128FP*)RWFT[4][0][hand])(paddr, data); break; + case 64: ((vtlbMemR64FP*)vtlbdata.RWFT[3][0][hand])(paddr, data); break; + case 128: ((vtlbMemR128FP*)vtlbdata.RWFT[4][0][hand])(paddr, data); break; jNO_DEFAULT; } @@ -157,7 +151,7 @@ __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) template __forceinline void __fastcall MemOp_w0(u32 addr, DataType data) { - u32 vmv=vmap[addr>>VTLB_PAGE_BITS]; + u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; s32 ppf=addr+vmv; if (!(ppf<0)) { @@ -172,9 +166,9 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data) switch( DataSize ) { - case 8: return ((vtlbMemW8FP*)RWFT[0][1][hand])(paddr, (u8)data); - case 16: return ((vtlbMemW16FP*)RWFT[1][1][hand])(paddr, (u16)data); - case 32: return ((vtlbMemW32FP*)RWFT[2][1][hand])(paddr, (u32)data); + case 8: return ((vtlbMemW8FP*)vtlbdata.RWFT[0][1][hand])(paddr, (u8)data); + case 16: return ((vtlbMemW16FP*)vtlbdata.RWFT[1][1][hand])(paddr, (u16)data); + case 32: return ((vtlbMemW32FP*)vtlbdata.RWFT[2][1][hand])(paddr, (u32)data); jNO_DEFAULT; } @@ -184,7 +178,7 @@ template __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data) { verify(DataSize==128 || DataSize==64); - u32 vmv=vmap[addr>>VTLB_PAGE_BITS]; + u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; s32 ppf=addr+vmv; if (!(ppf<0)) { @@ -200,8 +194,8 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data) //SysPrintf("Translated 0x%08X to 0x%08X\n",addr,paddr); switch( DataSize ) { - case 64: return ((vtlbMemW64FP*)RWFT[3][1][hand])(paddr, data); - case 128: return ((vtlbMemW128FP*)RWFT[4][1][hand])(paddr, data); + case 64: return ((vtlbMemW64FP*)vtlbdata.RWFT[3][1][hand])(paddr, data); + case 128: return ((vtlbMemW128FP*)vtlbdata.RWFT[4][1][hand])(paddr, data); jNO_DEFAULT; } @@ -352,17 +346,17 @@ vtlbHandler vtlb_RegisterHandler( vtlbMemR8FP* r8,vtlbMemR16FP* r16,vtlbMemR32FP //write the code :p vtlbHandler rv=vtlbHandlerCount++; - RWFT[0][0][rv] = (r8!=0) ? r8:vtlbDefaultPhyRead8; - RWFT[1][0][rv] = (r16!=0) ? r16:vtlbDefaultPhyRead16; - RWFT[2][0][rv] = (r32!=0) ? r32:vtlbDefaultPhyRead32; - RWFT[3][0][rv] = (r64!=0) ? r64:vtlbDefaultPhyRead64; - RWFT[4][0][rv] = (r128!=0) ? r128:vtlbDefaultPhyRead128; + vtlbdata.RWFT[0][0][rv] = (r8!=0) ? r8:vtlbDefaultPhyRead8; + vtlbdata.RWFT[1][0][rv] = (r16!=0) ? r16:vtlbDefaultPhyRead16; + vtlbdata.RWFT[2][0][rv] = (r32!=0) ? r32:vtlbDefaultPhyRead32; + vtlbdata.RWFT[3][0][rv] = (r64!=0) ? r64:vtlbDefaultPhyRead64; + vtlbdata.RWFT[4][0][rv] = (r128!=0) ? r128:vtlbDefaultPhyRead128; - RWFT[0][1][rv] = (w8!=0) ? w8:vtlbDefaultPhyWrite8; - RWFT[1][1][rv] = (w16!=0) ? w16:vtlbDefaultPhyWrite16; - RWFT[2][1][rv] = (w32!=0) ? w32:vtlbDefaultPhyWrite32; - RWFT[3][1][rv] = (w64!=0) ? w64:vtlbDefaultPhyWrite64; - RWFT[4][1][rv] = (w128!=0) ? w128:vtlbDefaultPhyWrite128; + vtlbdata.RWFT[0][1][rv] = (w8!=0) ? w8:vtlbDefaultPhyWrite8; + vtlbdata.RWFT[1][1][rv] = (w16!=0) ? w16:vtlbDefaultPhyWrite16; + vtlbdata.RWFT[2][1][rv] = (w32!=0) ? w32:vtlbDefaultPhyWrite32; + vtlbdata.RWFT[3][1][rv] = (w64!=0) ? w64:vtlbDefaultPhyWrite64; + vtlbdata.RWFT[4][1][rv] = (w128!=0) ? w128:vtlbDefaultPhyWrite128; return rv; } @@ -382,7 +376,7 @@ void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size) while(size>0) { - pmap[start>>VTLB_PAGE_BITS]=value; + vtlbdata.pmap[start>>VTLB_PAGE_BITS]=value; start+=VTLB_PAGE_SIZE; size-=VTLB_PAGE_SIZE; @@ -407,7 +401,7 @@ void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize) while(blocksz>0) { - pmap[start>>VTLB_PAGE_BITS]=ptr; + vtlbdata.pmap[start>>VTLB_PAGE_BITS]=ptr; start+=VTLB_PAGE_SIZE; ptr+=VTLB_PAGE_SIZE; @@ -425,7 +419,7 @@ void vtlb_Mirror(u32 new_region,u32 start,u32 size) while(size>0) { - pmap[start>>VTLB_PAGE_BITS]=pmap[new_region>>VTLB_PAGE_BITS]; + vtlbdata.pmap[start>>VTLB_PAGE_BITS]=vtlbdata.pmap[new_region>>VTLB_PAGE_BITS]; start+=VTLB_PAGE_SIZE; new_region+=VTLB_PAGE_SIZE; @@ -435,10 +429,10 @@ void vtlb_Mirror(u32 new_region,u32 start,u32 size) __forceinline void* vtlb_GetPhyPtr(u32 paddr) { - if (paddr>=VTLB_PMAP_SZ || pmap[paddr>>VTLB_PAGE_BITS]<0) + if (paddr>=VTLB_PMAP_SZ || vtlbdata.pmap[paddr>>VTLB_PAGE_BITS]<0) return NULL; else - return reinterpret_cast(pmap[paddr>>VTLB_PAGE_BITS]+(paddr&VTLB_PAGE_MASK)); + return reinterpret_cast(vtlbdata.pmap[paddr>>VTLB_PAGE_BITS]+(paddr&VTLB_PAGE_MASK)); } //virtual mappings @@ -462,11 +456,11 @@ void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz) } else { - pme=pmap[paddr>>VTLB_PAGE_BITS]; + pme=vtlbdata.pmap[paddr>>VTLB_PAGE_BITS]; if (pme<0) pme|=paddr;// top bit is set anyway ... } - vmap[vaddr>>VTLB_PAGE_BITS]=pme-vaddr; + vtlbdata.vmap[vaddr>>VTLB_PAGE_BITS]=pme-vaddr; vaddr+=VTLB_PAGE_SIZE; paddr+=VTLB_PAGE_SIZE; sz-=VTLB_PAGE_SIZE; @@ -480,7 +474,7 @@ void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz) u32 bu8=(u32)buffer; while(sz>0) { - vmap[vaddr>>VTLB_PAGE_BITS]=bu8-vaddr; + vtlbdata.vmap[vaddr>>VTLB_PAGE_BITS]=bu8-vaddr; vaddr+=VTLB_PAGE_SIZE; bu8+=VTLB_PAGE_SIZE; sz-=VTLB_PAGE_SIZE; @@ -500,7 +494,7 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz) } handl|=vaddr; // top bit is set anyway ... handl|=0x80000000; - vmap[vaddr>>VTLB_PAGE_BITS]=handl-vaddr; + vtlbdata.vmap[vaddr>>VTLB_PAGE_BITS]=handl-vaddr; vaddr+=VTLB_PAGE_SIZE; sz-=VTLB_PAGE_SIZE; } @@ -510,7 +504,7 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz) void vtlb_Init() { vtlbHandlerCount=0; - memzero_obj(RWFT); + memzero_obj(vtlbdata.RWFT); //Register default handlers //Unmapped Virt handlers _MUST_ be registered first. diff --git a/pcsx2/vtlb.h b/pcsx2/vtlb.h index 84ba374d08..5571d6b835 100644 --- a/pcsx2/vtlb.h +++ b/pcsx2/vtlb.h @@ -75,9 +75,18 @@ namespace vtlb_private static const uint VTLB_PMAP_SZ = 0x20000000; static const uint VTLB_VMAP_ITEMS = 0x100000000ULL / VTLB_PAGE_SIZE; - extern void* RWFT[5][2][128]; - extern s32 pmap[VTLB_PMAP_ITEMS]; //512KB - extern s32 vmap[VTLB_VMAP_ITEMS]; //4MB + struct MapData + { + s32 pmap[VTLB_PMAP_ITEMS]; //512KB + s32 vmap[VTLB_VMAP_ITEMS]; //4MB + + // first indexer -- 8/16/32/64/128 bit tables [values 0-4] + // second indexer -- read/write [0 or 1] + // third indexer -- 128 possible handlers! + void* RWFT[5][2][128]; + }; + + PCSX2_ALIGNED_EXTERN( 64, MapData vtlbdata ); } #endif diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 05889e7ce3..90047888b2 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -173,7 +173,7 @@ static void _vtlb_DynGen_IndirectRead( u32 bits ) MOVZX32R8toR(EAX,EAX); SUB32RtoR(ECX,EAX); //eax=[funct+eax] - MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2); + MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.RWFT[szidx][0],2); SUB32ItoR(ECX,0x80000000); CALL32R(EAX); } @@ -187,7 +187,7 @@ void vtlb_DynGenRead64(u32 bits) MOV32RtoR(EAX,ECX); SHR32ItoR(EAX,VTLB_PAGE_BITS); - MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); + MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); u8* _fullread = JS8(0); @@ -209,7 +209,7 @@ void vtlb_DynGenRead32(u32 bits, bool sign) MOV32RtoR(EAX,ECX); SHR32ItoR(EAX,VTLB_PAGE_BITS); - MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); + MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); u8* _fullread = JS8(0); @@ -243,7 +243,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { jASSUME( bits == 64 || bits == 128 ); - void* vmv_ptr = &vmap[addr_const>>VTLB_PAGE_BITS]; + void* vmv_ptr = &vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; MOV32MtoR(EAX,(uptr)vmv_ptr); MOV32ItoR(ECX,addr_const); @@ -266,7 +266,7 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) { jASSUME( bits <= 32 ); - void* vmv_ptr = &vmap[addr_const>>VTLB_PAGE_BITS]; + void* vmv_ptr = &vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; MOV32MtoR(EAX,(uptr)vmv_ptr); MOV32ItoR(ECX,addr_const); @@ -368,7 +368,7 @@ static void _vtlb_DynGen_IndirectWrite( u32 bits ) MOVZX32R8toR(EAX,EAX); SUB32RtoR(ECX,EAX); //eax=[funct+eax] - MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2); + MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.RWFT[szidx][1],2); SUB32ItoR(ECX,0x80000000); CALL32R(EAX); } @@ -377,7 +377,7 @@ void vtlb_DynGenWrite(u32 sz) { MOV32RtoR(EAX,ECX); SHR32ItoR(EAX,VTLB_PAGE_BITS); - MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); + MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); u8* _full=JS8(0); @@ -398,7 +398,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) // the VTLB could feasibly be remapped by other recompiled code at any time. // So we're limited in exactly how much we can pre-calcuate. - void* vmv_ptr = &vmap[addr_const>>VTLB_PAGE_BITS]; + void* vmv_ptr = &vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; MOV32MtoR(EAX,(uptr)vmv_ptr); MOV32ItoR(ECX,addr_const);