diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index a2c7d44406..1213232f4b 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -163,11 +163,19 @@ vtlbHandler tlb_fallback_6; vtlbHandler tlb_fallback_7; vtlbHandler tlb_fallback_8; -vtlbHandler vu0_micro_mem; -vtlbHandler vu1_micro_mem; +vtlbHandler vu0_micro_mem[2]; // 0 - dynarec, 1 - interpreter +vtlbHandler vu1_micro_mem[2]; // 0 - dynarec, 1 - interpreter vtlbHandler hw_by_page[0x10]; +// Used to remap the VUmicro memory according to the VU0/VU1 dynarec setting. +// (the VU memory operations are different for recs vs. interpreters) +void memMapVUmicro() +{ + vtlb_MapHandler(vu0_micro_mem[CHECK_VU0REC ? 0 : 1],0x11000000,0x00004000); + vtlb_MapHandler(vu1_micro_mem[CHECK_VU1REC ? 0 : 1],0x11008000,0x00004000); +} + void memMapPhy() { //Main mem @@ -185,15 +193,7 @@ void memMapPhy() //IOP mem vtlb_MapBlock(psxM,0x1c000000,0x00800000); - //VU0:Micro - //vtlb_MapBlock(VU0.Micro,0x11000000,0x00004000,0x1000); - vtlb_MapHandler(vu0_micro_mem,0x11000000,0x00004000); - //VU0:Mem vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000); - //VU1:Micro - //vtlb_MapBlock(VU1.Micro,0x11008000,0x00004000); - vtlb_MapHandler(vu1_micro_mem,0x11008000,0x00004000); - //VU1:Mem vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000); //These fallback to mem* stuff ... @@ -447,7 +447,20 @@ void __fastcall _ext_memWrite128(u32 mem, const u64 *value) } #define vtlb_RegisterHandlerTempl1(nam,t) vtlb_RegisterHandler(nam##Read8,nam##Read16,nam##Read32,nam##Read64,nam##Read128, \ - nam##Write8,nam##Write16,nam##Write32,nam##Write64,nam##Write128); + nam##Write8,nam##Write16,nam##Write32,nam##Write64,nam##Write128) + +#define vtlb_RegisterHandlerTempl2(nam,t,rec) vtlb_RegisterHandler(nam##Read8,nam##Read16,nam##Read32,nam##Read64,nam##Read128, \ + nam##Write8,nam##Write16,nam##Write32,nam##Write64,nam##Write128) + +typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc ); + +template +static __forceinline ClearFunc_t& GetClearFunc() +{ + return dynarec ? + (( vunum==0 ) ? VU0micro::recClear : VU1micro::recClear) + : (( vunum==0 ) ? VU0micro::intClear : VU1micro::intClear); +} template mem8_t __fastcall vuMicroRead8(u32 addr) @@ -498,89 +511,74 @@ void __fastcall vuMicroRead128(u32 addr,mem128_t* data) // [TODO] : Profile this code and see how often the VUs get written, and how // often it changes the values being written (invoking a cpuClear). -template +template void __fastcall vuMicroWrite8(u32 addr,mem8_t data) { - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; + addr &= (vunum==0) ? 0xfff : 0x3fff; + VURegs& vu = (vunum==0) ? VU0 : VU1; - if (vu->Micro[addr]!=data) + if (vu.Micro[addr]!=data) { - vu->Micro[addr]=data; + vu.Micro[addr]=data; - if (vunum==0) - CpuVU0->Clear(addr&(~7),1); - else - CpuVU1->Clear(addr&(~7),1); + GetClearFunc()(addr&(~7),1); } } -template +template void __fastcall vuMicroWrite16(u32 addr,mem16_t data) { - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; + addr &= (vunum==0) ? 0xfff : 0x3fff; + VURegs& vu = (vunum==0) ? VU0 : VU1; - if (*(u16*)&vu->Micro[addr]!=data) + if (*(u16*)&vu.Micro[addr]!=data) { - *(u16*)&vu->Micro[addr]=data; + *(u16*)&vu.Micro[addr]=data; - if (vunum==0) - CpuVU0->Clear(addr&(~7),1); - else - CpuVU1->Clear(addr&(~7),1); + GetClearFunc()(addr&(~7),1); } } -template +template void __fastcall vuMicroWrite32(u32 addr,mem32_t data) { - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; + addr &= (vunum==0) ? 0xfff : 0x3fff; + VURegs& vu = (vunum==0) ? VU0 : VU1; - if (*(u32*)&vu->Micro[addr]!=data) + if (*(u32*)&vu.Micro[addr]!=data) { - *(u32*)&vu->Micro[addr]=data; + *(u32*)&vu.Micro[addr]=data; - if (vunum==0) - CpuVU0->Clear(addr&(~7),1); - else - CpuVU1->Clear(addr&(~7),1); + GetClearFunc()(addr&(~7),1); } } -template +template void __fastcall vuMicroWrite64(u32 addr,const mem64_t* data) { - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; + addr &= (vunum==0) ? 0xfff : 0x3fff; + VURegs& vu = (vunum==0) ? VU0 : VU1; - if (*(u64*)&vu->Micro[addr]!=data[0]) + if (*(u64*)&vu.Micro[addr]!=data[0]) { - *(u64*)&vu->Micro[addr]=data[0]; + *(u64*)&vu.Micro[addr]=data[0]; - if (vunum==0) - CpuVU0->Clear(addr,1); - else - CpuVU1->Clear(addr,1); + GetClearFunc()(addr,1); } } -template +template void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data) { - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; + addr &= (vunum==0) ? 0xfff : 0x3fff; + VURegs& vu = (vunum==0) ? VU0 : VU1; - if (*(u64*)&vu->Micro[addr]!=data[0] || *(u64*)&vu->Micro[addr+8]!=data[1]) + if (*(u64*)&vu.Micro[addr]!=data[0] || *(u64*)&vu.Micro[addr+8]!=data[1]) { - *(u64*)&vu->Micro[addr]=data[0]; - *(u64*)&vu->Micro[addr+8]=data[1]; + *(u64*)&vu.Micro[addr]=data[0]; + *(u64*)&vu.Micro[addr+8]=data[1]; - if (vunum==0) - CpuVU0->Clear(addr,2); - else - CpuVU1->Clear(addr,2); + GetClearFunc()(addr,2); } } @@ -605,7 +603,7 @@ void memClearPageAddr(u32 vaddr) } /////////////////////////////////////////////////////////////////////////// -// VTLB Memory Init / Reset / Shutdown +// PS2 Memory Init / Reset / Shutdown static const uint m_allMemSize = Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2 + Ps2MemSize::ERom + @@ -707,8 +705,13 @@ void memReset() tlb_fallback_7=vtlb_RegisterHandlerTempl1(_ext_mem,7); tlb_fallback_8=vtlb_RegisterHandlerTempl1(_ext_mem,8); - vu0_micro_mem=vtlb_RegisterHandlerTempl1(vuMicro,0); - vu1_micro_mem=vtlb_RegisterHandlerTempl1(vuMicro,1); + // Dynarec versions of VUs + vu0_micro_mem[0] = vtlb_RegisterHandlerTempl2(vuMicro,0,true); + vu1_micro_mem[0] = vtlb_RegisterHandlerTempl2(vuMicro,1,true); + + // Interpreter versions of VUs + vu0_micro_mem[1] = vtlb_RegisterHandlerTempl2(vuMicro,0,false); + vu1_micro_mem[1] = vtlb_RegisterHandlerTempl2(vuMicro,1,false); ////////////////////////////////////////////////////// // psHw Optimized Mappings @@ -719,7 +722,7 @@ void memReset() tlb_fallback_1 = vtlb_RegisterHandler( _ext_memRead8<1>, _ext_memRead16<1>, hwRead32_page_other, _ext_memRead64<1>, _ext_memRead128<1>, _ext_memWrite8<1>, _ext_memWrite16<1>, hwWrite32_page_other, _ext_memWrite64<1>, _ext_memWrite128<1> - ); + ); hw_by_page[0x0] = vtlb_RegisterHandler( _ext_memRead8<1>, _ext_memRead16<1>, hwRead32_page_00, _ext_memRead64<1>, _ext_memRead128<1>, @@ -763,6 +766,7 @@ void memReset() //vtlb_VMapUnmap(0x20000000,0x60000000); memMapPhy(); + memMapVUmicro(); memMapKernelMem(); memMapSupervisorMem(); memMapUserMem(); diff --git a/pcsx2/Memory.h b/pcsx2/Memory.h index dd372f00d0..07d14cba17 100644 --- a/pcsx2/Memory.h +++ b/pcsx2/Memory.h @@ -230,21 +230,22 @@ extern u8 g_RealGSMem[0x2000]; #define PSMu32(mem) (*(u32*)PSM(mem)) #define PSMu64(mem) (*(u64*)PSM(mem)) -void memAlloc(); -void memReset(); // clears PS2 ram and loads the bios. Throws Exception::FileNotFound on error. -void memSetKernelMode(); -void memSetSupervisorMode(); -void memSetUserMode(); -void memSetPageAddr(u32 vaddr, u32 paddr); -void memClearPageAddr(u32 vaddr); -void memShutdown(); +extern void memAlloc(); +extern void memReset(); // clears PS2 ram and loads the bios. Throws Exception::FileNotFound on error. +extern void memShutdown(); +extern void memSetKernelMode(); +extern void memSetSupervisorMode(); +extern void memSetUserMode(); +extern void memSetPageAddr(u32 vaddr, u32 paddr); +extern void memClearPageAddr(u32 vaddr); + +extern void memMapVUmicro(); + #ifdef __LINUX__ void SysPageFaultExceptionFilter( int signal, siginfo_t *info, void * ); void __fastcall InstallLinuxExceptionHandler(); void __fastcall ReleaseLinuxExceptionHandler(); -#endif - -#ifdef _WIN32 +#else int SysPageFaultExceptionFilter(EXCEPTION_POINTERS* eps); #endif @@ -258,13 +259,6 @@ void mmap_ResetBlockTracking(); extern void __fastcall memRead8(u32 mem, u8 *out); extern void __fastcall memRead16(u32 mem, u16 *out); extern void __fastcall memRead32(u32 mem, u32 *out); -/*int __fastcall _memRead64(u32 mem, u64 *out); -int __fastcall _memRead128(u32 mem, u64 *out); -void __fastcall _memWrite8 (u32 mem, u8 value); -void __fastcall _memWrite16(u32 mem, u16 value); -void __fastcall _memWrite32(u32 mem, u32 value); -void __fastcall _memWrite64(u32 mem, u64 value); -void __fastcall _memWrite128(u32 mem, u64 *value);*/ #define memRead64 vtlb_memRead64 #define memRead128 vtlb_memRead128 diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index 7c98b13eab..f36f7472d3 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -522,7 +522,7 @@ __forceinline bool _cpuBranchTest_Shared() { // We're in a BranchTest. All dynarec registers are flushed // so there is no need to freeze registers here. - CpuVU0->ExecuteBlock(); + CpuVU0.ExecuteBlock(); // This might be needed to keep the EE and VU0 in sync. // A better fix will require hefty changes to the VU recs. -_- diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index d32f479b56..398a556ad6 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -51,11 +51,11 @@ static void TestClearVUs(u32 madr, u32 size) if( madr >= 0x11000000 ) { if( madr < 0x11004000 ) { DbgCon::Notice("scratch pad clearing vu0"); - CpuVU0->Clear(madr&0xfff, size); + CpuVU0.Clear(madr&0xfff, size); } else if( madr >= 0x11008000 && madr < 0x1100c000 ) { DbgCon::Notice("scratch pad clearing vu1\n"); - CpuVU1->Clear(madr&0x3fff, size); + CpuVU1.Clear(madr&0x3fff, size); } } } diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index ef70ae34d7..ab6d86bf89 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -248,7 +248,7 @@ void SysAllocateDynarecs() try { - recVU0.Allocate(); + VU0micro::recAlloc(); } catch( Exception::BaseException& ex ) { @@ -260,12 +260,12 @@ void SysAllocateDynarecs() ); g_Session.ForceDisableVU0rec = true; - recVU0.Shutdown(); + VU0micro::recShutdown(); } try { - recVU1.Allocate(); + VU1micro::recAlloc(); } catch( Exception::BaseException& ex ) { @@ -277,7 +277,7 @@ void SysAllocateDynarecs() ); g_Session.ForceDisableVU1rec = true; - recVU1.Shutdown(); + VU1micro::recShutdown(); } // If both VUrecs failed, then make sure the SuperVU is totally closed out: diff --git a/pcsx2/VU0.cpp b/pcsx2/VU0.cpp index e8b2b69824..6a3ad14df1 100644 --- a/pcsx2/VU0.cpp +++ b/pcsx2/VU0.cpp @@ -79,7 +79,7 @@ void _vu0WaitMicro() { VU0.flags&= ~VUFLAG_MFLAGSET; do { - CpuVU0->ExecuteBlock(); + CpuVU0.ExecuteBlock(); // knockout kings 2002 loops here if( VU0.cycle-startcycle > 0x1000 ) { Console::Notice("VU0 perma-stall, breaking execution..."); // (email zero if gfx are bad) @@ -351,7 +351,7 @@ void vu0Finish() int i = 0; while(i++ < 32) { - CpuVU0->ExecuteBlock(); + CpuVU0.ExecuteBlock(); if(!(VU0.VI[REG_VPU_STAT].UL & 0x1)) break; } diff --git a/pcsx2/VU0micro.cpp b/pcsx2/VU0micro.cpp index f6cdb710ea..59bc4130f7 100644 --- a/pcsx2/VU0micro.cpp +++ b/pcsx2/VU0micro.cpp @@ -84,7 +84,7 @@ void vu0ExecMicro(u32 addr) { if (addr != -1) VU0.VI[REG_TPC].UL = addr; _vuExecMicroDebug(VU0); - CpuVU0->ExecuteBlock(); + CpuVU0.ExecuteBlock(); // If the VU0 program didn't finish then we'll want to finish it up // pretty soon. This fixes vmhacks in some games (Naruto Ultimate Ninja 2) diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index d6112fcc26..ceca15dcad 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -193,58 +193,61 @@ void vu0Exec(VURegs* VU) if (VU->VF[0].f.w != 1.0f) DbgCon::Error("VF[0].w != 1.0!!!!\n"); } -static void intAlloc() +namespace VU0micro { -} - -static void intReset() -{ -} - -static void intStep() -{ - vu0Exec( &VU0 ); -} - -static void intExecuteBlock() -{ - int i; - -#ifdef _DEBUG - int prevbranch; -#endif - - for (i = 128; i--;) { - - if ((VU0.VI[REG_VPU_STAT].UL & 0x1) == 0) - break; - -#ifdef _DEBUG - prevbranch = vu0branch; -#endif - vu0Exec(&VU0); + static void intAlloc() + { } - if( i < 0 && (VU0.branch || VU0.ebit) ) { - // execute one more - vu0Exec(&VU0); + static void intShutdown() + { + } + + void __fastcall intClear(u32 Addr, u32 Size) + { + } + + static void intReset() + { + } + + static void intStep() + { + vu0Exec( &VU0 ); + } + + static void intExecuteBlock() + { + int i; + + #ifdef _DEBUG + int prevbranch; + #endif + + for (i = 128; i--;) { + + if ((VU0.VI[REG_VPU_STAT].UL & 0x1) == 0) + break; + + #ifdef _DEBUG + prevbranch = vu0branch; + #endif + vu0Exec(&VU0); + } + + if( i < 0 && (VU0.branch || VU0.ebit) ) { + // execute one more + vu0Exec(&VU0); + } } } -static void intClear(u32 Addr, u32 Size) -{ -} +using namespace VU0micro; -static void intShutdown() +const VUmicroCpu intVU0 = { -} - -VUmicroCpu intVU0 = -{ - intAlloc -, intReset + intReset , intStep , intExecuteBlock , intClear -, intShutdown }; diff --git a/pcsx2/VU1micro.cpp b/pcsx2/VU1micro.cpp index 5a2e0a4b75..d76c768e6d 100644 --- a/pcsx2/VU1micro.cpp +++ b/pcsx2/VU1micro.cpp @@ -78,7 +78,7 @@ void vu1ExecMicro(u32 addr) while(VU0.VI[REG_VPU_STAT].UL & 0x100) { VUM_LOG("vu1ExecMicro > Stalling until current microprogram finishes"); - CpuVU1->ExecuteBlock(); + CpuVU1.ExecuteBlock(); } VUM_LOG("vu1ExecMicro %x\n", addr); @@ -90,7 +90,7 @@ void vu1ExecMicro(u32 addr) if (addr != -1) VU1.VI[REG_TPC].UL = addr; _vuExecMicroDebug(VU1); - CpuVU1->ExecuteBlock(); + CpuVU1.ExecuteBlock(); } _vuRegsTables(VU1, VU1regs); diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index 553cdd5554..4d5fcd3791 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -179,57 +179,59 @@ void vu1Exec(VURegs* VU) if (VU->VF[0].f.w != 1.0f) DbgCon::Error("VF[0].w != 1.0!!!!\n"); } -static void intAlloc() +namespace VU1micro { -} - -static void intReset() -{ -} - -static void intStep() -{ - vu1Exec( &VU1 ); -} - -static void intExecuteBlock() -{ - int i; -#ifdef _DEBUG - int prevbranch; -#endif - - for (i = 128; i--;) { - if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) - break; - -#ifdef _DEBUG - prevbranch = vu1branch; -#endif - vu1Exec(&VU1); + void intAlloc() + { } - if( i < 0 && (VU1.branch || VU1.ebit) ) { - // execute one more - vu1Exec(&VU1); + void __fastcall intClear(u32 Addr, u32 Size) + { + } + + void intShutdown() + { + } + + static void intReset() + { + } + + static void intStep() + { + vu1Exec( &VU1 ); + } + + static void intExecuteBlock() + { + int i; + #ifdef _DEBUG + int prevbranch; + #endif + + for (i = 128; i--;) { + if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) + break; + + #ifdef _DEBUG + prevbranch = vu1branch; + #endif + vu1Exec(&VU1); + } + + if( i < 0 && (VU1.branch || VU1.ebit) ) { + // execute one more + vu1Exec(&VU1); + } } } +using namespace VU1micro; -static void intClear(u32 Addr, u32 Size) +const VUmicroCpu intVU1 = { -} - -static void intShutdown() -{ -} - -VUmicroCpu intVU1 = -{ - intAlloc -, intReset + intReset , intStep , intExecuteBlock , intClear -, intShutdown }; diff --git a/pcsx2/VUmicro.h b/pcsx2/VUmicro.h index 1c71cf18a1..71fbc99982 100644 --- a/pcsx2/VUmicro.h +++ b/pcsx2/VUmicro.h @@ -23,21 +23,43 @@ struct VUmicroCpu { - void (*Allocate)(); // throws exceptions on failure. void (*Reset)(); void (*Step)(); void (*ExecuteBlock)(); // VUs should support block-level execution only. - void (*Clear)(u32 Addr, u32 Size); - void (*Shutdown)(); // deallocates memory reserved by Allocate + void (__fastcall *Clear)(u32 Addr, u32 Size); }; -extern VUmicroCpu *CpuVU0; -extern VUmicroCpu intVU0; -extern VUmicroCpu recVU0; +extern VUmicroCpu CpuVU0; +extern const VUmicroCpu intVU0; +extern const VUmicroCpu recVU0; -extern VUmicroCpu *CpuVU1; -extern VUmicroCpu intVU1; -extern VUmicroCpu recVU1; +extern VUmicroCpu CpuVU1; +extern const VUmicroCpu intVU1; +extern const VUmicroCpu recVU1; + +namespace VU0micro +{ + extern void recAlloc(); + extern void recShutdown(); + extern void __fastcall recClear(u32 Addr, u32 Size); + + // Note: Interpreter functions are dummies -- they don't actually do anything. + extern void intAlloc(); + extern void intShutdown(); + extern void __fastcall intClear(u32 Addr, u32 Size); +} + +namespace VU1micro +{ + extern void recAlloc(); + extern void recShutdown(); + extern void __fastcall recClear(u32 Addr, u32 Size); + + // Note: Interpreter functions are dummies -- they don't actually do anything. + extern void intAlloc(); + extern void intShutdown(); + extern void __fastcall intClear(u32 Addr, u32 Size); +} ///////////////////////////////////////////////////////////////// // These functions initialize memory for both VUs. diff --git a/pcsx2/VUmicroMem.cpp b/pcsx2/VUmicroMem.cpp index 194b93dfa6..fe1352c355 100644 --- a/pcsx2/VUmicroMem.cpp +++ b/pcsx2/VUmicroMem.cpp @@ -31,8 +31,11 @@ extern PSMEMORYBLOCK s_psVuMem; extern PSMEMORYMAP *memLUT; #endif -VUmicroCpu *CpuVU0; -VUmicroCpu *CpuVU1; +// The following CpuVU objects are value types instead of handles or pointers because they are +// modified on the fly to implement VU1 Skip. + +VUmicroCpu CpuVU0; // contains a working copy of the VU0 cpu functions/API +VUmicroCpu CpuVU1; // contains a working copy of the VU1 cpu functions/API static void DummyExecuteVU1Block(void) { @@ -40,32 +43,27 @@ static void DummyExecuteVU1Block(void) VU1.vifRegs->stat &= ~4; // also reset the bit (grandia 3 works) } -void (*recVU1EB)(), (*intVU1EB)(); - void vu1MicroEnableSkip() { - CpuVU1->ExecuteBlock = DummyExecuteVU1Block; + CpuVU1.ExecuteBlock = DummyExecuteVU1Block; } void vu1MicroDisableSkip() { - CpuVU1->ExecuteBlock = CHECK_VU1REC ? recVU1EB : intVU1EB; + CpuVU1.ExecuteBlock = CHECK_VU1REC ? recVU1.ExecuteBlock : intVU1.ExecuteBlock; } bool vu1MicroIsSkipping() { - return CpuVU1->ExecuteBlock == DummyExecuteVU1Block; + return CpuVU1.ExecuteBlock == DummyExecuteVU1Block; } void vuMicroCpuReset() { - recVU1EB = recVU1.ExecuteBlock; - intVU1EB = intVU1.ExecuteBlock; - - CpuVU0 = CHECK_VU0REC ? &recVU0 : &intVU0; - CpuVU1 = CHECK_VU1REC ? &recVU1 : &intVU1; - CpuVU0->Reset(); - CpuVU1->Reset(); + CpuVU0 = CHECK_VU0REC ? recVU0 : intVU0; + CpuVU1 = CHECK_VU1REC ? recVU1 : intVU1; + CpuVU0.Reset(); + CpuVU1.Reset(); // SuperVUreset will do nothing is none of the recs are initialized. // But it's needed if one or the other is initialized. @@ -180,6 +178,8 @@ void vuMicroMemReset() jASSUME( VU0.Mem != NULL ); jASSUME( VU1.Mem != NULL ); + memMapVUmicro(); + // === VU0 Initialization === memzero_obj(VU0.ACC); memzero_obj(VU0.VF); @@ -221,7 +221,6 @@ void vuMicroMemReset() // VU1.VI = (REG_VI*)(VU0.Mem + 0x4200); VU1.vuExec = vu1Exec; VU1.vifRegs = vif1Regs; - } void SaveState::vuMicroFreeze() diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 141ab8b468..8693a2d269 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -219,7 +219,7 @@ __forceinline void vif1FLUSH() { if( VU0.VI[REG_VPU_STAT].UL & 0x100 ) { do { - CpuVU1->ExecuteBlock(); + CpuVU1.ExecuteBlock(); } while(VU0.VI[REG_VPU_STAT].UL & 0x100); g_vifCycles+= (VU1.cycle - _cycles)*BIAS; @@ -813,7 +813,7 @@ static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) { }*/ if (memcmp(VU0.Micro + addr, data, size << 2)) { memcpy_fast(VU0.Micro + addr, data, size << 2); - CpuVU0->Clear(addr, size); + CpuVU0.Clear(addr, size); } } @@ -1487,7 +1487,7 @@ static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) { assert( VU1.Micro > 0 ); if (memcmp(VU1.Micro + addr, data, size << 2)) { memcpy_fast(VU1.Micro + addr, data, size << 2); - CpuVU1->Clear(addr, size); + CpuVU1.Clear(addr, size); } } diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index 90619d29ae..a7c7941acc 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -1,3 +1,21 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2008 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + /* EE physical map : [0000 0000,1000 0000) -> Ram (mirrored ?) @@ -20,10 +38,9 @@ #include "Common.h" #include "vtlb.h" #include "COP0.h" -#include "x86/ix86/ix86.h" -#include "iCore.h" using namespace R5900; +using namespace vtlb_private; #ifdef PCSX2_DEVBUILD #define verify(x) {if (!(x)) { (*(u8*)0)=3; }} @@ -31,21 +48,16 @@ using namespace R5900; #define verify jASSUME #endif -static const uint VTLB_PAGE_BITS =12; -static const uint VTLB_PAGE_MASK=(4095); -static const uint VTLB_PAGE_SIZE=(4096); - -static const uint VTLB_PMAP_ITEMS=(0x20000000/VTLB_PAGE_SIZE); -static const uint VTLB_PMAP_SZ=0x20000000; -static const uint VTLB_VMAP_ITEMS=(0x100000000ULL/VTLB_PAGE_SIZE); -static s32 pmap[VTLB_PMAP_ITEMS]; //512KB -static s32 vmap[VTLB_VMAP_ITEMS]; //4MB - -// first indexer -- 8/16/32/64/128 bit tables [values 0-4] -// second indexer -- read/write [0 or 1] -// third indexer -- 128 pages of memory! -static void* RWFT[5][2][128]; +namespace vtlb_private +{ + s32 pmap[VTLB_PMAP_ITEMS]; //512KB + s32 vmap[VTLB_VMAP_ITEMS]; //4MB + // first indexer -- 8/16/32/64/128 bit tables [values 0-4] + // second indexer -- read/write [0 or 1] + // third indexer -- 128 pages of memory! + void* RWFT[5][2][128]; +} vtlbHandler vtlbHandlerCount=0; @@ -81,7 +93,11 @@ callfunction: jmp [readfunctions8-0x800000+eax]; }*/ -// For 8, 16, and 32 bit accesses +///////////////////////////////////////////////////////////////////////// +// Interpreter Implementations of VTLB Memory Operations. +// See recVTLB.cpp for the dynarec versions. + +// Interpreterd VTLB lookup for 8, 16, and 32 bit accesses template __forceinline DataType __fastcall MemOp_r0(u32 addr) { @@ -107,7 +123,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr) } } -// For 64 and 128 bit accesses. +// Interpreterd VTLB lookup for 64 and 128 bit accesses. template __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) { @@ -234,26 +250,36 @@ void __fastcall vtlb_memWrite128(u32 mem, const mem128_t *value) } // Some functions used by interpreters and stuff... +// These maintain a "consistent" API with 64/128 reads. void __fastcall memRead8(u32 mem, u8 *out) { *out = vtlb_memRead8( mem ); } void __fastcall memRead16(u32 mem, u16 *out) { *out = vtlb_memRead16( mem ); } void __fastcall memRead32(u32 mem, u32 *out) { *out = vtlb_memRead32( mem ); } +///////////////////////////////////////////////////////////////////////// +// Error / TLB Miss Handlers +// +// Generates a VtlbMiss Exception static __forceinline void vtlb_Miss(u32 addr,u32 mode) { - SysPrintf("vtlb miss : addr 0x%X, mode %d\n",addr,mode); + Console::Error("vtlb miss : addr 0x%X, mode %d", params addr,mode); verify(false); + if (mode==0) cpuTlbMissR(addr, cpuRegs.branch); else cpuTlbMissW(addr, cpuRegs.branch); } + +// Just dies a horrible death for now. +// static __forceinline void vtlb_BusError(u32 addr,u32 mode) { - SysPrintf("vtlb bus error : addr 0x%X, mode %d\n",addr,mode); + Console::Error("vtlb bus error : addr 0x%X, mode %d\n",params addr,mode); verify(false); } -///// + +///// Virtual Mapping Errors (TLB Miss) template mem8_t __fastcall vtlbUnmappedVRead8(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; } template @@ -274,7 +300,8 @@ template void __fastcall vtlbUnmappedVWrite64(u32 addr,const mem64_t* data) { vtlb_Miss(addr|saddr,1); } template void __fastcall vtlbUnmappedVWrite128(u32 addr,const mem128_t* data) { vtlb_Miss(addr|saddr,1); } -///// + +///// Physical Mapping Errors (Bus Error) template mem8_t __fastcall vtlbUnmappedPRead8(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; } template @@ -295,19 +322,34 @@ template void __fastcall vtlbUnmappedPWrite64(u32 addr,const mem64_t* data) { vtlb_BusError(addr|saddr,1); } template void __fastcall vtlbUnmappedPWrite128(u32 addr,const mem128_t* data) { vtlb_BusError(addr|saddr,1); } -///// -mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { SysPrintf("vtlbDefaultPhyRead8: 0x%X\n",addr); verify(false); return -1; } -mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { SysPrintf("vtlbDefaultPhyRead16: 0x%X\n",addr); verify(false); return -1; } -mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { SysPrintf("vtlbDefaultPhyRead32: 0x%X\n",addr); verify(false); return -1; } -void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { SysPrintf("vtlbDefaultPhyRead64: 0x%X\n",addr); verify(false); } -void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { SysPrintf("vtlbDefaultPhyRead128: 0x%X\n",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { SysPrintf("vtlbDefaultPhyWrite8: 0x%X\n",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { SysPrintf("vtlbDefaultPhyWrite16: 0x%X\n",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { SysPrintf("vtlbDefaultPhyWrite32: 0x%X\n",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { SysPrintf("vtlbDefaultPhyWrite64: 0x%X\n",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { SysPrintf("vtlbDefaultPhyWrite128: 0x%X\n",addr); verify(false); } -///// +///// VTLB mapping errors (unmapped address spaces) +mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { Console::Error("vtlbDefaultPhyRead8: 0x%X",params addr); verify(false); return -1; } +mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { Console::Error("vtlbDefaultPhyRead16: 0x%X",params addr); verify(false); return -1; } +mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { Console::Error("vtlbDefaultPhyRead32: 0x%X",params addr); verify(false); return -1; } +void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { Console::Error("vtlbDefaultPhyRead64: 0x%X",params addr); verify(false); } +void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { Console::Error("vtlbDefaultPhyRead128: 0x%X",params addr); verify(false); } + +void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { Console::Error("vtlbDefaultPhyWrite8: 0x%X",params addr); verify(false); } +void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { Console::Error("vtlbDefaultPhyWrite16: 0x%X",params addr); verify(false); } +void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { Console::Error("vtlbDefaultPhyWrite32: 0x%X",params addr); verify(false); } +void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console::Error("vtlbDefaultPhyWrite64: 0x%X",params addr); verify(false); } +void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); } + + +///////////////////////////////////////////////////////////////////////// +// VTLB Public API -- Init/Term/RegisterHandler stuff +// + + +// Registers a handler into the VTLB's internal handler array. The handler defines specific behavior +// for how memory pages bound to the handler are read from / written to. If any of the handler pointers +// are NULL, the memory operations will be mapped to the BusError handler (thus generating BusError +// exceptions if the emulated app attempts to access them). +// +// Note: All handlers persist across calls to vtlb_Reset(), but are wiped/invalidated by calls to vtlb_Init() +// +// Returns a handle for the newly created handler See .vtlb_MapHandler for use of the return value. vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP* r32,vltbMemR64FP* r64,vltbMemR128FP* r128, vltbMemW8FP* w8,vltbMemW16FP* w16,vltbMemW32FP* w32,vltbMemW64FP* w64,vltbMemW128FP* w128) { @@ -329,6 +371,13 @@ vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP return rv; } +// Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region. +// New mappings always assume priority over previous mappings, so place "generic" mappings for +// large areas of memory first, and then specialize specific small regions of memory afterward. +// A single handler can be mapped to many different regions by using multiple calls to this +// function. +// +// The memory region start and size parameters must be pagesize aligned. void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size) { verify(0==(start&VTLB_PAGE_MASK)); @@ -343,6 +392,7 @@ void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size) size-=VTLB_PAGE_SIZE; } } + void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize) { s32 baseint=(s32)base; @@ -370,6 +420,7 @@ void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize) } } } + void vtlb_Mirror(u32 new_region,u32 start,u32 size) { verify(0==(new_region&VTLB_PAGE_MASK)); @@ -394,6 +445,7 @@ __forceinline void* vtlb_GetPhyPtr(u32 paddr) return reinterpret_cast(pmap[paddr>>VTLB_PAGE_BITS]+(paddr&VTLB_PAGE_MASK)); } + //virtual mappings //TODO: Add invalid paddr checks void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz) @@ -459,14 +511,14 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz) } } +// Clears vtlb handlers and memory mappings. void vtlb_Init() { - //Reset all vars to default values vtlbHandlerCount=0; memzero_obj(RWFT); //Register default handlers - //Unmapped Virt handlers _MUST_ be registed first. + //Unmapped Virt handlers _MUST_ be registered first. //On address translation the top bit cannot be preserved.This is not normaly a problem since //the physical address space can be 'compressed' to just 29 bits.However, to properly handle exceptions //there must be a way to get the full address back.Thats why i use these 2 functions and encode the hi bit directly into em :) @@ -499,6 +551,8 @@ void vtlb_Init() vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE); } +// Performs a COP0-level reset of the PS2's TLB. +// This function should probably be part of the COP0 rather than here in VTLB. void vtlb_Reset() { for(int i=0; i<48; i++) UnmapTLB(i); @@ -509,284 +563,4 @@ void vtlb_Term() //nothing to do for now } -#include "iR5900.h" - -//ecx = addr -//edx = ptr -void vtlb_DynGenRead64(u32 bits) -{ - /* - u32 vmv=vmap[addr>>VTLB_PAGE_BITS]; - s32 ppf=addr+vmv; - if (!(ppf<0)) - { - data[0]=*reinterpret_cast(ppf); - if (DataSize==128) - data[1]=*reinterpret_cast(ppf+8); - return 0; - } - else - { - //has to: translate, find function, call function - u32 hand=(u8)vmv; - u32 paddr=ppf-hand+0x80000000; - //SysPrintf("Translted 0x%08X to 0x%08X\n",addr,paddr); - return reinterpret_cast::HandlerType*>(RWFT[TemplateHelper::sidx][0][hand])(paddr,data); - } - - mov eax,ecx; - shr eax,VTLB_PAGE_BITS; - mov eax,[eax*4+vmap]; - add ecx,eax; - js _fullread; - - //these are wrong order, just an example ... - mov [eax],ecx; - mov ecx,[edx]; - mov [eax+4],ecx; - mov ecx,[edx+4]; - mov [eax+4+4],ecx; - mov ecx,[edx+4+4]; - mov [eax+4+4+4+4],ecx; - mov ecx,[edx+4+4+4+4]; - ///.... - - jmp cont; - _fullread: - movzx eax,al; - sub ecx,eax; - sub ecx,0x80000000; - call [eax+stuff]; - cont: - ........ - - */ - MOV32RtoR(EAX,ECX); - SHR32ItoR(EAX,VTLB_PAGE_BITS); - MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); - ADD32RtoR(ECX,EAX); - u8* _fullread=JS8(0); - switch(bits) - { - case 64: - if( _hasFreeMMXreg() ) - { - const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,ECX,0); - MOVQRtoRmOffset(EDX,freereg,0); - _freeMMXreg(freereg); - } - else - { - MOV32RmtoR(EAX,ECX); - MOV32RtoRm(EDX,EAX); - - MOV32RmtoROffset(EAX,ECX,4); - MOV32RtoRmOffset(EDX,EAX,4); - } - break; - - case 128: - if( _hasFreeXMMreg() ) - { - const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,ECX,0); - SSE2_MOVDQARtoRmOffset(EDX,freereg,0); - _freeXMMreg(freereg); - } - else - { - MOV32RmtoR(EAX,ECX); - MOV32RtoRm(EDX,EAX); - - MOV32RmtoROffset(EAX,ECX,4); - MOV32RtoRmOffset(EDX,EAX,4); - - MOV32RmtoROffset(EAX,ECX,8); - MOV32RtoRmOffset(EDX,EAX,8); - - MOV32RmtoROffset(EAX,ECX,12); - MOV32RtoRmOffset(EDX,EAX,12); - } - break; - - jNO_DEFAULT - } - - u8* cont=JMP8(0); - x86SetJ8(_fullread); - int szidx; - - switch(bits) - { - case 64: szidx=3; break; - case 128: szidx=4; break; - jNO_DEFAULT - } - - MOVZX32R8toR(EAX,EAX); - SUB32RtoR(ECX,EAX); - //eax=[funct+eax] - MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2); - SUB32ItoR(ECX,0x80000000); - CALL32R(EAX); - - x86SetJ8(cont); -} - -// ecx - source address to read from -// Returns read value in eax. -void vtlb_DynGenRead32(u32 bits, bool sign) -{ - jASSUME( bits <= 32 ); - - MOV32RtoR(EAX,ECX); - SHR32ItoR(EAX,VTLB_PAGE_BITS); - MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); - ADD32RtoR(ECX,EAX); - u8* _fullread=JS8(0); - - switch(bits) - { - case 8: - if( sign ) - MOVSX32Rm8toR(EAX,ECX); - else - MOVZX32Rm8toR(EAX,ECX); - break; - - case 16: - if( sign ) - MOVSX32Rm16toR(EAX,ECX); - else - MOVZX32Rm16toR(EAX,ECX); - break; - - case 32: - MOV32RmtoR(EAX,ECX); - break; - - jNO_DEFAULT - } - - u8* cont=JMP8(0); - x86SetJ8(_fullread); - int szidx; - - switch(bits) - { - case 8: szidx=0; break; - case 16: szidx=1; break; - case 32: szidx=2; break; - jNO_DEFAULT - } - - MOVZX32R8toR(EAX,EAX); - SUB32RtoR(ECX,EAX); - //eax=[funct+eax] - MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2); - SUB32ItoR(ECX,0x80000000); - CALL32R(EAX); - - // perform sign extension on the result: - - if( bits==8 ) - { - if( sign ) - MOVSX32R8toR(EAX,EAX); - else - MOVZX32R8toR(EAX,EAX); - } - else if( bits==16 ) - { - if( sign ) - MOVSX32R16toR(EAX,EAX); - else - MOVZX32R16toR(EAX,EAX); - } - - x86SetJ8(cont); -} - -void vtlb_DynGenWrite(u32 sz) -{ - MOV32RtoR(EAX,ECX); - SHR32ItoR(EAX,VTLB_PAGE_BITS); - MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); - ADD32RtoR(ECX,EAX); - u8* _full=JS8(0); - switch(sz) - { - //8 , 16, 32 : data on EDX - case 8: - MOV8RtoRm(ECX,EDX); - break; - case 16: - MOV16RtoRm(ECX,EDX); - break; - case 32: - MOV32RtoRm(ECX,EDX); - break; - - case 64: - if( _hasFreeMMXreg() ) - { - const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,EDX,0); - MOVQRtoRmOffset(ECX,freereg,0); - _freeMMXreg( freereg ); - } - else - { - MOV32RmtoR(EAX,EDX); - MOV32RtoRm(ECX,EAX); - - MOV32RmtoROffset(EAX,EDX,4); - MOV32RtoRmOffset(ECX,EAX,4); - } - break; - - case 128: - if( _hasFreeXMMreg() ) - { - const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,EDX,0); - SSE2_MOVDQARtoRmOffset(ECX,freereg,0); - _freeXMMreg( freereg ); - } - else - { - MOV32RmtoR(EAX,EDX); - MOV32RtoRm(ECX,EAX); - MOV32RmtoROffset(EAX,EDX,4); - MOV32RtoRmOffset(ECX,EAX,4); - MOV32RmtoROffset(EAX,EDX,8); - MOV32RtoRmOffset(ECX,EAX,8); - MOV32RmtoROffset(EAX,EDX,12); - MOV32RtoRmOffset(ECX,EAX,12); - } - break; - } - u8* cont=JMP8(0); - x86SetJ8(_full); - int szidx=0; - - switch(sz) - { - case 8: szidx=0; break; - case 16: szidx=1; break; - case 32: szidx=2; break; - case 64: szidx=3; break; - case 128: szidx=4; break; - } - MOVZX32R8toR(EAX,EAX); - SUB32RtoR(ECX,EAX); - //eax=[funct+eax] - MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2); - SUB32ItoR(ECX,0x80000000); - CALL32R(EAX); - - x86SetJ8(cont); -} - #endif // PCSX2_VIRTUAL_MEM diff --git a/pcsx2/vtlb.h b/pcsx2/vtlb.h index 583a19b355..4c031de32a 100644 --- a/pcsx2/vtlb.h +++ b/pcsx2/vtlb.h @@ -27,41 +27,56 @@ typedef void __fastcall vltbMemW128FP(u32 addr,const mem128_t* data); typedef u32 vtlbHandler; -void vtlb_Init(); -void vtlb_Reset(); -void vtlb_Term(); +extern void vtlb_Init(); +extern void vtlb_Reset(); +extern void vtlb_Term(); //physical stuff vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP* r32,vltbMemR64FP* r64,vltbMemR128FP* r128, vltbMemW8FP* w8,vltbMemW16FP* w16,vltbMemW32FP* w32,vltbMemW64FP* w64,vltbMemW128FP* w128); -void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size); -void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize=0); +extern void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size); +extern void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize=0); extern void* vtlb_GetPhyPtr(u32 paddr); -//void vtlb_Mirror(u32 new_region,u32 start,u32 size); // -> not working yet :( +//extern void vtlb_Mirror(u32 new_region,u32 start,u32 size); // -> not working yet :( //virtual mappings -void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz); -void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz); -void vtlb_VMapUnmap(u32 vaddr,u32 sz); +extern void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz); +extern void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz); +extern void vtlb_VMapUnmap(u32 vaddr,u32 sz); //Memory functions -u8 __fastcall vtlb_memRead8(u32 mem); -u16 __fastcall vtlb_memRead16(u32 mem); -u32 __fastcall vtlb_memRead32(u32 mem); -void __fastcall vtlb_memRead64(u32 mem, u64 *out); -void __fastcall vtlb_memRead128(u32 mem, u64 *out); -void __fastcall vtlb_memWrite8 (u32 mem, u8 value); -void __fastcall vtlb_memWrite16(u32 mem, u16 value); -void __fastcall vtlb_memWrite32(u32 mem, u32 value); -void __fastcall vtlb_memWrite64(u32 mem, const u64* value); -void __fastcall vtlb_memWrite128(u32 mem, const u64* value); +extern u8 __fastcall vtlb_memRead8(u32 mem); +extern u16 __fastcall vtlb_memRead16(u32 mem); +extern u32 __fastcall vtlb_memRead32(u32 mem); +extern void __fastcall vtlb_memRead64(u32 mem, u64 *out); +extern void __fastcall vtlb_memRead128(u32 mem, u64 *out); +extern void __fastcall vtlb_memWrite8 (u32 mem, u8 value); +extern void __fastcall vtlb_memWrite16(u32 mem, u16 value); +extern void __fastcall vtlb_memWrite32(u32 mem, u32 value); +extern void __fastcall vtlb_memWrite64(u32 mem, const u64* value); +extern void __fastcall vtlb_memWrite128(u32 mem, const u64* value); extern void vtlb_DynGenWrite(u32 sz); extern void vtlb_DynGenRead32(u32 bits, bool sign); extern void vtlb_DynGenRead64(u32 sz); +namespace vtlb_private +{ + static const uint VTLB_PAGE_BITS = 12; + static const uint VTLB_PAGE_MASK = 4095; + static const uint VTLB_PAGE_SIZE = 4096; + + static const uint VTLB_PMAP_ITEMS = 0x20000000 / VTLB_PAGE_SIZE; + static const uint VTLB_PMAP_SZ = 0x20000000; + static const uint VTLB_VMAP_ITEMS = 0x100000000ULL / VTLB_PAGE_SIZE; + + extern void* RWFT[5][2][128]; + extern s32 pmap[VTLB_PMAP_ITEMS]; //512KB + extern s32 vmap[VTLB_VMAP_ITEMS]; //4MB +} + #endif #endif diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index de0b23a638..7dc26ce769 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2696,6 +2696,10 @@ RelativePath="..\..\MemoryVM.cpp" > + + diff --git a/pcsx2/x86/iCOP2.cpp b/pcsx2/x86/iCOP2.cpp index b361f7394e..6e909a6280 100644 --- a/pcsx2/x86/iCOP2.cpp +++ b/pcsx2/x86/iCOP2.cpp @@ -199,7 +199,7 @@ static void recCTC2(s32 info) MOV32ItoM((uptr)&VU0.VI[_Fs_].UL,g_cpuConstRegs[_Rt_].UL[0]); //PUSH32I( -1 ); iFlushCall(FLUSH_NOCONST); - CALLFunc((uptr)CpuVU0->ExecuteBlock); + CALLFunc((uptr)CpuVU0.ExecuteBlock); //CALLFunc((uptr)vu0ExecMicro); //ADD32ItoR( ESP, 4 ); break; diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index 85d7acf9cc..c8a5131a2d 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -26,12 +26,21 @@ namespace VU0micro { - - static void recAlloc() + void recAlloc() { SuperVUAlloc(0); } + void __fastcall recClear(u32 Addr, u32 Size) + { + SuperVUClear(Addr, Size*4, 0); + } + + void recShutdown() + { + SuperVUDestroy( 0 ); + } + static void recReset() { SuperVUReset(0); @@ -54,26 +63,14 @@ namespace VU0micro SuperVUExecuteProgram(VU0.VI[ REG_TPC ].UL & 0xfff, 0); FreezeXMMRegs(0); } - - static void recClear(u32 Addr, u32 Size) - { - SuperVUClear(Addr, Size*4, 0); - } - - static void recShutdown() - { - SuperVUDestroy( 0 ); - } } using namespace VU0micro; -VUmicroCpu recVU0 = +const VUmicroCpu recVU0 = { - recAlloc -, recReset + recReset , recStep , recExecuteBlock , recClear -, recShutdown }; diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 235b9b770f..5c766e8d5e 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -33,6 +33,22 @@ extern u32 vudump; namespace VU1micro { + void recAlloc() + { + SuperVUAlloc(1); + } + + void __fastcall recClear( u32 Addr, u32 Size ) + { + assert( (Addr&7) == 0 ); + SuperVUClear(Addr, Size*4, 1); + } + + void recShutdown() + { + SuperVUDestroy( 1 ); + } + // commented out because I'm not sure it actually works anymore with SuperVU (air) /*static void iVU1DumpBlock() { @@ -64,16 +80,11 @@ namespace VU1micro fclose( f ); }*/ - static void recAlloc() - { - SuperVUAlloc(1); - } - static void recReset() { SuperVUReset(1); - // these shouldn't be needed, but shouldn't hurt anythign either. + // these shouldn't be needed, but shouldn't hurt anything either. x86FpuState = FPU_STATE; iCWstate = 0; } @@ -111,28 +122,14 @@ namespace VU1micro } while( VU0.VI[ REG_VPU_STAT ].UL&0x100 ); FreezeXMMRegs(0); } - - static void recClear( u32 Addr, u32 Size ) - { - assert( (Addr&7) == 0 ); - SuperVUClear(Addr, Size*4, 1); - } - - static void recShutdown() - { - SuperVUDestroy( 1 ); - } - } using namespace VU1micro; -VUmicroCpu recVU1 = +const VUmicroCpu recVU1 = { - recAlloc -, recReset + recReset , recStep , recExecuteBlock , recClear -, recShutdown }; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 15464ab77c..62250154fc 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -443,7 +443,7 @@ void SuperVUReset(int vuindex) } // clear the block and any joining blocks -void SuperVUClear(u32 startpc, u32 size, int vuindex) +__forceinline void SuperVUClear(u32 startpc, u32 size, int vuindex) { vector::iterator itrange; list::iterator it = s_listVUHeaders[vuindex].begin(); diff --git a/pcsx2/x86/iVUzerorec.h b/pcsx2/x86/iVUzerorec.h index 2e8ce1b0d3..9af1d42dbe 100644 --- a/pcsx2/x86/iVUzerorec.h +++ b/pcsx2/x86/iVUzerorec.h @@ -31,20 +31,20 @@ extern void SuperVUReset(int vuindex); // if vuindex is -1, resets everything #ifdef __LINUX__ extern "C" { #endif -void SuperVUExecuteProgram(u32 startpc, int vuindex); -void SuperVUEndProgram(); -void svudispfntemp(); +extern void SuperVUExecuteProgram(u32 startpc, int vuindex); +extern void SuperVUEndProgram(); +extern void svudispfntemp(); #ifdef __LINUX__ } #endif -void SuperVUClear(u32 startpc, u32 size, int vuindex); +extern void SuperVUClear(u32 startpc, u32 size, int vuindex); // read = 0, will write to reg // read = 1, will read from reg // read = 2, addr of previously written reg (used for status and clip flags) -u32 SuperVUGetVIAddr(int reg, int read); +extern u32 SuperVUGetVIAddr(int reg, int read); // if p == 0, flush q else flush p; if wait is != 0, waits for p/q -void SuperVUFlush(int p, int wait); +extern void SuperVUFlush(int p, int wait); #endif diff --git a/pcsx2/x86/ix86-32/aR5900-32.S b/pcsx2/x86/ix86-32/aR5900-32.S index 1e2ae01135..ee65cebeb0 100644 --- a/pcsx2/x86/ix86-32/aR5900-32.S +++ b/pcsx2/x86/ix86-32/aR5900-32.S @@ -12,11 +12,16 @@ #define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot #define BASEBLOCK_SIZE 2 // in dwords -#define PCOFFSET 0x2a8 +#define PCOFFSET 0x2a8 // this must always match what Pcsx2 displays at startup #define REG_PC %ecx #define REG_BLOCK %esi +////////////////////////////////////////////////////////////////////////// +// Recompiles the next block, and links the old block directly to it. +// This is a on-shot execution for ny block which uses it. Once the block +// has been statically linked to the new block, this function will be bypassed +// .globl Dispatcher Dispatcher: # EDX contains the jump addr to modify @@ -24,48 +29,59 @@ Dispatcher: # calc PC_GETBLOCK # ((BASEBLOCK*)(recLUT[((u32)(x)) >> 16] + (sizeof(BASEBLOCK)/4)*((x) & 0xffff))) - mov %eax, dword ptr [cpuRegs + PCOFFSET] - mov REG_BLOCK, %eax - mov REG_PC, %eax - shr %eax, 16 - and REG_BLOCK, 0xffff - shl %eax, 2 - add %eax, dword ptr [recLUT] - shl REG_BLOCK, 1 - add REG_BLOCK, dword ptr [%eax] - + #mov %eax, dword ptr [cpuRegs + PCOFFSET] + #mov REG_BLOCK, %eax + #mov REG_PC, %eax + #shr %eax, 16 + #and REG_BLOCK, 0xffff + #shl %eax, 2 + #add %eax, dword ptr [recLUT] + #shl REG_BLOCK, 1 + #add REG_BLOCK, dword ptr [%eax] + + mov %eax,dword ptr [cpuRegs+PCOFFSET] + mov %ecx,%eax + mov REG_PC,%eax + shr %eax,10h + and %ecx,0FFFFh + mov %edx,dword ptr [recLUT] + mov %eax,dword ptr [edx+eax*4] + lea %ecx,[eax+ecx*2] + // check if startpc == cpuRegs.pc //and %ecx, 0x5fffffff // remove higher bits - cmp REG_PC, dword ptr [REG_BLOCK+BLOCKTYPE_STARTPC] + cmp REG_PC, dword ptr [%ecx+BLOCKTYPE_STARTPC] je Dispatcher_CheckPtr // recompile push REG_BLOCK push REG_PC // pc call recRecompile - add %esp, 4 // pop old param - pop REG_BLOCK + add %esp, 4 + pop %eax // eax is now the REG_BLOCK Dispatcher_CheckPtr: - mov REG_BLOCK, dword ptr [REG_BLOCK] + mov %eax, dword ptr [%eax] #ifdef _DEBUG - test REG_BLOCK, REG_BLOCK + test %eax, %eax jnz Dispatcher_CallFn // throw an exception int 10 Dispatcher_CallFn: #endif - - and REG_BLOCK, 0x0fffffff - mov %edx, REG_BLOCK + and %eax, 0x0fffffff pop %ecx // x86Ptr to mod + mov %edx, %eax sub %edx, %ecx sub %edx, 4 mov dword ptr [%ecx], %edx - jmp REG_BLOCK + jmp %eax +////////////////////////////////////////////////////////////////////////// +// edx - baseblock->startpc +// stack - x86Ptr .globl DispatcherClear DispatcherClear: // EDX contains the current pc @@ -119,58 +135,47 @@ DispatcherClear_Recompile: jmp %eax - +////////////////////////////////////////////////////////////////////////// // called when jumping to variable pc address +// This is basically the same as Dispatcher but without the part at the end +// that modifies the block's jmp instruction. (ie, no static block linking) + .globl DispatcherReg DispatcherReg: - //s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc); - mov %edx, dword ptr [cpuRegs+PCOFFSET] - mov %ecx, %edx - - shr %edx, 14 - and %edx, 0xfffffffc - add %edx, [recLUT] - mov %edx, dword ptr [%edx] + mov %eax,dword ptr [cpuRegs+PCOFFSET] + mov %ecx,%eax + mov REG_PC,%eax + shr %eax,10h + and %ecx,0FFFFh + mov %edx,dword ptr [recLUT] + mov %eax,dword ptr [edx+eax*4] + lea %ecx,[eax+ecx*2] - mov %eax, %ecx - and %eax, 0xfffc - // %edx += 2*%eax - shl %eax, 1 - add %edx, %eax - // check if startpc == cpuRegs.pc - mov %eax, %ecx - //and %eax, 0x5fffffff // remove higher bits - cmp %eax, dword ptr [%edx+BLOCKTYPE_STARTPC] - jne DispatcherReg_recomp + //and %ecx, 0x5fffffff // remove higher bits + cmp REG_PC, dword ptr [%ecx+BLOCKTYPE_STARTPC] + je Dispatcher_CheckPtr - mov %eax, dword ptr [%edx] + // recompile + push REG_BLOCK + push REG_PC // pc + call recRecompile + add %esp, 4 + pop %eax // eax is now the REG_BLOCK +Dispatcher_CheckPtr: + mov %eax, dword ptr [%eax] #ifdef _DEBUG test %eax, %eax - jnz CallFn2 - # throw an exception + jnz Dispatcher_CallFn + // throw an exception int 10 -CallFn2: - +Dispatcher_CallFn: #endif - and %eax, 0x0fffffff - jmp %eax // fnptr - -DispatcherReg_recomp: - sub %esp, 8 - mov dword ptr [%esp+4], %edx - mov dword ptr [%esp], %ecx - call recRecompile - mov %edx, dword ptr [%esp+4] - add %esp, 8 - - mov %eax, dword ptr [%edx] - and %eax, 0x0fffffff - jmp %eax // fnptr + jmp %eax .globl _StartPerfCounter diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp new file mode 100644 index 0000000000..bffd06d30d --- /dev/null +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -0,0 +1,318 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2008 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" + +#include "Common.h" +#include "vtlb.h" + +#include "x86/ix86/ix86.h" +#include "iCore.h" +#include "iR5900.h" + +using namespace vtlb_private; + +/* + // Pseudo-Code For the following Dynarec Implementations --> + + u32 vmv=vmap[addr>>VTLB_PAGE_BITS]; + s32 ppf=addr+vmv; + if (!(ppf<0)) + { + data[0]=*reinterpret_cast(ppf); + if (DataSize==128) + data[1]=*reinterpret_cast(ppf+8); + return 0; + } + else + { + //has to: translate, find function, call function + u32 hand=(u8)vmv; + u32 paddr=ppf-hand+0x80000000; + //SysPrintf("Translted 0x%08X to 0x%08X\n",addr,paddr); + return reinterpret_cast::HandlerType*>(RWFT[TemplateHelper::sidx][0][hand])(paddr,data); + } + + // And in ASM it looks something like this --> + + mov eax,ecx; + shr eax,VTLB_PAGE_BITS; + mov eax,[eax*4+vmap]; + add ecx,eax; + js _fullread; + + //these are wrong order, just an example ... + mov [eax],ecx; + mov ecx,[edx]; + mov [eax+4],ecx; + mov ecx,[edx+4]; + mov [eax+4+4],ecx; + mov ecx,[edx+4+4]; + mov [eax+4+4+4+4],ecx; + mov ecx,[edx+4+4+4+4]; + ///.... + + jmp cont; + _fullread: + movzx eax,al; + sub ecx,eax; + sub ecx,0x80000000; + call [eax+stuff]; + cont: + ........ + + */ + + +//ecx = addr +//edx = ptr +void vtlb_DynGenRead64(u32 bits) +{ + MOV32RtoR(EAX,ECX); + SHR32ItoR(EAX,VTLB_PAGE_BITS); + MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); + ADD32RtoR(ECX,EAX); + u8* _fullread=JS8(0); + switch(bits) + { + case 64: + if( _hasFreeMMXreg() ) + { + const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); + MOVQRmtoROffset(freereg,ECX,0); + MOVQRtoRmOffset(EDX,freereg,0); + _freeMMXreg(freereg); + } + else + { + MOV32RmtoR(EAX,ECX); + MOV32RtoRm(EDX,EAX); + + MOV32RmtoROffset(EAX,ECX,4); + MOV32RtoRmOffset(EDX,EAX,4); + } + break; + + case 128: + if( _hasFreeXMMreg() ) + { + const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); + SSE2_MOVDQARmtoROffset(freereg,ECX,0); + SSE2_MOVDQARtoRmOffset(EDX,freereg,0); + _freeXMMreg(freereg); + } + else + { + // Could put in an MMX optimization here as well, but no point really. + // It's almost never used since there's almost always a free XMM reg. + + MOV32RmtoR(EAX,ECX); + MOV32RtoRm(EDX,EAX); + + MOV32RmtoROffset(EAX,ECX,4); + MOV32RtoRmOffset(EDX,EAX,4); + + MOV32RmtoROffset(EAX,ECX,8); + MOV32RtoRmOffset(EDX,EAX,8); + + MOV32RmtoROffset(EAX,ECX,12); + MOV32RtoRmOffset(EDX,EAX,12); + } + break; + + jNO_DEFAULT + } + + u8* cont=JMP8(0); + x86SetJ8(_fullread); + int szidx; + + switch(bits) + { + case 64: szidx=3; break; + case 128: szidx=4; break; + jNO_DEFAULT + } + + MOVZX32R8toR(EAX,EAX); + SUB32RtoR(ECX,EAX); + //eax=[funct+eax] + MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2); + SUB32ItoR(ECX,0x80000000); + CALL32R(EAX); + + x86SetJ8(cont); +} + +// ecx - source address to read from +// Returns read value in eax. +void vtlb_DynGenRead32(u32 bits, bool sign) +{ + jASSUME( bits <= 32 ); + + MOV32RtoR(EAX,ECX); + SHR32ItoR(EAX,VTLB_PAGE_BITS); + MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); + ADD32RtoR(ECX,EAX); + u8* _fullread=JS8(0); + + switch(bits) + { + case 8: + if( sign ) + MOVSX32Rm8toR(EAX,ECX); + else + MOVZX32Rm8toR(EAX,ECX); + break; + + case 16: + if( sign ) + MOVSX32Rm16toR(EAX,ECX); + else + MOVZX32Rm16toR(EAX,ECX); + break; + + case 32: + MOV32RmtoR(EAX,ECX); + break; + + jNO_DEFAULT + } + + u8* cont=JMP8(0); + x86SetJ8(_fullread); + int szidx; + + switch(bits) + { + case 8: szidx=0; break; + case 16: szidx=1; break; + case 32: szidx=2; break; + jNO_DEFAULT + } + + MOVZX32R8toR(EAX,EAX); + SUB32RtoR(ECX,EAX); + //eax=[funct+eax] + MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2); + SUB32ItoR(ECX,0x80000000); + CALL32R(EAX); + + // perform sign extension on the result: + + if( bits==8 ) + { + if( sign ) + MOVSX32R8toR(EAX,EAX); + else + MOVZX32R8toR(EAX,EAX); + } + else if( bits==16 ) + { + if( sign ) + MOVSX32R16toR(EAX,EAX); + else + MOVZX32R16toR(EAX,EAX); + } + + x86SetJ8(cont); +} + +void vtlb_DynGenWrite(u32 sz) +{ + MOV32RtoR(EAX,ECX); + SHR32ItoR(EAX,VTLB_PAGE_BITS); + MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2); + ADD32RtoR(ECX,EAX); + u8* _full=JS8(0); + switch(sz) + { + //8 , 16, 32 : data on EDX + case 8: + MOV8RtoRm(ECX,EDX); + break; + case 16: + MOV16RtoRm(ECX,EDX); + break; + case 32: + MOV32RtoRm(ECX,EDX); + break; + + case 64: + if( _hasFreeMMXreg() ) + { + const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); + MOVQRmtoROffset(freereg,EDX,0); + MOVQRtoRmOffset(ECX,freereg,0); + _freeMMXreg( freereg ); + } + else + { + MOV32RmtoR(EAX,EDX); + MOV32RtoRm(ECX,EAX); + + MOV32RmtoROffset(EAX,EDX,4); + MOV32RtoRmOffset(ECX,EAX,4); + } + break; + + case 128: + if( _hasFreeXMMreg() ) + { + const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); + SSE2_MOVDQARmtoROffset(freereg,EDX,0); + SSE2_MOVDQARtoRmOffset(ECX,freereg,0); + _freeXMMreg( freereg ); + } + else + { + // Could put in an MMX optimization here as well, but no point really. + // It's almost never used since there's almost always a free XMM reg. + + MOV32RmtoR(EAX,EDX); + MOV32RtoRm(ECX,EAX); + MOV32RmtoROffset(EAX,EDX,4); + MOV32RtoRmOffset(ECX,EAX,4); + MOV32RmtoROffset(EAX,EDX,8); + MOV32RtoRmOffset(ECX,EAX,8); + MOV32RmtoROffset(EAX,EDX,12); + MOV32RtoRmOffset(ECX,EAX,12); + } + break; + } + u8* cont=JMP8(0); + x86SetJ8(_full); + int szidx=0; + + switch(sz) + { + case 8: szidx=0; break; + case 16: szidx=1; break; + case 32: szidx=2; break; + case 64: szidx=3; break; + case 128: szidx=4; break; + } + MOVZX32R8toR(EAX,EAX); + SUB32RtoR(ECX,EAX); + //eax=[funct+eax] + MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2); + SUB32ItoR(ECX,0x80000000); + CALL32R(EAX); + + x86SetJ8(cont); +}