Optimized VTLB's mappings for VU0/VU1. Seems like a pretty big speedup to me, but my PC sucks too much on VU1-intensive games for me to be sure. :)

Bugfix: VUSkip works again!  However, frameskipping is still not where it should be... seems to like to skip too much (grr!)

Linux: Committed a new version of aR5900-32.S, which should work better than the old version (but I can't really test it).

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@651 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2009-01-29 04:06:48 +00:00 committed by Gregory Hainaut
parent e4e8b6dbaf
commit 9a5da04452
23 changed files with 776 additions and 642 deletions

View File

@ -163,11 +163,19 @@ vtlbHandler tlb_fallback_6;
vtlbHandler tlb_fallback_7;
vtlbHandler tlb_fallback_8;
vtlbHandler vu0_micro_mem;
vtlbHandler vu1_micro_mem;
vtlbHandler vu0_micro_mem[2]; // 0 - dynarec, 1 - interpreter
vtlbHandler vu1_micro_mem[2]; // 0 - dynarec, 1 - interpreter
vtlbHandler hw_by_page[0x10];
// Used to remap the VUmicro memory according to the VU0/VU1 dynarec setting.
// (the VU memory operations are different for recs vs. interpreters)
void memMapVUmicro()
{
vtlb_MapHandler(vu0_micro_mem[CHECK_VU0REC ? 0 : 1],0x11000000,0x00004000);
vtlb_MapHandler(vu1_micro_mem[CHECK_VU1REC ? 0 : 1],0x11008000,0x00004000);
}
void memMapPhy()
{
//Main mem
@ -185,15 +193,7 @@ void memMapPhy()
//IOP mem
vtlb_MapBlock(psxM,0x1c000000,0x00800000);
//VU0:Micro
//vtlb_MapBlock(VU0.Micro,0x11000000,0x00004000,0x1000);
vtlb_MapHandler(vu0_micro_mem,0x11000000,0x00004000);
//VU0:Mem
vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000);
//VU1:Micro
//vtlb_MapBlock(VU1.Micro,0x11008000,0x00004000);
vtlb_MapHandler(vu1_micro_mem,0x11008000,0x00004000);
//VU1:Mem
vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000);
//These fallback to mem* stuff ...
@ -447,7 +447,20 @@ void __fastcall _ext_memWrite128(u32 mem, const u64 *value)
}
#define vtlb_RegisterHandlerTempl1(nam,t) vtlb_RegisterHandler(nam##Read8<t>,nam##Read16<t>,nam##Read32<t>,nam##Read64<t>,nam##Read128<t>, \
nam##Write8<t>,nam##Write16<t>,nam##Write32<t>,nam##Write64<t>,nam##Write128<t>);
nam##Write8<t>,nam##Write16<t>,nam##Write32<t>,nam##Write64<t>,nam##Write128<t>)
#define vtlb_RegisterHandlerTempl2(nam,t,rec) vtlb_RegisterHandler(nam##Read8<t>,nam##Read16<t>,nam##Read32<t>,nam##Read64<t>,nam##Read128<t>, \
nam##Write8<t,rec>,nam##Write16<t,rec>,nam##Write32<t,rec>,nam##Write64<t,rec>,nam##Write128<t,rec>)
typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc );
template<int vunum, bool dynarec>
static __forceinline ClearFunc_t& GetClearFunc()
{
return dynarec ?
(( vunum==0 ) ? VU0micro::recClear : VU1micro::recClear)
: (( vunum==0 ) ? VU0micro::intClear : VU1micro::intClear);
}
template<int vunum>
mem8_t __fastcall vuMicroRead8(u32 addr)
@ -498,89 +511,74 @@ void __fastcall vuMicroRead128(u32 addr,mem128_t* data)
// [TODO] : Profile this code and see how often the VUs get written, and how
// often it changes the values being written (invoking a cpuClear).
template<int vunum>
template<int vunum, bool dynrec>
void __fastcall vuMicroWrite8(u32 addr,mem8_t data)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (vu->Micro[addr]!=data)
if (vu.Micro[addr]!=data)
{
vu->Micro[addr]=data;
vu.Micro[addr]=data;
if (vunum==0)
CpuVU0->Clear(addr&(~7),1);
else
CpuVU1->Clear(addr&(~7),1);
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
}
}
template<int vunum>
template<int vunum, bool dynrec>
void __fastcall vuMicroWrite16(u32 addr,mem16_t data)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u16*)&vu->Micro[addr]!=data)
if (*(u16*)&vu.Micro[addr]!=data)
{
*(u16*)&vu->Micro[addr]=data;
*(u16*)&vu.Micro[addr]=data;
if (vunum==0)
CpuVU0->Clear(addr&(~7),1);
else
CpuVU1->Clear(addr&(~7),1);
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
}
}
template<int vunum>
template<int vunum, bool dynrec>
void __fastcall vuMicroWrite32(u32 addr,mem32_t data)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u32*)&vu->Micro[addr]!=data)
if (*(u32*)&vu.Micro[addr]!=data)
{
*(u32*)&vu->Micro[addr]=data;
*(u32*)&vu.Micro[addr]=data;
if (vunum==0)
CpuVU0->Clear(addr&(~7),1);
else
CpuVU1->Clear(addr&(~7),1);
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
}
}
template<int vunum>
template<int vunum, bool dynrec>
void __fastcall vuMicroWrite64(u32 addr,const mem64_t* data)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u64*)&vu->Micro[addr]!=data[0])
if (*(u64*)&vu.Micro[addr]!=data[0])
{
*(u64*)&vu->Micro[addr]=data[0];
*(u64*)&vu.Micro[addr]=data[0];
if (vunum==0)
CpuVU0->Clear(addr,1);
else
CpuVU1->Clear(addr,1);
GetClearFunc<vunum, dynrec>()(addr,1);
}
}
template<int vunum>
template<int vunum, bool dynrec>
void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data)
{
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u64*)&vu->Micro[addr]!=data[0] || *(u64*)&vu->Micro[addr+8]!=data[1])
if (*(u64*)&vu.Micro[addr]!=data[0] || *(u64*)&vu.Micro[addr+8]!=data[1])
{
*(u64*)&vu->Micro[addr]=data[0];
*(u64*)&vu->Micro[addr+8]=data[1];
*(u64*)&vu.Micro[addr]=data[0];
*(u64*)&vu.Micro[addr+8]=data[1];
if (vunum==0)
CpuVU0->Clear(addr,2);
else
CpuVU1->Clear(addr,2);
GetClearFunc<vunum, dynrec>()(addr,2);
}
}
@ -605,7 +603,7 @@ void memClearPageAddr(u32 vaddr)
}
///////////////////////////////////////////////////////////////////////////
// VTLB Memory Init / Reset / Shutdown
// PS2 Memory Init / Reset / Shutdown
static const uint m_allMemSize =
Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2 + Ps2MemSize::ERom +
@ -707,8 +705,13 @@ void memReset()
tlb_fallback_7=vtlb_RegisterHandlerTempl1(_ext_mem,7);
tlb_fallback_8=vtlb_RegisterHandlerTempl1(_ext_mem,8);
vu0_micro_mem=vtlb_RegisterHandlerTempl1(vuMicro,0);
vu1_micro_mem=vtlb_RegisterHandlerTempl1(vuMicro,1);
// Dynarec versions of VUs
vu0_micro_mem[0] = vtlb_RegisterHandlerTempl2(vuMicro,0,true);
vu1_micro_mem[0] = vtlb_RegisterHandlerTempl2(vuMicro,1,true);
// Interpreter versions of VUs
vu0_micro_mem[1] = vtlb_RegisterHandlerTempl2(vuMicro,0,false);
vu1_micro_mem[1] = vtlb_RegisterHandlerTempl2(vuMicro,1,false);
//////////////////////////////////////////////////////
// psHw Optimized Mappings
@ -719,7 +722,7 @@ void memReset()
tlb_fallback_1 = vtlb_RegisterHandler(
_ext_memRead8<1>, _ext_memRead16<1>, hwRead32_page_other, _ext_memRead64<1>, _ext_memRead128<1>,
_ext_memWrite8<1>, _ext_memWrite16<1>, hwWrite32_page_other, _ext_memWrite64<1>, _ext_memWrite128<1>
);
);
hw_by_page[0x0] = vtlb_RegisterHandler(
_ext_memRead8<1>, _ext_memRead16<1>, hwRead32_page_00, _ext_memRead64<1>, _ext_memRead128<1>,
@ -763,6 +766,7 @@ void memReset()
//vtlb_VMapUnmap(0x20000000,0x60000000);
memMapPhy();
memMapVUmicro();
memMapKernelMem();
memMapSupervisorMem();
memMapUserMem();

View File

@ -230,21 +230,22 @@ extern u8 g_RealGSMem[0x2000];
#define PSMu32(mem) (*(u32*)PSM(mem))
#define PSMu64(mem) (*(u64*)PSM(mem))
void memAlloc();
void memReset(); // clears PS2 ram and loads the bios. Throws Exception::FileNotFound on error.
void memSetKernelMode();
void memSetSupervisorMode();
void memSetUserMode();
void memSetPageAddr(u32 vaddr, u32 paddr);
void memClearPageAddr(u32 vaddr);
void memShutdown();
extern void memAlloc();
extern void memReset(); // clears PS2 ram and loads the bios. Throws Exception::FileNotFound on error.
extern void memShutdown();
extern void memSetKernelMode();
extern void memSetSupervisorMode();
extern void memSetUserMode();
extern void memSetPageAddr(u32 vaddr, u32 paddr);
extern void memClearPageAddr(u32 vaddr);
extern void memMapVUmicro();
#ifdef __LINUX__
void SysPageFaultExceptionFilter( int signal, siginfo_t *info, void * );
void __fastcall InstallLinuxExceptionHandler();
void __fastcall ReleaseLinuxExceptionHandler();
#endif
#ifdef _WIN32
#else
int SysPageFaultExceptionFilter(EXCEPTION_POINTERS* eps);
#endif
@ -258,13 +259,6 @@ void mmap_ResetBlockTracking();
extern void __fastcall memRead8(u32 mem, u8 *out);
extern void __fastcall memRead16(u32 mem, u16 *out);
extern void __fastcall memRead32(u32 mem, u32 *out);
/*int __fastcall _memRead64(u32 mem, u64 *out);
int __fastcall _memRead128(u32 mem, u64 *out);
void __fastcall _memWrite8 (u32 mem, u8 value);
void __fastcall _memWrite16(u32 mem, u16 value);
void __fastcall _memWrite32(u32 mem, u32 value);
void __fastcall _memWrite64(u32 mem, u64 value);
void __fastcall _memWrite128(u32 mem, u64 *value);*/
#define memRead64 vtlb_memRead64
#define memRead128 vtlb_memRead128

View File

@ -522,7 +522,7 @@ __forceinline bool _cpuBranchTest_Shared()
{
// We're in a BranchTest. All dynarec registers are flushed
// so there is no need to freeze registers here.
CpuVU0->ExecuteBlock();
CpuVU0.ExecuteBlock();
// This might be needed to keep the EE and VU0 in sync.
// A better fix will require hefty changes to the VU recs. -_-

View File

@ -51,11 +51,11 @@ static void TestClearVUs(u32 madr, u32 size)
if( madr >= 0x11000000 ) {
if( madr < 0x11004000 ) {
DbgCon::Notice("scratch pad clearing vu0");
CpuVU0->Clear(madr&0xfff, size);
CpuVU0.Clear(madr&0xfff, size);
}
else if( madr >= 0x11008000 && madr < 0x1100c000 ) {
DbgCon::Notice("scratch pad clearing vu1\n");
CpuVU1->Clear(madr&0x3fff, size);
CpuVU1.Clear(madr&0x3fff, size);
}
}
}

View File

@ -248,7 +248,7 @@ void SysAllocateDynarecs()
try
{
recVU0.Allocate();
VU0micro::recAlloc();
}
catch( Exception::BaseException& ex )
{
@ -260,12 +260,12 @@ void SysAllocateDynarecs()
);
g_Session.ForceDisableVU0rec = true;
recVU0.Shutdown();
VU0micro::recShutdown();
}
try
{
recVU1.Allocate();
VU1micro::recAlloc();
}
catch( Exception::BaseException& ex )
{
@ -277,7 +277,7 @@ void SysAllocateDynarecs()
);
g_Session.ForceDisableVU1rec = true;
recVU1.Shutdown();
VU1micro::recShutdown();
}
// If both VUrecs failed, then make sure the SuperVU is totally closed out:

View File

@ -79,7 +79,7 @@ void _vu0WaitMicro() {
VU0.flags&= ~VUFLAG_MFLAGSET;
do {
CpuVU0->ExecuteBlock();
CpuVU0.ExecuteBlock();
// knockout kings 2002 loops here
if( VU0.cycle-startcycle > 0x1000 ) {
Console::Notice("VU0 perma-stall, breaking execution..."); // (email zero if gfx are bad)
@ -351,7 +351,7 @@ void vu0Finish()
int i = 0;
while(i++ < 32) {
CpuVU0->ExecuteBlock();
CpuVU0.ExecuteBlock();
if(!(VU0.VI[REG_VPU_STAT].UL & 0x1))
break;
}

View File

@ -84,7 +84,7 @@ void vu0ExecMicro(u32 addr) {
if (addr != -1) VU0.VI[REG_TPC].UL = addr;
_vuExecMicroDebug(VU0);
CpuVU0->ExecuteBlock();
CpuVU0.ExecuteBlock();
// If the VU0 program didn't finish then we'll want to finish it up
// pretty soon. This fixes vmhacks in some games (Naruto Ultimate Ninja 2)

View File

@ -193,58 +193,61 @@ void vu0Exec(VURegs* VU)
if (VU->VF[0].f.w != 1.0f) DbgCon::Error("VF[0].w != 1.0!!!!\n");
}
static void intAlloc()
namespace VU0micro
{
}
static void intReset()
{
}
static void intStep()
{
vu0Exec( &VU0 );
}
static void intExecuteBlock()
{
int i;
#ifdef _DEBUG
int prevbranch;
#endif
for (i = 128; i--;) {
if ((VU0.VI[REG_VPU_STAT].UL & 0x1) == 0)
break;
#ifdef _DEBUG
prevbranch = vu0branch;
#endif
vu0Exec(&VU0);
static void intAlloc()
{
}
if( i < 0 && (VU0.branch || VU0.ebit) ) {
// execute one more
vu0Exec(&VU0);
static void intShutdown()
{
}
void __fastcall intClear(u32 Addr, u32 Size)
{
}
static void intReset()
{
}
static void intStep()
{
vu0Exec( &VU0 );
}
static void intExecuteBlock()
{
int i;
#ifdef _DEBUG
int prevbranch;
#endif
for (i = 128; i--;) {
if ((VU0.VI[REG_VPU_STAT].UL & 0x1) == 0)
break;
#ifdef _DEBUG
prevbranch = vu0branch;
#endif
vu0Exec(&VU0);
}
if( i < 0 && (VU0.branch || VU0.ebit) ) {
// execute one more
vu0Exec(&VU0);
}
}
}
static void intClear(u32 Addr, u32 Size)
{
}
using namespace VU0micro;
static void intShutdown()
const VUmicroCpu intVU0 =
{
}
VUmicroCpu intVU0 =
{
intAlloc
, intReset
intReset
, intStep
, intExecuteBlock
, intClear
, intShutdown
};

View File

@ -78,7 +78,7 @@ void vu1ExecMicro(u32 addr)
while(VU0.VI[REG_VPU_STAT].UL & 0x100)
{
VUM_LOG("vu1ExecMicro > Stalling until current microprogram finishes");
CpuVU1->ExecuteBlock();
CpuVU1.ExecuteBlock();
}
VUM_LOG("vu1ExecMicro %x\n", addr);
@ -90,7 +90,7 @@ void vu1ExecMicro(u32 addr)
if (addr != -1) VU1.VI[REG_TPC].UL = addr;
_vuExecMicroDebug(VU1);
CpuVU1->ExecuteBlock();
CpuVU1.ExecuteBlock();
}
_vuRegsTables(VU1, VU1regs);

View File

@ -179,57 +179,59 @@ void vu1Exec(VURegs* VU)
if (VU->VF[0].f.w != 1.0f) DbgCon::Error("VF[0].w != 1.0!!!!\n");
}
static void intAlloc()
namespace VU1micro
{
}
static void intReset()
{
}
static void intStep()
{
vu1Exec( &VU1 );
}
static void intExecuteBlock()
{
int i;
#ifdef _DEBUG
int prevbranch;
#endif
for (i = 128; i--;) {
if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0)
break;
#ifdef _DEBUG
prevbranch = vu1branch;
#endif
vu1Exec(&VU1);
void intAlloc()
{
}
if( i < 0 && (VU1.branch || VU1.ebit) ) {
// execute one more
vu1Exec(&VU1);
void __fastcall intClear(u32 Addr, u32 Size)
{
}
void intShutdown()
{
}
static void intReset()
{
}
static void intStep()
{
vu1Exec( &VU1 );
}
static void intExecuteBlock()
{
int i;
#ifdef _DEBUG
int prevbranch;
#endif
for (i = 128; i--;) {
if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0)
break;
#ifdef _DEBUG
prevbranch = vu1branch;
#endif
vu1Exec(&VU1);
}
if( i < 0 && (VU1.branch || VU1.ebit) ) {
// execute one more
vu1Exec(&VU1);
}
}
}
using namespace VU1micro;
static void intClear(u32 Addr, u32 Size)
const VUmicroCpu intVU1 =
{
}
static void intShutdown()
{
}
VUmicroCpu intVU1 =
{
intAlloc
, intReset
intReset
, intStep
, intExecuteBlock
, intClear
, intShutdown
};

View File

@ -23,21 +23,43 @@
struct VUmicroCpu
{
void (*Allocate)(); // throws exceptions on failure.
void (*Reset)();
void (*Step)();
void (*ExecuteBlock)(); // VUs should support block-level execution only.
void (*Clear)(u32 Addr, u32 Size);
void (*Shutdown)(); // deallocates memory reserved by Allocate
void (__fastcall *Clear)(u32 Addr, u32 Size);
};
extern VUmicroCpu *CpuVU0;
extern VUmicroCpu intVU0;
extern VUmicroCpu recVU0;
extern VUmicroCpu CpuVU0;
extern const VUmicroCpu intVU0;
extern const VUmicroCpu recVU0;
extern VUmicroCpu *CpuVU1;
extern VUmicroCpu intVU1;
extern VUmicroCpu recVU1;
extern VUmicroCpu CpuVU1;
extern const VUmicroCpu intVU1;
extern const VUmicroCpu recVU1;
namespace VU0micro
{
extern void recAlloc();
extern void recShutdown();
extern void __fastcall recClear(u32 Addr, u32 Size);
// Note: Interpreter functions are dummies -- they don't actually do anything.
extern void intAlloc();
extern void intShutdown();
extern void __fastcall intClear(u32 Addr, u32 Size);
}
namespace VU1micro
{
extern void recAlloc();
extern void recShutdown();
extern void __fastcall recClear(u32 Addr, u32 Size);
// Note: Interpreter functions are dummies -- they don't actually do anything.
extern void intAlloc();
extern void intShutdown();
extern void __fastcall intClear(u32 Addr, u32 Size);
}
/////////////////////////////////////////////////////////////////
// These functions initialize memory for both VUs.

View File

@ -31,8 +31,11 @@ extern PSMEMORYBLOCK s_psVuMem;
extern PSMEMORYMAP *memLUT;
#endif
VUmicroCpu *CpuVU0;
VUmicroCpu *CpuVU1;
// The following CpuVU objects are value types instead of handles or pointers because they are
// modified on the fly to implement VU1 Skip.
VUmicroCpu CpuVU0; // contains a working copy of the VU0 cpu functions/API
VUmicroCpu CpuVU1; // contains a working copy of the VU1 cpu functions/API
static void DummyExecuteVU1Block(void)
{
@ -40,32 +43,27 @@ static void DummyExecuteVU1Block(void)
VU1.vifRegs->stat &= ~4; // also reset the bit (grandia 3 works)
}
void (*recVU1EB)(), (*intVU1EB)();
void vu1MicroEnableSkip()
{
CpuVU1->ExecuteBlock = DummyExecuteVU1Block;
CpuVU1.ExecuteBlock = DummyExecuteVU1Block;
}
void vu1MicroDisableSkip()
{
CpuVU1->ExecuteBlock = CHECK_VU1REC ? recVU1EB : intVU1EB;
CpuVU1.ExecuteBlock = CHECK_VU1REC ? recVU1.ExecuteBlock : intVU1.ExecuteBlock;
}
bool vu1MicroIsSkipping()
{
return CpuVU1->ExecuteBlock == DummyExecuteVU1Block;
return CpuVU1.ExecuteBlock == DummyExecuteVU1Block;
}
void vuMicroCpuReset()
{
recVU1EB = recVU1.ExecuteBlock;
intVU1EB = intVU1.ExecuteBlock;
CpuVU0 = CHECK_VU0REC ? &recVU0 : &intVU0;
CpuVU1 = CHECK_VU1REC ? &recVU1 : &intVU1;
CpuVU0->Reset();
CpuVU1->Reset();
CpuVU0 = CHECK_VU0REC ? recVU0 : intVU0;
CpuVU1 = CHECK_VU1REC ? recVU1 : intVU1;
CpuVU0.Reset();
CpuVU1.Reset();
// SuperVUreset will do nothing is none of the recs are initialized.
// But it's needed if one or the other is initialized.
@ -180,6 +178,8 @@ void vuMicroMemReset()
jASSUME( VU0.Mem != NULL );
jASSUME( VU1.Mem != NULL );
memMapVUmicro();
// === VU0 Initialization ===
memzero_obj(VU0.ACC);
memzero_obj(VU0.VF);
@ -221,7 +221,6 @@ void vuMicroMemReset()
// VU1.VI = (REG_VI*)(VU0.Mem + 0x4200);
VU1.vuExec = vu1Exec;
VU1.vifRegs = vif1Regs;
}
void SaveState::vuMicroFreeze()

View File

@ -219,7 +219,7 @@ __forceinline void vif1FLUSH() {
if( VU0.VI[REG_VPU_STAT].UL & 0x100 ) {
do {
CpuVU1->ExecuteBlock();
CpuVU1.ExecuteBlock();
} while(VU0.VI[REG_VPU_STAT].UL & 0x100);
g_vifCycles+= (VU1.cycle - _cycles)*BIAS;
@ -813,7 +813,7 @@ static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) {
}*/
if (memcmp(VU0.Micro + addr, data, size << 2)) {
memcpy_fast(VU0.Micro + addr, data, size << 2);
CpuVU0->Clear(addr, size);
CpuVU0.Clear(addr, size);
}
}
@ -1487,7 +1487,7 @@ static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) {
assert( VU1.Micro > 0 );
if (memcmp(VU1.Micro + addr, data, size << 2)) {
memcpy_fast(VU1.Micro + addr, data, size << 2);
CpuVU1->Clear(addr, size);
CpuVU1.Clear(addr, size);
}
}

View File

@ -1,3 +1,21 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2008 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
EE physical map :
[0000 0000,1000 0000) -> Ram (mirrored ?)
@ -20,10 +38,9 @@
#include "Common.h"
#include "vtlb.h"
#include "COP0.h"
#include "x86/ix86/ix86.h"
#include "iCore.h"
using namespace R5900;
using namespace vtlb_private;
#ifdef PCSX2_DEVBUILD
#define verify(x) {if (!(x)) { (*(u8*)0)=3; }}
@ -31,21 +48,16 @@ using namespace R5900;
#define verify jASSUME
#endif
static const uint VTLB_PAGE_BITS =12;
static const uint VTLB_PAGE_MASK=(4095);
static const uint VTLB_PAGE_SIZE=(4096);
static const uint VTLB_PMAP_ITEMS=(0x20000000/VTLB_PAGE_SIZE);
static const uint VTLB_PMAP_SZ=0x20000000;
static const uint VTLB_VMAP_ITEMS=(0x100000000ULL/VTLB_PAGE_SIZE);
static s32 pmap[VTLB_PMAP_ITEMS]; //512KB
static s32 vmap[VTLB_VMAP_ITEMS]; //4MB
// first indexer -- 8/16/32/64/128 bit tables [values 0-4]
// second indexer -- read/write [0 or 1]
// third indexer -- 128 pages of memory!
static void* RWFT[5][2][128];
namespace vtlb_private
{
s32 pmap[VTLB_PMAP_ITEMS]; //512KB
s32 vmap[VTLB_VMAP_ITEMS]; //4MB
// first indexer -- 8/16/32/64/128 bit tables [values 0-4]
// second indexer -- read/write [0 or 1]
// third indexer -- 128 pages of memory!
void* RWFT[5][2][128];
}
vtlbHandler vtlbHandlerCount=0;
@ -81,7 +93,11 @@ callfunction:
jmp [readfunctions8-0x800000+eax];
}*/
// For 8, 16, and 32 bit accesses
/////////////////////////////////////////////////////////////////////////
// Interpreter Implementations of VTLB Memory Operations.
// See recVTLB.cpp for the dynarec versions.
// Interpreterd VTLB lookup for 8, 16, and 32 bit accesses
template<int DataSize,typename DataType>
__forceinline DataType __fastcall MemOp_r0(u32 addr)
{
@ -107,7 +123,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr)
}
}
// For 64 and 128 bit accesses.
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
template<int DataSize,typename DataType>
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
{
@ -234,26 +250,36 @@ void __fastcall vtlb_memWrite128(u32 mem, const mem128_t *value)
}
// Some functions used by interpreters and stuff...
// These maintain a "consistent" API with 64/128 reads.
void __fastcall memRead8(u32 mem, u8 *out) { *out = vtlb_memRead8( mem ); }
void __fastcall memRead16(u32 mem, u16 *out) { *out = vtlb_memRead16( mem ); }
void __fastcall memRead32(u32 mem, u32 *out) { *out = vtlb_memRead32( mem ); }
/////////////////////////////////////////////////////////////////////////
// Error / TLB Miss Handlers
//
// Generates a VtlbMiss Exception
static __forceinline void vtlb_Miss(u32 addr,u32 mode)
{
SysPrintf("vtlb miss : addr 0x%X, mode %d\n",addr,mode);
Console::Error("vtlb miss : addr 0x%X, mode %d", params addr,mode);
verify(false);
if (mode==0)
cpuTlbMissR(addr, cpuRegs.branch);
else
cpuTlbMissW(addr, cpuRegs.branch);
}
// Just dies a horrible death for now.
//
static __forceinline void vtlb_BusError(u32 addr,u32 mode)
{
SysPrintf("vtlb bus error : addr 0x%X, mode %d\n",addr,mode);
Console::Error("vtlb bus error : addr 0x%X, mode %d\n",params addr,mode);
verify(false);
}
/////
///// Virtual Mapping Errors (TLB Miss)
template<u32 saddr>
mem8_t __fastcall vtlbUnmappedVRead8(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; }
template<u32 saddr>
@ -274,7 +300,8 @@ template<u32 saddr>
void __fastcall vtlbUnmappedVWrite64(u32 addr,const mem64_t* data) { vtlb_Miss(addr|saddr,1); }
template<u32 saddr>
void __fastcall vtlbUnmappedVWrite128(u32 addr,const mem128_t* data) { vtlb_Miss(addr|saddr,1); }
/////
///// Physical Mapping Errors (Bus Error)
template<u32 saddr>
mem8_t __fastcall vtlbUnmappedPRead8(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; }
template<u32 saddr>
@ -295,19 +322,34 @@ template<u32 saddr>
void __fastcall vtlbUnmappedPWrite64(u32 addr,const mem64_t* data) { vtlb_BusError(addr|saddr,1); }
template<u32 saddr>
void __fastcall vtlbUnmappedPWrite128(u32 addr,const mem128_t* data) { vtlb_BusError(addr|saddr,1); }
/////
mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { SysPrintf("vtlbDefaultPhyRead8: 0x%X\n",addr); verify(false); return -1; }
mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { SysPrintf("vtlbDefaultPhyRead16: 0x%X\n",addr); verify(false); return -1; }
mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { SysPrintf("vtlbDefaultPhyRead32: 0x%X\n",addr); verify(false); return -1; }
void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { SysPrintf("vtlbDefaultPhyRead64: 0x%X\n",addr); verify(false); }
void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { SysPrintf("vtlbDefaultPhyRead128: 0x%X\n",addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { SysPrintf("vtlbDefaultPhyWrite8: 0x%X\n",addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { SysPrintf("vtlbDefaultPhyWrite16: 0x%X\n",addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { SysPrintf("vtlbDefaultPhyWrite32: 0x%X\n",addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { SysPrintf("vtlbDefaultPhyWrite64: 0x%X\n",addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { SysPrintf("vtlbDefaultPhyWrite128: 0x%X\n",addr); verify(false); }
/////
///// VTLB mapping errors (unmapped address spaces)
mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { Console::Error("vtlbDefaultPhyRead8: 0x%X",params addr); verify(false); return -1; }
mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { Console::Error("vtlbDefaultPhyRead16: 0x%X",params addr); verify(false); return -1; }
mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { Console::Error("vtlbDefaultPhyRead32: 0x%X",params addr); verify(false); return -1; }
void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { Console::Error("vtlbDefaultPhyRead64: 0x%X",params addr); verify(false); }
void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { Console::Error("vtlbDefaultPhyRead128: 0x%X",params addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { Console::Error("vtlbDefaultPhyWrite8: 0x%X",params addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { Console::Error("vtlbDefaultPhyWrite16: 0x%X",params addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { Console::Error("vtlbDefaultPhyWrite32: 0x%X",params addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console::Error("vtlbDefaultPhyWrite64: 0x%X",params addr); verify(false); }
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); }
/////////////////////////////////////////////////////////////////////////
// VTLB Public API -- Init/Term/RegisterHandler stuff
//
// Registers a handler into the VTLB's internal handler array. The handler defines specific behavior
// for how memory pages bound to the handler are read from / written to. If any of the handler pointers
// are NULL, the memory operations will be mapped to the BusError handler (thus generating BusError
// exceptions if the emulated app attempts to access them).
//
// Note: All handlers persist across calls to vtlb_Reset(), but are wiped/invalidated by calls to vtlb_Init()
//
// Returns a handle for the newly created handler See .vtlb_MapHandler for use of the return value.
vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP* r32,vltbMemR64FP* r64,vltbMemR128FP* r128,
vltbMemW8FP* w8,vltbMemW16FP* w16,vltbMemW32FP* w32,vltbMemW64FP* w64,vltbMemW128FP* w128)
{
@ -329,6 +371,13 @@ vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP
return rv;
}
// Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region.
// New mappings always assume priority over previous mappings, so place "generic" mappings for
// large areas of memory first, and then specialize specific small regions of memory afterward.
// A single handler can be mapped to many different regions by using multiple calls to this
// function.
//
// The memory region start and size parameters must be pagesize aligned.
void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size)
{
verify(0==(start&VTLB_PAGE_MASK));
@ -343,6 +392,7 @@ void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size)
size-=VTLB_PAGE_SIZE;
}
}
void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize)
{
s32 baseint=(s32)base;
@ -370,6 +420,7 @@ void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize)
}
}
}
void vtlb_Mirror(u32 new_region,u32 start,u32 size)
{
verify(0==(new_region&VTLB_PAGE_MASK));
@ -394,6 +445,7 @@ __forceinline void* vtlb_GetPhyPtr(u32 paddr)
return reinterpret_cast<void*>(pmap[paddr>>VTLB_PAGE_BITS]+(paddr&VTLB_PAGE_MASK));
}
//virtual mappings
//TODO: Add invalid paddr checks
void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz)
@ -459,14 +511,14 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz)
}
}
// Clears vtlb handlers and memory mappings.
void vtlb_Init()
{
//Reset all vars to default values
vtlbHandlerCount=0;
memzero_obj(RWFT);
//Register default handlers
//Unmapped Virt handlers _MUST_ be registed first.
//Unmapped Virt handlers _MUST_ be registered first.
//On address translation the top bit cannot be preserved.This is not normaly a problem since
//the physical address space can be 'compressed' to just 29 bits.However, to properly handle exceptions
//there must be a way to get the full address back.Thats why i use these 2 functions and encode the hi bit directly into em :)
@ -499,6 +551,8 @@ void vtlb_Init()
vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE);
}
// Performs a COP0-level reset of the PS2's TLB.
// This function should probably be part of the COP0 rather than here in VTLB.
void vtlb_Reset()
{
for(int i=0; i<48; i++) UnmapTLB(i);
@ -509,284 +563,4 @@ void vtlb_Term()
//nothing to do for now
}
#include "iR5900.h"
//ecx = addr
//edx = ptr
void vtlb_DynGenRead64(u32 bits)
{
/*
u32 vmv=vmap[addr>>VTLB_PAGE_BITS];
s32 ppf=addr+vmv;
if (!(ppf<0))
{
data[0]=*reinterpret_cast<DataType*>(ppf);
if (DataSize==128)
data[1]=*reinterpret_cast<DataType*>(ppf+8);
return 0;
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//SysPrintf("Translted 0x%08X to 0x%08X\n",addr,paddr);
return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
}
mov eax,ecx;
shr eax,VTLB_PAGE_BITS;
mov eax,[eax*4+vmap];
add ecx,eax;
js _fullread;
//these are wrong order, just an example ...
mov [eax],ecx;
mov ecx,[edx];
mov [eax+4],ecx;
mov ecx,[edx+4];
mov [eax+4+4],ecx;
mov ecx,[edx+4+4];
mov [eax+4+4+4+4],ecx;
mov ecx,[edx+4+4+4+4];
///....
jmp cont;
_fullread:
movzx eax,al;
sub ecx,eax;
sub ecx,0x80000000;
call [eax+stuff];
cont:
........
*/
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread=JS8(0);
switch(bits)
{
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,ECX,0);
MOVQRtoRmOffset(EDX,freereg,0);
_freeMMXreg(freereg);
}
else
{
MOV32RmtoR(EAX,ECX);
MOV32RtoRm(EDX,EAX);
MOV32RmtoROffset(EAX,ECX,4);
MOV32RtoRmOffset(EDX,EAX,4);
}
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,ECX,0);
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
_freeXMMreg(freereg);
}
else
{
MOV32RmtoR(EAX,ECX);
MOV32RtoRm(EDX,EAX);
MOV32RmtoROffset(EAX,ECX,4);
MOV32RtoRmOffset(EDX,EAX,4);
MOV32RmtoROffset(EAX,ECX,8);
MOV32RtoRmOffset(EDX,EAX,8);
MOV32RmtoROffset(EAX,ECX,12);
MOV32RtoRmOffset(EDX,EAX,12);
}
break;
jNO_DEFAULT
}
u8* cont=JMP8(0);
x86SetJ8(_fullread);
int szidx;
switch(bits)
{
case 64: szidx=3; break;
case 128: szidx=4; break;
jNO_DEFAULT
}
MOVZX32R8toR(EAX,EAX);
SUB32RtoR(ECX,EAX);
//eax=[funct+eax]
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
SUB32ItoR(ECX,0x80000000);
CALL32R(EAX);
x86SetJ8(cont);
}
// ecx - source address to read from
// Returns read value in eax.
void vtlb_DynGenRead32(u32 bits, bool sign)
{
jASSUME( bits <= 32 );
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread=JS8(0);
switch(bits)
{
case 8:
if( sign )
MOVSX32Rm8toR(EAX,ECX);
else
MOVZX32Rm8toR(EAX,ECX);
break;
case 16:
if( sign )
MOVSX32Rm16toR(EAX,ECX);
else
MOVZX32Rm16toR(EAX,ECX);
break;
case 32:
MOV32RmtoR(EAX,ECX);
break;
jNO_DEFAULT
}
u8* cont=JMP8(0);
x86SetJ8(_fullread);
int szidx;
switch(bits)
{
case 8: szidx=0; break;
case 16: szidx=1; break;
case 32: szidx=2; break;
jNO_DEFAULT
}
MOVZX32R8toR(EAX,EAX);
SUB32RtoR(ECX,EAX);
//eax=[funct+eax]
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
SUB32ItoR(ECX,0x80000000);
CALL32R(EAX);
// perform sign extension on the result:
if( bits==8 )
{
if( sign )
MOVSX32R8toR(EAX,EAX);
else
MOVZX32R8toR(EAX,EAX);
}
else if( bits==16 )
{
if( sign )
MOVSX32R16toR(EAX,EAX);
else
MOVZX32R16toR(EAX,EAX);
}
x86SetJ8(cont);
}
void vtlb_DynGenWrite(u32 sz)
{
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _full=JS8(0);
switch(sz)
{
//8 , 16, 32 : data on EDX
case 8:
MOV8RtoRm(ECX,EDX);
break;
case 16:
MOV16RtoRm(ECX,EDX);
break;
case 32:
MOV32RtoRm(ECX,EDX);
break;
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,EDX,0);
MOVQRtoRmOffset(ECX,freereg,0);
_freeMMXreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoRm(ECX,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
}
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
SSE2_MOVDQARtoRmOffset(ECX,freereg,0);
_freeXMMreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoRm(ECX,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
MOV32RmtoROffset(EAX,EDX,8);
MOV32RtoRmOffset(ECX,EAX,8);
MOV32RmtoROffset(EAX,EDX,12);
MOV32RtoRmOffset(ECX,EAX,12);
}
break;
}
u8* cont=JMP8(0);
x86SetJ8(_full);
int szidx=0;
switch(sz)
{
case 8: szidx=0; break;
case 16: szidx=1; break;
case 32: szidx=2; break;
case 64: szidx=3; break;
case 128: szidx=4; break;
}
MOVZX32R8toR(EAX,EAX);
SUB32RtoR(ECX,EAX);
//eax=[funct+eax]
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2);
SUB32ItoR(ECX,0x80000000);
CALL32R(EAX);
x86SetJ8(cont);
}
#endif // PCSX2_VIRTUAL_MEM

View File

@ -27,41 +27,56 @@ typedef void __fastcall vltbMemW128FP(u32 addr,const mem128_t* data);
typedef u32 vtlbHandler;
void vtlb_Init();
void vtlb_Reset();
void vtlb_Term();
extern void vtlb_Init();
extern void vtlb_Reset();
extern void vtlb_Term();
//physical stuff
vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP* r32,vltbMemR64FP* r64,vltbMemR128FP* r128,
vltbMemW8FP* w8,vltbMemW16FP* w16,vltbMemW32FP* w32,vltbMemW64FP* w64,vltbMemW128FP* w128);
void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size);
void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize=0);
extern void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size);
extern void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize=0);
extern void* vtlb_GetPhyPtr(u32 paddr);
//void vtlb_Mirror(u32 new_region,u32 start,u32 size); // -> not working yet :(
//extern void vtlb_Mirror(u32 new_region,u32 start,u32 size); // -> not working yet :(
//virtual mappings
void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz);
void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz);
void vtlb_VMapUnmap(u32 vaddr,u32 sz);
extern void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz);
extern void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz);
extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
//Memory functions
u8 __fastcall vtlb_memRead8(u32 mem);
u16 __fastcall vtlb_memRead16(u32 mem);
u32 __fastcall vtlb_memRead32(u32 mem);
void __fastcall vtlb_memRead64(u32 mem, u64 *out);
void __fastcall vtlb_memRead128(u32 mem, u64 *out);
void __fastcall vtlb_memWrite8 (u32 mem, u8 value);
void __fastcall vtlb_memWrite16(u32 mem, u16 value);
void __fastcall vtlb_memWrite32(u32 mem, u32 value);
void __fastcall vtlb_memWrite64(u32 mem, const u64* value);
void __fastcall vtlb_memWrite128(u32 mem, const u64* value);
extern u8 __fastcall vtlb_memRead8(u32 mem);
extern u16 __fastcall vtlb_memRead16(u32 mem);
extern u32 __fastcall vtlb_memRead32(u32 mem);
extern void __fastcall vtlb_memRead64(u32 mem, u64 *out);
extern void __fastcall vtlb_memRead128(u32 mem, u64 *out);
extern void __fastcall vtlb_memWrite8 (u32 mem, u8 value);
extern void __fastcall vtlb_memWrite16(u32 mem, u16 value);
extern void __fastcall vtlb_memWrite32(u32 mem, u32 value);
extern void __fastcall vtlb_memWrite64(u32 mem, const u64* value);
extern void __fastcall vtlb_memWrite128(u32 mem, const u64* value);
extern void vtlb_DynGenWrite(u32 sz);
extern void vtlb_DynGenRead32(u32 bits, bool sign);
extern void vtlb_DynGenRead64(u32 sz);
namespace vtlb_private
{
static const uint VTLB_PAGE_BITS = 12;
static const uint VTLB_PAGE_MASK = 4095;
static const uint VTLB_PAGE_SIZE = 4096;
static const uint VTLB_PMAP_ITEMS = 0x20000000 / VTLB_PAGE_SIZE;
static const uint VTLB_PMAP_SZ = 0x20000000;
static const uint VTLB_VMAP_ITEMS = 0x100000000ULL / VTLB_PAGE_SIZE;
extern void* RWFT[5][2][128];
extern s32 pmap[VTLB_PMAP_ITEMS]; //512KB
extern s32 vmap[VTLB_VMAP_ITEMS]; //4MB
}
#endif
#endif

View File

@ -2696,6 +2696,10 @@
RelativePath="..\..\MemoryVM.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86-32\recVTLB.cpp"
>
</File>
<File
RelativePath="..\..\vtlb.cpp"
>

View File

@ -199,7 +199,7 @@ static void recCTC2(s32 info)
MOV32ItoM((uptr)&VU0.VI[_Fs_].UL,g_cpuConstRegs[_Rt_].UL[0]);
//PUSH32I( -1 );
iFlushCall(FLUSH_NOCONST);
CALLFunc((uptr)CpuVU0->ExecuteBlock);
CALLFunc((uptr)CpuVU0.ExecuteBlock);
//CALLFunc((uptr)vu0ExecMicro);
//ADD32ItoR( ESP, 4 );
break;

View File

@ -26,12 +26,21 @@
namespace VU0micro
{
static void recAlloc()
void recAlloc()
{
SuperVUAlloc(0);
}
void __fastcall recClear(u32 Addr, u32 Size)
{
SuperVUClear(Addr, Size*4, 0);
}
void recShutdown()
{
SuperVUDestroy( 0 );
}
static void recReset()
{
SuperVUReset(0);
@ -54,26 +63,14 @@ namespace VU0micro
SuperVUExecuteProgram(VU0.VI[ REG_TPC ].UL & 0xfff, 0);
FreezeXMMRegs(0);
}
static void recClear(u32 Addr, u32 Size)
{
SuperVUClear(Addr, Size*4, 0);
}
static void recShutdown()
{
SuperVUDestroy( 0 );
}
}
using namespace VU0micro;
VUmicroCpu recVU0 =
const VUmicroCpu recVU0 =
{
recAlloc
, recReset
recReset
, recStep
, recExecuteBlock
, recClear
, recShutdown
};

View File

@ -33,6 +33,22 @@ extern u32 vudump;
namespace VU1micro
{
void recAlloc()
{
SuperVUAlloc(1);
}
void __fastcall recClear( u32 Addr, u32 Size )
{
assert( (Addr&7) == 0 );
SuperVUClear(Addr, Size*4, 1);
}
void recShutdown()
{
SuperVUDestroy( 1 );
}
// commented out because I'm not sure it actually works anymore with SuperVU (air)
/*static void iVU1DumpBlock()
{
@ -64,16 +80,11 @@ namespace VU1micro
fclose( f );
}*/
static void recAlloc()
{
SuperVUAlloc(1);
}
static void recReset()
{
SuperVUReset(1);
// these shouldn't be needed, but shouldn't hurt anythign either.
// these shouldn't be needed, but shouldn't hurt anything either.
x86FpuState = FPU_STATE;
iCWstate = 0;
}
@ -111,28 +122,14 @@ namespace VU1micro
} while( VU0.VI[ REG_VPU_STAT ].UL&0x100 );
FreezeXMMRegs(0);
}
static void recClear( u32 Addr, u32 Size )
{
assert( (Addr&7) == 0 );
SuperVUClear(Addr, Size*4, 1);
}
static void recShutdown()
{
SuperVUDestroy( 1 );
}
}
using namespace VU1micro;
VUmicroCpu recVU1 =
const VUmicroCpu recVU1 =
{
recAlloc
, recReset
recReset
, recStep
, recExecuteBlock
, recClear
, recShutdown
};

View File

@ -443,7 +443,7 @@ void SuperVUReset(int vuindex)
}
// clear the block and any joining blocks
void SuperVUClear(u32 startpc, u32 size, int vuindex)
__forceinline void SuperVUClear(u32 startpc, u32 size, int vuindex)
{
vector<VuFunctionHeader::RANGE>::iterator itrange;
list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();

View File

@ -31,20 +31,20 @@ extern void SuperVUReset(int vuindex); // if vuindex is -1, resets everything
#ifdef __LINUX__
extern "C" {
#endif
void SuperVUExecuteProgram(u32 startpc, int vuindex);
void SuperVUEndProgram();
void svudispfntemp();
extern void SuperVUExecuteProgram(u32 startpc, int vuindex);
extern void SuperVUEndProgram();
extern void svudispfntemp();
#ifdef __LINUX__
}
#endif
void SuperVUClear(u32 startpc, u32 size, int vuindex);
extern void SuperVUClear(u32 startpc, u32 size, int vuindex);
// read = 0, will write to reg
// read = 1, will read from reg
// read = 2, addr of previously written reg (used for status and clip flags)
u32 SuperVUGetVIAddr(int reg, int read);
extern u32 SuperVUGetVIAddr(int reg, int read);
// if p == 0, flush q else flush p; if wait is != 0, waits for p/q
void SuperVUFlush(int p, int wait);
extern void SuperVUFlush(int p, int wait);
#endif

View File

@ -12,11 +12,16 @@
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
#define BASEBLOCK_SIZE 2 // in dwords
#define PCOFFSET 0x2a8
#define PCOFFSET 0x2a8 // this must always match what Pcsx2 displays at startup
#define REG_PC %ecx
#define REG_BLOCK %esi
//////////////////////////////////////////////////////////////////////////
// Recompiles the next block, and links the old block directly to it.
// This is a on-shot execution for ny block which uses it. Once the block
// has been statically linked to the new block, this function will be bypassed
//
.globl Dispatcher
Dispatcher:
# EDX contains the jump addr to modify
@ -24,48 +29,59 @@ Dispatcher:
# calc PC_GETBLOCK
# ((BASEBLOCK*)(recLUT[((u32)(x)) >> 16] + (sizeof(BASEBLOCK)/4)*((x) & 0xffff)))
mov %eax, dword ptr [cpuRegs + PCOFFSET]
mov REG_BLOCK, %eax
mov REG_PC, %eax
shr %eax, 16
and REG_BLOCK, 0xffff
shl %eax, 2
add %eax, dword ptr [recLUT]
shl REG_BLOCK, 1
add REG_BLOCK, dword ptr [%eax]
#mov %eax, dword ptr [cpuRegs + PCOFFSET]
#mov REG_BLOCK, %eax
#mov REG_PC, %eax
#shr %eax, 16
#and REG_BLOCK, 0xffff
#shl %eax, 2
#add %eax, dword ptr [recLUT]
#shl REG_BLOCK, 1
#add REG_BLOCK, dword ptr [%eax]
mov %eax,dword ptr [cpuRegs+PCOFFSET]
mov %ecx,%eax
mov REG_PC,%eax
shr %eax,10h
and %ecx,0FFFFh
mov %edx,dword ptr [recLUT]
mov %eax,dword ptr [edx+eax*4]
lea %ecx,[eax+ecx*2]
// check if startpc == cpuRegs.pc
//and %ecx, 0x5fffffff // remove higher bits
cmp REG_PC, dword ptr [REG_BLOCK+BLOCKTYPE_STARTPC]
cmp REG_PC, dword ptr [%ecx+BLOCKTYPE_STARTPC]
je Dispatcher_CheckPtr
// recompile
push REG_BLOCK
push REG_PC // pc
call recRecompile
add %esp, 4 // pop old param
pop REG_BLOCK
add %esp, 4
pop %eax // eax is now the REG_BLOCK
Dispatcher_CheckPtr:
mov REG_BLOCK, dword ptr [REG_BLOCK]
mov %eax, dword ptr [%eax]
#ifdef _DEBUG
test REG_BLOCK, REG_BLOCK
test %eax, %eax
jnz Dispatcher_CallFn
// throw an exception
int 10
Dispatcher_CallFn:
#endif
and REG_BLOCK, 0x0fffffff
mov %edx, REG_BLOCK
and %eax, 0x0fffffff
pop %ecx // x86Ptr to mod
mov %edx, %eax
sub %edx, %ecx
sub %edx, 4
mov dword ptr [%ecx], %edx
jmp REG_BLOCK
jmp %eax
//////////////////////////////////////////////////////////////////////////
// edx - baseblock->startpc
// stack - x86Ptr
.globl DispatcherClear
DispatcherClear:
// EDX contains the current pc
@ -119,58 +135,47 @@ DispatcherClear_Recompile:
jmp %eax
//////////////////////////////////////////////////////////////////////////
// called when jumping to variable pc address
// This is basically the same as Dispatcher but without the part at the end
// that modifies the block's jmp instruction. (ie, no static block linking)
.globl DispatcherReg
DispatcherReg:
//s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
mov %edx, dword ptr [cpuRegs+PCOFFSET]
mov %ecx, %edx
shr %edx, 14
and %edx, 0xfffffffc
add %edx, [recLUT]
mov %edx, dword ptr [%edx]
mov %eax,dword ptr [cpuRegs+PCOFFSET]
mov %ecx,%eax
mov REG_PC,%eax
shr %eax,10h
and %ecx,0FFFFh
mov %edx,dword ptr [recLUT]
mov %eax,dword ptr [edx+eax*4]
lea %ecx,[eax+ecx*2]
mov %eax, %ecx
and %eax, 0xfffc
// %edx += 2*%eax
shl %eax, 1
add %edx, %eax
// check if startpc == cpuRegs.pc
mov %eax, %ecx
//and %eax, 0x5fffffff // remove higher bits
cmp %eax, dword ptr [%edx+BLOCKTYPE_STARTPC]
jne DispatcherReg_recomp
//and %ecx, 0x5fffffff // remove higher bits
cmp REG_PC, dword ptr [%ecx+BLOCKTYPE_STARTPC]
je Dispatcher_CheckPtr
mov %eax, dword ptr [%edx]
// recompile
push REG_BLOCK
push REG_PC // pc
call recRecompile
add %esp, 4
pop %eax // eax is now the REG_BLOCK
Dispatcher_CheckPtr:
mov %eax, dword ptr [%eax]
#ifdef _DEBUG
test %eax, %eax
jnz CallFn2
# throw an exception
jnz Dispatcher_CallFn
// throw an exception
int 10
CallFn2:
Dispatcher_CallFn:
#endif
and %eax, 0x0fffffff
jmp %eax // fnptr
DispatcherReg_recomp:
sub %esp, 8
mov dword ptr [%esp+4], %edx
mov dword ptr [%esp], %ecx
call recRecompile
mov %edx, dword ptr [%esp+4]
add %esp, 8
mov %eax, dword ptr [%edx]
and %eax, 0x0fffffff
jmp %eax // fnptr
jmp %eax
.globl _StartPerfCounter

View File

@ -0,0 +1,318 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2008 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "Common.h"
#include "vtlb.h"
#include "x86/ix86/ix86.h"
#include "iCore.h"
#include "iR5900.h"
using namespace vtlb_private;
/*
// Pseudo-Code For the following Dynarec Implementations -->
u32 vmv=vmap[addr>>VTLB_PAGE_BITS];
s32 ppf=addr+vmv;
if (!(ppf<0))
{
data[0]=*reinterpret_cast<DataType*>(ppf);
if (DataSize==128)
data[1]=*reinterpret_cast<DataType*>(ppf+8);
return 0;
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//SysPrintf("Translted 0x%08X to 0x%08X\n",addr,paddr);
return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
}
// And in ASM it looks something like this -->
mov eax,ecx;
shr eax,VTLB_PAGE_BITS;
mov eax,[eax*4+vmap];
add ecx,eax;
js _fullread;
//these are wrong order, just an example ...
mov [eax],ecx;
mov ecx,[edx];
mov [eax+4],ecx;
mov ecx,[edx+4];
mov [eax+4+4],ecx;
mov ecx,[edx+4+4];
mov [eax+4+4+4+4],ecx;
mov ecx,[edx+4+4+4+4];
///....
jmp cont;
_fullread:
movzx eax,al;
sub ecx,eax;
sub ecx,0x80000000;
call [eax+stuff];
cont:
........
*/
//ecx = addr
//edx = ptr
void vtlb_DynGenRead64(u32 bits)
{
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread=JS8(0);
switch(bits)
{
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,ECX,0);
MOVQRtoRmOffset(EDX,freereg,0);
_freeMMXreg(freereg);
}
else
{
MOV32RmtoR(EAX,ECX);
MOV32RtoRm(EDX,EAX);
MOV32RmtoROffset(EAX,ECX,4);
MOV32RtoRmOffset(EDX,EAX,4);
}
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,ECX,0);
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
_freeXMMreg(freereg);
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV32RmtoR(EAX,ECX);
MOV32RtoRm(EDX,EAX);
MOV32RmtoROffset(EAX,ECX,4);
MOV32RtoRmOffset(EDX,EAX,4);
MOV32RmtoROffset(EAX,ECX,8);
MOV32RtoRmOffset(EDX,EAX,8);
MOV32RmtoROffset(EAX,ECX,12);
MOV32RtoRmOffset(EDX,EAX,12);
}
break;
jNO_DEFAULT
}
u8* cont=JMP8(0);
x86SetJ8(_fullread);
int szidx;
switch(bits)
{
case 64: szidx=3; break;
case 128: szidx=4; break;
jNO_DEFAULT
}
MOVZX32R8toR(EAX,EAX);
SUB32RtoR(ECX,EAX);
//eax=[funct+eax]
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
SUB32ItoR(ECX,0x80000000);
CALL32R(EAX);
x86SetJ8(cont);
}
// ecx - source address to read from
// Returns read value in eax.
void vtlb_DynGenRead32(u32 bits, bool sign)
{
jASSUME( bits <= 32 );
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread=JS8(0);
switch(bits)
{
case 8:
if( sign )
MOVSX32Rm8toR(EAX,ECX);
else
MOVZX32Rm8toR(EAX,ECX);
break;
case 16:
if( sign )
MOVSX32Rm16toR(EAX,ECX);
else
MOVZX32Rm16toR(EAX,ECX);
break;
case 32:
MOV32RmtoR(EAX,ECX);
break;
jNO_DEFAULT
}
u8* cont=JMP8(0);
x86SetJ8(_fullread);
int szidx;
switch(bits)
{
case 8: szidx=0; break;
case 16: szidx=1; break;
case 32: szidx=2; break;
jNO_DEFAULT
}
MOVZX32R8toR(EAX,EAX);
SUB32RtoR(ECX,EAX);
//eax=[funct+eax]
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
SUB32ItoR(ECX,0x80000000);
CALL32R(EAX);
// perform sign extension on the result:
if( bits==8 )
{
if( sign )
MOVSX32R8toR(EAX,EAX);
else
MOVZX32R8toR(EAX,EAX);
}
else if( bits==16 )
{
if( sign )
MOVSX32R16toR(EAX,EAX);
else
MOVZX32R16toR(EAX,EAX);
}
x86SetJ8(cont);
}
void vtlb_DynGenWrite(u32 sz)
{
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
ADD32RtoR(ECX,EAX);
u8* _full=JS8(0);
switch(sz)
{
//8 , 16, 32 : data on EDX
case 8:
MOV8RtoRm(ECX,EDX);
break;
case 16:
MOV16RtoRm(ECX,EDX);
break;
case 32:
MOV32RtoRm(ECX,EDX);
break;
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,EDX,0);
MOVQRtoRmOffset(ECX,freereg,0);
_freeMMXreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoRm(ECX,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
}
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
SSE2_MOVDQARtoRmOffset(ECX,freereg,0);
_freeXMMreg( freereg );
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV32RmtoR(EAX,EDX);
MOV32RtoRm(ECX,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
MOV32RmtoROffset(EAX,EDX,8);
MOV32RtoRmOffset(ECX,EAX,8);
MOV32RmtoROffset(EAX,EDX,12);
MOV32RtoRmOffset(ECX,EAX,12);
}
break;
}
u8* cont=JMP8(0);
x86SetJ8(_full);
int szidx=0;
switch(sz)
{
case 8: szidx=0; break;
case 16: szidx=1; break;
case 32: szidx=2; break;
case 64: szidx=3; break;
case 128: szidx=4; break;
}
MOVZX32R8toR(EAX,EAX);
SUB32RtoR(ECX,EAX);
//eax=[funct+eax]
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2);
SUB32ItoR(ECX,0x80000000);
CALL32R(EAX);
x86SetJ8(cont);
}