mirror of https://github.com/PCSX2/pcsx2.git
Optimized VTLB's mappings for VU0/VU1. Seems like a pretty big speedup to me, but my PC sucks too much on VU1-intensive games for me to be sure. :)
Bugfix: VUSkip works again! However, frameskipping is still not where it should be... seems to like to skip too much (grr!) Linux: Committed a new version of aR5900-32.S, which should work better than the old version (but I can't really test it). git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@651 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
e4e8b6dbaf
commit
9a5da04452
126
pcsx2/Memory.cpp
126
pcsx2/Memory.cpp
|
@ -163,11 +163,19 @@ vtlbHandler tlb_fallback_6;
|
|||
vtlbHandler tlb_fallback_7;
|
||||
vtlbHandler tlb_fallback_8;
|
||||
|
||||
vtlbHandler vu0_micro_mem;
|
||||
vtlbHandler vu1_micro_mem;
|
||||
vtlbHandler vu0_micro_mem[2]; // 0 - dynarec, 1 - interpreter
|
||||
vtlbHandler vu1_micro_mem[2]; // 0 - dynarec, 1 - interpreter
|
||||
|
||||
vtlbHandler hw_by_page[0x10];
|
||||
|
||||
// Used to remap the VUmicro memory according to the VU0/VU1 dynarec setting.
|
||||
// (the VU memory operations are different for recs vs. interpreters)
|
||||
void memMapVUmicro()
|
||||
{
|
||||
vtlb_MapHandler(vu0_micro_mem[CHECK_VU0REC ? 0 : 1],0x11000000,0x00004000);
|
||||
vtlb_MapHandler(vu1_micro_mem[CHECK_VU1REC ? 0 : 1],0x11008000,0x00004000);
|
||||
}
|
||||
|
||||
void memMapPhy()
|
||||
{
|
||||
//Main mem
|
||||
|
@ -185,15 +193,7 @@ void memMapPhy()
|
|||
//IOP mem
|
||||
vtlb_MapBlock(psxM,0x1c000000,0x00800000);
|
||||
|
||||
//VU0:Micro
|
||||
//vtlb_MapBlock(VU0.Micro,0x11000000,0x00004000,0x1000);
|
||||
vtlb_MapHandler(vu0_micro_mem,0x11000000,0x00004000);
|
||||
//VU0:Mem
|
||||
vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000);
|
||||
//VU1:Micro
|
||||
//vtlb_MapBlock(VU1.Micro,0x11008000,0x00004000);
|
||||
vtlb_MapHandler(vu1_micro_mem,0x11008000,0x00004000);
|
||||
//VU1:Mem
|
||||
vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000);
|
||||
|
||||
//These fallback to mem* stuff ...
|
||||
|
@ -447,7 +447,20 @@ void __fastcall _ext_memWrite128(u32 mem, const u64 *value)
|
|||
}
|
||||
|
||||
#define vtlb_RegisterHandlerTempl1(nam,t) vtlb_RegisterHandler(nam##Read8<t>,nam##Read16<t>,nam##Read32<t>,nam##Read64<t>,nam##Read128<t>, \
|
||||
nam##Write8<t>,nam##Write16<t>,nam##Write32<t>,nam##Write64<t>,nam##Write128<t>);
|
||||
nam##Write8<t>,nam##Write16<t>,nam##Write32<t>,nam##Write64<t>,nam##Write128<t>)
|
||||
|
||||
#define vtlb_RegisterHandlerTempl2(nam,t,rec) vtlb_RegisterHandler(nam##Read8<t>,nam##Read16<t>,nam##Read32<t>,nam##Read64<t>,nam##Read128<t>, \
|
||||
nam##Write8<t,rec>,nam##Write16<t,rec>,nam##Write32<t,rec>,nam##Write64<t,rec>,nam##Write128<t,rec>)
|
||||
|
||||
typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc );
|
||||
|
||||
template<int vunum, bool dynarec>
|
||||
static __forceinline ClearFunc_t& GetClearFunc()
|
||||
{
|
||||
return dynarec ?
|
||||
(( vunum==0 ) ? VU0micro::recClear : VU1micro::recClear)
|
||||
: (( vunum==0 ) ? VU0micro::intClear : VU1micro::intClear);
|
||||
}
|
||||
|
||||
template<int vunum>
|
||||
mem8_t __fastcall vuMicroRead8(u32 addr)
|
||||
|
@ -498,89 +511,74 @@ void __fastcall vuMicroRead128(u32 addr,mem128_t* data)
|
|||
// [TODO] : Profile this code and see how often the VUs get written, and how
|
||||
// often it changes the values being written (invoking a cpuClear).
|
||||
|
||||
template<int vunum>
|
||||
template<int vunum, bool dynrec>
|
||||
void __fastcall vuMicroWrite8(u32 addr,mem8_t data)
|
||||
{
|
||||
addr&=(vunum==0)?0xfff:0x3fff;
|
||||
VURegs* vu=(vunum==0)?&VU0:&VU1;
|
||||
addr &= (vunum==0) ? 0xfff : 0x3fff;
|
||||
VURegs& vu = (vunum==0) ? VU0 : VU1;
|
||||
|
||||
if (vu->Micro[addr]!=data)
|
||||
if (vu.Micro[addr]!=data)
|
||||
{
|
||||
vu->Micro[addr]=data;
|
||||
vu.Micro[addr]=data;
|
||||
|
||||
if (vunum==0)
|
||||
CpuVU0->Clear(addr&(~7),1);
|
||||
else
|
||||
CpuVU1->Clear(addr&(~7),1);
|
||||
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
|
||||
}
|
||||
}
|
||||
|
||||
template<int vunum>
|
||||
template<int vunum, bool dynrec>
|
||||
void __fastcall vuMicroWrite16(u32 addr,mem16_t data)
|
||||
{
|
||||
addr&=(vunum==0)?0xfff:0x3fff;
|
||||
VURegs* vu=(vunum==0)?&VU0:&VU1;
|
||||
addr &= (vunum==0) ? 0xfff : 0x3fff;
|
||||
VURegs& vu = (vunum==0) ? VU0 : VU1;
|
||||
|
||||
if (*(u16*)&vu->Micro[addr]!=data)
|
||||
if (*(u16*)&vu.Micro[addr]!=data)
|
||||
{
|
||||
*(u16*)&vu->Micro[addr]=data;
|
||||
*(u16*)&vu.Micro[addr]=data;
|
||||
|
||||
if (vunum==0)
|
||||
CpuVU0->Clear(addr&(~7),1);
|
||||
else
|
||||
CpuVU1->Clear(addr&(~7),1);
|
||||
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
|
||||
}
|
||||
}
|
||||
|
||||
template<int vunum>
|
||||
template<int vunum, bool dynrec>
|
||||
void __fastcall vuMicroWrite32(u32 addr,mem32_t data)
|
||||
{
|
||||
addr&=(vunum==0)?0xfff:0x3fff;
|
||||
VURegs* vu=(vunum==0)?&VU0:&VU1;
|
||||
addr &= (vunum==0) ? 0xfff : 0x3fff;
|
||||
VURegs& vu = (vunum==0) ? VU0 : VU1;
|
||||
|
||||
if (*(u32*)&vu->Micro[addr]!=data)
|
||||
if (*(u32*)&vu.Micro[addr]!=data)
|
||||
{
|
||||
*(u32*)&vu->Micro[addr]=data;
|
||||
*(u32*)&vu.Micro[addr]=data;
|
||||
|
||||
if (vunum==0)
|
||||
CpuVU0->Clear(addr&(~7),1);
|
||||
else
|
||||
CpuVU1->Clear(addr&(~7),1);
|
||||
GetClearFunc<vunum, dynrec>()(addr&(~7),1);
|
||||
}
|
||||
}
|
||||
|
||||
template<int vunum>
|
||||
template<int vunum, bool dynrec>
|
||||
void __fastcall vuMicroWrite64(u32 addr,const mem64_t* data)
|
||||
{
|
||||
addr&=(vunum==0)?0xfff:0x3fff;
|
||||
VURegs* vu=(vunum==0)?&VU0:&VU1;
|
||||
addr &= (vunum==0) ? 0xfff : 0x3fff;
|
||||
VURegs& vu = (vunum==0) ? VU0 : VU1;
|
||||
|
||||
if (*(u64*)&vu->Micro[addr]!=data[0])
|
||||
if (*(u64*)&vu.Micro[addr]!=data[0])
|
||||
{
|
||||
*(u64*)&vu->Micro[addr]=data[0];
|
||||
*(u64*)&vu.Micro[addr]=data[0];
|
||||
|
||||
if (vunum==0)
|
||||
CpuVU0->Clear(addr,1);
|
||||
else
|
||||
CpuVU1->Clear(addr,1);
|
||||
GetClearFunc<vunum, dynrec>()(addr,1);
|
||||
}
|
||||
}
|
||||
|
||||
template<int vunum>
|
||||
template<int vunum, bool dynrec>
|
||||
void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data)
|
||||
{
|
||||
addr&=(vunum==0)?0xfff:0x3fff;
|
||||
VURegs* vu=(vunum==0)?&VU0:&VU1;
|
||||
addr &= (vunum==0) ? 0xfff : 0x3fff;
|
||||
VURegs& vu = (vunum==0) ? VU0 : VU1;
|
||||
|
||||
if (*(u64*)&vu->Micro[addr]!=data[0] || *(u64*)&vu->Micro[addr+8]!=data[1])
|
||||
if (*(u64*)&vu.Micro[addr]!=data[0] || *(u64*)&vu.Micro[addr+8]!=data[1])
|
||||
{
|
||||
*(u64*)&vu->Micro[addr]=data[0];
|
||||
*(u64*)&vu->Micro[addr+8]=data[1];
|
||||
*(u64*)&vu.Micro[addr]=data[0];
|
||||
*(u64*)&vu.Micro[addr+8]=data[1];
|
||||
|
||||
if (vunum==0)
|
||||
CpuVU0->Clear(addr,2);
|
||||
else
|
||||
CpuVU1->Clear(addr,2);
|
||||
GetClearFunc<vunum, dynrec>()(addr,2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -605,7 +603,7 @@ void memClearPageAddr(u32 vaddr)
|
|||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// VTLB Memory Init / Reset / Shutdown
|
||||
// PS2 Memory Init / Reset / Shutdown
|
||||
|
||||
static const uint m_allMemSize =
|
||||
Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2 + Ps2MemSize::ERom +
|
||||
|
@ -707,8 +705,13 @@ void memReset()
|
|||
tlb_fallback_7=vtlb_RegisterHandlerTempl1(_ext_mem,7);
|
||||
tlb_fallback_8=vtlb_RegisterHandlerTempl1(_ext_mem,8);
|
||||
|
||||
vu0_micro_mem=vtlb_RegisterHandlerTempl1(vuMicro,0);
|
||||
vu1_micro_mem=vtlb_RegisterHandlerTempl1(vuMicro,1);
|
||||
// Dynarec versions of VUs
|
||||
vu0_micro_mem[0] = vtlb_RegisterHandlerTempl2(vuMicro,0,true);
|
||||
vu1_micro_mem[0] = vtlb_RegisterHandlerTempl2(vuMicro,1,true);
|
||||
|
||||
// Interpreter versions of VUs
|
||||
vu0_micro_mem[1] = vtlb_RegisterHandlerTempl2(vuMicro,0,false);
|
||||
vu1_micro_mem[1] = vtlb_RegisterHandlerTempl2(vuMicro,1,false);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// psHw Optimized Mappings
|
||||
|
@ -719,7 +722,7 @@ void memReset()
|
|||
tlb_fallback_1 = vtlb_RegisterHandler(
|
||||
_ext_memRead8<1>, _ext_memRead16<1>, hwRead32_page_other, _ext_memRead64<1>, _ext_memRead128<1>,
|
||||
_ext_memWrite8<1>, _ext_memWrite16<1>, hwWrite32_page_other, _ext_memWrite64<1>, _ext_memWrite128<1>
|
||||
);
|
||||
);
|
||||
|
||||
hw_by_page[0x0] = vtlb_RegisterHandler(
|
||||
_ext_memRead8<1>, _ext_memRead16<1>, hwRead32_page_00, _ext_memRead64<1>, _ext_memRead128<1>,
|
||||
|
@ -763,6 +766,7 @@ void memReset()
|
|||
//vtlb_VMapUnmap(0x20000000,0x60000000);
|
||||
|
||||
memMapPhy();
|
||||
memMapVUmicro();
|
||||
memMapKernelMem();
|
||||
memMapSupervisorMem();
|
||||
memMapUserMem();
|
||||
|
|
|
@ -230,21 +230,22 @@ extern u8 g_RealGSMem[0x2000];
|
|||
#define PSMu32(mem) (*(u32*)PSM(mem))
|
||||
#define PSMu64(mem) (*(u64*)PSM(mem))
|
||||
|
||||
void memAlloc();
|
||||
void memReset(); // clears PS2 ram and loads the bios. Throws Exception::FileNotFound on error.
|
||||
void memSetKernelMode();
|
||||
void memSetSupervisorMode();
|
||||
void memSetUserMode();
|
||||
void memSetPageAddr(u32 vaddr, u32 paddr);
|
||||
void memClearPageAddr(u32 vaddr);
|
||||
void memShutdown();
|
||||
extern void memAlloc();
|
||||
extern void memReset(); // clears PS2 ram and loads the bios. Throws Exception::FileNotFound on error.
|
||||
extern void memShutdown();
|
||||
extern void memSetKernelMode();
|
||||
extern void memSetSupervisorMode();
|
||||
extern void memSetUserMode();
|
||||
extern void memSetPageAddr(u32 vaddr, u32 paddr);
|
||||
extern void memClearPageAddr(u32 vaddr);
|
||||
|
||||
extern void memMapVUmicro();
|
||||
|
||||
#ifdef __LINUX__
|
||||
void SysPageFaultExceptionFilter( int signal, siginfo_t *info, void * );
|
||||
void __fastcall InstallLinuxExceptionHandler();
|
||||
void __fastcall ReleaseLinuxExceptionHandler();
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#else
|
||||
int SysPageFaultExceptionFilter(EXCEPTION_POINTERS* eps);
|
||||
#endif
|
||||
|
||||
|
@ -258,13 +259,6 @@ void mmap_ResetBlockTracking();
|
|||
extern void __fastcall memRead8(u32 mem, u8 *out);
|
||||
extern void __fastcall memRead16(u32 mem, u16 *out);
|
||||
extern void __fastcall memRead32(u32 mem, u32 *out);
|
||||
/*int __fastcall _memRead64(u32 mem, u64 *out);
|
||||
int __fastcall _memRead128(u32 mem, u64 *out);
|
||||
void __fastcall _memWrite8 (u32 mem, u8 value);
|
||||
void __fastcall _memWrite16(u32 mem, u16 value);
|
||||
void __fastcall _memWrite32(u32 mem, u32 value);
|
||||
void __fastcall _memWrite64(u32 mem, u64 value);
|
||||
void __fastcall _memWrite128(u32 mem, u64 *value);*/
|
||||
|
||||
#define memRead64 vtlb_memRead64
|
||||
#define memRead128 vtlb_memRead128
|
||||
|
|
|
@ -522,7 +522,7 @@ __forceinline bool _cpuBranchTest_Shared()
|
|||
{
|
||||
// We're in a BranchTest. All dynarec registers are flushed
|
||||
// so there is no need to freeze registers here.
|
||||
CpuVU0->ExecuteBlock();
|
||||
CpuVU0.ExecuteBlock();
|
||||
|
||||
// This might be needed to keep the EE and VU0 in sync.
|
||||
// A better fix will require hefty changes to the VU recs. -_-
|
||||
|
|
|
@ -51,11 +51,11 @@ static void TestClearVUs(u32 madr, u32 size)
|
|||
if( madr >= 0x11000000 ) {
|
||||
if( madr < 0x11004000 ) {
|
||||
DbgCon::Notice("scratch pad clearing vu0");
|
||||
CpuVU0->Clear(madr&0xfff, size);
|
||||
CpuVU0.Clear(madr&0xfff, size);
|
||||
}
|
||||
else if( madr >= 0x11008000 && madr < 0x1100c000 ) {
|
||||
DbgCon::Notice("scratch pad clearing vu1\n");
|
||||
CpuVU1->Clear(madr&0x3fff, size);
|
||||
CpuVU1.Clear(madr&0x3fff, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -248,7 +248,7 @@ void SysAllocateDynarecs()
|
|||
|
||||
try
|
||||
{
|
||||
recVU0.Allocate();
|
||||
VU0micro::recAlloc();
|
||||
}
|
||||
catch( Exception::BaseException& ex )
|
||||
{
|
||||
|
@ -260,12 +260,12 @@ void SysAllocateDynarecs()
|
|||
);
|
||||
|
||||
g_Session.ForceDisableVU0rec = true;
|
||||
recVU0.Shutdown();
|
||||
VU0micro::recShutdown();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
recVU1.Allocate();
|
||||
VU1micro::recAlloc();
|
||||
}
|
||||
catch( Exception::BaseException& ex )
|
||||
{
|
||||
|
@ -277,7 +277,7 @@ void SysAllocateDynarecs()
|
|||
);
|
||||
|
||||
g_Session.ForceDisableVU1rec = true;
|
||||
recVU1.Shutdown();
|
||||
VU1micro::recShutdown();
|
||||
}
|
||||
|
||||
// If both VUrecs failed, then make sure the SuperVU is totally closed out:
|
||||
|
|
|
@ -79,7 +79,7 @@ void _vu0WaitMicro() {
|
|||
VU0.flags&= ~VUFLAG_MFLAGSET;
|
||||
|
||||
do {
|
||||
CpuVU0->ExecuteBlock();
|
||||
CpuVU0.ExecuteBlock();
|
||||
// knockout kings 2002 loops here
|
||||
if( VU0.cycle-startcycle > 0x1000 ) {
|
||||
Console::Notice("VU0 perma-stall, breaking execution..."); // (email zero if gfx are bad)
|
||||
|
@ -351,7 +351,7 @@ void vu0Finish()
|
|||
int i = 0;
|
||||
|
||||
while(i++ < 32) {
|
||||
CpuVU0->ExecuteBlock();
|
||||
CpuVU0.ExecuteBlock();
|
||||
if(!(VU0.VI[REG_VPU_STAT].UL & 0x1))
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ void vu0ExecMicro(u32 addr) {
|
|||
|
||||
if (addr != -1) VU0.VI[REG_TPC].UL = addr;
|
||||
_vuExecMicroDebug(VU0);
|
||||
CpuVU0->ExecuteBlock();
|
||||
CpuVU0.ExecuteBlock();
|
||||
|
||||
// If the VU0 program didn't finish then we'll want to finish it up
|
||||
// pretty soon. This fixes vmhacks in some games (Naruto Ultimate Ninja 2)
|
||||
|
|
|
@ -193,58 +193,61 @@ void vu0Exec(VURegs* VU)
|
|||
if (VU->VF[0].f.w != 1.0f) DbgCon::Error("VF[0].w != 1.0!!!!\n");
|
||||
}
|
||||
|
||||
static void intAlloc()
|
||||
namespace VU0micro
|
||||
{
|
||||
}
|
||||
|
||||
static void intReset()
|
||||
{
|
||||
}
|
||||
|
||||
static void intStep()
|
||||
{
|
||||
vu0Exec( &VU0 );
|
||||
}
|
||||
|
||||
static void intExecuteBlock()
|
||||
{
|
||||
int i;
|
||||
|
||||
#ifdef _DEBUG
|
||||
int prevbranch;
|
||||
#endif
|
||||
|
||||
for (i = 128; i--;) {
|
||||
|
||||
if ((VU0.VI[REG_VPU_STAT].UL & 0x1) == 0)
|
||||
break;
|
||||
|
||||
#ifdef _DEBUG
|
||||
prevbranch = vu0branch;
|
||||
#endif
|
||||
vu0Exec(&VU0);
|
||||
static void intAlloc()
|
||||
{
|
||||
}
|
||||
|
||||
if( i < 0 && (VU0.branch || VU0.ebit) ) {
|
||||
// execute one more
|
||||
vu0Exec(&VU0);
|
||||
static void intShutdown()
|
||||
{
|
||||
}
|
||||
|
||||
void __fastcall intClear(u32 Addr, u32 Size)
|
||||
{
|
||||
}
|
||||
|
||||
static void intReset()
|
||||
{
|
||||
}
|
||||
|
||||
static void intStep()
|
||||
{
|
||||
vu0Exec( &VU0 );
|
||||
}
|
||||
|
||||
static void intExecuteBlock()
|
||||
{
|
||||
int i;
|
||||
|
||||
#ifdef _DEBUG
|
||||
int prevbranch;
|
||||
#endif
|
||||
|
||||
for (i = 128; i--;) {
|
||||
|
||||
if ((VU0.VI[REG_VPU_STAT].UL & 0x1) == 0)
|
||||
break;
|
||||
|
||||
#ifdef _DEBUG
|
||||
prevbranch = vu0branch;
|
||||
#endif
|
||||
vu0Exec(&VU0);
|
||||
}
|
||||
|
||||
if( i < 0 && (VU0.branch || VU0.ebit) ) {
|
||||
// execute one more
|
||||
vu0Exec(&VU0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void intClear(u32 Addr, u32 Size)
|
||||
{
|
||||
}
|
||||
using namespace VU0micro;
|
||||
|
||||
static void intShutdown()
|
||||
const VUmicroCpu intVU0 =
|
||||
{
|
||||
}
|
||||
|
||||
VUmicroCpu intVU0 =
|
||||
{
|
||||
intAlloc
|
||||
, intReset
|
||||
intReset
|
||||
, intStep
|
||||
, intExecuteBlock
|
||||
, intClear
|
||||
, intShutdown
|
||||
};
|
||||
|
|
|
@ -78,7 +78,7 @@ void vu1ExecMicro(u32 addr)
|
|||
while(VU0.VI[REG_VPU_STAT].UL & 0x100)
|
||||
{
|
||||
VUM_LOG("vu1ExecMicro > Stalling until current microprogram finishes");
|
||||
CpuVU1->ExecuteBlock();
|
||||
CpuVU1.ExecuteBlock();
|
||||
}
|
||||
|
||||
VUM_LOG("vu1ExecMicro %x\n", addr);
|
||||
|
@ -90,7 +90,7 @@ void vu1ExecMicro(u32 addr)
|
|||
if (addr != -1) VU1.VI[REG_TPC].UL = addr;
|
||||
_vuExecMicroDebug(VU1);
|
||||
|
||||
CpuVU1->ExecuteBlock();
|
||||
CpuVU1.ExecuteBlock();
|
||||
}
|
||||
|
||||
_vuRegsTables(VU1, VU1regs);
|
||||
|
|
|
@ -179,57 +179,59 @@ void vu1Exec(VURegs* VU)
|
|||
if (VU->VF[0].f.w != 1.0f) DbgCon::Error("VF[0].w != 1.0!!!!\n");
|
||||
}
|
||||
|
||||
static void intAlloc()
|
||||
namespace VU1micro
|
||||
{
|
||||
}
|
||||
|
||||
static void intReset()
|
||||
{
|
||||
}
|
||||
|
||||
static void intStep()
|
||||
{
|
||||
vu1Exec( &VU1 );
|
||||
}
|
||||
|
||||
static void intExecuteBlock()
|
||||
{
|
||||
int i;
|
||||
#ifdef _DEBUG
|
||||
int prevbranch;
|
||||
#endif
|
||||
|
||||
for (i = 128; i--;) {
|
||||
if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0)
|
||||
break;
|
||||
|
||||
#ifdef _DEBUG
|
||||
prevbranch = vu1branch;
|
||||
#endif
|
||||
vu1Exec(&VU1);
|
||||
void intAlloc()
|
||||
{
|
||||
}
|
||||
|
||||
if( i < 0 && (VU1.branch || VU1.ebit) ) {
|
||||
// execute one more
|
||||
vu1Exec(&VU1);
|
||||
void __fastcall intClear(u32 Addr, u32 Size)
|
||||
{
|
||||
}
|
||||
|
||||
void intShutdown()
|
||||
{
|
||||
}
|
||||
|
||||
static void intReset()
|
||||
{
|
||||
}
|
||||
|
||||
static void intStep()
|
||||
{
|
||||
vu1Exec( &VU1 );
|
||||
}
|
||||
|
||||
static void intExecuteBlock()
|
||||
{
|
||||
int i;
|
||||
#ifdef _DEBUG
|
||||
int prevbranch;
|
||||
#endif
|
||||
|
||||
for (i = 128; i--;) {
|
||||
if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0)
|
||||
break;
|
||||
|
||||
#ifdef _DEBUG
|
||||
prevbranch = vu1branch;
|
||||
#endif
|
||||
vu1Exec(&VU1);
|
||||
}
|
||||
|
||||
if( i < 0 && (VU1.branch || VU1.ebit) ) {
|
||||
// execute one more
|
||||
vu1Exec(&VU1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using namespace VU1micro;
|
||||
|
||||
static void intClear(u32 Addr, u32 Size)
|
||||
const VUmicroCpu intVU1 =
|
||||
{
|
||||
}
|
||||
|
||||
static void intShutdown()
|
||||
{
|
||||
}
|
||||
|
||||
VUmicroCpu intVU1 =
|
||||
{
|
||||
intAlloc
|
||||
, intReset
|
||||
intReset
|
||||
, intStep
|
||||
, intExecuteBlock
|
||||
, intClear
|
||||
, intShutdown
|
||||
};
|
||||
|
|
|
@ -23,21 +23,43 @@
|
|||
|
||||
struct VUmicroCpu
|
||||
{
|
||||
void (*Allocate)(); // throws exceptions on failure.
|
||||
void (*Reset)();
|
||||
void (*Step)();
|
||||
void (*ExecuteBlock)(); // VUs should support block-level execution only.
|
||||
void (*Clear)(u32 Addr, u32 Size);
|
||||
void (*Shutdown)(); // deallocates memory reserved by Allocate
|
||||
void (__fastcall *Clear)(u32 Addr, u32 Size);
|
||||
};
|
||||
|
||||
extern VUmicroCpu *CpuVU0;
|
||||
extern VUmicroCpu intVU0;
|
||||
extern VUmicroCpu recVU0;
|
||||
extern VUmicroCpu CpuVU0;
|
||||
extern const VUmicroCpu intVU0;
|
||||
extern const VUmicroCpu recVU0;
|
||||
|
||||
extern VUmicroCpu *CpuVU1;
|
||||
extern VUmicroCpu intVU1;
|
||||
extern VUmicroCpu recVU1;
|
||||
extern VUmicroCpu CpuVU1;
|
||||
extern const VUmicroCpu intVU1;
|
||||
extern const VUmicroCpu recVU1;
|
||||
|
||||
namespace VU0micro
|
||||
{
|
||||
extern void recAlloc();
|
||||
extern void recShutdown();
|
||||
extern void __fastcall recClear(u32 Addr, u32 Size);
|
||||
|
||||
// Note: Interpreter functions are dummies -- they don't actually do anything.
|
||||
extern void intAlloc();
|
||||
extern void intShutdown();
|
||||
extern void __fastcall intClear(u32 Addr, u32 Size);
|
||||
}
|
||||
|
||||
namespace VU1micro
|
||||
{
|
||||
extern void recAlloc();
|
||||
extern void recShutdown();
|
||||
extern void __fastcall recClear(u32 Addr, u32 Size);
|
||||
|
||||
// Note: Interpreter functions are dummies -- they don't actually do anything.
|
||||
extern void intAlloc();
|
||||
extern void intShutdown();
|
||||
extern void __fastcall intClear(u32 Addr, u32 Size);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// These functions initialize memory for both VUs.
|
||||
|
|
|
@ -31,8 +31,11 @@ extern PSMEMORYBLOCK s_psVuMem;
|
|||
extern PSMEMORYMAP *memLUT;
|
||||
#endif
|
||||
|
||||
VUmicroCpu *CpuVU0;
|
||||
VUmicroCpu *CpuVU1;
|
||||
// The following CpuVU objects are value types instead of handles or pointers because they are
|
||||
// modified on the fly to implement VU1 Skip.
|
||||
|
||||
VUmicroCpu CpuVU0; // contains a working copy of the VU0 cpu functions/API
|
||||
VUmicroCpu CpuVU1; // contains a working copy of the VU1 cpu functions/API
|
||||
|
||||
static void DummyExecuteVU1Block(void)
|
||||
{
|
||||
|
@ -40,32 +43,27 @@ static void DummyExecuteVU1Block(void)
|
|||
VU1.vifRegs->stat &= ~4; // also reset the bit (grandia 3 works)
|
||||
}
|
||||
|
||||
void (*recVU1EB)(), (*intVU1EB)();
|
||||
|
||||
void vu1MicroEnableSkip()
|
||||
{
|
||||
CpuVU1->ExecuteBlock = DummyExecuteVU1Block;
|
||||
CpuVU1.ExecuteBlock = DummyExecuteVU1Block;
|
||||
}
|
||||
|
||||
void vu1MicroDisableSkip()
|
||||
{
|
||||
CpuVU1->ExecuteBlock = CHECK_VU1REC ? recVU1EB : intVU1EB;
|
||||
CpuVU1.ExecuteBlock = CHECK_VU1REC ? recVU1.ExecuteBlock : intVU1.ExecuteBlock;
|
||||
}
|
||||
|
||||
bool vu1MicroIsSkipping()
|
||||
{
|
||||
return CpuVU1->ExecuteBlock == DummyExecuteVU1Block;
|
||||
return CpuVU1.ExecuteBlock == DummyExecuteVU1Block;
|
||||
}
|
||||
|
||||
void vuMicroCpuReset()
|
||||
{
|
||||
recVU1EB = recVU1.ExecuteBlock;
|
||||
intVU1EB = intVU1.ExecuteBlock;
|
||||
|
||||
CpuVU0 = CHECK_VU0REC ? &recVU0 : &intVU0;
|
||||
CpuVU1 = CHECK_VU1REC ? &recVU1 : &intVU1;
|
||||
CpuVU0->Reset();
|
||||
CpuVU1->Reset();
|
||||
CpuVU0 = CHECK_VU0REC ? recVU0 : intVU0;
|
||||
CpuVU1 = CHECK_VU1REC ? recVU1 : intVU1;
|
||||
CpuVU0.Reset();
|
||||
CpuVU1.Reset();
|
||||
|
||||
// SuperVUreset will do nothing is none of the recs are initialized.
|
||||
// But it's needed if one or the other is initialized.
|
||||
|
@ -180,6 +178,8 @@ void vuMicroMemReset()
|
|||
jASSUME( VU0.Mem != NULL );
|
||||
jASSUME( VU1.Mem != NULL );
|
||||
|
||||
memMapVUmicro();
|
||||
|
||||
// === VU0 Initialization ===
|
||||
memzero_obj(VU0.ACC);
|
||||
memzero_obj(VU0.VF);
|
||||
|
@ -221,7 +221,6 @@ void vuMicroMemReset()
|
|||
// VU1.VI = (REG_VI*)(VU0.Mem + 0x4200);
|
||||
VU1.vuExec = vu1Exec;
|
||||
VU1.vifRegs = vif1Regs;
|
||||
|
||||
}
|
||||
|
||||
void SaveState::vuMicroFreeze()
|
||||
|
|
|
@ -219,7 +219,7 @@ __forceinline void vif1FLUSH() {
|
|||
|
||||
if( VU0.VI[REG_VPU_STAT].UL & 0x100 ) {
|
||||
do {
|
||||
CpuVU1->ExecuteBlock();
|
||||
CpuVU1.ExecuteBlock();
|
||||
} while(VU0.VI[REG_VPU_STAT].UL & 0x100);
|
||||
|
||||
g_vifCycles+= (VU1.cycle - _cycles)*BIAS;
|
||||
|
@ -813,7 +813,7 @@ static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) {
|
|||
}*/
|
||||
if (memcmp(VU0.Micro + addr, data, size << 2)) {
|
||||
memcpy_fast(VU0.Micro + addr, data, size << 2);
|
||||
CpuVU0->Clear(addr, size);
|
||||
CpuVU0.Clear(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1487,7 +1487,7 @@ static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) {
|
|||
assert( VU1.Micro > 0 );
|
||||
if (memcmp(VU1.Micro + addr, data, size << 2)) {
|
||||
memcpy_fast(VU1.Micro + addr, data, size << 2);
|
||||
CpuVU1->Clear(addr, size);
|
||||
CpuVU1.Clear(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
406
pcsx2/vtlb.cpp
406
pcsx2/vtlb.cpp
|
@ -1,3 +1,21 @@
|
|||
/* Pcsx2 - Pc Ps2 Emulator
|
||||
* Copyright (C) 2002-2008 Pcsx2 Team
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
/*
|
||||
EE physical map :
|
||||
[0000 0000,1000 0000) -> Ram (mirrored ?)
|
||||
|
@ -20,10 +38,9 @@
|
|||
#include "Common.h"
|
||||
#include "vtlb.h"
|
||||
#include "COP0.h"
|
||||
#include "x86/ix86/ix86.h"
|
||||
#include "iCore.h"
|
||||
|
||||
using namespace R5900;
|
||||
using namespace vtlb_private;
|
||||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
#define verify(x) {if (!(x)) { (*(u8*)0)=3; }}
|
||||
|
@ -31,21 +48,16 @@ using namespace R5900;
|
|||
#define verify jASSUME
|
||||
#endif
|
||||
|
||||
static const uint VTLB_PAGE_BITS =12;
|
||||
static const uint VTLB_PAGE_MASK=(4095);
|
||||
static const uint VTLB_PAGE_SIZE=(4096);
|
||||
|
||||
static const uint VTLB_PMAP_ITEMS=(0x20000000/VTLB_PAGE_SIZE);
|
||||
static const uint VTLB_PMAP_SZ=0x20000000;
|
||||
static const uint VTLB_VMAP_ITEMS=(0x100000000ULL/VTLB_PAGE_SIZE);
|
||||
static s32 pmap[VTLB_PMAP_ITEMS]; //512KB
|
||||
static s32 vmap[VTLB_VMAP_ITEMS]; //4MB
|
||||
|
||||
// first indexer -- 8/16/32/64/128 bit tables [values 0-4]
|
||||
// second indexer -- read/write [0 or 1]
|
||||
// third indexer -- 128 pages of memory!
|
||||
static void* RWFT[5][2][128];
|
||||
namespace vtlb_private
|
||||
{
|
||||
s32 pmap[VTLB_PMAP_ITEMS]; //512KB
|
||||
s32 vmap[VTLB_VMAP_ITEMS]; //4MB
|
||||
|
||||
// first indexer -- 8/16/32/64/128 bit tables [values 0-4]
|
||||
// second indexer -- read/write [0 or 1]
|
||||
// third indexer -- 128 pages of memory!
|
||||
void* RWFT[5][2][128];
|
||||
}
|
||||
|
||||
vtlbHandler vtlbHandlerCount=0;
|
||||
|
||||
|
@ -81,7 +93,11 @@ callfunction:
|
|||
jmp [readfunctions8-0x800000+eax];
|
||||
}*/
|
||||
|
||||
// For 8, 16, and 32 bit accesses
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Interpreter Implementations of VTLB Memory Operations.
|
||||
// See recVTLB.cpp for the dynarec versions.
|
||||
|
||||
// Interpreterd VTLB lookup for 8, 16, and 32 bit accesses
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline DataType __fastcall MemOp_r0(u32 addr)
|
||||
{
|
||||
|
@ -107,7 +123,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr)
|
|||
}
|
||||
}
|
||||
|
||||
// For 64 and 128 bit accesses.
|
||||
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
|
||||
template<int DataSize,typename DataType>
|
||||
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
|
||||
{
|
||||
|
@ -234,26 +250,36 @@ void __fastcall vtlb_memWrite128(u32 mem, const mem128_t *value)
|
|||
}
|
||||
|
||||
// Some functions used by interpreters and stuff...
|
||||
// These maintain a "consistent" API with 64/128 reads.
|
||||
void __fastcall memRead8(u32 mem, u8 *out) { *out = vtlb_memRead8( mem ); }
|
||||
void __fastcall memRead16(u32 mem, u16 *out) { *out = vtlb_memRead16( mem ); }
|
||||
void __fastcall memRead32(u32 mem, u32 *out) { *out = vtlb_memRead32( mem ); }
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Error / TLB Miss Handlers
|
||||
//
|
||||
|
||||
// Generates a VtlbMiss Exception
|
||||
static __forceinline void vtlb_Miss(u32 addr,u32 mode)
|
||||
{
|
||||
SysPrintf("vtlb miss : addr 0x%X, mode %d\n",addr,mode);
|
||||
Console::Error("vtlb miss : addr 0x%X, mode %d", params addr,mode);
|
||||
verify(false);
|
||||
|
||||
if (mode==0)
|
||||
cpuTlbMissR(addr, cpuRegs.branch);
|
||||
else
|
||||
cpuTlbMissW(addr, cpuRegs.branch);
|
||||
}
|
||||
|
||||
// Just dies a horrible death for now.
|
||||
//
|
||||
static __forceinline void vtlb_BusError(u32 addr,u32 mode)
|
||||
{
|
||||
SysPrintf("vtlb bus error : addr 0x%X, mode %d\n",addr,mode);
|
||||
Console::Error("vtlb bus error : addr 0x%X, mode %d\n",params addr,mode);
|
||||
verify(false);
|
||||
}
|
||||
/////
|
||||
|
||||
///// Virtual Mapping Errors (TLB Miss)
|
||||
template<u32 saddr>
|
||||
mem8_t __fastcall vtlbUnmappedVRead8(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; }
|
||||
template<u32 saddr>
|
||||
|
@ -274,7 +300,8 @@ template<u32 saddr>
|
|||
void __fastcall vtlbUnmappedVWrite64(u32 addr,const mem64_t* data) { vtlb_Miss(addr|saddr,1); }
|
||||
template<u32 saddr>
|
||||
void __fastcall vtlbUnmappedVWrite128(u32 addr,const mem128_t* data) { vtlb_Miss(addr|saddr,1); }
|
||||
/////
|
||||
|
||||
///// Physical Mapping Errors (Bus Error)
|
||||
template<u32 saddr>
|
||||
mem8_t __fastcall vtlbUnmappedPRead8(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; }
|
||||
template<u32 saddr>
|
||||
|
@ -295,19 +322,34 @@ template<u32 saddr>
|
|||
void __fastcall vtlbUnmappedPWrite64(u32 addr,const mem64_t* data) { vtlb_BusError(addr|saddr,1); }
|
||||
template<u32 saddr>
|
||||
void __fastcall vtlbUnmappedPWrite128(u32 addr,const mem128_t* data) { vtlb_BusError(addr|saddr,1); }
|
||||
/////
|
||||
mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { SysPrintf("vtlbDefaultPhyRead8: 0x%X\n",addr); verify(false); return -1; }
|
||||
mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { SysPrintf("vtlbDefaultPhyRead16: 0x%X\n",addr); verify(false); return -1; }
|
||||
mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { SysPrintf("vtlbDefaultPhyRead32: 0x%X\n",addr); verify(false); return -1; }
|
||||
void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { SysPrintf("vtlbDefaultPhyRead64: 0x%X\n",addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { SysPrintf("vtlbDefaultPhyRead128: 0x%X\n",addr); verify(false); }
|
||||
|
||||
void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { SysPrintf("vtlbDefaultPhyWrite8: 0x%X\n",addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { SysPrintf("vtlbDefaultPhyWrite16: 0x%X\n",addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { SysPrintf("vtlbDefaultPhyWrite32: 0x%X\n",addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { SysPrintf("vtlbDefaultPhyWrite64: 0x%X\n",addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { SysPrintf("vtlbDefaultPhyWrite128: 0x%X\n",addr); verify(false); }
|
||||
/////
|
||||
///// VTLB mapping errors (unmapped address spaces)
|
||||
mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { Console::Error("vtlbDefaultPhyRead8: 0x%X",params addr); verify(false); return -1; }
|
||||
mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { Console::Error("vtlbDefaultPhyRead16: 0x%X",params addr); verify(false); return -1; }
|
||||
mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { Console::Error("vtlbDefaultPhyRead32: 0x%X",params addr); verify(false); return -1; }
|
||||
void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { Console::Error("vtlbDefaultPhyRead64: 0x%X",params addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { Console::Error("vtlbDefaultPhyRead128: 0x%X",params addr); verify(false); }
|
||||
|
||||
void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { Console::Error("vtlbDefaultPhyWrite8: 0x%X",params addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { Console::Error("vtlbDefaultPhyWrite16: 0x%X",params addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { Console::Error("vtlbDefaultPhyWrite32: 0x%X",params addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console::Error("vtlbDefaultPhyWrite64: 0x%X",params addr); verify(false); }
|
||||
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); }
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// VTLB Public API -- Init/Term/RegisterHandler stuff
|
||||
//
|
||||
|
||||
|
||||
// Registers a handler into the VTLB's internal handler array. The handler defines specific behavior
|
||||
// for how memory pages bound to the handler are read from / written to. If any of the handler pointers
|
||||
// are NULL, the memory operations will be mapped to the BusError handler (thus generating BusError
|
||||
// exceptions if the emulated app attempts to access them).
|
||||
//
|
||||
// Note: All handlers persist across calls to vtlb_Reset(), but are wiped/invalidated by calls to vtlb_Init()
|
||||
//
|
||||
// Returns a handle for the newly created handler See .vtlb_MapHandler for use of the return value.
|
||||
vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP* r32,vltbMemR64FP* r64,vltbMemR128FP* r128,
|
||||
vltbMemW8FP* w8,vltbMemW16FP* w16,vltbMemW32FP* w32,vltbMemW64FP* w64,vltbMemW128FP* w128)
|
||||
{
|
||||
|
@ -329,6 +371,13 @@ vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP
|
|||
return rv;
|
||||
}
|
||||
|
||||
// Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region.
|
||||
// New mappings always assume priority over previous mappings, so place "generic" mappings for
|
||||
// large areas of memory first, and then specialize specific small regions of memory afterward.
|
||||
// A single handler can be mapped to many different regions by using multiple calls to this
|
||||
// function.
|
||||
//
|
||||
// The memory region start and size parameters must be pagesize aligned.
|
||||
void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size)
|
||||
{
|
||||
verify(0==(start&VTLB_PAGE_MASK));
|
||||
|
@ -343,6 +392,7 @@ void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size)
|
|||
size-=VTLB_PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize)
|
||||
{
|
||||
s32 baseint=(s32)base;
|
||||
|
@ -370,6 +420,7 @@ void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vtlb_Mirror(u32 new_region,u32 start,u32 size)
|
||||
{
|
||||
verify(0==(new_region&VTLB_PAGE_MASK));
|
||||
|
@ -394,6 +445,7 @@ __forceinline void* vtlb_GetPhyPtr(u32 paddr)
|
|||
return reinterpret_cast<void*>(pmap[paddr>>VTLB_PAGE_BITS]+(paddr&VTLB_PAGE_MASK));
|
||||
|
||||
}
|
||||
|
||||
//virtual mappings
|
||||
//TODO: Add invalid paddr checks
|
||||
void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz)
|
||||
|
@ -459,14 +511,14 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz)
|
|||
}
|
||||
}
|
||||
|
||||
// Clears vtlb handlers and memory mappings.
|
||||
void vtlb_Init()
|
||||
{
|
||||
//Reset all vars to default values
|
||||
vtlbHandlerCount=0;
|
||||
memzero_obj(RWFT);
|
||||
|
||||
//Register default handlers
|
||||
//Unmapped Virt handlers _MUST_ be registed first.
|
||||
//Unmapped Virt handlers _MUST_ be registered first.
|
||||
//On address translation the top bit cannot be preserved.This is not normaly a problem since
|
||||
//the physical address space can be 'compressed' to just 29 bits.However, to properly handle exceptions
|
||||
//there must be a way to get the full address back.Thats why i use these 2 functions and encode the hi bit directly into em :)
|
||||
|
@ -499,6 +551,8 @@ void vtlb_Init()
|
|||
vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Performs a COP0-level reset of the PS2's TLB.
|
||||
// This function should probably be part of the COP0 rather than here in VTLB.
|
||||
void vtlb_Reset()
|
||||
{
|
||||
for(int i=0; i<48; i++) UnmapTLB(i);
|
||||
|
@ -509,284 +563,4 @@ void vtlb_Term()
|
|||
//nothing to do for now
|
||||
}
|
||||
|
||||
#include "iR5900.h"
|
||||
|
||||
//ecx = addr
|
||||
//edx = ptr
|
||||
void vtlb_DynGenRead64(u32 bits)
|
||||
{
|
||||
/*
|
||||
u32 vmv=vmap[addr>>VTLB_PAGE_BITS];
|
||||
s32 ppf=addr+vmv;
|
||||
if (!(ppf<0))
|
||||
{
|
||||
data[0]=*reinterpret_cast<DataType*>(ppf);
|
||||
if (DataSize==128)
|
||||
data[1]=*reinterpret_cast<DataType*>(ppf+8);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
//has to: translate, find function, call function
|
||||
u32 hand=(u8)vmv;
|
||||
u32 paddr=ppf-hand+0x80000000;
|
||||
//SysPrintf("Translted 0x%08X to 0x%08X\n",addr,paddr);
|
||||
return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
|
||||
}
|
||||
|
||||
mov eax,ecx;
|
||||
shr eax,VTLB_PAGE_BITS;
|
||||
mov eax,[eax*4+vmap];
|
||||
add ecx,eax;
|
||||
js _fullread;
|
||||
|
||||
//these are wrong order, just an example ...
|
||||
mov [eax],ecx;
|
||||
mov ecx,[edx];
|
||||
mov [eax+4],ecx;
|
||||
mov ecx,[edx+4];
|
||||
mov [eax+4+4],ecx;
|
||||
mov ecx,[edx+4+4];
|
||||
mov [eax+4+4+4+4],ecx;
|
||||
mov ecx,[edx+4+4+4+4];
|
||||
///....
|
||||
|
||||
jmp cont;
|
||||
_fullread:
|
||||
movzx eax,al;
|
||||
sub ecx,eax;
|
||||
sub ecx,0x80000000;
|
||||
call [eax+stuff];
|
||||
cont:
|
||||
........
|
||||
|
||||
*/
|
||||
MOV32RtoR(EAX,ECX);
|
||||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _fullread=JS8(0);
|
||||
switch(bits)
|
||||
{
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoROffset(freereg,ECX,0);
|
||||
MOVQRtoRmOffset(EDX,freereg,0);
|
||||
_freeMMXreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,4);
|
||||
MOV32RtoRmOffset(EDX,EAX,4);
|
||||
}
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoROffset(freereg,ECX,0);
|
||||
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
|
||||
_freeXMMreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,4);
|
||||
MOV32RtoRmOffset(EDX,EAX,4);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,8);
|
||||
MOV32RtoRmOffset(EDX,EAX,8);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,12);
|
||||
MOV32RtoRmOffset(EDX,EAX,12);
|
||||
}
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
u8* cont=JMP8(0);
|
||||
x86SetJ8(_fullread);
|
||||
int szidx;
|
||||
|
||||
switch(bits)
|
||||
{
|
||||
case 64: szidx=3; break;
|
||||
case 128: szidx=4; break;
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
SUB32RtoR(ECX,EAX);
|
||||
//eax=[funct+eax]
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
|
||||
SUB32ItoR(ECX,0x80000000);
|
||||
CALL32R(EAX);
|
||||
|
||||
x86SetJ8(cont);
|
||||
}
|
||||
|
||||
// ecx - source address to read from
|
||||
// Returns read value in eax.
|
||||
void vtlb_DynGenRead32(u32 bits, bool sign)
|
||||
{
|
||||
jASSUME( bits <= 32 );
|
||||
|
||||
MOV32RtoR(EAX,ECX);
|
||||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _fullread=JS8(0);
|
||||
|
||||
switch(bits)
|
||||
{
|
||||
case 8:
|
||||
if( sign )
|
||||
MOVSX32Rm8toR(EAX,ECX);
|
||||
else
|
||||
MOVZX32Rm8toR(EAX,ECX);
|
||||
break;
|
||||
|
||||
case 16:
|
||||
if( sign )
|
||||
MOVSX32Rm16toR(EAX,ECX);
|
||||
else
|
||||
MOVZX32Rm16toR(EAX,ECX);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
u8* cont=JMP8(0);
|
||||
x86SetJ8(_fullread);
|
||||
int szidx;
|
||||
|
||||
switch(bits)
|
||||
{
|
||||
case 8: szidx=0; break;
|
||||
case 16: szidx=1; break;
|
||||
case 32: szidx=2; break;
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
SUB32RtoR(ECX,EAX);
|
||||
//eax=[funct+eax]
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
|
||||
SUB32ItoR(ECX,0x80000000);
|
||||
CALL32R(EAX);
|
||||
|
||||
// perform sign extension on the result:
|
||||
|
||||
if( bits==8 )
|
||||
{
|
||||
if( sign )
|
||||
MOVSX32R8toR(EAX,EAX);
|
||||
else
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
}
|
||||
else if( bits==16 )
|
||||
{
|
||||
if( sign )
|
||||
MOVSX32R16toR(EAX,EAX);
|
||||
else
|
||||
MOVZX32R16toR(EAX,EAX);
|
||||
}
|
||||
|
||||
x86SetJ8(cont);
|
||||
}
|
||||
|
||||
void vtlb_DynGenWrite(u32 sz)
|
||||
{
|
||||
MOV32RtoR(EAX,ECX);
|
||||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _full=JS8(0);
|
||||
switch(sz)
|
||||
{
|
||||
//8 , 16, 32 : data on EDX
|
||||
case 8:
|
||||
MOV8RtoRm(ECX,EDX);
|
||||
break;
|
||||
case 16:
|
||||
MOV16RtoRm(ECX,EDX);
|
||||
break;
|
||||
case 32:
|
||||
MOV32RtoRm(ECX,EDX);
|
||||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoROffset(freereg,EDX,0);
|
||||
MOVQRtoRmOffset(ECX,freereg,0);
|
||||
_freeMMXreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoRm(ECX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,EDX,4);
|
||||
MOV32RtoRmOffset(ECX,EAX,4);
|
||||
}
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
|
||||
SSE2_MOVDQARtoRmOffset(ECX,freereg,0);
|
||||
_freeXMMreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoRm(ECX,EAX);
|
||||
MOV32RmtoROffset(EAX,EDX,4);
|
||||
MOV32RtoRmOffset(ECX,EAX,4);
|
||||
MOV32RmtoROffset(EAX,EDX,8);
|
||||
MOV32RtoRmOffset(ECX,EAX,8);
|
||||
MOV32RmtoROffset(EAX,EDX,12);
|
||||
MOV32RtoRmOffset(ECX,EAX,12);
|
||||
}
|
||||
break;
|
||||
}
|
||||
u8* cont=JMP8(0);
|
||||
x86SetJ8(_full);
|
||||
int szidx=0;
|
||||
|
||||
switch(sz)
|
||||
{
|
||||
case 8: szidx=0; break;
|
||||
case 16: szidx=1; break;
|
||||
case 32: szidx=2; break;
|
||||
case 64: szidx=3; break;
|
||||
case 128: szidx=4; break;
|
||||
}
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
SUB32RtoR(ECX,EAX);
|
||||
//eax=[funct+eax]
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2);
|
||||
SUB32ItoR(ECX,0x80000000);
|
||||
CALL32R(EAX);
|
||||
|
||||
x86SetJ8(cont);
|
||||
}
|
||||
|
||||
#endif // PCSX2_VIRTUAL_MEM
|
||||
|
|
53
pcsx2/vtlb.h
53
pcsx2/vtlb.h
|
@ -27,41 +27,56 @@ typedef void __fastcall vltbMemW128FP(u32 addr,const mem128_t* data);
|
|||
|
||||
typedef u32 vtlbHandler;
|
||||
|
||||
void vtlb_Init();
|
||||
void vtlb_Reset();
|
||||
void vtlb_Term();
|
||||
extern void vtlb_Init();
|
||||
extern void vtlb_Reset();
|
||||
extern void vtlb_Term();
|
||||
|
||||
//physical stuff
|
||||
vtlbHandler vtlb_RegisterHandler( vltbMemR8FP* r8,vltbMemR16FP* r16,vltbMemR32FP* r32,vltbMemR64FP* r64,vltbMemR128FP* r128,
|
||||
vltbMemW8FP* w8,vltbMemW16FP* w16,vltbMemW32FP* w32,vltbMemW64FP* w64,vltbMemW128FP* w128);
|
||||
|
||||
void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size);
|
||||
void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize=0);
|
||||
extern void vtlb_MapHandler(vtlbHandler handler,u32 start,u32 size);
|
||||
extern void vtlb_MapBlock(void* base,u32 start,u32 size,u32 blocksize=0);
|
||||
extern void* vtlb_GetPhyPtr(u32 paddr);
|
||||
//void vtlb_Mirror(u32 new_region,u32 start,u32 size); // -> not working yet :(
|
||||
//extern void vtlb_Mirror(u32 new_region,u32 start,u32 size); // -> not working yet :(
|
||||
|
||||
//virtual mappings
|
||||
void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz);
|
||||
void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz);
|
||||
void vtlb_VMapUnmap(u32 vaddr,u32 sz);
|
||||
extern void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz);
|
||||
extern void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz);
|
||||
extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
|
||||
|
||||
//Memory functions
|
||||
|
||||
u8 __fastcall vtlb_memRead8(u32 mem);
|
||||
u16 __fastcall vtlb_memRead16(u32 mem);
|
||||
u32 __fastcall vtlb_memRead32(u32 mem);
|
||||
void __fastcall vtlb_memRead64(u32 mem, u64 *out);
|
||||
void __fastcall vtlb_memRead128(u32 mem, u64 *out);
|
||||
void __fastcall vtlb_memWrite8 (u32 mem, u8 value);
|
||||
void __fastcall vtlb_memWrite16(u32 mem, u16 value);
|
||||
void __fastcall vtlb_memWrite32(u32 mem, u32 value);
|
||||
void __fastcall vtlb_memWrite64(u32 mem, const u64* value);
|
||||
void __fastcall vtlb_memWrite128(u32 mem, const u64* value);
|
||||
extern u8 __fastcall vtlb_memRead8(u32 mem);
|
||||
extern u16 __fastcall vtlb_memRead16(u32 mem);
|
||||
extern u32 __fastcall vtlb_memRead32(u32 mem);
|
||||
extern void __fastcall vtlb_memRead64(u32 mem, u64 *out);
|
||||
extern void __fastcall vtlb_memRead128(u32 mem, u64 *out);
|
||||
extern void __fastcall vtlb_memWrite8 (u32 mem, u8 value);
|
||||
extern void __fastcall vtlb_memWrite16(u32 mem, u16 value);
|
||||
extern void __fastcall vtlb_memWrite32(u32 mem, u32 value);
|
||||
extern void __fastcall vtlb_memWrite64(u32 mem, const u64* value);
|
||||
extern void __fastcall vtlb_memWrite128(u32 mem, const u64* value);
|
||||
|
||||
extern void vtlb_DynGenWrite(u32 sz);
|
||||
extern void vtlb_DynGenRead32(u32 bits, bool sign);
|
||||
extern void vtlb_DynGenRead64(u32 sz);
|
||||
|
||||
namespace vtlb_private
|
||||
{
|
||||
static const uint VTLB_PAGE_BITS = 12;
|
||||
static const uint VTLB_PAGE_MASK = 4095;
|
||||
static const uint VTLB_PAGE_SIZE = 4096;
|
||||
|
||||
static const uint VTLB_PMAP_ITEMS = 0x20000000 / VTLB_PAGE_SIZE;
|
||||
static const uint VTLB_PMAP_SZ = 0x20000000;
|
||||
static const uint VTLB_VMAP_ITEMS = 0x100000000ULL / VTLB_PAGE_SIZE;
|
||||
|
||||
extern void* RWFT[5][2][128];
|
||||
extern s32 pmap[VTLB_PMAP_ITEMS]; //512KB
|
||||
extern s32 vmap[VTLB_VMAP_ITEMS]; //4MB
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2696,6 +2696,10 @@
|
|||
RelativePath="..\..\MemoryVM.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\ix86-32\recVTLB.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\vtlb.cpp"
|
||||
>
|
||||
|
|
|
@ -199,7 +199,7 @@ static void recCTC2(s32 info)
|
|||
MOV32ItoM((uptr)&VU0.VI[_Fs_].UL,g_cpuConstRegs[_Rt_].UL[0]);
|
||||
//PUSH32I( -1 );
|
||||
iFlushCall(FLUSH_NOCONST);
|
||||
CALLFunc((uptr)CpuVU0->ExecuteBlock);
|
||||
CALLFunc((uptr)CpuVU0.ExecuteBlock);
|
||||
//CALLFunc((uptr)vu0ExecMicro);
|
||||
//ADD32ItoR( ESP, 4 );
|
||||
break;
|
||||
|
|
|
@ -26,12 +26,21 @@
|
|||
|
||||
namespace VU0micro
|
||||
{
|
||||
|
||||
static void recAlloc()
|
||||
void recAlloc()
|
||||
{
|
||||
SuperVUAlloc(0);
|
||||
}
|
||||
|
||||
void __fastcall recClear(u32 Addr, u32 Size)
|
||||
{
|
||||
SuperVUClear(Addr, Size*4, 0);
|
||||
}
|
||||
|
||||
void recShutdown()
|
||||
{
|
||||
SuperVUDestroy( 0 );
|
||||
}
|
||||
|
||||
static void recReset()
|
||||
{
|
||||
SuperVUReset(0);
|
||||
|
@ -54,26 +63,14 @@ namespace VU0micro
|
|||
SuperVUExecuteProgram(VU0.VI[ REG_TPC ].UL & 0xfff, 0);
|
||||
FreezeXMMRegs(0);
|
||||
}
|
||||
|
||||
static void recClear(u32 Addr, u32 Size)
|
||||
{
|
||||
SuperVUClear(Addr, Size*4, 0);
|
||||
}
|
||||
|
||||
static void recShutdown()
|
||||
{
|
||||
SuperVUDestroy( 0 );
|
||||
}
|
||||
}
|
||||
|
||||
using namespace VU0micro;
|
||||
|
||||
VUmicroCpu recVU0 =
|
||||
const VUmicroCpu recVU0 =
|
||||
{
|
||||
recAlloc
|
||||
, recReset
|
||||
recReset
|
||||
, recStep
|
||||
, recExecuteBlock
|
||||
, recClear
|
||||
, recShutdown
|
||||
};
|
||||
|
|
|
@ -33,6 +33,22 @@ extern u32 vudump;
|
|||
|
||||
namespace VU1micro
|
||||
{
|
||||
void recAlloc()
|
||||
{
|
||||
SuperVUAlloc(1);
|
||||
}
|
||||
|
||||
void __fastcall recClear( u32 Addr, u32 Size )
|
||||
{
|
||||
assert( (Addr&7) == 0 );
|
||||
SuperVUClear(Addr, Size*4, 1);
|
||||
}
|
||||
|
||||
void recShutdown()
|
||||
{
|
||||
SuperVUDestroy( 1 );
|
||||
}
|
||||
|
||||
// commented out because I'm not sure it actually works anymore with SuperVU (air)
|
||||
/*static void iVU1DumpBlock()
|
||||
{
|
||||
|
@ -64,16 +80,11 @@ namespace VU1micro
|
|||
fclose( f );
|
||||
}*/
|
||||
|
||||
static void recAlloc()
|
||||
{
|
||||
SuperVUAlloc(1);
|
||||
}
|
||||
|
||||
static void recReset()
|
||||
{
|
||||
SuperVUReset(1);
|
||||
|
||||
// these shouldn't be needed, but shouldn't hurt anythign either.
|
||||
// these shouldn't be needed, but shouldn't hurt anything either.
|
||||
x86FpuState = FPU_STATE;
|
||||
iCWstate = 0;
|
||||
}
|
||||
|
@ -111,28 +122,14 @@ namespace VU1micro
|
|||
} while( VU0.VI[ REG_VPU_STAT ].UL&0x100 );
|
||||
FreezeXMMRegs(0);
|
||||
}
|
||||
|
||||
static void recClear( u32 Addr, u32 Size )
|
||||
{
|
||||
assert( (Addr&7) == 0 );
|
||||
SuperVUClear(Addr, Size*4, 1);
|
||||
}
|
||||
|
||||
static void recShutdown()
|
||||
{
|
||||
SuperVUDestroy( 1 );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
using namespace VU1micro;
|
||||
|
||||
VUmicroCpu recVU1 =
|
||||
const VUmicroCpu recVU1 =
|
||||
{
|
||||
recAlloc
|
||||
, recReset
|
||||
recReset
|
||||
, recStep
|
||||
, recExecuteBlock
|
||||
, recClear
|
||||
, recShutdown
|
||||
};
|
||||
|
|
|
@ -443,7 +443,7 @@ void SuperVUReset(int vuindex)
|
|||
}
|
||||
|
||||
// clear the block and any joining blocks
|
||||
void SuperVUClear(u32 startpc, u32 size, int vuindex)
|
||||
__forceinline void SuperVUClear(u32 startpc, u32 size, int vuindex)
|
||||
{
|
||||
vector<VuFunctionHeader::RANGE>::iterator itrange;
|
||||
list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
|
||||
|
|
|
@ -31,20 +31,20 @@ extern void SuperVUReset(int vuindex); // if vuindex is -1, resets everything
|
|||
#ifdef __LINUX__
|
||||
extern "C" {
|
||||
#endif
|
||||
void SuperVUExecuteProgram(u32 startpc, int vuindex);
|
||||
void SuperVUEndProgram();
|
||||
void svudispfntemp();
|
||||
extern void SuperVUExecuteProgram(u32 startpc, int vuindex);
|
||||
extern void SuperVUEndProgram();
|
||||
extern void svudispfntemp();
|
||||
#ifdef __LINUX__
|
||||
}
|
||||
#endif
|
||||
void SuperVUClear(u32 startpc, u32 size, int vuindex);
|
||||
extern void SuperVUClear(u32 startpc, u32 size, int vuindex);
|
||||
|
||||
// read = 0, will write to reg
|
||||
// read = 1, will read from reg
|
||||
// read = 2, addr of previously written reg (used for status and clip flags)
|
||||
u32 SuperVUGetVIAddr(int reg, int read);
|
||||
extern u32 SuperVUGetVIAddr(int reg, int read);
|
||||
|
||||
// if p == 0, flush q else flush p; if wait is != 0, waits for p/q
|
||||
void SuperVUFlush(int p, int wait);
|
||||
extern void SuperVUFlush(int p, int wait);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -12,11 +12,16 @@
|
|||
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
|
||||
|
||||
#define BASEBLOCK_SIZE 2 // in dwords
|
||||
#define PCOFFSET 0x2a8
|
||||
#define PCOFFSET 0x2a8 // this must always match what Pcsx2 displays at startup
|
||||
|
||||
#define REG_PC %ecx
|
||||
#define REG_BLOCK %esi
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Recompiles the next block, and links the old block directly to it.
|
||||
// This is a on-shot execution for ny block which uses it. Once the block
|
||||
// has been statically linked to the new block, this function will be bypassed
|
||||
//
|
||||
.globl Dispatcher
|
||||
Dispatcher:
|
||||
# EDX contains the jump addr to modify
|
||||
|
@ -24,48 +29,59 @@ Dispatcher:
|
|||
|
||||
# calc PC_GETBLOCK
|
||||
# ((BASEBLOCK*)(recLUT[((u32)(x)) >> 16] + (sizeof(BASEBLOCK)/4)*((x) & 0xffff)))
|
||||
mov %eax, dword ptr [cpuRegs + PCOFFSET]
|
||||
mov REG_BLOCK, %eax
|
||||
mov REG_PC, %eax
|
||||
shr %eax, 16
|
||||
and REG_BLOCK, 0xffff
|
||||
shl %eax, 2
|
||||
add %eax, dword ptr [recLUT]
|
||||
shl REG_BLOCK, 1
|
||||
add REG_BLOCK, dword ptr [%eax]
|
||||
|
||||
#mov %eax, dword ptr [cpuRegs + PCOFFSET]
|
||||
#mov REG_BLOCK, %eax
|
||||
#mov REG_PC, %eax
|
||||
#shr %eax, 16
|
||||
#and REG_BLOCK, 0xffff
|
||||
#shl %eax, 2
|
||||
#add %eax, dword ptr [recLUT]
|
||||
#shl REG_BLOCK, 1
|
||||
#add REG_BLOCK, dword ptr [%eax]
|
||||
|
||||
mov %eax,dword ptr [cpuRegs+PCOFFSET]
|
||||
mov %ecx,%eax
|
||||
mov REG_PC,%eax
|
||||
shr %eax,10h
|
||||
and %ecx,0FFFFh
|
||||
mov %edx,dword ptr [recLUT]
|
||||
mov %eax,dword ptr [edx+eax*4]
|
||||
lea %ecx,[eax+ecx*2]
|
||||
|
||||
// check if startpc == cpuRegs.pc
|
||||
//and %ecx, 0x5fffffff // remove higher bits
|
||||
cmp REG_PC, dword ptr [REG_BLOCK+BLOCKTYPE_STARTPC]
|
||||
cmp REG_PC, dword ptr [%ecx+BLOCKTYPE_STARTPC]
|
||||
je Dispatcher_CheckPtr
|
||||
|
||||
// recompile
|
||||
push REG_BLOCK
|
||||
push REG_PC // pc
|
||||
call recRecompile
|
||||
add %esp, 4 // pop old param
|
||||
pop REG_BLOCK
|
||||
add %esp, 4
|
||||
pop %eax // eax is now the REG_BLOCK
|
||||
Dispatcher_CheckPtr:
|
||||
mov REG_BLOCK, dword ptr [REG_BLOCK]
|
||||
mov %eax, dword ptr [%eax]
|
||||
|
||||
#ifdef _DEBUG
|
||||
test REG_BLOCK, REG_BLOCK
|
||||
test %eax, %eax
|
||||
jnz Dispatcher_CallFn
|
||||
// throw an exception
|
||||
int 10
|
||||
|
||||
Dispatcher_CallFn:
|
||||
#endif
|
||||
|
||||
and REG_BLOCK, 0x0fffffff
|
||||
mov %edx, REG_BLOCK
|
||||
and %eax, 0x0fffffff
|
||||
pop %ecx // x86Ptr to mod
|
||||
mov %edx, %eax
|
||||
sub %edx, %ecx
|
||||
sub %edx, 4
|
||||
mov dword ptr [%ecx], %edx
|
||||
|
||||
jmp REG_BLOCK
|
||||
jmp %eax
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// edx - baseblock->startpc
|
||||
// stack - x86Ptr
|
||||
.globl DispatcherClear
|
||||
DispatcherClear:
|
||||
// EDX contains the current pc
|
||||
|
@ -119,58 +135,47 @@ DispatcherClear_Recompile:
|
|||
|
||||
jmp %eax
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// called when jumping to variable pc address
|
||||
// This is basically the same as Dispatcher but without the part at the end
|
||||
// that modifies the block's jmp instruction. (ie, no static block linking)
|
||||
|
||||
.globl DispatcherReg
|
||||
DispatcherReg:
|
||||
|
||||
//s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc);
|
||||
mov %edx, dword ptr [cpuRegs+PCOFFSET]
|
||||
mov %ecx, %edx
|
||||
|
||||
shr %edx, 14
|
||||
and %edx, 0xfffffffc
|
||||
add %edx, [recLUT]
|
||||
mov %edx, dword ptr [%edx]
|
||||
mov %eax,dword ptr [cpuRegs+PCOFFSET]
|
||||
mov %ecx,%eax
|
||||
mov REG_PC,%eax
|
||||
shr %eax,10h
|
||||
and %ecx,0FFFFh
|
||||
mov %edx,dword ptr [recLUT]
|
||||
mov %eax,dword ptr [edx+eax*4]
|
||||
lea %ecx,[eax+ecx*2]
|
||||
|
||||
mov %eax, %ecx
|
||||
and %eax, 0xfffc
|
||||
// %edx += 2*%eax
|
||||
shl %eax, 1
|
||||
add %edx, %eax
|
||||
|
||||
// check if startpc == cpuRegs.pc
|
||||
mov %eax, %ecx
|
||||
//and %eax, 0x5fffffff // remove higher bits
|
||||
cmp %eax, dword ptr [%edx+BLOCKTYPE_STARTPC]
|
||||
jne DispatcherReg_recomp
|
||||
//and %ecx, 0x5fffffff // remove higher bits
|
||||
cmp REG_PC, dword ptr [%ecx+BLOCKTYPE_STARTPC]
|
||||
je Dispatcher_CheckPtr
|
||||
|
||||
mov %eax, dword ptr [%edx]
|
||||
// recompile
|
||||
push REG_BLOCK
|
||||
push REG_PC // pc
|
||||
call recRecompile
|
||||
add %esp, 4
|
||||
pop %eax // eax is now the REG_BLOCK
|
||||
Dispatcher_CheckPtr:
|
||||
mov %eax, dword ptr [%eax]
|
||||
|
||||
#ifdef _DEBUG
|
||||
test %eax, %eax
|
||||
jnz CallFn2
|
||||
# throw an exception
|
||||
jnz Dispatcher_CallFn
|
||||
// throw an exception
|
||||
int 10
|
||||
|
||||
CallFn2:
|
||||
|
||||
Dispatcher_CallFn:
|
||||
#endif
|
||||
|
||||
and %eax, 0x0fffffff
|
||||
jmp %eax // fnptr
|
||||
|
||||
DispatcherReg_recomp:
|
||||
sub %esp, 8
|
||||
mov dword ptr [%esp+4], %edx
|
||||
mov dword ptr [%esp], %ecx
|
||||
call recRecompile
|
||||
mov %edx, dword ptr [%esp+4]
|
||||
add %esp, 8
|
||||
|
||||
mov %eax, dword ptr [%edx]
|
||||
and %eax, 0x0fffffff
|
||||
jmp %eax // fnptr
|
||||
jmp %eax
|
||||
|
||||
|
||||
.globl _StartPerfCounter
|
||||
|
|
|
@ -0,0 +1,318 @@
|
|||
/* Pcsx2 - Pc Ps2 Emulator
|
||||
* Copyright (C) 2002-2008 Pcsx2 Team
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#include "PrecompiledHeader.h"
|
||||
|
||||
#include "Common.h"
|
||||
#include "vtlb.h"
|
||||
|
||||
#include "x86/ix86/ix86.h"
|
||||
#include "iCore.h"
|
||||
#include "iR5900.h"
|
||||
|
||||
using namespace vtlb_private;
|
||||
|
||||
/*
|
||||
// Pseudo-Code For the following Dynarec Implementations -->
|
||||
|
||||
u32 vmv=vmap[addr>>VTLB_PAGE_BITS];
|
||||
s32 ppf=addr+vmv;
|
||||
if (!(ppf<0))
|
||||
{
|
||||
data[0]=*reinterpret_cast<DataType*>(ppf);
|
||||
if (DataSize==128)
|
||||
data[1]=*reinterpret_cast<DataType*>(ppf+8);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
//has to: translate, find function, call function
|
||||
u32 hand=(u8)vmv;
|
||||
u32 paddr=ppf-hand+0x80000000;
|
||||
//SysPrintf("Translted 0x%08X to 0x%08X\n",addr,paddr);
|
||||
return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
|
||||
}
|
||||
|
||||
// And in ASM it looks something like this -->
|
||||
|
||||
mov eax,ecx;
|
||||
shr eax,VTLB_PAGE_BITS;
|
||||
mov eax,[eax*4+vmap];
|
||||
add ecx,eax;
|
||||
js _fullread;
|
||||
|
||||
//these are wrong order, just an example ...
|
||||
mov [eax],ecx;
|
||||
mov ecx,[edx];
|
||||
mov [eax+4],ecx;
|
||||
mov ecx,[edx+4];
|
||||
mov [eax+4+4],ecx;
|
||||
mov ecx,[edx+4+4];
|
||||
mov [eax+4+4+4+4],ecx;
|
||||
mov ecx,[edx+4+4+4+4];
|
||||
///....
|
||||
|
||||
jmp cont;
|
||||
_fullread:
|
||||
movzx eax,al;
|
||||
sub ecx,eax;
|
||||
sub ecx,0x80000000;
|
||||
call [eax+stuff];
|
||||
cont:
|
||||
........
|
||||
|
||||
*/
|
||||
|
||||
|
||||
//ecx = addr
|
||||
//edx = ptr
|
||||
void vtlb_DynGenRead64(u32 bits)
|
||||
{
|
||||
MOV32RtoR(EAX,ECX);
|
||||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _fullread=JS8(0);
|
||||
switch(bits)
|
||||
{
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoROffset(freereg,ECX,0);
|
||||
MOVQRtoRmOffset(EDX,freereg,0);
|
||||
_freeMMXreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,4);
|
||||
MOV32RtoRmOffset(EDX,EAX,4);
|
||||
}
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoROffset(freereg,ECX,0);
|
||||
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
|
||||
_freeXMMreg(freereg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
MOV32RtoRm(EDX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,4);
|
||||
MOV32RtoRmOffset(EDX,EAX,4);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,8);
|
||||
MOV32RtoRmOffset(EDX,EAX,8);
|
||||
|
||||
MOV32RmtoROffset(EAX,ECX,12);
|
||||
MOV32RtoRmOffset(EDX,EAX,12);
|
||||
}
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
u8* cont=JMP8(0);
|
||||
x86SetJ8(_fullread);
|
||||
int szidx;
|
||||
|
||||
switch(bits)
|
||||
{
|
||||
case 64: szidx=3; break;
|
||||
case 128: szidx=4; break;
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
SUB32RtoR(ECX,EAX);
|
||||
//eax=[funct+eax]
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
|
||||
SUB32ItoR(ECX,0x80000000);
|
||||
CALL32R(EAX);
|
||||
|
||||
x86SetJ8(cont);
|
||||
}
|
||||
|
||||
// ecx - source address to read from
|
||||
// Returns read value in eax.
|
||||
void vtlb_DynGenRead32(u32 bits, bool sign)
|
||||
{
|
||||
jASSUME( bits <= 32 );
|
||||
|
||||
MOV32RtoR(EAX,ECX);
|
||||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _fullread=JS8(0);
|
||||
|
||||
switch(bits)
|
||||
{
|
||||
case 8:
|
||||
if( sign )
|
||||
MOVSX32Rm8toR(EAX,ECX);
|
||||
else
|
||||
MOVZX32Rm8toR(EAX,ECX);
|
||||
break;
|
||||
|
||||
case 16:
|
||||
if( sign )
|
||||
MOVSX32Rm16toR(EAX,ECX);
|
||||
else
|
||||
MOVZX32Rm16toR(EAX,ECX);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
MOV32RmtoR(EAX,ECX);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
u8* cont=JMP8(0);
|
||||
x86SetJ8(_fullread);
|
||||
int szidx;
|
||||
|
||||
switch(bits)
|
||||
{
|
||||
case 8: szidx=0; break;
|
||||
case 16: szidx=1; break;
|
||||
case 32: szidx=2; break;
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
SUB32RtoR(ECX,EAX);
|
||||
//eax=[funct+eax]
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][0],2);
|
||||
SUB32ItoR(ECX,0x80000000);
|
||||
CALL32R(EAX);
|
||||
|
||||
// perform sign extension on the result:
|
||||
|
||||
if( bits==8 )
|
||||
{
|
||||
if( sign )
|
||||
MOVSX32R8toR(EAX,EAX);
|
||||
else
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
}
|
||||
else if( bits==16 )
|
||||
{
|
||||
if( sign )
|
||||
MOVSX32R16toR(EAX,EAX);
|
||||
else
|
||||
MOVZX32R16toR(EAX,EAX);
|
||||
}
|
||||
|
||||
x86SetJ8(cont);
|
||||
}
|
||||
|
||||
void vtlb_DynGenWrite(u32 sz)
|
||||
{
|
||||
MOV32RtoR(EAX,ECX);
|
||||
SHR32ItoR(EAX,VTLB_PAGE_BITS);
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)vmap,2);
|
||||
ADD32RtoR(ECX,EAX);
|
||||
u8* _full=JS8(0);
|
||||
switch(sz)
|
||||
{
|
||||
//8 , 16, 32 : data on EDX
|
||||
case 8:
|
||||
MOV8RtoRm(ECX,EDX);
|
||||
break;
|
||||
case 16:
|
||||
MOV16RtoRm(ECX,EDX);
|
||||
break;
|
||||
case 32:
|
||||
MOV32RtoRm(ECX,EDX);
|
||||
break;
|
||||
|
||||
case 64:
|
||||
if( _hasFreeMMXreg() )
|
||||
{
|
||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||
MOVQRmtoROffset(freereg,EDX,0);
|
||||
MOVQRtoRmOffset(ECX,freereg,0);
|
||||
_freeMMXreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoRm(ECX,EAX);
|
||||
|
||||
MOV32RmtoROffset(EAX,EDX,4);
|
||||
MOV32RtoRmOffset(ECX,EAX,4);
|
||||
}
|
||||
break;
|
||||
|
||||
case 128:
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
||||
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
|
||||
SSE2_MOVDQARtoRmOffset(ECX,freereg,0);
|
||||
_freeXMMreg( freereg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Could put in an MMX optimization here as well, but no point really.
|
||||
// It's almost never used since there's almost always a free XMM reg.
|
||||
|
||||
MOV32RmtoR(EAX,EDX);
|
||||
MOV32RtoRm(ECX,EAX);
|
||||
MOV32RmtoROffset(EAX,EDX,4);
|
||||
MOV32RtoRmOffset(ECX,EAX,4);
|
||||
MOV32RmtoROffset(EAX,EDX,8);
|
||||
MOV32RtoRmOffset(ECX,EAX,8);
|
||||
MOV32RmtoROffset(EAX,EDX,12);
|
||||
MOV32RtoRmOffset(ECX,EAX,12);
|
||||
}
|
||||
break;
|
||||
}
|
||||
u8* cont=JMP8(0);
|
||||
x86SetJ8(_full);
|
||||
int szidx=0;
|
||||
|
||||
switch(sz)
|
||||
{
|
||||
case 8: szidx=0; break;
|
||||
case 16: szidx=1; break;
|
||||
case 32: szidx=2; break;
|
||||
case 64: szidx=3; break;
|
||||
case 128: szidx=4; break;
|
||||
}
|
||||
MOVZX32R8toR(EAX,EAX);
|
||||
SUB32RtoR(ECX,EAX);
|
||||
//eax=[funct+eax]
|
||||
MOV32RmSOffsettoR(EAX,EAX,(int)RWFT[szidx][1],2);
|
||||
SUB32ItoR(ECX,0x80000000);
|
||||
CALL32R(EAX);
|
||||
|
||||
x86SetJ8(cont);
|
||||
}
|
Loading…
Reference in New Issue