--This breaks linux.

--Basic vtlb code rewrite for full mapping using exceptions
--This is buggy & leaks ram for now


git-svn-id: http://pcsx2.googlecode.com/svn/branches/vtlb-exp@934 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
drkiiraziel 2009-04-09 20:44:26 +00:00
parent c81e012e51
commit 6c88e99cf2
7 changed files with 289 additions and 166 deletions

View File

@ -357,13 +357,15 @@ static void psxCheckEndGate32(int i)
_psxCheckEndGate( i );
}
#include <windows.h>
void psxVBlankStart()
{
cdvdVsync();
psxHu32(0x1070) |= 1;
if(psxvblankgate & (1 << 1)) psxCheckStartGate16(1);
if(psxvblankgate & (1 << 3)) psxCheckStartGate32(3);
if (GetAsyncKeyState('P'))
Cpu->Reset();
}
void psxVBlankEnd()

View File

@ -61,6 +61,12 @@ vtlbHandler UnmappedVirtHandler1;
vtlbHandler UnmappedPhyHandler0;
vtlbHandler UnmappedPhyHandler1;
#define VTLB_ALLOC_SIZE (0x2900000) //this is a bit more than required
u8* vtlb_alloc_base; //base of the memory array
u8* vtlb_alloc_current; //current base
u8 vtlb_alloc_bits[VTLB_ALLOC_SIZE/16/8]; //328 kb
/*
__asm
@ -91,6 +97,13 @@ callfunction:
// Interpreter Implementations of VTLB Memory Operations.
// See recVTLB.cpp for the dynarec versions.
void memwritebits(u8* ptr)
{
u32 offs=ptr-vtlb_alloc_base;
offs/=16;
vtlb_alloc_bits[offs/8]|=1<<(offs%8);
}
// Interpreted VTLB lookup for 8, 16, and 32 bit accesses
template<int DataSize,typename DataType>
__forceinline DataType __fastcall MemOp_r0(u32 addr)
@ -116,7 +129,6 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr)
jNO_DEFAULT;
}
}
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
template<int DataSize,typename DataType>
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
@ -155,6 +167,7 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
s32 ppf=addr+vmv;
if (!(ppf<0))
{
//memwritebits((u8*)ppf);
*reinterpret_cast<DataType*>(ppf)=data;
}
else
@ -182,6 +195,7 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
s32 ppf=addr+vmv;
if (!(ppf<0))
{
//memwritebits((u8*)ppf);
*reinterpret_cast<DataType*>(ppf)=*data;
if (DataSize==128)
*reinterpret_cast<DataType*>(ppf+8)=data[1];
@ -552,6 +566,13 @@ void vtlb_Term()
//nothing to do for now
}
void vtlb_alloc_mem()
{
u32 size=VTLB_ALLOC_SIZE;
vtlb_alloc_base=SysMmapEx( 0, size, 0x80000000, "Vtlb");
vtlb_alloc_current=vtlb_alloc_base;
}
// This function allocates memory block with are compatible with the Vtlb's requirements
// for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory
// pointer to be cleared. Some operating systems and/or implementations of malloc do that,
@ -559,6 +580,17 @@ void vtlb_Term()
// platform.
u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress )
{
if (!vtlb_alloc_base)
vtlb_alloc_mem();
u32 realign=((uptr)vtlb_alloc_current&(align-1));
if (realign)
vtlb_alloc_current+=align-realign;
u8* rv=vtlb_alloc_current;
vtlb_alloc_current+=size;
return rv;
#ifdef __LINUX__
return SysMmapEx( tryBaseAddress, size, 0x80000000, "Vtlb" );
#else
@ -569,6 +601,7 @@ u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress )
void vtlb_free( void* pmem, uint size )
{
return;//whatever
if( pmem == NULL ) return;
#ifdef __LINUX__

View File

@ -202,10 +202,8 @@ void WinRun()
_doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 );
#ifndef _DEBUG
if( Config.Profiler )
ProfilerInit();
#endif
InitCPUTicks();
@ -800,7 +798,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
SaveConfig();
break;
#ifndef _DEBUG
case ID_PROFILER:
Config.Profiler = !Config.Profiler;
if( Config.Profiler )
@ -815,7 +812,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
}
SaveConfig();
break;
#endif
default:
if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax))
@ -989,9 +985,7 @@ void CreateMainMenu() {
ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT);
ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS);
ADDSEPARATOR(0);
#ifndef _DEBUG
ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER);
#endif
ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES);
ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE);
ADDSEPARATOR(0);

View File

@ -49,14 +49,53 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps )
}
// get bad virtual address
u32 offset = (u8*)ExceptionRecord.ExceptionInformation[1]-psM;
uptr addr=ExceptionRecord.ExceptionInformation[1];
if (offset>=Ps2MemSize::Base)
return EXCEPTION_CONTINUE_SEARCH;
//this is a *hackfix* for a bug on x64 windows kernels.They do not give correct address
//if the error is a missaligned access (they return 0)
if (addr==0)
{
if (eps->ContextRecord->Ecx & 0x80000000)
addr=eps->ContextRecord->Ecx;
}
u32 offset = addr-(uptr)psM;
if (addr&0x80000000)
{
uptr _vtlb_HandleRewrite(uptr code);
u8* pcode=(u8*)ExceptionRecord.ExceptionAddress;
mmap_ClearCpuBlock( offset );
u32 patch_point=1;
//01 C1
while(pcode[-patch_point]!=0x81 || pcode[-patch_point-1]!=0xC1 || pcode[-patch_point-2]!=0x01)
{
patch_point++;
}
assert(pcode[-patch_point]==0x81);
pcode[-patch_point]=0xF;//js32, 0x81 is add32
pcode[-patch_point+1]=0x88;
return EXCEPTION_CONTINUE_EXECUTION;
//resume execution from correct point
eps->ContextRecord->Eax-=*(u32*)&pcode[-patch_point+2];
uptr codeloc=_vtlb_HandleRewrite(*(u32*)&pcode[-patch_point+2]);
eps->ContextRecord->Eip=codeloc;
*(u32*)&pcode[-patch_point+2]=codeloc-(u32)&pcode[-patch_point+6];
SysPrintf("memop patch for full mapping @ %08X : pp %d\n",pcode,patch_point);
return EXCEPTION_CONTINUE_EXECUTION;
}
else
{
if (offset>=Ps2MemSize::Base)
return EXCEPTION_CONTINUE_SEARCH;
mmap_ClearCpuBlock( offset );
return EXCEPTION_CONTINUE_EXECUTION;
}
}

View File

@ -24,23 +24,178 @@
#include "iCore.h"
#include "iR5900.h"
using namespace vtlb_private;
u8* execohax_pos=0;
u8* execohax_start=0;
u32 execohx_sz;
// NOTICE: This function *destroys* EAX!!
// Moves 128 bits of memory from the source register ptr to the dest register ptr.
// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason)
void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
u8* code_pos=0;
u8* code_start=0;
u32 code_sz;
using namespace vtlb_private;
#include <windows.h>
void execuCode(bool set)
{
MOV32RmtoR(EAX,srcRm);
MOV32RtoRm(destRm,EAX);
MOV32RmtoROffset(EAX,srcRm,4);
MOV32RtoRmOffset(destRm,EAX,4);
MOV32RmtoROffset(EAX,srcRm,8);
MOV32RtoRmOffset(destRm,EAX,8);
MOV32RmtoROffset(EAX,srcRm,12);
MOV32RtoRmOffset(destRm,EAX,12);
u32 used=code_pos-code_start;
u32 free=2*1024*1024-used;
if (code_pos == 0 || free<128)
{
SysPrintf("Leaking 2 megabytes of ram\n");
code_start=code_pos=(u8*)VirtualAlloc(0,2*1024*1024,MEM_COMMIT,PAGE_EXECUTE_READWRITE);
code_sz+=2*1024*1024;
int i=0;
while(i<code_sz)
{
//UD2 is 0xF 0xB.Fill the stream with it so that the cpu don't try to execute past branches ..
code_start[i]=0xF;i++;
code_start[i]=0xB;i++;
}
}
static u8* old;
if (set)
{
old=x86SetPtr(code_pos);
}
else
{
code_pos=x86SetPtr(old);
u32 tt=execohx_sz-2*1024*1024+(execohax_pos-execohax_start);
u32 tc=code_sz-free;
SysPrintf("%d code, %d pot, %.2f%%\n",tc,tt,tc/(float)tt*100);
}
}
u32* execohaxme(bool set)
{
u32 used=execohax_pos-execohax_start;
u32 free=2*1024*1024-used;
if (execohax_pos == 0 || free<128)
{
SysPrintf("Leaking 2 megabytes of ram\n");
execohax_start=execohax_pos=(u8*)VirtualAlloc(0,2*1024*1024,MEM_COMMIT,PAGE_EXECUTE_READWRITE);
execohx_sz+=2*1024*1024;
}
static u8* saved;
static u8* mod;
if (set)
{
write8<_EmitterId_>( 0x81 );
ModRM<_EmitterId_>( 3, 0, EAX );
write32<_EmitterId_>( (uptr)execohax_pos );
saved=x86SetPtr(execohax_pos);
mod=execohax_pos;
write8<_EmitterId_>(0); //size, in bytes
write32<_EmitterId_>(0); //return address
}
else
{
//x86AlignExecutable(4);
//x86Align(64);
execohax_pos=x86SetPtr(mod);
write8<_EmitterId_>(execohax_pos-mod-5);
return (u32*)x86SetPtr(saved);
}
return 0;
}
uptr _vtlb_HandleRewrite(uptr block)
{
u8 size=*(u8*)block;
u32 ra=*(u32*)(block+1);
u8* pcode=(u8*)(block+5);
execuCode(true);
uptr rv=(uptr)code_pos;
while(size--)
{
write8<_EmitterId_>(*pcode++);
}
JMP32(ra-(uptr)x86Ptr[_EmitterId_]-5);
execuCode(false);
//do magic
return rv;
}
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] );
void MOVx_SSE( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0,bool half=false )
{
int reg;
bool free_reg=false;
if( _hasFreeXMMreg() )
{
free_reg=true;
reg=_allocTempXMMreg( XMMT_INT, -1 );
}
else
{
SSE2_MOVDQA_XMM_to_M128((uptr)g_globalXMMData,XMM0);
reg=XMM0;
}
if (half)
{
if (srcAddr)
SSE_MOVLPS_M64_to_XMM(reg,srcAddr);
else
SSE_MOVLPS_RmOffset_to_XMM(reg,srcRm,0);
if (dstAddr)
SSE_MOVLPS_XMM_to_M64(dstAddr,reg);
else
SSE_MOVLPS_XMM_to_RmOffset(destRm,reg,0);
}
else
{
if (srcAddr)
SSE2_MOVDQA_M128_to_XMM(reg,srcAddr);
else
SSE2_MOVDQARmtoROffset(reg,srcRm,0);
if (dstAddr)
SSE2_MOVDQA_XMM_to_M128(dstAddr,reg);
else
SSE2_MOVDQARtoRmOffset(destRm,reg,0);
}
if (free_reg)
_freeXMMreg(reg);
else
{
SSE2_MOVDQA_M128_to_XMM(XMM0,(uptr)g_globalXMMData);
}
}
void MOV64_MMX( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0)
{
//if free xmm && fpu state then we use the SSE version.
if( !(_hasFreeXMMreg() && (x86FpuState == FPU_STATE)) && _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
if (srcAddr)
MOVQMtoR(freereg,srcAddr);
else
MOVQRmtoROffset(freereg,srcRm,0);
if (dstAddr)
MOVQRtoM(dstAddr,freereg);
else
MOVQRtoRmOffset(destRm,freereg,0);
_freeMMXreg(freereg);
}
else
{
MOVx_SSE(destRm,srcRm,srcAddr,dstAddr,true);
}
}
/*
// Pseudo-Code For the following Dynarec Implementations -->
@ -118,38 +273,11 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
break;
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,ECX,0);
MOVQRtoRmOffset(EDX,freereg,0);
_freeMMXreg(freereg);
}
else
{
MOV32RmtoR(EAX,ECX);
MOV32RtoRm(EDX,EAX);
MOV32RmtoROffset(EAX,ECX,4);
MOV32RtoRmOffset(EDX,EAX,4);
}
MOV64_MMX(EDX,ECX);
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,ECX,0);
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
_freeXMMreg(freereg);
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV128_MtoM( EDX, ECX ); // dest <- src!
}
MOVx_SSE(EDX,ECX);
break;
jNO_DEFAULT
@ -189,15 +317,16 @@ void vtlb_DynGenRead64(u32 bits)
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread = JS8(0);
//u8* _direct = JMP8(0);
execohaxme(true);
_vtlb_DynGen_DirectRead( bits, false );
u8* cont = JMP8(0);
x86SetJ8(_fullread);
_vtlb_DynGen_IndirectRead( bits );
x86SetJ8(cont);
u32* patch=execohaxme(false);
_vtlb_DynGen_DirectRead( bits, false );
*patch=(uptr)x86Ptr[_EmitterId_];
}
// Recompiled input registers:
@ -211,12 +340,9 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread = JS8(0);
//u8* _direct = JMP8(0);
execohaxme(true);
_vtlb_DynGen_DirectRead( bits, sign );
u8* cont = JMP8(0);
x86SetJ8(_fullread);
_vtlb_DynGen_IndirectRead( bits );
// perform sign extension on the result:
@ -236,7 +362,11 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
MOVZX32R16toR(EAX,EAX);
}
x86SetJ8(cont);
u32* patch=execohaxme(false);
_vtlb_DynGen_DirectRead( bits, sign );
*patch=(uptr)x86Ptr[_EmitterId_];
}
//
@ -251,39 +381,11 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
switch( bits )
{
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQMtoR(freereg,ppf);
MOVQRtoRmOffset(EDX,freereg,0);
_freeMMXreg(freereg);
}
else
{
MOV32MtoR(EAX,ppf);
MOV32RtoRm(EDX,EAX);
MOV32MtoR(EAX,ppf+4);
MOV32RtoRmOffset(EDX,EAX,4);
}
MOV64_MMX( EDX, ECX,ppf ); // dest <- src!
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQA_M128_to_XMM( freereg, ppf );
SSE2_MOVDQARtoRmOffset(EDX,freereg,0);
_freeXMMreg(freereg);
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV32ItoR( ECX, ppf );
MOV128_MtoM( EDX, ECX ); // dest <- src!
}
MOVx_SSE( EDX, ECX,ppf ); // dest <- src!
break;
jNO_DEFAULT
@ -403,40 +505,16 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
break;
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,EDX,0);
MOVQRtoRmOffset(ECX,freereg,0);
_freeMMXreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoRm(ECX,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoRmOffset(ECX,EAX,4);
}
MOV64_MMX( ECX, EDX );
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
SSE2_MOVDQARtoRmOffset(ECX,freereg,0);
_freeXMMreg( freereg );
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV128_MtoM( ECX, EDX ); // dest <- src!
}
MOVx_SSE( ECX, EDX );
break;
}
// SHR32ItoR(ECX,4);// do /16
// BTS_wtf(asdasd,ECX);
}
static void _vtlb_DynGen_IndirectWrite( u32 bits )
@ -464,15 +542,17 @@ void vtlb_DynGenWrite(u32 sz)
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
ADD32RtoR(ECX,EAX);
u8* _full=JS8(0);
_vtlb_DynGen_DirectWrite( sz );
u8* cont = JMP8(0);
//u8* _direct=JMP8(0);
x86SetJ8(_full);
execohaxme(true);
_vtlb_DynGen_IndirectWrite( sz );
x86SetJ8(cont);
u32* patch=execohaxme(false);
_vtlb_DynGen_DirectWrite( sz );
*patch=(uptr)x86Ptr[_EmitterId_];
}
@ -499,39 +579,11 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
break;
case 64:
if( _hasFreeMMXreg() )
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoROffset(freereg,EDX,0);
MOVQRtoM(ppf,freereg);
_freeMMXreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoM(ppf,EAX);
MOV32RmtoROffset(EAX,EDX,4);
MOV32RtoM(ppf+4,EAX);
}
MOV64_MMX( ECX, EDX,0,ppf); // dest <- src!
break;
case 128:
if( _hasFreeXMMreg() )
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoROffset(freereg,EDX,0);
SSE2_MOVDQA_XMM_to_M128(ppf,freereg);
_freeXMMreg( freereg );
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV32ItoR( ECX, ppf );
MOV128_MtoM( ECX, EDX ); // dest <- src!
}
MOVx_SSE( ECX, EDX,0,ppf); // dest <- src!
break;
}

View File

@ -79,7 +79,7 @@ emitterT void write64( u64 val ){
//------------------------------------------------------------------
// jump/align functions
//------------------------------------------------------------------
emitterT void ex86SetPtr( u8 *ptr );
emitterT u8* ex86SetPtr( u8 *ptr );
emitterT void ex86SetJ8( u8 *j8 );
emitterT void ex86SetJ8A( u8 *j8 );
emitterT void ex86SetJ16( u16 *j16 );

View File

@ -159,9 +159,12 @@ emitterT void CMOV32MtoR( int cc, int to, uptr from )
}
////////////////////////////////////////////////////
emitterT void ex86SetPtr( u8* ptr )
emitterT u8* ex86SetPtr( u8* ptr )
{
x86Ptr[I] = ptr;
u8* rv= x86Ptr[I];
if (ptr!=0)
x86Ptr[I] = ptr;
return rv;
}
////////////////////////////////////////////////////