From 6c88e99cf2476d6e248053c572f69fcb7903e1ce Mon Sep 17 00:00:00 2001 From: drkiiraziel Date: Thu, 9 Apr 2009 20:44:26 +0000 Subject: [PATCH] --This breaks linux. --Basic vtlb code rewrite for full mapping using exceptions --This is buggy & leaks ram for now git-svn-id: http://pcsx2.googlecode.com/svn/branches/vtlb-exp@934 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/IopCounters.cpp | 4 +- pcsx2/vtlb.cpp | 35 +++- pcsx2/windows/WinMain.cpp | 6 - pcsx2/windows/WinSysExec.cpp | 49 ++++- pcsx2/x86/ix86-32/recVTLB.cpp | 352 +++++++++++++++++++--------------- pcsx2/x86/ix86/ix86.h | 2 +- pcsx2/x86/ix86/ix86.inl | 7 +- 7 files changed, 289 insertions(+), 166 deletions(-) diff --git a/pcsx2/IopCounters.cpp b/pcsx2/IopCounters.cpp index 1152854a19..626f857503 100644 --- a/pcsx2/IopCounters.cpp +++ b/pcsx2/IopCounters.cpp @@ -357,13 +357,15 @@ static void psxCheckEndGate32(int i) _psxCheckEndGate( i ); } - +#include void psxVBlankStart() { cdvdVsync(); psxHu32(0x1070) |= 1; if(psxvblankgate & (1 << 1)) psxCheckStartGate16(1); if(psxvblankgate & (1 << 3)) psxCheckStartGate32(3); + if (GetAsyncKeyState('P')) + Cpu->Reset(); } void psxVBlankEnd() diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index f34df81bf4..810cbc5d41 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -61,6 +61,12 @@ vtlbHandler UnmappedVirtHandler1; vtlbHandler UnmappedPhyHandler0; vtlbHandler UnmappedPhyHandler1; +#define VTLB_ALLOC_SIZE (0x2900000) //this is a bit more than required + +u8* vtlb_alloc_base; //base of the memory array +u8* vtlb_alloc_current; //current base +u8 vtlb_alloc_bits[VTLB_ALLOC_SIZE/16/8]; //328 kb + /* __asm @@ -91,6 +97,13 @@ callfunction: // Interpreter Implementations of VTLB Memory Operations. // See recVTLB.cpp for the dynarec versions. +void memwritebits(u8* ptr) +{ + u32 offs=ptr-vtlb_alloc_base; + offs/=16; + vtlb_alloc_bits[offs/8]|=1<<(offs%8); +} + // Interpreted VTLB lookup for 8, 16, and 32 bit accesses template __forceinline DataType __fastcall MemOp_r0(u32 addr) @@ -116,7 +129,6 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr) jNO_DEFAULT; } } - // Interpreterd VTLB lookup for 64 and 128 bit accesses. template __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) @@ -155,6 +167,7 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data) s32 ppf=addr+vmv; if (!(ppf<0)) { + //memwritebits((u8*)ppf); *reinterpret_cast(ppf)=data; } else @@ -182,6 +195,7 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data) s32 ppf=addr+vmv; if (!(ppf<0)) { + //memwritebits((u8*)ppf); *reinterpret_cast(ppf)=*data; if (DataSize==128) *reinterpret_cast(ppf+8)=data[1]; @@ -552,6 +566,13 @@ void vtlb_Term() //nothing to do for now } + +void vtlb_alloc_mem() +{ + u32 size=VTLB_ALLOC_SIZE; + vtlb_alloc_base=SysMmapEx( 0, size, 0x80000000, "Vtlb"); + vtlb_alloc_current=vtlb_alloc_base; +} // This function allocates memory block with are compatible with the Vtlb's requirements // for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory // pointer to be cleared. Some operating systems and/or implementations of malloc do that, @@ -559,6 +580,17 @@ void vtlb_Term() // platform. u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress ) { + if (!vtlb_alloc_base) + vtlb_alloc_mem(); + + u32 realign=((uptr)vtlb_alloc_current&(align-1)); + if (realign) + vtlb_alloc_current+=align-realign; + + u8* rv=vtlb_alloc_current; + vtlb_alloc_current+=size; + return rv; + #ifdef __LINUX__ return SysMmapEx( tryBaseAddress, size, 0x80000000, "Vtlb" ); #else @@ -569,6 +601,7 @@ u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress ) void vtlb_free( void* pmem, uint size ) { + return;//whatever if( pmem == NULL ) return; #ifdef __LINUX__ diff --git a/pcsx2/windows/WinMain.cpp b/pcsx2/windows/WinMain.cpp index de6b3a953b..c74e88157f 100644 --- a/pcsx2/windows/WinMain.cpp +++ b/pcsx2/windows/WinMain.cpp @@ -202,10 +202,8 @@ void WinRun() _doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 ); -#ifndef _DEBUG if( Config.Profiler ) ProfilerInit(); -#endif InitCPUTicks(); @@ -800,7 +798,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) SaveConfig(); break; -#ifndef _DEBUG case ID_PROFILER: Config.Profiler = !Config.Profiler; if( Config.Profiler ) @@ -815,7 +812,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) } SaveConfig(); break; -#endif default: if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax)) @@ -989,9 +985,7 @@ void CreateMainMenu() { ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT); ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS); ADDSEPARATOR(0); -#ifndef _DEBUG ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER); -#endif ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES); ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE); ADDSEPARATOR(0); diff --git a/pcsx2/windows/WinSysExec.cpp b/pcsx2/windows/WinSysExec.cpp index ec62fe33d0..3c7ce85aff 100644 --- a/pcsx2/windows/WinSysExec.cpp +++ b/pcsx2/windows/WinSysExec.cpp @@ -49,14 +49,53 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps ) } // get bad virtual address - u32 offset = (u8*)ExceptionRecord.ExceptionInformation[1]-psM; + uptr addr=ExceptionRecord.ExceptionInformation[1]; - if (offset>=Ps2MemSize::Base) - return EXCEPTION_CONTINUE_SEARCH; + //this is a *hackfix* for a bug on x64 windows kernels.They do not give correct address + //if the error is a missaligned access (they return 0) + if (addr==0) + { + if (eps->ContextRecord->Ecx & 0x80000000) + addr=eps->ContextRecord->Ecx; + } + u32 offset = addr-(uptr)psM; + + if (addr&0x80000000) + { + uptr _vtlb_HandleRewrite(uptr code); + u8* pcode=(u8*)ExceptionRecord.ExceptionAddress; - mmap_ClearCpuBlock( offset ); + u32 patch_point=1; + //01 C1 + while(pcode[-patch_point]!=0x81 || pcode[-patch_point-1]!=0xC1 || pcode[-patch_point-2]!=0x01) + { + patch_point++; + } + assert(pcode[-patch_point]==0x81); + pcode[-patch_point]=0xF;//js32, 0x81 is add32 + pcode[-patch_point+1]=0x88; - return EXCEPTION_CONTINUE_EXECUTION; + //resume execution from correct point + + eps->ContextRecord->Eax-=*(u32*)&pcode[-patch_point+2]; + + uptr codeloc=_vtlb_HandleRewrite(*(u32*)&pcode[-patch_point+2]); + + eps->ContextRecord->Eip=codeloc; + *(u32*)&pcode[-patch_point+2]=codeloc-(u32)&pcode[-patch_point+6]; + + SysPrintf("memop patch for full mapping @ %08X : pp %d\n",pcode,patch_point); + return EXCEPTION_CONTINUE_EXECUTION; + } + else + { + if (offset>=Ps2MemSize::Base) + return EXCEPTION_CONTINUE_SEARCH; + + mmap_ClearCpuBlock( offset ); + + return EXCEPTION_CONTINUE_EXECUTION; + } } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index cbc567c68d..2554ab041f 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -24,23 +24,178 @@ #include "iCore.h" #include "iR5900.h" -using namespace vtlb_private; +u8* execohax_pos=0; +u8* execohax_start=0; +u32 execohx_sz; -// NOTICE: This function *destroys* EAX!! -// Moves 128 bits of memory from the source register ptr to the dest register ptr. -// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason) -void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) +u8* code_pos=0; +u8* code_start=0; +u32 code_sz; + +using namespace vtlb_private; +#include + +void execuCode(bool set) { - MOV32RmtoR(EAX,srcRm); - MOV32RtoRm(destRm,EAX); - MOV32RmtoROffset(EAX,srcRm,4); - MOV32RtoRmOffset(destRm,EAX,4); - MOV32RmtoROffset(EAX,srcRm,8); - MOV32RtoRmOffset(destRm,EAX,8); - MOV32RmtoROffset(EAX,srcRm,12); - MOV32RtoRmOffset(destRm,EAX,12); + u32 used=code_pos-code_start; + u32 free=2*1024*1024-used; + + if (code_pos == 0 || free<128) + { + SysPrintf("Leaking 2 megabytes of ram\n"); + code_start=code_pos=(u8*)VirtualAlloc(0,2*1024*1024,MEM_COMMIT,PAGE_EXECUTE_READWRITE); + code_sz+=2*1024*1024; + int i=0; + while(i( 0x81 ); + ModRM<_EmitterId_>( 3, 0, EAX ); + write32<_EmitterId_>( (uptr)execohax_pos ); + + saved=x86SetPtr(execohax_pos); + mod=execohax_pos; + write8<_EmitterId_>(0); //size, in bytes + write32<_EmitterId_>(0); //return address + } + else + { + //x86AlignExecutable(4); + //x86Align(64); + execohax_pos=x86SetPtr(mod); + write8<_EmitterId_>(execohax_pos-mod-5); + return (u32*)x86SetPtr(saved); + } + + return 0; +} + +uptr _vtlb_HandleRewrite(uptr block) +{ + u8 size=*(u8*)block; + u32 ra=*(u32*)(block+1); + u8* pcode=(u8*)(block+5); + execuCode(true); + uptr rv=(uptr)code_pos; + + while(size--) + { + write8<_EmitterId_>(*pcode++); + } + JMP32(ra-(uptr)x86Ptr[_EmitterId_]-5); + + execuCode(false); + //do magic + return rv; +} + +PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] ); +void MOVx_SSE( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0,bool half=false ) +{ + int reg; + bool free_reg=false; + if( _hasFreeXMMreg() ) + { + free_reg=true; + reg=_allocTempXMMreg( XMMT_INT, -1 ); + } + else + { + SSE2_MOVDQA_XMM_to_M128((uptr)g_globalXMMData,XMM0); + reg=XMM0; + } + + if (half) + { + if (srcAddr) + SSE_MOVLPS_M64_to_XMM(reg,srcAddr); + else + SSE_MOVLPS_RmOffset_to_XMM(reg,srcRm,0); + + if (dstAddr) + SSE_MOVLPS_XMM_to_M64(dstAddr,reg); + else + SSE_MOVLPS_XMM_to_RmOffset(destRm,reg,0); + } + else + { + if (srcAddr) + SSE2_MOVDQA_M128_to_XMM(reg,srcAddr); + else + SSE2_MOVDQARmtoROffset(reg,srcRm,0); + + if (dstAddr) + SSE2_MOVDQA_XMM_to_M128(dstAddr,reg); + else + SSE2_MOVDQARtoRmOffset(destRm,reg,0); + } + + + if (free_reg) + _freeXMMreg(reg); + else + { + SSE2_MOVDQA_M128_to_XMM(XMM0,(uptr)g_globalXMMData); + } +} +void MOV64_MMX( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0) +{ + //if free xmm && fpu state then we use the SSE version. + if( !(_hasFreeXMMreg() && (x86FpuState == FPU_STATE)) && _hasFreeMMXreg() ) + { + const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); + if (srcAddr) + MOVQMtoR(freereg,srcAddr); + else + MOVQRmtoROffset(freereg,srcRm,0); + + if (dstAddr) + MOVQRtoM(dstAddr,freereg); + else + MOVQRtoRmOffset(destRm,freereg,0); + + _freeMMXreg(freereg); + } + else + { + MOVx_SSE(destRm,srcRm,srcAddr,dstAddr,true); + } +} /* // Pseudo-Code For the following Dynarec Implementations --> @@ -118,38 +273,11 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) break; case 64: - if( _hasFreeMMXreg() ) - { - const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,ECX,0); - MOVQRtoRmOffset(EDX,freereg,0); - _freeMMXreg(freereg); - } - else - { - MOV32RmtoR(EAX,ECX); - MOV32RtoRm(EDX,EAX); - - MOV32RmtoROffset(EAX,ECX,4); - MOV32RtoRmOffset(EDX,EAX,4); - } + MOV64_MMX(EDX,ECX); break; case 128: - if( _hasFreeXMMreg() ) - { - const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,ECX,0); - SSE2_MOVDQARtoRmOffset(EDX,freereg,0); - _freeXMMreg(freereg); - } - else - { - // Could put in an MMX optimization here as well, but no point really. - // It's almost never used since there's almost always a free XMM reg. - - MOV128_MtoM( EDX, ECX ); // dest <- src! - } + MOVx_SSE(EDX,ECX); break; jNO_DEFAULT @@ -189,15 +317,16 @@ void vtlb_DynGenRead64(u32 bits) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _fullread = JS8(0); + //u8* _direct = JMP8(0); + execohaxme(true); - _vtlb_DynGen_DirectRead( bits, false ); - u8* cont = JMP8(0); - - x86SetJ8(_fullread); _vtlb_DynGen_IndirectRead( bits ); - - x86SetJ8(cont); + + u32* patch=execohaxme(false); + + _vtlb_DynGen_DirectRead( bits, false ); + + *patch=(uptr)x86Ptr[_EmitterId_]; } // Recompiled input registers: @@ -211,12 +340,9 @@ void vtlb_DynGenRead32(u32 bits, bool sign) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _fullread = JS8(0); + //u8* _direct = JMP8(0); + execohaxme(true); - _vtlb_DynGen_DirectRead( bits, sign ); - u8* cont = JMP8(0); - - x86SetJ8(_fullread); _vtlb_DynGen_IndirectRead( bits ); // perform sign extension on the result: @@ -236,7 +362,11 @@ void vtlb_DynGenRead32(u32 bits, bool sign) MOVZX32R16toR(EAX,EAX); } - x86SetJ8(cont); + u32* patch=execohaxme(false); + + _vtlb_DynGen_DirectRead( bits, sign ); + + *patch=(uptr)x86Ptr[_EmitterId_]; } // @@ -251,39 +381,11 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) switch( bits ) { case 64: - if( _hasFreeMMXreg() ) - { - const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQMtoR(freereg,ppf); - MOVQRtoRmOffset(EDX,freereg,0); - _freeMMXreg(freereg); - } - else - { - MOV32MtoR(EAX,ppf); - MOV32RtoRm(EDX,EAX); - - MOV32MtoR(EAX,ppf+4); - MOV32RtoRmOffset(EDX,EAX,4); - } + MOV64_MMX( EDX, ECX,ppf ); // dest <- src! break; case 128: - if( _hasFreeXMMreg() ) - { - const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQA_M128_to_XMM( freereg, ppf ); - SSE2_MOVDQARtoRmOffset(EDX,freereg,0); - _freeXMMreg(freereg); - } - else - { - // Could put in an MMX optimization here as well, but no point really. - // It's almost never used since there's almost always a free XMM reg. - - MOV32ItoR( ECX, ppf ); - MOV128_MtoM( EDX, ECX ); // dest <- src! - } + MOVx_SSE( EDX, ECX,ppf ); // dest <- src! break; jNO_DEFAULT @@ -403,40 +505,16 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) break; case 64: - if( _hasFreeMMXreg() ) - { - const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,EDX,0); - MOVQRtoRmOffset(ECX,freereg,0); - _freeMMXreg( freereg ); - } - else - { - MOV32RmtoR(EAX,EDX); - MOV32RtoRm(ECX,EAX); - - MOV32RmtoROffset(EAX,EDX,4); - MOV32RtoRmOffset(ECX,EAX,4); - } + MOV64_MMX( ECX, EDX ); break; case 128: - if( _hasFreeXMMreg() ) - { - const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,EDX,0); - SSE2_MOVDQARtoRmOffset(ECX,freereg,0); - _freeXMMreg( freereg ); - } - else - { - // Could put in an MMX optimization here as well, but no point really. - // It's almost never used since there's almost always a free XMM reg. - - MOV128_MtoM( ECX, EDX ); // dest <- src! - } + MOVx_SSE( ECX, EDX ); break; } + +// SHR32ItoR(ECX,4);// do /16 +// BTS_wtf(asdasd,ECX); } static void _vtlb_DynGen_IndirectWrite( u32 bits ) @@ -464,15 +542,17 @@ void vtlb_DynGenWrite(u32 sz) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _full=JS8(0); - _vtlb_DynGen_DirectWrite( sz ); - u8* cont = JMP8(0); + //u8* _direct=JMP8(0); - x86SetJ8(_full); + execohaxme(true); + _vtlb_DynGen_IndirectWrite( sz ); - - x86SetJ8(cont); + + u32* patch=execohaxme(false); + _vtlb_DynGen_DirectWrite( sz ); + + *patch=(uptr)x86Ptr[_EmitterId_]; } @@ -499,39 +579,11 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) break; case 64: - if( _hasFreeMMXreg() ) - { - const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,EDX,0); - MOVQRtoM(ppf,freereg); - _freeMMXreg( freereg ); - } - else - { - MOV32RmtoR(EAX,EDX); - MOV32RtoM(ppf,EAX); - - MOV32RmtoROffset(EAX,EDX,4); - MOV32RtoM(ppf+4,EAX); - } + MOV64_MMX( ECX, EDX,0,ppf); // dest <- src! break; case 128: - if( _hasFreeXMMreg() ) - { - const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,EDX,0); - SSE2_MOVDQA_XMM_to_M128(ppf,freereg); - _freeXMMreg( freereg ); - } - else - { - // Could put in an MMX optimization here as well, but no point really. - // It's almost never used since there's almost always a free XMM reg. - - MOV32ItoR( ECX, ppf ); - MOV128_MtoM( ECX, EDX ); // dest <- src! - } + MOVx_SSE( ECX, EDX,0,ppf); // dest <- src! break; } diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 797dc2eaf7..51a65d712a 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -79,7 +79,7 @@ emitterT void write64( u64 val ){ //------------------------------------------------------------------ // jump/align functions //------------------------------------------------------------------ -emitterT void ex86SetPtr( u8 *ptr ); +emitterT u8* ex86SetPtr( u8 *ptr ); emitterT void ex86SetJ8( u8 *j8 ); emitterT void ex86SetJ8A( u8 *j8 ); emitterT void ex86SetJ16( u16 *j16 ); diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index ae4f5829af..7394bd6a3a 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -159,9 +159,12 @@ emitterT void CMOV32MtoR( int cc, int to, uptr from ) } //////////////////////////////////////////////////// -emitterT void ex86SetPtr( u8* ptr ) +emitterT u8* ex86SetPtr( u8* ptr ) { - x86Ptr[I] = ptr; + u8* rv= x86Ptr[I]; + if (ptr!=0) + x86Ptr[I] = ptr; + return rv; } ////////////////////////////////////////////////////