diff --git a/common/include/Utilities/PageFaultSource.h b/common/include/Utilities/PageFaultSource.h index fc4e88ce4d..a22b32b66e 100644 --- a/common/include/Utilities/PageFaultSource.h +++ b/common/include/Utilities/PageFaultSource.h @@ -247,6 +247,11 @@ public: m_blocksize = (bytes + __pagesize - 1) / __pagesize; return m_blocksize * __pagesize; } + + virtual void Reset() + { + _parent::Reset(); + } protected: @@ -266,72 +271,6 @@ protected: virtual void CommitBlocks( uptr page, uint blocks ); }; -// -------------------------------------------------------------------------------------- -// SpatialArrayReserve -// -------------------------------------------------------------------------------------- -// A spatial array is one where large areas of the memory reserve will remain unused during -// process execution. Only areas put to use will be committed to virtual memory. -// -// Spatial array efficiency depends heavily on selecting the right parameters for the array's -// primary intended use. Memory in a spatial array is arranged by blocks, with each block -// containing some number of pages (pages are 4096 bytes each on most platforms). When the -// array is accessed, the entire block containing the addressed memory will be committed at -// once. Blocks can be a single page in size (4096 bytes), though this is highly discouraged -// due to overhead and fragmentation penalties. -// -// Balancing block sizes: -// Larger blocks are good for reducing memory fragmentation and block-tracking overhead, but -// can also result in a lot of otherwise unused memory being committed to memory. Smaller -// blocks are good for arrays that will tend toward more sequential behavior, as they reduce -// the amount of unused memory being committed. However, since every block requires a -// tracking entry, assigning small blocks to a very large array can result in quite a bit of -// unwanted overhead. Furthermore, if the array is accessed randomly, system physical memory -// will become very fragmented, which will also hurt performance. -// -// By default, the base block size is based on a heuristic that balances the size of the spatial -// array reserve against a best-guess performance profile for the target platform. -// -class SpatialArrayReserve : public BaseVmReserveListener -{ - typedef BaseVmReserveListener _parent; - -protected: - uint m_numblocks; - - // Array of block bits, each bit indicating if the block has been committed to memory - // or not. The array length is typically determined via ((numblocks+7) / 8), though the - // actual array size may be larger in order to accommodate 32-bit or 128-bit accelerated - // operations. - ScopedAlignedAlloc m_blockbits; - -public: - SpatialArrayReserve( const wxString& name ); - - virtual void* Reserve( size_t size = 0, uptr base = 0, uptr upper_bounds = 0 ); - virtual void Reset(); - virtual bool TryResize( uint newsize ); - - void OnCommittedBlock( void* block ); - - SpatialArrayReserve& SetBlockCount( uint blocks ); - SpatialArrayReserve& SetBlockSizeInPages( uint bytes ); - - uptr SetBlockSize( uptr bytes ); - - operator void*() { return m_baseptr; } - operator const void*() const { return m_baseptr; } - - operator u8*() { return (u8*)m_baseptr; } - operator const u8*() const { return (u8*)m_baseptr; } - - using _parent::operator[]; - -protected: - void ReprotectCommittedBlocks( const PageProtectionMode& newmode ); - void DoCommitAndProtect( uptr page ); - uint _calcBlockBitArrayLength() const; -}; - #ifdef __linux__ # define PCSX2_PAGEFAULT_PROTECT diff --git a/common/src/Utilities/VirtualMemory.cpp b/common/src/Utilities/VirtualMemory.cpp index cb7c2dbe6b..edbfbbf853 100644 --- a/common/src/Utilities/VirtualMemory.cpp +++ b/common/src/Utilities/VirtualMemory.cpp @@ -336,154 +336,6 @@ void BaseVmReserveListener::OnPageFaultEvent(const PageFaultInfo& info, bool& ha #endif } - -// -------------------------------------------------------------------------------------- -// SpatialArrayReserve (implementations) -// -------------------------------------------------------------------------------------- - -SpatialArrayReserve::SpatialArrayReserve( const wxString& name ) : - _parent( name ), m_numblocks(0) -{ - m_prot_mode = PageAccess_ReadWrite(); -} - -uint SpatialArrayReserve::_calcBlockBitArrayLength() const -{ - // divide by 8 (rounded up) to compress 8 bits into each byte. - // mask off lower bits (rounded up) to allow for 128-bit alignment and SSE operations. - return (((m_numblocks + 7) / 8) + 15) & ~15; -} - -void* SpatialArrayReserve::Reserve( size_t size, uptr base, uptr upper_bounds ) -{ - void* addr = _parent::Reserve( size, base, upper_bounds ); - if (!addr) return NULL; - - if (m_blocksize) SetBlockSizeInPages( m_blocksize ); - m_blockbits.Alloc( _calcBlockBitArrayLength() ); - - return addr; -} - -void SpatialArrayReserve::ReprotectCommittedBlocks( const PageProtectionMode& newmode ) -{ - if (!m_pages_commited) return; - - u8* curptr = GetPtr(); - const uint blockBytes = m_blocksize * __pagesize; - for (uint i=0; iMain; if (rampage >= Ps2MemSize::MainRam) - return -1; //not in ram, no tracking done ... + return ProtMode_NotRequired; //not in ram, no tracking done ... rampage >>= 12; - return ( m_PageProtectInfo[rampage].Mode == ProtMode_Manual ) ? 1 : 0; + + return m_PageProtectInfo[rampage].Mode; } // paddr - physically mapped PS2 address diff --git a/pcsx2/Memory.h b/pcsx2/Memory.h index 883aebd136..86507e03f3 100644 --- a/pcsx2/Memory.h +++ b/pcsx2/Memory.h @@ -116,7 +116,15 @@ extern void memBindConditionalHandlers(); extern void memMapVUmicro(); -extern int mmap_GetRamPageInfo( u32 paddr ); +enum vtlb_ProtectionMode +{ + ProtMode_None = 0, // page is 'unaccounted' -- neither protected nor unprotected + ProtMode_Write, // page is under write protection (exception handler) + ProtMode_Manual, // page is under manual protection (self-checked at execution) + ProtMode_NotRequired // page doesn't require any protection +}; + +extern vtlb_ProtectionMode mmap_GetRamPageInfo( u32 paddr ); extern void mmap_MarkCountedRamPage( u32 paddr ); extern void mmap_ResetBlockTracking(); diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index 7b1ba8e18b..96a6df6dea 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -73,9 +73,23 @@ void* RecompiledCodeReserve::Reserve( size_t size, uptr base, uptr upper_bounds { if (!_parent::Reserve(size, base, upper_bounds)) return NULL; _registerProfiler(); + + // Pre-Allocate the first block (to reduce the number of segmentation fault + // in debugger) + DoCommitAndProtect(0); + return m_baseptr; } +void RecompiledCodeReserve::Reset() +{ + _parent::Reset(); + + // Pre-Allocate the first block (to reduce the number of segmentation fault + // in debugger) + DoCommitAndProtect(0); +} + // Sets the abbreviated name used by the profiler. Name should be under 10 characters long. // After a name has been set, a profiler source will be automatically registered and cleared diff --git a/pcsx2/System/RecTypes.h b/pcsx2/System/RecTypes.h index 29ff43ef78..73c64af4af 100644 --- a/pcsx2/System/RecTypes.h +++ b/pcsx2/System/RecTypes.h @@ -42,6 +42,7 @@ public: virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 ); virtual void OnCommittedBlock( void* block ); + virtual void Reset(); virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname ); virtual RecompiledCodeReserve& SetProfilerName( const char* shortname ) diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 6a5e6034d2..d8c8f116ed 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -757,7 +757,7 @@ static const uint m_recBlockAllocSize = static void recReserveCache() { - if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _1mb * 2); + if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb); recMem->SetProfilerName("IOPrec"); while (!recMem->IsOk()) diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index ae797856b6..db0ca2d7a2 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -63,23 +63,6 @@ __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0}; u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException; -// -------------------------------------------------------------------------------------- -// R5900LutReserve_RAM -// -------------------------------------------------------------------------------------- -class R5900LutReserve_RAM : public SpatialArrayReserve -{ - typedef SpatialArrayReserve _parent; - -public: - R5900LutReserve_RAM( const wxString& name ) - : _parent( name ) - { - } - -protected: - void OnCommittedBlock( void* block ); -}; - //////////////////////////////////////////////////////////////// // Static Private Variables - R5900 Dynarec @@ -88,8 +71,9 @@ protected: static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units static RecompiledCodeReserve* recMem = NULL; -static SpatialArrayReserve* recRAMCopy = NULL; -static R5900LutReserve_RAM* recLutReserve_RAM = NULL; +static u8* recRAMCopy = NULL; +static u8* recLutReserve_RAM = NULL; +static const size_t recLutSize = Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1; static uptr m_ConfiguredCacheReserve = 64; @@ -599,11 +583,6 @@ static __ri void ClearRecLUT(BASEBLOCK* base, int memsize) base[i].SetFnptr((uptr)JITCompile); } -void R5900LutReserve_RAM::OnCommittedBlock( void* block ) -{ - _parent::OnCommittedBlock(block); - ClearRecLUT((BASEBLOCK*)block, __pagesize * m_blocksize); -} static void recThrowHardwareDeficiency( const wxChar* extFail ) { @@ -614,7 +593,7 @@ static void recThrowHardwareDeficiency( const wxChar* extFail ) static void recReserveCache() { - if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _1mb * 4); + if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb); recMem->SetProfilerName("EErec"); while (!recMem->IsOk()) @@ -643,25 +622,19 @@ static void recAlloc() { if (!recRAMCopy) { - recRAMCopy = new SpatialArrayReserve( L"R5900 RAM copy" ); - recRAMCopy->SetBlockSize(_16kb); - recRAMCopy->Reserve(Ps2MemSize::MainRam); + recRAMCopy = (u8*)_aligned_malloc(Ps2MemSize::MainRam, 4096); } if (!recRAM) { - recLutReserve_RAM = new R5900LutReserve_RAM( L"R5900 RAM LUT" ); - recLutReserve_RAM->SetBlockSize(_16kb); - recLutReserve_RAM->Reserve(Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1); + recLutReserve_RAM = (u8*)_aligned_malloc(recLutSize, 4096); } - BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM->GetPtr(); + BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM; recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4); recROM = basepos; basepos += (Ps2MemSize::Rom / 4); recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4); - pxAssert(recLutReserve_RAM->GetPtrEnd() == (u8*)basepos); - for (int i = 0; i < 0x10000; i++) recLUT_SetPage(recLUT, 0, 0, 0, i, 0); @@ -731,8 +704,8 @@ static void recResetRaw() Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" ); recMem->Reset(); - recRAMCopy->Reset(); - recLutReserve_RAM->Reset(); + ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize); + memset(recRAMCopy, 0, Ps2MemSize::MainRam); maxrecmem = 0; @@ -756,8 +729,8 @@ static void recResetRaw() static void recShutdown() { safe_delete( recMem ); - safe_delete( recRAMCopy ); - safe_delete( recLutReserve_RAM ); + safe_aligned_free( recRAMCopy ); + safe_aligned_free( recLutReserve_RAM ); recBlocks.Reset(); @@ -1675,6 +1648,89 @@ void __fastcall dyna_page_reset(u32 start,u32 sz) mmap_MarkCountedRamPage( start ); } +static void memory_protect_recompiled_code(u32 startpc, u32 size) +{ + u32 inpage_ptr = HWADDR(startpc); + u32 inpage_sz = size*4; + + // The kernel context register is stored @ 0x800010C0-0x80001300 + // The EENULL thread context register is stored @ 0x81000-.... + bool contains_thread_stack = ((startpc >> 12) == 0x81) || ((startpc >> 12) == 0x80001); + + // note: blocks are guaranteed to reside within the confines of a single page. + const vtlb_ProtectionMode PageType = contains_thread_stack ? ProtMode_Manual : mmap_GetRamPageInfo( inpage_ptr ); + + switch (PageType) + { + case ProtMode_NotRequired: + break; + + case ProtMode_None: + case ProtMode_Write: + mmap_MarkCountedRamPage( inpage_ptr ); + manual_page[inpage_ptr >> 12] = 0; + break; + + case ProtMode_Manual: + xMOV( ecx, inpage_ptr ); + xMOV( edx, inpage_sz / 4 ); + //xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard + + u32 lpc = inpage_ptr; + u32 stg = inpage_sz; + + while(stg>0) + { + xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) ); + xJNE(DispatchBlockDiscard); + + stg -= 4; + lpc += 4; + } + + // Tweakpoint! 3 is a 'magic' number representing the number of times a counted block + // is re-protected before the recompiler gives up and sets it up as an uncounted (permanent) + // manual block. Higher thresholds result in more recompilations for blocks that share code + // and data on the same page. Side effects of a lower threshold: over extended gameplay + // with several map changes, a game's overall performance could degrade. + + // (ideally, perhaps, manual_counter should be reset to 0 every few minutes?) + + if (!contains_thread_stack && manual_counter[inpage_ptr >> 12] <= 3) + { + // Counted blocks add a weighted (by block size) value into manual_page each time they're + // run. If the block gets run a lot, it resets and re-protects itself in the hope + // that whatever forced it to be manually-checked before was a 1-time deal. + + // Counted blocks have a secondary threshold check in manual_counter, which forces a block + // to 'uncounted' mode if it's recompiled several times. This protects against excessive + // recompilation of blocks that reside on the same codepage as data. + + // fixme? Currently this algo is kinda dumb and results in the forced recompilation of a + // lot of blocks before it decides to mark a 'busy' page as uncounted. There might be + // be a more clever approach that could streamline this process, by doing a first-pass + // test using the vtlb memory protection (without recompilation!) to reprotect a counted + // block. But unless a new algo is relatively simple in implementation, it's probably + // not worth the effort (tests show that we have lots of recompiler memory to spare, and + // that the current amount of recompilation is fairly cheap). + + xADD(ptr16[&manual_page[inpage_ptr >> 12]], size); + xJC(DispatchPageReset); + + // note: clearcnt is measured per-page, not per-block! + ConsoleColorScope cs( Color_Gray ); + eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d", + startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] ); + } + else + { + eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d", + startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz ); + } + break; + } +} + // Skip MPEG Game-Fix bool skipMPEG_By_Pattern(u32 sPC) { @@ -2076,84 +2132,8 @@ StartRecomp: if (dumplog & 1) iDumpBlock(startpc, recPtr); #endif - u32 sz = (s_nEndBlock-startpc) >> 2; - u32 inpage_ptr = HWADDR(startpc); - u32 inpage_sz = sz*4; - - // note: blocks are guaranteed to reside within the confines of a single page. - - const int PageType = mmap_GetRamPageInfo( inpage_ptr ); - //const u32 pgsz = std::min(0x1000 - inpage_offs, inpage_sz); - const u32 pgsz = inpage_sz; - - switch (PageType) - { - case -1: - break; - - case 0: - mmap_MarkCountedRamPage( inpage_ptr ); - manual_page[inpage_ptr >> 12] = 0; - break; - - default: - xMOV( ecx, inpage_ptr ); - xMOV( edx, pgsz / 4 ); - //xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard - - u32 lpc = inpage_ptr; - u32 stg = pgsz; - - while(stg>0) - { - xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) ); - xJNE(DispatchBlockDiscard); - - stg -= 4; - lpc += 4; - } - - // Tweakpoint! 3 is a 'magic' number representing the number of times a counted block - // is re-protected before the recompiler gives up and sets it up as an uncounted (permanent) - // manual block. Higher thresholds result in more recompilations for blocks that share code - // and data on the same page. Side effects of a lower threshold: over extended gameplay - // with several map changes, a game's overall performance could degrade. - - // (ideally, perhaps, manual_counter should be reset to 0 every few minutes?) - - if (startpc != 0x81fc0 && manual_counter[inpage_ptr >> 12] <= 3) - { - // Counted blocks add a weighted (by block size) value into manual_page each time they're - // run. If the block gets run a lot, it resets and re-protects itself in the hope - // that whatever forced it to be manually-checked before was a 1-time deal. - - // Counted blocks have a secondary threshold check in manual_counter, which forces a block - // to 'uncounted' mode if it's recompiled several times. This protects against excessive - // recompilation of blocks that reside on the same codepage as data. - - // fixme? Currently this algo is kinda dumb and results in the forced recompilation of a - // lot of blocks before it decides to mark a 'busy' page as uncounted. There might be - // be a more clever approach that could streamline this process, by doing a first-pass - // test using the vtlb memory protection (without recompilation!) to reprotect a counted - // block. But unless a new algo is relatively simple in implementation, it's probably - // not worth the effort (tests show that we have lots of recompiler memory to spare, and - // that the current amount of recompilation is fairly cheap). - - xADD(ptr16[&manual_page[inpage_ptr >> 12]], sz); - xJC(DispatchPageReset); - - // note: clearcnt is measured per-page, not per-block! - ConsoleColorScope cs( Color_Gray ); - eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d", - startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] ); - } - else - { - eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d", - startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, pgsz, inpage_sz ); - } - break; - } + // Detect and handle self-modified code + memory_protect_recompiled_code(startpc, (s_nEndBlock-startpc) >> 2); // Skip Recompilation if sceMpegIsEnd Pattern detected bool doRecompilation = !skipMPEG_By_Pattern(startpc); @@ -2186,7 +2166,7 @@ StartRecomp: if ((oldBlock->startpc + oldBlock->size * 4) <= HWADDR(startpc)) break; - if (memcmp(&(*recRAMCopy)[oldBlock->startpc / 4], PSM(oldBlock->startpc), + if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc), oldBlock->size * 4)) { recClear(startpc, (pc - startpc) / 4); @@ -2196,7 +2176,7 @@ StartRecomp: } } - memcpy(&(*recRAMCopy)[HWADDR(startpc) / 4], PSM(startpc), pc - startpc); + memcpy(&recRAMCopy[HWADDR(startpc) / 4], PSM(startpc), pc - startpc); } s_pCurBlock->SetFnptr((uptr)recPtr); diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 1aaa3893f8..8b71c5ca1d 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -30,7 +30,7 @@ static __fi void mVUthrowHardwareDeficiency(const wxChar* extFail, int vuIndex) void mVUreserveCache(microVU& mVU) { - mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index)); + mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index), _16mb); mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index)); mVU.cache = mVU.index ? diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index 0b5ee77b13..2bfd5be970 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -23,7 +23,7 @@ void dVifReserve(int idx) { if(!nVif[idx].recReserve) - nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx)); + nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx), _8mb); nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec ); } diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index bd28dba23c..2cfdf907da 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -417,7 +417,7 @@ void VifUnpackSSE_Init() DevCon.WriteLn( "Generating SSE-optimized unpacking functions for VIF interpreters..." ); - nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions"); + nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions", _64kb); nVifUpkExec->SetProfilerName("iVIF-SSE"); nVifUpkExec->SetBlockSize( 1 ); nVifUpkExec->Reserve( _64kb );