Merge pull request #967 from PCSX2/remove-lazy-allocation

Reduce lazy allocation
This commit is contained in:
Gregory Hainaut 2015-11-15 00:12:07 +01:00
commit 21857ec12d
11 changed files with 137 additions and 351 deletions

View File

@ -247,6 +247,11 @@ public:
m_blocksize = (bytes + __pagesize - 1) / __pagesize; m_blocksize = (bytes + __pagesize - 1) / __pagesize;
return m_blocksize * __pagesize; return m_blocksize * __pagesize;
} }
virtual void Reset()
{
_parent::Reset();
}
protected: protected:
@ -266,72 +271,6 @@ protected:
virtual void CommitBlocks( uptr page, uint blocks ); virtual void CommitBlocks( uptr page, uint blocks );
}; };
// --------------------------------------------------------------------------------------
// SpatialArrayReserve
// --------------------------------------------------------------------------------------
// A spatial array is one where large areas of the memory reserve will remain unused during
// process execution. Only areas put to use will be committed to virtual memory.
//
// Spatial array efficiency depends heavily on selecting the right parameters for the array's
// primary intended use. Memory in a spatial array is arranged by blocks, with each block
// containing some number of pages (pages are 4096 bytes each on most platforms). When the
// array is accessed, the entire block containing the addressed memory will be committed at
// once. Blocks can be a single page in size (4096 bytes), though this is highly discouraged
// due to overhead and fragmentation penalties.
//
// Balancing block sizes:
// Larger blocks are good for reducing memory fragmentation and block-tracking overhead, but
// can also result in a lot of otherwise unused memory being committed to memory. Smaller
// blocks are good for arrays that will tend toward more sequential behavior, as they reduce
// the amount of unused memory being committed. However, since every block requires a
// tracking entry, assigning small blocks to a very large array can result in quite a bit of
// unwanted overhead. Furthermore, if the array is accessed randomly, system physical memory
// will become very fragmented, which will also hurt performance.
//
// By default, the base block size is based on a heuristic that balances the size of the spatial
// array reserve against a best-guess performance profile for the target platform.
//
class SpatialArrayReserve : public BaseVmReserveListener
{
typedef BaseVmReserveListener _parent;
protected:
uint m_numblocks;
// Array of block bits, each bit indicating if the block has been committed to memory
// or not. The array length is typically determined via ((numblocks+7) / 8), though the
// actual array size may be larger in order to accommodate 32-bit or 128-bit accelerated
// operations.
ScopedAlignedAlloc<u8,16> m_blockbits;
public:
SpatialArrayReserve( const wxString& name );
virtual void* Reserve( size_t size = 0, uptr base = 0, uptr upper_bounds = 0 );
virtual void Reset();
virtual bool TryResize( uint newsize );
void OnCommittedBlock( void* block );
SpatialArrayReserve& SetBlockCount( uint blocks );
SpatialArrayReserve& SetBlockSizeInPages( uint bytes );
uptr SetBlockSize( uptr bytes );
operator void*() { return m_baseptr; }
operator const void*() const { return m_baseptr; }
operator u8*() { return (u8*)m_baseptr; }
operator const u8*() const { return (u8*)m_baseptr; }
using _parent::operator[];
protected:
void ReprotectCommittedBlocks( const PageProtectionMode& newmode );
void DoCommitAndProtect( uptr page );
uint _calcBlockBitArrayLength() const;
};
#ifdef __linux__ #ifdef __linux__
# define PCSX2_PAGEFAULT_PROTECT # define PCSX2_PAGEFAULT_PROTECT

View File

@ -336,154 +336,6 @@ void BaseVmReserveListener::OnPageFaultEvent(const PageFaultInfo& info, bool& ha
#endif #endif
} }
// --------------------------------------------------------------------------------------
// SpatialArrayReserve (implementations)
// --------------------------------------------------------------------------------------
SpatialArrayReserve::SpatialArrayReserve( const wxString& name ) :
_parent( name ), m_numblocks(0)
{
m_prot_mode = PageAccess_ReadWrite();
}
uint SpatialArrayReserve::_calcBlockBitArrayLength() const
{
// divide by 8 (rounded up) to compress 8 bits into each byte.
// mask off lower bits (rounded up) to allow for 128-bit alignment and SSE operations.
return (((m_numblocks + 7) / 8) + 15) & ~15;
}
void* SpatialArrayReserve::Reserve( size_t size, uptr base, uptr upper_bounds )
{
void* addr = _parent::Reserve( size, base, upper_bounds );
if (!addr) return NULL;
if (m_blocksize) SetBlockSizeInPages( m_blocksize );
m_blockbits.Alloc( _calcBlockBitArrayLength() );
return addr;
}
void SpatialArrayReserve::ReprotectCommittedBlocks( const PageProtectionMode& newmode )
{
if (!m_pages_commited) return;
u8* curptr = GetPtr();
const uint blockBytes = m_blocksize * __pagesize;
for (uint i=0; i<m_numblocks; ++i, curptr+=blockBytes)
{
uint thisbit = 1 << (i & 7);
if (!(m_blockbits[i/8] & thisbit)) continue;
HostSys::MemProtect(curptr, blockBytes, newmode);
HostSys::MmapResetPtr(curptr, blockBytes);
}
}
// Resets/clears the spatial array, reducing the memory commit pool overhead to zero (0).
void SpatialArrayReserve::Reset()
{
ReprotectCommittedBlocks( PageAccess_None() );
memzero_sse_a(m_blockbits.GetPtr(), _calcBlockBitArrayLength());
}
// Important! The number of blocks of the array will be altered when using this method.
//
bool SpatialArrayReserve::TryResize( uint newsize )
{
uint newpages = (newsize + __pagesize - 1) / __pagesize;
// find the last allocated block -- we cannot be allowed to resize any smaller than that:
uint i;
for (i=m_numblocks-1; i; --i)
{
uint bit = i & 7;
if (m_blockbits[i / 8] & bit) break;
}
uint pages_in_use = i * m_blocksize;
if (newpages < pages_in_use) return false;
if (!_parent::TryResize( newsize )) return false;
// On success, we must re-calibrate the internal blockbits array.
m_blockbits.Resize( (m_numblocks + 7) / 8 );
return true;
}
// This method allows the programmer to specify the block size of the array as a function
// of its reserved size. This function *must* be called *after* the reserve has been made,
// and *before* the array contents have been accessed.
//
// Calls to this function prior to initializing the reserve or after the reserve has been
// accessed (resulting in committed blocks) will be ignored -- and will generate an assertion
// in debug builds.
SpatialArrayReserve& SpatialArrayReserve::SetBlockCount( uint blocks )
{
pxAssumeDev( !m_pages_commited, "Invalid object state: SetBlockCount must be called prior to reserved memory accesses." );
// Calculate such that the last block extends past the end of the array, if necessary.
m_numblocks = blocks;
m_blocksize = (m_pages_reserved + m_numblocks-1) / m_numblocks;
return *this;
}
// Sets the block size via pages (pages are defined by the __pagesize global, which is
// typically 4096).
//
// This method must be called prior to accessing or modifying the array contents. Calls to
// a modified buffer will be ignored (and generate an assertion in dev/debug modes).
SpatialArrayReserve& SpatialArrayReserve::SetBlockSizeInPages( uint pages )
{
if (pxAssertDev(!m_pages_commited, "Invalid object state: Block size can only be changed prior to accessing or modifying the reserved buffer contents."))
{
m_blocksize = pages;
m_numblocks = (m_pages_reserved + m_blocksize - 1) / m_blocksize;
m_blockbits.Alloc( _calcBlockBitArrayLength() );
}
return *this;
}
// SetBlockSize assigns the block size of the spatial array, in bytes. The actual size of
// each block will be rounded up to the nearest page size. The resulting size is returned.
//
// This method must be called prior to accessing or modifying the array contents. Calls to
// a modified buffer will be ignored (and generate an assertion in dev/debug modes).
uptr SpatialArrayReserve::SetBlockSize( uptr bytes )
{
SetBlockSizeInPages((bytes + __pagesize - 1) / __pagesize);
return m_blocksize * __pagesize;
}
void SpatialArrayReserve::DoCommitAndProtect( uptr page )
{
// Spatial Arrays work on block granularity only:
// Round the page into a block, and commit the whole block that the page belongs to.
uint block = page / m_blocksize;
CommitBlocks(block*m_blocksize, 1);
}
void SpatialArrayReserve::OnCommittedBlock( void* block )
{
// Determine the block position in the blockbits array, flag it, and be done!
uptr relative = (uptr)block - (uptr)m_baseptr;
relative /= m_blocksize * __pagesize;
//DbgCon.WriteLn("Check me out @ 0x%08x", block);
pxAssert( (m_blockbits[relative/8] & (1 << (relative & 7))) == 0 );
m_blockbits[relative/8] |= 1 << (relative & 7);
}
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// PageProtectionMode (implementations) // PageProtectionMode (implementations)
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------

View File

@ -891,13 +891,6 @@ void eeMemoryReserve::Release()
// code below. // code below.
// //
enum vtlb_ProtectionMode
{
ProtMode_None = 0, // page is 'unaccounted' -- neither protected nor unprotected
ProtMode_Write, // page is under write protection (exception handler)
ProtMode_Manual // page is under manual protection (self-checked at execution)
};
struct vtlb_PageProtectionInfo struct vtlb_PageProtectionInfo
{ {
// Ram De-mapping -- used to convert fully translated/mapped offsets (which reside with // Ram De-mapping -- used to convert fully translated/mapped offsets (which reside with
@ -914,12 +907,10 @@ static __aligned16 vtlb_PageProtectionInfo m_PageProtectInfo[Ps2MemSize::MainRam
// returns: // returns:
// -1 - unchecked block (resides in ROM, thus is integrity is constant) // ProtMode_NotRequired - unchecked block (resides in ROM, thus is integrity is constant)
// 0 - page is using Write protection // Or the current mode
// 1 - page is using manual protection (recompiler must include execution-time
// self-checking of block integrity)
// //
int mmap_GetRamPageInfo( u32 paddr ) vtlb_ProtectionMode mmap_GetRamPageInfo( u32 paddr )
{ {
pxAssert( eeMem ); pxAssert( eeMem );
@ -929,10 +920,11 @@ int mmap_GetRamPageInfo( u32 paddr )
uptr rampage = ptr - (uptr)eeMem->Main; uptr rampage = ptr - (uptr)eeMem->Main;
if (rampage >= Ps2MemSize::MainRam) if (rampage >= Ps2MemSize::MainRam)
return -1; //not in ram, no tracking done ... return ProtMode_NotRequired; //not in ram, no tracking done ...
rampage >>= 12; rampage >>= 12;
return ( m_PageProtectInfo[rampage].Mode == ProtMode_Manual ) ? 1 : 0;
return m_PageProtectInfo[rampage].Mode;
} }
// paddr - physically mapped PS2 address // paddr - physically mapped PS2 address

View File

@ -116,7 +116,15 @@ extern void memBindConditionalHandlers();
extern void memMapVUmicro(); extern void memMapVUmicro();
extern int mmap_GetRamPageInfo( u32 paddr ); enum vtlb_ProtectionMode
{
ProtMode_None = 0, // page is 'unaccounted' -- neither protected nor unprotected
ProtMode_Write, // page is under write protection (exception handler)
ProtMode_Manual, // page is under manual protection (self-checked at execution)
ProtMode_NotRequired // page doesn't require any protection
};
extern vtlb_ProtectionMode mmap_GetRamPageInfo( u32 paddr );
extern void mmap_MarkCountedRamPage( u32 paddr ); extern void mmap_MarkCountedRamPage( u32 paddr );
extern void mmap_ResetBlockTracking(); extern void mmap_ResetBlockTracking();

View File

@ -73,9 +73,23 @@ void* RecompiledCodeReserve::Reserve( size_t size, uptr base, uptr upper_bounds
{ {
if (!_parent::Reserve(size, base, upper_bounds)) return NULL; if (!_parent::Reserve(size, base, upper_bounds)) return NULL;
_registerProfiler(); _registerProfiler();
// Pre-Allocate the first block (to reduce the number of segmentation fault
// in debugger)
DoCommitAndProtect(0);
return m_baseptr; return m_baseptr;
} }
void RecompiledCodeReserve::Reset()
{
_parent::Reset();
// Pre-Allocate the first block (to reduce the number of segmentation fault
// in debugger)
DoCommitAndProtect(0);
}
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long. // Sets the abbreviated name used by the profiler. Name should be under 10 characters long.
// After a name has been set, a profiler source will be automatically registered and cleared // After a name has been set, a profiler source will be automatically registered and cleared

View File

@ -42,6 +42,7 @@ public:
virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 ); virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 );
virtual void OnCommittedBlock( void* block ); virtual void OnCommittedBlock( void* block );
virtual void Reset();
virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname ); virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname );
virtual RecompiledCodeReserve& SetProfilerName( const char* shortname ) virtual RecompiledCodeReserve& SetProfilerName( const char* shortname )

View File

@ -757,7 +757,7 @@ static const uint m_recBlockAllocSize =
static void recReserveCache() static void recReserveCache()
{ {
if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _1mb * 2); if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb);
recMem->SetProfilerName("IOPrec"); recMem->SetProfilerName("IOPrec");
while (!recMem->IsOk()) while (!recMem->IsOk())

View File

@ -63,23 +63,6 @@ __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException; bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
// --------------------------------------------------------------------------------------
// R5900LutReserve_RAM
// --------------------------------------------------------------------------------------
class R5900LutReserve_RAM : public SpatialArrayReserve
{
typedef SpatialArrayReserve _parent;
public:
R5900LutReserve_RAM( const wxString& name )
: _parent( name )
{
}
protected:
void OnCommittedBlock( void* block );
};
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Static Private Variables - R5900 Dynarec // Static Private Variables - R5900 Dynarec
@ -88,8 +71,9 @@ protected:
static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
static RecompiledCodeReserve* recMem = NULL; static RecompiledCodeReserve* recMem = NULL;
static SpatialArrayReserve* recRAMCopy = NULL; static u8* recRAMCopy = NULL;
static R5900LutReserve_RAM* recLutReserve_RAM = NULL; static u8* recLutReserve_RAM = NULL;
static const size_t recLutSize = Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1;
static uptr m_ConfiguredCacheReserve = 64; static uptr m_ConfiguredCacheReserve = 64;
@ -599,11 +583,6 @@ static __ri void ClearRecLUT(BASEBLOCK* base, int memsize)
base[i].SetFnptr((uptr)JITCompile); base[i].SetFnptr((uptr)JITCompile);
} }
void R5900LutReserve_RAM::OnCommittedBlock( void* block )
{
_parent::OnCommittedBlock(block);
ClearRecLUT((BASEBLOCK*)block, __pagesize * m_blocksize);
}
static void recThrowHardwareDeficiency( const wxChar* extFail ) static void recThrowHardwareDeficiency( const wxChar* extFail )
{ {
@ -614,7 +593,7 @@ static void recThrowHardwareDeficiency( const wxChar* extFail )
static void recReserveCache() static void recReserveCache()
{ {
if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _1mb * 4); if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb);
recMem->SetProfilerName("EErec"); recMem->SetProfilerName("EErec");
while (!recMem->IsOk()) while (!recMem->IsOk())
@ -643,25 +622,19 @@ static void recAlloc()
{ {
if (!recRAMCopy) if (!recRAMCopy)
{ {
recRAMCopy = new SpatialArrayReserve( L"R5900 RAM copy" ); recRAMCopy = (u8*)_aligned_malloc(Ps2MemSize::MainRam, 4096);
recRAMCopy->SetBlockSize(_16kb);
recRAMCopy->Reserve(Ps2MemSize::MainRam);
} }
if (!recRAM) if (!recRAM)
{ {
recLutReserve_RAM = new R5900LutReserve_RAM( L"R5900 RAM LUT" ); recLutReserve_RAM = (u8*)_aligned_malloc(recLutSize, 4096);
recLutReserve_RAM->SetBlockSize(_16kb);
recLutReserve_RAM->Reserve(Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1);
} }
BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM->GetPtr(); BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM;
recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4); recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4);
recROM = basepos; basepos += (Ps2MemSize::Rom / 4); recROM = basepos; basepos += (Ps2MemSize::Rom / 4);
recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4); recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4);
pxAssert(recLutReserve_RAM->GetPtrEnd() == (u8*)basepos);
for (int i = 0; i < 0x10000; i++) for (int i = 0; i < 0x10000; i++)
recLUT_SetPage(recLUT, 0, 0, 0, i, 0); recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
@ -731,8 +704,8 @@ static void recResetRaw()
Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" ); Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" );
recMem->Reset(); recMem->Reset();
recRAMCopy->Reset(); ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize);
recLutReserve_RAM->Reset(); memset(recRAMCopy, 0, Ps2MemSize::MainRam);
maxrecmem = 0; maxrecmem = 0;
@ -756,8 +729,8 @@ static void recResetRaw()
static void recShutdown() static void recShutdown()
{ {
safe_delete( recMem ); safe_delete( recMem );
safe_delete( recRAMCopy ); safe_aligned_free( recRAMCopy );
safe_delete( recLutReserve_RAM ); safe_aligned_free( recLutReserve_RAM );
recBlocks.Reset(); recBlocks.Reset();
@ -1675,6 +1648,89 @@ void __fastcall dyna_page_reset(u32 start,u32 sz)
mmap_MarkCountedRamPage( start ); mmap_MarkCountedRamPage( start );
} }
static void memory_protect_recompiled_code(u32 startpc, u32 size)
{
u32 inpage_ptr = HWADDR(startpc);
u32 inpage_sz = size*4;
// The kernel context register is stored @ 0x800010C0-0x80001300
// The EENULL thread context register is stored @ 0x81000-....
bool contains_thread_stack = ((startpc >> 12) == 0x81) || ((startpc >> 12) == 0x80001);
// note: blocks are guaranteed to reside within the confines of a single page.
const vtlb_ProtectionMode PageType = contains_thread_stack ? ProtMode_Manual : mmap_GetRamPageInfo( inpage_ptr );
switch (PageType)
{
case ProtMode_NotRequired:
break;
case ProtMode_None:
case ProtMode_Write:
mmap_MarkCountedRamPage( inpage_ptr );
manual_page[inpage_ptr >> 12] = 0;
break;
case ProtMode_Manual:
xMOV( ecx, inpage_ptr );
xMOV( edx, inpage_sz / 4 );
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
u32 lpc = inpage_ptr;
u32 stg = inpage_sz;
while(stg>0)
{
xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) );
xJNE(DispatchBlockDiscard);
stg -= 4;
lpc += 4;
}
// Tweakpoint! 3 is a 'magic' number representing the number of times a counted block
// is re-protected before the recompiler gives up and sets it up as an uncounted (permanent)
// manual block. Higher thresholds result in more recompilations for blocks that share code
// and data on the same page. Side effects of a lower threshold: over extended gameplay
// with several map changes, a game's overall performance could degrade.
// (ideally, perhaps, manual_counter should be reset to 0 every few minutes?)
if (!contains_thread_stack && manual_counter[inpage_ptr >> 12] <= 3)
{
// Counted blocks add a weighted (by block size) value into manual_page each time they're
// run. If the block gets run a lot, it resets and re-protects itself in the hope
// that whatever forced it to be manually-checked before was a 1-time deal.
// Counted blocks have a secondary threshold check in manual_counter, which forces a block
// to 'uncounted' mode if it's recompiled several times. This protects against excessive
// recompilation of blocks that reside on the same codepage as data.
// fixme? Currently this algo is kinda dumb and results in the forced recompilation of a
// lot of blocks before it decides to mark a 'busy' page as uncounted. There might be
// be a more clever approach that could streamline this process, by doing a first-pass
// test using the vtlb memory protection (without recompilation!) to reprotect a counted
// block. But unless a new algo is relatively simple in implementation, it's probably
// not worth the effort (tests show that we have lots of recompiler memory to spare, and
// that the current amount of recompilation is fairly cheap).
xADD(ptr16[&manual_page[inpage_ptr >> 12]], size);
xJC(DispatchPageReset);
// note: clearcnt is measured per-page, not per-block!
ConsoleColorScope cs( Color_Gray );
eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d",
startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] );
}
else
{
eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d",
startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz );
}
break;
}
}
// Skip MPEG Game-Fix // Skip MPEG Game-Fix
bool skipMPEG_By_Pattern(u32 sPC) { bool skipMPEG_By_Pattern(u32 sPC) {
@ -2076,84 +2132,8 @@ StartRecomp:
if (dumplog & 1) iDumpBlock(startpc, recPtr); if (dumplog & 1) iDumpBlock(startpc, recPtr);
#endif #endif
u32 sz = (s_nEndBlock-startpc) >> 2; // Detect and handle self-modified code
u32 inpage_ptr = HWADDR(startpc); memory_protect_recompiled_code(startpc, (s_nEndBlock-startpc) >> 2);
u32 inpage_sz = sz*4;
// note: blocks are guaranteed to reside within the confines of a single page.
const int PageType = mmap_GetRamPageInfo( inpage_ptr );
//const u32 pgsz = std::min(0x1000 - inpage_offs, inpage_sz);
const u32 pgsz = inpage_sz;
switch (PageType)
{
case -1:
break;
case 0:
mmap_MarkCountedRamPage( inpage_ptr );
manual_page[inpage_ptr >> 12] = 0;
break;
default:
xMOV( ecx, inpage_ptr );
xMOV( edx, pgsz / 4 );
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
u32 lpc = inpage_ptr;
u32 stg = pgsz;
while(stg>0)
{
xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) );
xJNE(DispatchBlockDiscard);
stg -= 4;
lpc += 4;
}
// Tweakpoint! 3 is a 'magic' number representing the number of times a counted block
// is re-protected before the recompiler gives up and sets it up as an uncounted (permanent)
// manual block. Higher thresholds result in more recompilations for blocks that share code
// and data on the same page. Side effects of a lower threshold: over extended gameplay
// with several map changes, a game's overall performance could degrade.
// (ideally, perhaps, manual_counter should be reset to 0 every few minutes?)
if (startpc != 0x81fc0 && manual_counter[inpage_ptr >> 12] <= 3)
{
// Counted blocks add a weighted (by block size) value into manual_page each time they're
// run. If the block gets run a lot, it resets and re-protects itself in the hope
// that whatever forced it to be manually-checked before was a 1-time deal.
// Counted blocks have a secondary threshold check in manual_counter, which forces a block
// to 'uncounted' mode if it's recompiled several times. This protects against excessive
// recompilation of blocks that reside on the same codepage as data.
// fixme? Currently this algo is kinda dumb and results in the forced recompilation of a
// lot of blocks before it decides to mark a 'busy' page as uncounted. There might be
// be a more clever approach that could streamline this process, by doing a first-pass
// test using the vtlb memory protection (without recompilation!) to reprotect a counted
// block. But unless a new algo is relatively simple in implementation, it's probably
// not worth the effort (tests show that we have lots of recompiler memory to spare, and
// that the current amount of recompilation is fairly cheap).
xADD(ptr16[&manual_page[inpage_ptr >> 12]], sz);
xJC(DispatchPageReset);
// note: clearcnt is measured per-page, not per-block!
ConsoleColorScope cs( Color_Gray );
eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d",
startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] );
}
else
{
eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d",
startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, pgsz, inpage_sz );
}
break;
}
// Skip Recompilation if sceMpegIsEnd Pattern detected // Skip Recompilation if sceMpegIsEnd Pattern detected
bool doRecompilation = !skipMPEG_By_Pattern(startpc); bool doRecompilation = !skipMPEG_By_Pattern(startpc);
@ -2186,7 +2166,7 @@ StartRecomp:
if ((oldBlock->startpc + oldBlock->size * 4) <= HWADDR(startpc)) if ((oldBlock->startpc + oldBlock->size * 4) <= HWADDR(startpc))
break; break;
if (memcmp(&(*recRAMCopy)[oldBlock->startpc / 4], PSM(oldBlock->startpc), if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc),
oldBlock->size * 4)) oldBlock->size * 4))
{ {
recClear(startpc, (pc - startpc) / 4); recClear(startpc, (pc - startpc) / 4);
@ -2196,7 +2176,7 @@ StartRecomp:
} }
} }
memcpy(&(*recRAMCopy)[HWADDR(startpc) / 4], PSM(startpc), pc - startpc); memcpy(&recRAMCopy[HWADDR(startpc) / 4], PSM(startpc), pc - startpc);
} }
s_pCurBlock->SetFnptr((uptr)recPtr); s_pCurBlock->SetFnptr((uptr)recPtr);

View File

@ -30,7 +30,7 @@ static __fi void mVUthrowHardwareDeficiency(const wxChar* extFail, int vuIndex)
void mVUreserveCache(microVU& mVU) { void mVUreserveCache(microVU& mVU) {
mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index)); mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index), _16mb);
mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index)); mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index));
mVU.cache = mVU.index ? mVU.cache = mVU.index ?

View File

@ -23,7 +23,7 @@
void dVifReserve(int idx) { void dVifReserve(int idx) {
if(!nVif[idx].recReserve) if(!nVif[idx].recReserve)
nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx)); nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx), _8mb);
nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec ); nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec );
} }

View File

@ -417,7 +417,7 @@ void VifUnpackSSE_Init()
DevCon.WriteLn( "Generating SSE-optimized unpacking functions for VIF interpreters..." ); DevCon.WriteLn( "Generating SSE-optimized unpacking functions for VIF interpreters..." );
nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions"); nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions", _64kb);
nVifUpkExec->SetProfilerName("iVIF-SSE"); nVifUpkExec->SetProfilerName("iVIF-SSE");
nVifUpkExec->SetBlockSize( 1 ); nVifUpkExec->SetBlockSize( 1 );
nVifUpkExec->Reserve( _64kb ); nVifUpkExec->Reserve( _64kb );