mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #967 from PCSX2/remove-lazy-allocation
Reduce lazy allocation
This commit is contained in:
commit
21857ec12d
|
@ -248,6 +248,11 @@ public:
|
|||
return m_blocksize * __pagesize;
|
||||
}
|
||||
|
||||
virtual void Reset()
|
||||
{
|
||||
_parent::Reset();
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
// This function is called from OnPageFaultEvent after the address has been translated
|
||||
|
@ -266,72 +271,6 @@ protected:
|
|||
virtual void CommitBlocks( uptr page, uint blocks );
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SpatialArrayReserve
|
||||
// --------------------------------------------------------------------------------------
|
||||
// A spatial array is one where large areas of the memory reserve will remain unused during
|
||||
// process execution. Only areas put to use will be committed to virtual memory.
|
||||
//
|
||||
// Spatial array efficiency depends heavily on selecting the right parameters for the array's
|
||||
// primary intended use. Memory in a spatial array is arranged by blocks, with each block
|
||||
// containing some number of pages (pages are 4096 bytes each on most platforms). When the
|
||||
// array is accessed, the entire block containing the addressed memory will be committed at
|
||||
// once. Blocks can be a single page in size (4096 bytes), though this is highly discouraged
|
||||
// due to overhead and fragmentation penalties.
|
||||
//
|
||||
// Balancing block sizes:
|
||||
// Larger blocks are good for reducing memory fragmentation and block-tracking overhead, but
|
||||
// can also result in a lot of otherwise unused memory being committed to memory. Smaller
|
||||
// blocks are good for arrays that will tend toward more sequential behavior, as they reduce
|
||||
// the amount of unused memory being committed. However, since every block requires a
|
||||
// tracking entry, assigning small blocks to a very large array can result in quite a bit of
|
||||
// unwanted overhead. Furthermore, if the array is accessed randomly, system physical memory
|
||||
// will become very fragmented, which will also hurt performance.
|
||||
//
|
||||
// By default, the base block size is based on a heuristic that balances the size of the spatial
|
||||
// array reserve against a best-guess performance profile for the target platform.
|
||||
//
|
||||
class SpatialArrayReserve : public BaseVmReserveListener
|
||||
{
|
||||
typedef BaseVmReserveListener _parent;
|
||||
|
||||
protected:
|
||||
uint m_numblocks;
|
||||
|
||||
// Array of block bits, each bit indicating if the block has been committed to memory
|
||||
// or not. The array length is typically determined via ((numblocks+7) / 8), though the
|
||||
// actual array size may be larger in order to accommodate 32-bit or 128-bit accelerated
|
||||
// operations.
|
||||
ScopedAlignedAlloc<u8,16> m_blockbits;
|
||||
|
||||
public:
|
||||
SpatialArrayReserve( const wxString& name );
|
||||
|
||||
virtual void* Reserve( size_t size = 0, uptr base = 0, uptr upper_bounds = 0 );
|
||||
virtual void Reset();
|
||||
virtual bool TryResize( uint newsize );
|
||||
|
||||
void OnCommittedBlock( void* block );
|
||||
|
||||
SpatialArrayReserve& SetBlockCount( uint blocks );
|
||||
SpatialArrayReserve& SetBlockSizeInPages( uint bytes );
|
||||
|
||||
uptr SetBlockSize( uptr bytes );
|
||||
|
||||
operator void*() { return m_baseptr; }
|
||||
operator const void*() const { return m_baseptr; }
|
||||
|
||||
operator u8*() { return (u8*)m_baseptr; }
|
||||
operator const u8*() const { return (u8*)m_baseptr; }
|
||||
|
||||
using _parent::operator[];
|
||||
|
||||
protected:
|
||||
void ReprotectCommittedBlocks( const PageProtectionMode& newmode );
|
||||
void DoCommitAndProtect( uptr page );
|
||||
uint _calcBlockBitArrayLength() const;
|
||||
};
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
# define PCSX2_PAGEFAULT_PROTECT
|
||||
|
|
|
@ -336,154 +336,6 @@ void BaseVmReserveListener::OnPageFaultEvent(const PageFaultInfo& info, bool& ha
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SpatialArrayReserve (implementations)
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
SpatialArrayReserve::SpatialArrayReserve( const wxString& name ) :
|
||||
_parent( name ), m_numblocks(0)
|
||||
{
|
||||
m_prot_mode = PageAccess_ReadWrite();
|
||||
}
|
||||
|
||||
uint SpatialArrayReserve::_calcBlockBitArrayLength() const
|
||||
{
|
||||
// divide by 8 (rounded up) to compress 8 bits into each byte.
|
||||
// mask off lower bits (rounded up) to allow for 128-bit alignment and SSE operations.
|
||||
return (((m_numblocks + 7) / 8) + 15) & ~15;
|
||||
}
|
||||
|
||||
void* SpatialArrayReserve::Reserve( size_t size, uptr base, uptr upper_bounds )
|
||||
{
|
||||
void* addr = _parent::Reserve( size, base, upper_bounds );
|
||||
if (!addr) return NULL;
|
||||
|
||||
if (m_blocksize) SetBlockSizeInPages( m_blocksize );
|
||||
m_blockbits.Alloc( _calcBlockBitArrayLength() );
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
void SpatialArrayReserve::ReprotectCommittedBlocks( const PageProtectionMode& newmode )
|
||||
{
|
||||
if (!m_pages_commited) return;
|
||||
|
||||
u8* curptr = GetPtr();
|
||||
const uint blockBytes = m_blocksize * __pagesize;
|
||||
for (uint i=0; i<m_numblocks; ++i, curptr+=blockBytes)
|
||||
{
|
||||
uint thisbit = 1 << (i & 7);
|
||||
if (!(m_blockbits[i/8] & thisbit)) continue;
|
||||
|
||||
HostSys::MemProtect(curptr, blockBytes, newmode);
|
||||
HostSys::MmapResetPtr(curptr, blockBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// Resets/clears the spatial array, reducing the memory commit pool overhead to zero (0).
|
||||
void SpatialArrayReserve::Reset()
|
||||
{
|
||||
ReprotectCommittedBlocks( PageAccess_None() );
|
||||
memzero_sse_a(m_blockbits.GetPtr(), _calcBlockBitArrayLength());
|
||||
}
|
||||
|
||||
// Important! The number of blocks of the array will be altered when using this method.
|
||||
//
|
||||
bool SpatialArrayReserve::TryResize( uint newsize )
|
||||
{
|
||||
uint newpages = (newsize + __pagesize - 1) / __pagesize;
|
||||
|
||||
// find the last allocated block -- we cannot be allowed to resize any smaller than that:
|
||||
|
||||
uint i;
|
||||
for (i=m_numblocks-1; i; --i)
|
||||
{
|
||||
uint bit = i & 7;
|
||||
if (m_blockbits[i / 8] & bit) break;
|
||||
}
|
||||
|
||||
uint pages_in_use = i * m_blocksize;
|
||||
if (newpages < pages_in_use) return false;
|
||||
|
||||
if (!_parent::TryResize( newsize )) return false;
|
||||
|
||||
// On success, we must re-calibrate the internal blockbits array.
|
||||
|
||||
m_blockbits.Resize( (m_numblocks + 7) / 8 );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// This method allows the programmer to specify the block size of the array as a function
|
||||
// of its reserved size. This function *must* be called *after* the reserve has been made,
|
||||
// and *before* the array contents have been accessed.
|
||||
//
|
||||
// Calls to this function prior to initializing the reserve or after the reserve has been
|
||||
// accessed (resulting in committed blocks) will be ignored -- and will generate an assertion
|
||||
// in debug builds.
|
||||
SpatialArrayReserve& SpatialArrayReserve::SetBlockCount( uint blocks )
|
||||
{
|
||||
pxAssumeDev( !m_pages_commited, "Invalid object state: SetBlockCount must be called prior to reserved memory accesses." );
|
||||
|
||||
// Calculate such that the last block extends past the end of the array, if necessary.
|
||||
|
||||
m_numblocks = blocks;
|
||||
m_blocksize = (m_pages_reserved + m_numblocks-1) / m_numblocks;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Sets the block size via pages (pages are defined by the __pagesize global, which is
|
||||
// typically 4096).
|
||||
//
|
||||
// This method must be called prior to accessing or modifying the array contents. Calls to
|
||||
// a modified buffer will be ignored (and generate an assertion in dev/debug modes).
|
||||
SpatialArrayReserve& SpatialArrayReserve::SetBlockSizeInPages( uint pages )
|
||||
{
|
||||
if (pxAssertDev(!m_pages_commited, "Invalid object state: Block size can only be changed prior to accessing or modifying the reserved buffer contents."))
|
||||
{
|
||||
m_blocksize = pages;
|
||||
m_numblocks = (m_pages_reserved + m_blocksize - 1) / m_blocksize;
|
||||
m_blockbits.Alloc( _calcBlockBitArrayLength() );
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// SetBlockSize assigns the block size of the spatial array, in bytes. The actual size of
|
||||
// each block will be rounded up to the nearest page size. The resulting size is returned.
|
||||
//
|
||||
// This method must be called prior to accessing or modifying the array contents. Calls to
|
||||
// a modified buffer will be ignored (and generate an assertion in dev/debug modes).
|
||||
uptr SpatialArrayReserve::SetBlockSize( uptr bytes )
|
||||
{
|
||||
SetBlockSizeInPages((bytes + __pagesize - 1) / __pagesize);
|
||||
return m_blocksize * __pagesize;
|
||||
}
|
||||
|
||||
void SpatialArrayReserve::DoCommitAndProtect( uptr page )
|
||||
{
|
||||
// Spatial Arrays work on block granularity only:
|
||||
// Round the page into a block, and commit the whole block that the page belongs to.
|
||||
|
||||
uint block = page / m_blocksize;
|
||||
CommitBlocks(block*m_blocksize, 1);
|
||||
}
|
||||
|
||||
void SpatialArrayReserve::OnCommittedBlock( void* block )
|
||||
{
|
||||
// Determine the block position in the blockbits array, flag it, and be done!
|
||||
|
||||
uptr relative = (uptr)block - (uptr)m_baseptr;
|
||||
relative /= m_blocksize * __pagesize;
|
||||
|
||||
//DbgCon.WriteLn("Check me out @ 0x%08x", block);
|
||||
|
||||
pxAssert( (m_blockbits[relative/8] & (1 << (relative & 7))) == 0 );
|
||||
m_blockbits[relative/8] |= 1 << (relative & 7);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// PageProtectionMode (implementations)
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
|
|
@ -891,13 +891,6 @@ void eeMemoryReserve::Release()
|
|||
// code below.
|
||||
//
|
||||
|
||||
enum vtlb_ProtectionMode
|
||||
{
|
||||
ProtMode_None = 0, // page is 'unaccounted' -- neither protected nor unprotected
|
||||
ProtMode_Write, // page is under write protection (exception handler)
|
||||
ProtMode_Manual // page is under manual protection (self-checked at execution)
|
||||
};
|
||||
|
||||
struct vtlb_PageProtectionInfo
|
||||
{
|
||||
// Ram De-mapping -- used to convert fully translated/mapped offsets (which reside with
|
||||
|
@ -914,12 +907,10 @@ static __aligned16 vtlb_PageProtectionInfo m_PageProtectInfo[Ps2MemSize::MainRam
|
|||
|
||||
|
||||
// returns:
|
||||
// -1 - unchecked block (resides in ROM, thus is integrity is constant)
|
||||
// 0 - page is using Write protection
|
||||
// 1 - page is using manual protection (recompiler must include execution-time
|
||||
// self-checking of block integrity)
|
||||
// ProtMode_NotRequired - unchecked block (resides in ROM, thus is integrity is constant)
|
||||
// Or the current mode
|
||||
//
|
||||
int mmap_GetRamPageInfo( u32 paddr )
|
||||
vtlb_ProtectionMode mmap_GetRamPageInfo( u32 paddr )
|
||||
{
|
||||
pxAssert( eeMem );
|
||||
|
||||
|
@ -929,10 +920,11 @@ int mmap_GetRamPageInfo( u32 paddr )
|
|||
uptr rampage = ptr - (uptr)eeMem->Main;
|
||||
|
||||
if (rampage >= Ps2MemSize::MainRam)
|
||||
return -1; //not in ram, no tracking done ...
|
||||
return ProtMode_NotRequired; //not in ram, no tracking done ...
|
||||
|
||||
rampage >>= 12;
|
||||
return ( m_PageProtectInfo[rampage].Mode == ProtMode_Manual ) ? 1 : 0;
|
||||
|
||||
return m_PageProtectInfo[rampage].Mode;
|
||||
}
|
||||
|
||||
// paddr - physically mapped PS2 address
|
||||
|
|
|
@ -116,7 +116,15 @@ extern void memBindConditionalHandlers();
|
|||
|
||||
extern void memMapVUmicro();
|
||||
|
||||
extern int mmap_GetRamPageInfo( u32 paddr );
|
||||
enum vtlb_ProtectionMode
|
||||
{
|
||||
ProtMode_None = 0, // page is 'unaccounted' -- neither protected nor unprotected
|
||||
ProtMode_Write, // page is under write protection (exception handler)
|
||||
ProtMode_Manual, // page is under manual protection (self-checked at execution)
|
||||
ProtMode_NotRequired // page doesn't require any protection
|
||||
};
|
||||
|
||||
extern vtlb_ProtectionMode mmap_GetRamPageInfo( u32 paddr );
|
||||
extern void mmap_MarkCountedRamPage( u32 paddr );
|
||||
extern void mmap_ResetBlockTracking();
|
||||
|
||||
|
|
|
@ -73,9 +73,23 @@ void* RecompiledCodeReserve::Reserve( size_t size, uptr base, uptr upper_bounds
|
|||
{
|
||||
if (!_parent::Reserve(size, base, upper_bounds)) return NULL;
|
||||
_registerProfiler();
|
||||
|
||||
// Pre-Allocate the first block (to reduce the number of segmentation fault
|
||||
// in debugger)
|
||||
DoCommitAndProtect(0);
|
||||
|
||||
return m_baseptr;
|
||||
}
|
||||
|
||||
void RecompiledCodeReserve::Reset()
|
||||
{
|
||||
_parent::Reset();
|
||||
|
||||
// Pre-Allocate the first block (to reduce the number of segmentation fault
|
||||
// in debugger)
|
||||
DoCommitAndProtect(0);
|
||||
}
|
||||
|
||||
|
||||
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long.
|
||||
// After a name has been set, a profiler source will be automatically registered and cleared
|
||||
|
|
|
@ -42,6 +42,7 @@ public:
|
|||
|
||||
virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 );
|
||||
virtual void OnCommittedBlock( void* block );
|
||||
virtual void Reset();
|
||||
|
||||
virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname );
|
||||
virtual RecompiledCodeReserve& SetProfilerName( const char* shortname )
|
||||
|
|
|
@ -757,7 +757,7 @@ static const uint m_recBlockAllocSize =
|
|||
|
||||
static void recReserveCache()
|
||||
{
|
||||
if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _1mb * 2);
|
||||
if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb);
|
||||
recMem->SetProfilerName("IOPrec");
|
||||
|
||||
while (!recMem->IsOk())
|
||||
|
|
|
@ -63,23 +63,6 @@ __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
|
|||
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
|
||||
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// R5900LutReserve_RAM
|
||||
// --------------------------------------------------------------------------------------
|
||||
class R5900LutReserve_RAM : public SpatialArrayReserve
|
||||
{
|
||||
typedef SpatialArrayReserve _parent;
|
||||
|
||||
public:
|
||||
R5900LutReserve_RAM( const wxString& name )
|
||||
: _parent( name )
|
||||
{
|
||||
}
|
||||
|
||||
protected:
|
||||
void OnCommittedBlock( void* block );
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Static Private Variables - R5900 Dynarec
|
||||
|
@ -88,8 +71,9 @@ protected:
|
|||
static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
|
||||
|
||||
static RecompiledCodeReserve* recMem = NULL;
|
||||
static SpatialArrayReserve* recRAMCopy = NULL;
|
||||
static R5900LutReserve_RAM* recLutReserve_RAM = NULL;
|
||||
static u8* recRAMCopy = NULL;
|
||||
static u8* recLutReserve_RAM = NULL;
|
||||
static const size_t recLutSize = Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1;
|
||||
|
||||
static uptr m_ConfiguredCacheReserve = 64;
|
||||
|
||||
|
@ -599,11 +583,6 @@ static __ri void ClearRecLUT(BASEBLOCK* base, int memsize)
|
|||
base[i].SetFnptr((uptr)JITCompile);
|
||||
}
|
||||
|
||||
void R5900LutReserve_RAM::OnCommittedBlock( void* block )
|
||||
{
|
||||
_parent::OnCommittedBlock(block);
|
||||
ClearRecLUT((BASEBLOCK*)block, __pagesize * m_blocksize);
|
||||
}
|
||||
|
||||
static void recThrowHardwareDeficiency( const wxChar* extFail )
|
||||
{
|
||||
|
@ -614,7 +593,7 @@ static void recThrowHardwareDeficiency( const wxChar* extFail )
|
|||
|
||||
static void recReserveCache()
|
||||
{
|
||||
if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _1mb * 4);
|
||||
if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb);
|
||||
recMem->SetProfilerName("EErec");
|
||||
|
||||
while (!recMem->IsOk())
|
||||
|
@ -643,25 +622,19 @@ static void recAlloc()
|
|||
{
|
||||
if (!recRAMCopy)
|
||||
{
|
||||
recRAMCopy = new SpatialArrayReserve( L"R5900 RAM copy" );
|
||||
recRAMCopy->SetBlockSize(_16kb);
|
||||
recRAMCopy->Reserve(Ps2MemSize::MainRam);
|
||||
recRAMCopy = (u8*)_aligned_malloc(Ps2MemSize::MainRam, 4096);
|
||||
}
|
||||
|
||||
if (!recRAM)
|
||||
{
|
||||
recLutReserve_RAM = new R5900LutReserve_RAM( L"R5900 RAM LUT" );
|
||||
recLutReserve_RAM->SetBlockSize(_16kb);
|
||||
recLutReserve_RAM->Reserve(Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1);
|
||||
recLutReserve_RAM = (u8*)_aligned_malloc(recLutSize, 4096);
|
||||
}
|
||||
|
||||
BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM->GetPtr();
|
||||
BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM;
|
||||
recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4);
|
||||
recROM = basepos; basepos += (Ps2MemSize::Rom / 4);
|
||||
recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4);
|
||||
|
||||
pxAssert(recLutReserve_RAM->GetPtrEnd() == (u8*)basepos);
|
||||
|
||||
for (int i = 0; i < 0x10000; i++)
|
||||
recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
|
||||
|
||||
|
@ -731,8 +704,8 @@ static void recResetRaw()
|
|||
Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" );
|
||||
|
||||
recMem->Reset();
|
||||
recRAMCopy->Reset();
|
||||
recLutReserve_RAM->Reset();
|
||||
ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize);
|
||||
memset(recRAMCopy, 0, Ps2MemSize::MainRam);
|
||||
|
||||
maxrecmem = 0;
|
||||
|
||||
|
@ -756,8 +729,8 @@ static void recResetRaw()
|
|||
static void recShutdown()
|
||||
{
|
||||
safe_delete( recMem );
|
||||
safe_delete( recRAMCopy );
|
||||
safe_delete( recLutReserve_RAM );
|
||||
safe_aligned_free( recRAMCopy );
|
||||
safe_aligned_free( recLutReserve_RAM );
|
||||
|
||||
recBlocks.Reset();
|
||||
|
||||
|
@ -1675,6 +1648,89 @@ void __fastcall dyna_page_reset(u32 start,u32 sz)
|
|||
mmap_MarkCountedRamPage( start );
|
||||
}
|
||||
|
||||
static void memory_protect_recompiled_code(u32 startpc, u32 size)
|
||||
{
|
||||
u32 inpage_ptr = HWADDR(startpc);
|
||||
u32 inpage_sz = size*4;
|
||||
|
||||
// The kernel context register is stored @ 0x800010C0-0x80001300
|
||||
// The EENULL thread context register is stored @ 0x81000-....
|
||||
bool contains_thread_stack = ((startpc >> 12) == 0x81) || ((startpc >> 12) == 0x80001);
|
||||
|
||||
// note: blocks are guaranteed to reside within the confines of a single page.
|
||||
const vtlb_ProtectionMode PageType = contains_thread_stack ? ProtMode_Manual : mmap_GetRamPageInfo( inpage_ptr );
|
||||
|
||||
switch (PageType)
|
||||
{
|
||||
case ProtMode_NotRequired:
|
||||
break;
|
||||
|
||||
case ProtMode_None:
|
||||
case ProtMode_Write:
|
||||
mmap_MarkCountedRamPage( inpage_ptr );
|
||||
manual_page[inpage_ptr >> 12] = 0;
|
||||
break;
|
||||
|
||||
case ProtMode_Manual:
|
||||
xMOV( ecx, inpage_ptr );
|
||||
xMOV( edx, inpage_sz / 4 );
|
||||
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
|
||||
|
||||
u32 lpc = inpage_ptr;
|
||||
u32 stg = inpage_sz;
|
||||
|
||||
while(stg>0)
|
||||
{
|
||||
xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) );
|
||||
xJNE(DispatchBlockDiscard);
|
||||
|
||||
stg -= 4;
|
||||
lpc += 4;
|
||||
}
|
||||
|
||||
// Tweakpoint! 3 is a 'magic' number representing the number of times a counted block
|
||||
// is re-protected before the recompiler gives up and sets it up as an uncounted (permanent)
|
||||
// manual block. Higher thresholds result in more recompilations for blocks that share code
|
||||
// and data on the same page. Side effects of a lower threshold: over extended gameplay
|
||||
// with several map changes, a game's overall performance could degrade.
|
||||
|
||||
// (ideally, perhaps, manual_counter should be reset to 0 every few minutes?)
|
||||
|
||||
if (!contains_thread_stack && manual_counter[inpage_ptr >> 12] <= 3)
|
||||
{
|
||||
// Counted blocks add a weighted (by block size) value into manual_page each time they're
|
||||
// run. If the block gets run a lot, it resets and re-protects itself in the hope
|
||||
// that whatever forced it to be manually-checked before was a 1-time deal.
|
||||
|
||||
// Counted blocks have a secondary threshold check in manual_counter, which forces a block
|
||||
// to 'uncounted' mode if it's recompiled several times. This protects against excessive
|
||||
// recompilation of blocks that reside on the same codepage as data.
|
||||
|
||||
// fixme? Currently this algo is kinda dumb and results in the forced recompilation of a
|
||||
// lot of blocks before it decides to mark a 'busy' page as uncounted. There might be
|
||||
// be a more clever approach that could streamline this process, by doing a first-pass
|
||||
// test using the vtlb memory protection (without recompilation!) to reprotect a counted
|
||||
// block. But unless a new algo is relatively simple in implementation, it's probably
|
||||
// not worth the effort (tests show that we have lots of recompiler memory to spare, and
|
||||
// that the current amount of recompilation is fairly cheap).
|
||||
|
||||
xADD(ptr16[&manual_page[inpage_ptr >> 12]], size);
|
||||
xJC(DispatchPageReset);
|
||||
|
||||
// note: clearcnt is measured per-page, not per-block!
|
||||
ConsoleColorScope cs( Color_Gray );
|
||||
eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d",
|
||||
startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] );
|
||||
}
|
||||
else
|
||||
{
|
||||
eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d",
|
||||
startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz );
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip MPEG Game-Fix
|
||||
bool skipMPEG_By_Pattern(u32 sPC) {
|
||||
|
||||
|
@ -2076,84 +2132,8 @@ StartRecomp:
|
|||
if (dumplog & 1) iDumpBlock(startpc, recPtr);
|
||||
#endif
|
||||
|
||||
u32 sz = (s_nEndBlock-startpc) >> 2;
|
||||
u32 inpage_ptr = HWADDR(startpc);
|
||||
u32 inpage_sz = sz*4;
|
||||
|
||||
// note: blocks are guaranteed to reside within the confines of a single page.
|
||||
|
||||
const int PageType = mmap_GetRamPageInfo( inpage_ptr );
|
||||
//const u32 pgsz = std::min(0x1000 - inpage_offs, inpage_sz);
|
||||
const u32 pgsz = inpage_sz;
|
||||
|
||||
switch (PageType)
|
||||
{
|
||||
case -1:
|
||||
break;
|
||||
|
||||
case 0:
|
||||
mmap_MarkCountedRamPage( inpage_ptr );
|
||||
manual_page[inpage_ptr >> 12] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
xMOV( ecx, inpage_ptr );
|
||||
xMOV( edx, pgsz / 4 );
|
||||
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
|
||||
|
||||
u32 lpc = inpage_ptr;
|
||||
u32 stg = pgsz;
|
||||
|
||||
while(stg>0)
|
||||
{
|
||||
xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) );
|
||||
xJNE(DispatchBlockDiscard);
|
||||
|
||||
stg -= 4;
|
||||
lpc += 4;
|
||||
}
|
||||
|
||||
// Tweakpoint! 3 is a 'magic' number representing the number of times a counted block
|
||||
// is re-protected before the recompiler gives up and sets it up as an uncounted (permanent)
|
||||
// manual block. Higher thresholds result in more recompilations for blocks that share code
|
||||
// and data on the same page. Side effects of a lower threshold: over extended gameplay
|
||||
// with several map changes, a game's overall performance could degrade.
|
||||
|
||||
// (ideally, perhaps, manual_counter should be reset to 0 every few minutes?)
|
||||
|
||||
if (startpc != 0x81fc0 && manual_counter[inpage_ptr >> 12] <= 3)
|
||||
{
|
||||
// Counted blocks add a weighted (by block size) value into manual_page each time they're
|
||||
// run. If the block gets run a lot, it resets and re-protects itself in the hope
|
||||
// that whatever forced it to be manually-checked before was a 1-time deal.
|
||||
|
||||
// Counted blocks have a secondary threshold check in manual_counter, which forces a block
|
||||
// to 'uncounted' mode if it's recompiled several times. This protects against excessive
|
||||
// recompilation of blocks that reside on the same codepage as data.
|
||||
|
||||
// fixme? Currently this algo is kinda dumb and results in the forced recompilation of a
|
||||
// lot of blocks before it decides to mark a 'busy' page as uncounted. There might be
|
||||
// be a more clever approach that could streamline this process, by doing a first-pass
|
||||
// test using the vtlb memory protection (without recompilation!) to reprotect a counted
|
||||
// block. But unless a new algo is relatively simple in implementation, it's probably
|
||||
// not worth the effort (tests show that we have lots of recompiler memory to spare, and
|
||||
// that the current amount of recompilation is fairly cheap).
|
||||
|
||||
xADD(ptr16[&manual_page[inpage_ptr >> 12]], sz);
|
||||
xJC(DispatchPageReset);
|
||||
|
||||
// note: clearcnt is measured per-page, not per-block!
|
||||
ConsoleColorScope cs( Color_Gray );
|
||||
eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d",
|
||||
startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] );
|
||||
}
|
||||
else
|
||||
{
|
||||
eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d",
|
||||
startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, pgsz, inpage_sz );
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Detect and handle self-modified code
|
||||
memory_protect_recompiled_code(startpc, (s_nEndBlock-startpc) >> 2);
|
||||
|
||||
// Skip Recompilation if sceMpegIsEnd Pattern detected
|
||||
bool doRecompilation = !skipMPEG_By_Pattern(startpc);
|
||||
|
@ -2186,7 +2166,7 @@ StartRecomp:
|
|||
if ((oldBlock->startpc + oldBlock->size * 4) <= HWADDR(startpc))
|
||||
break;
|
||||
|
||||
if (memcmp(&(*recRAMCopy)[oldBlock->startpc / 4], PSM(oldBlock->startpc),
|
||||
if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc),
|
||||
oldBlock->size * 4))
|
||||
{
|
||||
recClear(startpc, (pc - startpc) / 4);
|
||||
|
@ -2196,7 +2176,7 @@ StartRecomp:
|
|||
}
|
||||
}
|
||||
|
||||
memcpy(&(*recRAMCopy)[HWADDR(startpc) / 4], PSM(startpc), pc - startpc);
|
||||
memcpy(&recRAMCopy[HWADDR(startpc) / 4], PSM(startpc), pc - startpc);
|
||||
}
|
||||
|
||||
s_pCurBlock->SetFnptr((uptr)recPtr);
|
||||
|
|
|
@ -30,7 +30,7 @@ static __fi void mVUthrowHardwareDeficiency(const wxChar* extFail, int vuIndex)
|
|||
|
||||
void mVUreserveCache(microVU& mVU) {
|
||||
|
||||
mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index));
|
||||
mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index), _16mb);
|
||||
mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index));
|
||||
|
||||
mVU.cache = mVU.index ?
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
void dVifReserve(int idx) {
|
||||
if(!nVif[idx].recReserve)
|
||||
nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx));
|
||||
nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx), _8mb);
|
||||
|
||||
nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec );
|
||||
}
|
||||
|
|
|
@ -417,7 +417,7 @@ void VifUnpackSSE_Init()
|
|||
|
||||
DevCon.WriteLn( "Generating SSE-optimized unpacking functions for VIF interpreters..." );
|
||||
|
||||
nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions");
|
||||
nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions", _64kb);
|
||||
nVifUpkExec->SetProfilerName("iVIF-SSE");
|
||||
nVifUpkExec->SetBlockSize( 1 );
|
||||
nVifUpkExec->Reserve( _64kb );
|
||||
|
|
Loading…
Reference in New Issue