mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #967 from PCSX2/remove-lazy-allocation
Reduce lazy allocation
This commit is contained in:
commit
21857ec12d
|
@ -247,6 +247,11 @@ public:
|
||||||
m_blocksize = (bytes + __pagesize - 1) / __pagesize;
|
m_blocksize = (bytes + __pagesize - 1) / __pagesize;
|
||||||
return m_blocksize * __pagesize;
|
return m_blocksize * __pagesize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void Reset()
|
||||||
|
{
|
||||||
|
_parent::Reset();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
@ -266,72 +271,6 @@ protected:
|
||||||
virtual void CommitBlocks( uptr page, uint blocks );
|
virtual void CommitBlocks( uptr page, uint blocks );
|
||||||
};
|
};
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
// SpatialArrayReserve
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
// A spatial array is one where large areas of the memory reserve will remain unused during
|
|
||||||
// process execution. Only areas put to use will be committed to virtual memory.
|
|
||||||
//
|
|
||||||
// Spatial array efficiency depends heavily on selecting the right parameters for the array's
|
|
||||||
// primary intended use. Memory in a spatial array is arranged by blocks, with each block
|
|
||||||
// containing some number of pages (pages are 4096 bytes each on most platforms). When the
|
|
||||||
// array is accessed, the entire block containing the addressed memory will be committed at
|
|
||||||
// once. Blocks can be a single page in size (4096 bytes), though this is highly discouraged
|
|
||||||
// due to overhead and fragmentation penalties.
|
|
||||||
//
|
|
||||||
// Balancing block sizes:
|
|
||||||
// Larger blocks are good for reducing memory fragmentation and block-tracking overhead, but
|
|
||||||
// can also result in a lot of otherwise unused memory being committed to memory. Smaller
|
|
||||||
// blocks are good for arrays that will tend toward more sequential behavior, as they reduce
|
|
||||||
// the amount of unused memory being committed. However, since every block requires a
|
|
||||||
// tracking entry, assigning small blocks to a very large array can result in quite a bit of
|
|
||||||
// unwanted overhead. Furthermore, if the array is accessed randomly, system physical memory
|
|
||||||
// will become very fragmented, which will also hurt performance.
|
|
||||||
//
|
|
||||||
// By default, the base block size is based on a heuristic that balances the size of the spatial
|
|
||||||
// array reserve against a best-guess performance profile for the target platform.
|
|
||||||
//
|
|
||||||
class SpatialArrayReserve : public BaseVmReserveListener
|
|
||||||
{
|
|
||||||
typedef BaseVmReserveListener _parent;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
uint m_numblocks;
|
|
||||||
|
|
||||||
// Array of block bits, each bit indicating if the block has been committed to memory
|
|
||||||
// or not. The array length is typically determined via ((numblocks+7) / 8), though the
|
|
||||||
// actual array size may be larger in order to accommodate 32-bit or 128-bit accelerated
|
|
||||||
// operations.
|
|
||||||
ScopedAlignedAlloc<u8,16> m_blockbits;
|
|
||||||
|
|
||||||
public:
|
|
||||||
SpatialArrayReserve( const wxString& name );
|
|
||||||
|
|
||||||
virtual void* Reserve( size_t size = 0, uptr base = 0, uptr upper_bounds = 0 );
|
|
||||||
virtual void Reset();
|
|
||||||
virtual bool TryResize( uint newsize );
|
|
||||||
|
|
||||||
void OnCommittedBlock( void* block );
|
|
||||||
|
|
||||||
SpatialArrayReserve& SetBlockCount( uint blocks );
|
|
||||||
SpatialArrayReserve& SetBlockSizeInPages( uint bytes );
|
|
||||||
|
|
||||||
uptr SetBlockSize( uptr bytes );
|
|
||||||
|
|
||||||
operator void*() { return m_baseptr; }
|
|
||||||
operator const void*() const { return m_baseptr; }
|
|
||||||
|
|
||||||
operator u8*() { return (u8*)m_baseptr; }
|
|
||||||
operator const u8*() const { return (u8*)m_baseptr; }
|
|
||||||
|
|
||||||
using _parent::operator[];
|
|
||||||
|
|
||||||
protected:
|
|
||||||
void ReprotectCommittedBlocks( const PageProtectionMode& newmode );
|
|
||||||
void DoCommitAndProtect( uptr page );
|
|
||||||
uint _calcBlockBitArrayLength() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
|
||||||
# define PCSX2_PAGEFAULT_PROTECT
|
# define PCSX2_PAGEFAULT_PROTECT
|
||||||
|
|
|
@ -336,154 +336,6 @@ void BaseVmReserveListener::OnPageFaultEvent(const PageFaultInfo& info, bool& ha
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
// SpatialArrayReserve (implementations)
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
SpatialArrayReserve::SpatialArrayReserve( const wxString& name ) :
|
|
||||||
_parent( name ), m_numblocks(0)
|
|
||||||
{
|
|
||||||
m_prot_mode = PageAccess_ReadWrite();
|
|
||||||
}
|
|
||||||
|
|
||||||
uint SpatialArrayReserve::_calcBlockBitArrayLength() const
|
|
||||||
{
|
|
||||||
// divide by 8 (rounded up) to compress 8 bits into each byte.
|
|
||||||
// mask off lower bits (rounded up) to allow for 128-bit alignment and SSE operations.
|
|
||||||
return (((m_numblocks + 7) / 8) + 15) & ~15;
|
|
||||||
}
|
|
||||||
|
|
||||||
void* SpatialArrayReserve::Reserve( size_t size, uptr base, uptr upper_bounds )
|
|
||||||
{
|
|
||||||
void* addr = _parent::Reserve( size, base, upper_bounds );
|
|
||||||
if (!addr) return NULL;
|
|
||||||
|
|
||||||
if (m_blocksize) SetBlockSizeInPages( m_blocksize );
|
|
||||||
m_blockbits.Alloc( _calcBlockBitArrayLength() );
|
|
||||||
|
|
||||||
return addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SpatialArrayReserve::ReprotectCommittedBlocks( const PageProtectionMode& newmode )
|
|
||||||
{
|
|
||||||
if (!m_pages_commited) return;
|
|
||||||
|
|
||||||
u8* curptr = GetPtr();
|
|
||||||
const uint blockBytes = m_blocksize * __pagesize;
|
|
||||||
for (uint i=0; i<m_numblocks; ++i, curptr+=blockBytes)
|
|
||||||
{
|
|
||||||
uint thisbit = 1 << (i & 7);
|
|
||||||
if (!(m_blockbits[i/8] & thisbit)) continue;
|
|
||||||
|
|
||||||
HostSys::MemProtect(curptr, blockBytes, newmode);
|
|
||||||
HostSys::MmapResetPtr(curptr, blockBytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resets/clears the spatial array, reducing the memory commit pool overhead to zero (0).
|
|
||||||
void SpatialArrayReserve::Reset()
|
|
||||||
{
|
|
||||||
ReprotectCommittedBlocks( PageAccess_None() );
|
|
||||||
memzero_sse_a(m_blockbits.GetPtr(), _calcBlockBitArrayLength());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Important! The number of blocks of the array will be altered when using this method.
|
|
||||||
//
|
|
||||||
bool SpatialArrayReserve::TryResize( uint newsize )
|
|
||||||
{
|
|
||||||
uint newpages = (newsize + __pagesize - 1) / __pagesize;
|
|
||||||
|
|
||||||
// find the last allocated block -- we cannot be allowed to resize any smaller than that:
|
|
||||||
|
|
||||||
uint i;
|
|
||||||
for (i=m_numblocks-1; i; --i)
|
|
||||||
{
|
|
||||||
uint bit = i & 7;
|
|
||||||
if (m_blockbits[i / 8] & bit) break;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint pages_in_use = i * m_blocksize;
|
|
||||||
if (newpages < pages_in_use) return false;
|
|
||||||
|
|
||||||
if (!_parent::TryResize( newsize )) return false;
|
|
||||||
|
|
||||||
// On success, we must re-calibrate the internal blockbits array.
|
|
||||||
|
|
||||||
m_blockbits.Resize( (m_numblocks + 7) / 8 );
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This method allows the programmer to specify the block size of the array as a function
|
|
||||||
// of its reserved size. This function *must* be called *after* the reserve has been made,
|
|
||||||
// and *before* the array contents have been accessed.
|
|
||||||
//
|
|
||||||
// Calls to this function prior to initializing the reserve or after the reserve has been
|
|
||||||
// accessed (resulting in committed blocks) will be ignored -- and will generate an assertion
|
|
||||||
// in debug builds.
|
|
||||||
SpatialArrayReserve& SpatialArrayReserve::SetBlockCount( uint blocks )
|
|
||||||
{
|
|
||||||
pxAssumeDev( !m_pages_commited, "Invalid object state: SetBlockCount must be called prior to reserved memory accesses." );
|
|
||||||
|
|
||||||
// Calculate such that the last block extends past the end of the array, if necessary.
|
|
||||||
|
|
||||||
m_numblocks = blocks;
|
|
||||||
m_blocksize = (m_pages_reserved + m_numblocks-1) / m_numblocks;
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sets the block size via pages (pages are defined by the __pagesize global, which is
|
|
||||||
// typically 4096).
|
|
||||||
//
|
|
||||||
// This method must be called prior to accessing or modifying the array contents. Calls to
|
|
||||||
// a modified buffer will be ignored (and generate an assertion in dev/debug modes).
|
|
||||||
SpatialArrayReserve& SpatialArrayReserve::SetBlockSizeInPages( uint pages )
|
|
||||||
{
|
|
||||||
if (pxAssertDev(!m_pages_commited, "Invalid object state: Block size can only be changed prior to accessing or modifying the reserved buffer contents."))
|
|
||||||
{
|
|
||||||
m_blocksize = pages;
|
|
||||||
m_numblocks = (m_pages_reserved + m_blocksize - 1) / m_blocksize;
|
|
||||||
m_blockbits.Alloc( _calcBlockBitArrayLength() );
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetBlockSize assigns the block size of the spatial array, in bytes. The actual size of
|
|
||||||
// each block will be rounded up to the nearest page size. The resulting size is returned.
|
|
||||||
//
|
|
||||||
// This method must be called prior to accessing or modifying the array contents. Calls to
|
|
||||||
// a modified buffer will be ignored (and generate an assertion in dev/debug modes).
|
|
||||||
uptr SpatialArrayReserve::SetBlockSize( uptr bytes )
|
|
||||||
{
|
|
||||||
SetBlockSizeInPages((bytes + __pagesize - 1) / __pagesize);
|
|
||||||
return m_blocksize * __pagesize;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SpatialArrayReserve::DoCommitAndProtect( uptr page )
|
|
||||||
{
|
|
||||||
// Spatial Arrays work on block granularity only:
|
|
||||||
// Round the page into a block, and commit the whole block that the page belongs to.
|
|
||||||
|
|
||||||
uint block = page / m_blocksize;
|
|
||||||
CommitBlocks(block*m_blocksize, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SpatialArrayReserve::OnCommittedBlock( void* block )
|
|
||||||
{
|
|
||||||
// Determine the block position in the blockbits array, flag it, and be done!
|
|
||||||
|
|
||||||
uptr relative = (uptr)block - (uptr)m_baseptr;
|
|
||||||
relative /= m_blocksize * __pagesize;
|
|
||||||
|
|
||||||
//DbgCon.WriteLn("Check me out @ 0x%08x", block);
|
|
||||||
|
|
||||||
pxAssert( (m_blockbits[relative/8] & (1 << (relative & 7))) == 0 );
|
|
||||||
m_blockbits[relative/8] |= 1 << (relative & 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
// PageProtectionMode (implementations)
|
// PageProtectionMode (implementations)
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
|
|
|
@ -891,13 +891,6 @@ void eeMemoryReserve::Release()
|
||||||
// code below.
|
// code below.
|
||||||
//
|
//
|
||||||
|
|
||||||
enum vtlb_ProtectionMode
|
|
||||||
{
|
|
||||||
ProtMode_None = 0, // page is 'unaccounted' -- neither protected nor unprotected
|
|
||||||
ProtMode_Write, // page is under write protection (exception handler)
|
|
||||||
ProtMode_Manual // page is under manual protection (self-checked at execution)
|
|
||||||
};
|
|
||||||
|
|
||||||
struct vtlb_PageProtectionInfo
|
struct vtlb_PageProtectionInfo
|
||||||
{
|
{
|
||||||
// Ram De-mapping -- used to convert fully translated/mapped offsets (which reside with
|
// Ram De-mapping -- used to convert fully translated/mapped offsets (which reside with
|
||||||
|
@ -914,12 +907,10 @@ static __aligned16 vtlb_PageProtectionInfo m_PageProtectInfo[Ps2MemSize::MainRam
|
||||||
|
|
||||||
|
|
||||||
// returns:
|
// returns:
|
||||||
// -1 - unchecked block (resides in ROM, thus is integrity is constant)
|
// ProtMode_NotRequired - unchecked block (resides in ROM, thus is integrity is constant)
|
||||||
// 0 - page is using Write protection
|
// Or the current mode
|
||||||
// 1 - page is using manual protection (recompiler must include execution-time
|
|
||||||
// self-checking of block integrity)
|
|
||||||
//
|
//
|
||||||
int mmap_GetRamPageInfo( u32 paddr )
|
vtlb_ProtectionMode mmap_GetRamPageInfo( u32 paddr )
|
||||||
{
|
{
|
||||||
pxAssert( eeMem );
|
pxAssert( eeMem );
|
||||||
|
|
||||||
|
@ -929,10 +920,11 @@ int mmap_GetRamPageInfo( u32 paddr )
|
||||||
uptr rampage = ptr - (uptr)eeMem->Main;
|
uptr rampage = ptr - (uptr)eeMem->Main;
|
||||||
|
|
||||||
if (rampage >= Ps2MemSize::MainRam)
|
if (rampage >= Ps2MemSize::MainRam)
|
||||||
return -1; //not in ram, no tracking done ...
|
return ProtMode_NotRequired; //not in ram, no tracking done ...
|
||||||
|
|
||||||
rampage >>= 12;
|
rampage >>= 12;
|
||||||
return ( m_PageProtectInfo[rampage].Mode == ProtMode_Manual ) ? 1 : 0;
|
|
||||||
|
return m_PageProtectInfo[rampage].Mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
// paddr - physically mapped PS2 address
|
// paddr - physically mapped PS2 address
|
||||||
|
|
|
@ -116,7 +116,15 @@ extern void memBindConditionalHandlers();
|
||||||
|
|
||||||
extern void memMapVUmicro();
|
extern void memMapVUmicro();
|
||||||
|
|
||||||
extern int mmap_GetRamPageInfo( u32 paddr );
|
enum vtlb_ProtectionMode
|
||||||
|
{
|
||||||
|
ProtMode_None = 0, // page is 'unaccounted' -- neither protected nor unprotected
|
||||||
|
ProtMode_Write, // page is under write protection (exception handler)
|
||||||
|
ProtMode_Manual, // page is under manual protection (self-checked at execution)
|
||||||
|
ProtMode_NotRequired // page doesn't require any protection
|
||||||
|
};
|
||||||
|
|
||||||
|
extern vtlb_ProtectionMode mmap_GetRamPageInfo( u32 paddr );
|
||||||
extern void mmap_MarkCountedRamPage( u32 paddr );
|
extern void mmap_MarkCountedRamPage( u32 paddr );
|
||||||
extern void mmap_ResetBlockTracking();
|
extern void mmap_ResetBlockTracking();
|
||||||
|
|
||||||
|
|
|
@ -73,9 +73,23 @@ void* RecompiledCodeReserve::Reserve( size_t size, uptr base, uptr upper_bounds
|
||||||
{
|
{
|
||||||
if (!_parent::Reserve(size, base, upper_bounds)) return NULL;
|
if (!_parent::Reserve(size, base, upper_bounds)) return NULL;
|
||||||
_registerProfiler();
|
_registerProfiler();
|
||||||
|
|
||||||
|
// Pre-Allocate the first block (to reduce the number of segmentation fault
|
||||||
|
// in debugger)
|
||||||
|
DoCommitAndProtect(0);
|
||||||
|
|
||||||
return m_baseptr;
|
return m_baseptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RecompiledCodeReserve::Reset()
|
||||||
|
{
|
||||||
|
_parent::Reset();
|
||||||
|
|
||||||
|
// Pre-Allocate the first block (to reduce the number of segmentation fault
|
||||||
|
// in debugger)
|
||||||
|
DoCommitAndProtect(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long.
|
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long.
|
||||||
// After a name has been set, a profiler source will be automatically registered and cleared
|
// After a name has been set, a profiler source will be automatically registered and cleared
|
||||||
|
|
|
@ -42,6 +42,7 @@ public:
|
||||||
|
|
||||||
virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 );
|
virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 );
|
||||||
virtual void OnCommittedBlock( void* block );
|
virtual void OnCommittedBlock( void* block );
|
||||||
|
virtual void Reset();
|
||||||
|
|
||||||
virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname );
|
virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname );
|
||||||
virtual RecompiledCodeReserve& SetProfilerName( const char* shortname )
|
virtual RecompiledCodeReserve& SetProfilerName( const char* shortname )
|
||||||
|
|
|
@ -757,7 +757,7 @@ static const uint m_recBlockAllocSize =
|
||||||
|
|
||||||
static void recReserveCache()
|
static void recReserveCache()
|
||||||
{
|
{
|
||||||
if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _1mb * 2);
|
if (!recMem) recMem = new RecompiledCodeReserve(L"R3000A Recompiler Cache", _8mb);
|
||||||
recMem->SetProfilerName("IOPrec");
|
recMem->SetProfilerName("IOPrec");
|
||||||
|
|
||||||
while (!recMem->IsOk())
|
while (!recMem->IsOk())
|
||||||
|
|
|
@ -63,23 +63,6 @@ __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
|
||||||
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
|
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
|
||||||
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
|
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
// R5900LutReserve_RAM
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
class R5900LutReserve_RAM : public SpatialArrayReserve
|
|
||||||
{
|
|
||||||
typedef SpatialArrayReserve _parent;
|
|
||||||
|
|
||||||
public:
|
|
||||||
R5900LutReserve_RAM( const wxString& name )
|
|
||||||
: _parent( name )
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
void OnCommittedBlock( void* block );
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
// Static Private Variables - R5900 Dynarec
|
// Static Private Variables - R5900 Dynarec
|
||||||
|
@ -88,8 +71,9 @@ protected:
|
||||||
static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
|
static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
|
||||||
|
|
||||||
static RecompiledCodeReserve* recMem = NULL;
|
static RecompiledCodeReserve* recMem = NULL;
|
||||||
static SpatialArrayReserve* recRAMCopy = NULL;
|
static u8* recRAMCopy = NULL;
|
||||||
static R5900LutReserve_RAM* recLutReserve_RAM = NULL;
|
static u8* recLutReserve_RAM = NULL;
|
||||||
|
static const size_t recLutSize = Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1;
|
||||||
|
|
||||||
static uptr m_ConfiguredCacheReserve = 64;
|
static uptr m_ConfiguredCacheReserve = 64;
|
||||||
|
|
||||||
|
@ -599,11 +583,6 @@ static __ri void ClearRecLUT(BASEBLOCK* base, int memsize)
|
||||||
base[i].SetFnptr((uptr)JITCompile);
|
base[i].SetFnptr((uptr)JITCompile);
|
||||||
}
|
}
|
||||||
|
|
||||||
void R5900LutReserve_RAM::OnCommittedBlock( void* block )
|
|
||||||
{
|
|
||||||
_parent::OnCommittedBlock(block);
|
|
||||||
ClearRecLUT((BASEBLOCK*)block, __pagesize * m_blocksize);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void recThrowHardwareDeficiency( const wxChar* extFail )
|
static void recThrowHardwareDeficiency( const wxChar* extFail )
|
||||||
{
|
{
|
||||||
|
@ -614,7 +593,7 @@ static void recThrowHardwareDeficiency( const wxChar* extFail )
|
||||||
|
|
||||||
static void recReserveCache()
|
static void recReserveCache()
|
||||||
{
|
{
|
||||||
if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _1mb * 4);
|
if (!recMem) recMem = new RecompiledCodeReserve(L"R5900-32 Recompiler Cache", _16mb);
|
||||||
recMem->SetProfilerName("EErec");
|
recMem->SetProfilerName("EErec");
|
||||||
|
|
||||||
while (!recMem->IsOk())
|
while (!recMem->IsOk())
|
||||||
|
@ -643,25 +622,19 @@ static void recAlloc()
|
||||||
{
|
{
|
||||||
if (!recRAMCopy)
|
if (!recRAMCopy)
|
||||||
{
|
{
|
||||||
recRAMCopy = new SpatialArrayReserve( L"R5900 RAM copy" );
|
recRAMCopy = (u8*)_aligned_malloc(Ps2MemSize::MainRam, 4096);
|
||||||
recRAMCopy->SetBlockSize(_16kb);
|
|
||||||
recRAMCopy->Reserve(Ps2MemSize::MainRam);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!recRAM)
|
if (!recRAM)
|
||||||
{
|
{
|
||||||
recLutReserve_RAM = new R5900LutReserve_RAM( L"R5900 RAM LUT" );
|
recLutReserve_RAM = (u8*)_aligned_malloc(recLutSize, 4096);
|
||||||
recLutReserve_RAM->SetBlockSize(_16kb);
|
|
||||||
recLutReserve_RAM->Reserve(Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM->GetPtr();
|
BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM;
|
||||||
recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4);
|
recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4);
|
||||||
recROM = basepos; basepos += (Ps2MemSize::Rom / 4);
|
recROM = basepos; basepos += (Ps2MemSize::Rom / 4);
|
||||||
recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4);
|
recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4);
|
||||||
|
|
||||||
pxAssert(recLutReserve_RAM->GetPtrEnd() == (u8*)basepos);
|
|
||||||
|
|
||||||
for (int i = 0; i < 0x10000; i++)
|
for (int i = 0; i < 0x10000; i++)
|
||||||
recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
|
recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
|
||||||
|
|
||||||
|
@ -731,8 +704,8 @@ static void recResetRaw()
|
||||||
Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" );
|
Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" );
|
||||||
|
|
||||||
recMem->Reset();
|
recMem->Reset();
|
||||||
recRAMCopy->Reset();
|
ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize);
|
||||||
recLutReserve_RAM->Reset();
|
memset(recRAMCopy, 0, Ps2MemSize::MainRam);
|
||||||
|
|
||||||
maxrecmem = 0;
|
maxrecmem = 0;
|
||||||
|
|
||||||
|
@ -756,8 +729,8 @@ static void recResetRaw()
|
||||||
static void recShutdown()
|
static void recShutdown()
|
||||||
{
|
{
|
||||||
safe_delete( recMem );
|
safe_delete( recMem );
|
||||||
safe_delete( recRAMCopy );
|
safe_aligned_free( recRAMCopy );
|
||||||
safe_delete( recLutReserve_RAM );
|
safe_aligned_free( recLutReserve_RAM );
|
||||||
|
|
||||||
recBlocks.Reset();
|
recBlocks.Reset();
|
||||||
|
|
||||||
|
@ -1675,6 +1648,89 @@ void __fastcall dyna_page_reset(u32 start,u32 sz)
|
||||||
mmap_MarkCountedRamPage( start );
|
mmap_MarkCountedRamPage( start );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void memory_protect_recompiled_code(u32 startpc, u32 size)
|
||||||
|
{
|
||||||
|
u32 inpage_ptr = HWADDR(startpc);
|
||||||
|
u32 inpage_sz = size*4;
|
||||||
|
|
||||||
|
// The kernel context register is stored @ 0x800010C0-0x80001300
|
||||||
|
// The EENULL thread context register is stored @ 0x81000-....
|
||||||
|
bool contains_thread_stack = ((startpc >> 12) == 0x81) || ((startpc >> 12) == 0x80001);
|
||||||
|
|
||||||
|
// note: blocks are guaranteed to reside within the confines of a single page.
|
||||||
|
const vtlb_ProtectionMode PageType = contains_thread_stack ? ProtMode_Manual : mmap_GetRamPageInfo( inpage_ptr );
|
||||||
|
|
||||||
|
switch (PageType)
|
||||||
|
{
|
||||||
|
case ProtMode_NotRequired:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ProtMode_None:
|
||||||
|
case ProtMode_Write:
|
||||||
|
mmap_MarkCountedRamPage( inpage_ptr );
|
||||||
|
manual_page[inpage_ptr >> 12] = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ProtMode_Manual:
|
||||||
|
xMOV( ecx, inpage_ptr );
|
||||||
|
xMOV( edx, inpage_sz / 4 );
|
||||||
|
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
|
||||||
|
|
||||||
|
u32 lpc = inpage_ptr;
|
||||||
|
u32 stg = inpage_sz;
|
||||||
|
|
||||||
|
while(stg>0)
|
||||||
|
{
|
||||||
|
xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) );
|
||||||
|
xJNE(DispatchBlockDiscard);
|
||||||
|
|
||||||
|
stg -= 4;
|
||||||
|
lpc += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tweakpoint! 3 is a 'magic' number representing the number of times a counted block
|
||||||
|
// is re-protected before the recompiler gives up and sets it up as an uncounted (permanent)
|
||||||
|
// manual block. Higher thresholds result in more recompilations for blocks that share code
|
||||||
|
// and data on the same page. Side effects of a lower threshold: over extended gameplay
|
||||||
|
// with several map changes, a game's overall performance could degrade.
|
||||||
|
|
||||||
|
// (ideally, perhaps, manual_counter should be reset to 0 every few minutes?)
|
||||||
|
|
||||||
|
if (!contains_thread_stack && manual_counter[inpage_ptr >> 12] <= 3)
|
||||||
|
{
|
||||||
|
// Counted blocks add a weighted (by block size) value into manual_page each time they're
|
||||||
|
// run. If the block gets run a lot, it resets and re-protects itself in the hope
|
||||||
|
// that whatever forced it to be manually-checked before was a 1-time deal.
|
||||||
|
|
||||||
|
// Counted blocks have a secondary threshold check in manual_counter, which forces a block
|
||||||
|
// to 'uncounted' mode if it's recompiled several times. This protects against excessive
|
||||||
|
// recompilation of blocks that reside on the same codepage as data.
|
||||||
|
|
||||||
|
// fixme? Currently this algo is kinda dumb and results in the forced recompilation of a
|
||||||
|
// lot of blocks before it decides to mark a 'busy' page as uncounted. There might be
|
||||||
|
// be a more clever approach that could streamline this process, by doing a first-pass
|
||||||
|
// test using the vtlb memory protection (without recompilation!) to reprotect a counted
|
||||||
|
// block. But unless a new algo is relatively simple in implementation, it's probably
|
||||||
|
// not worth the effort (tests show that we have lots of recompiler memory to spare, and
|
||||||
|
// that the current amount of recompilation is fairly cheap).
|
||||||
|
|
||||||
|
xADD(ptr16[&manual_page[inpage_ptr >> 12]], size);
|
||||||
|
xJC(DispatchPageReset);
|
||||||
|
|
||||||
|
// note: clearcnt is measured per-page, not per-block!
|
||||||
|
ConsoleColorScope cs( Color_Gray );
|
||||||
|
eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d",
|
||||||
|
startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d",
|
||||||
|
startpc, size, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Skip MPEG Game-Fix
|
// Skip MPEG Game-Fix
|
||||||
bool skipMPEG_By_Pattern(u32 sPC) {
|
bool skipMPEG_By_Pattern(u32 sPC) {
|
||||||
|
|
||||||
|
@ -2076,84 +2132,8 @@ StartRecomp:
|
||||||
if (dumplog & 1) iDumpBlock(startpc, recPtr);
|
if (dumplog & 1) iDumpBlock(startpc, recPtr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
u32 sz = (s_nEndBlock-startpc) >> 2;
|
// Detect and handle self-modified code
|
||||||
u32 inpage_ptr = HWADDR(startpc);
|
memory_protect_recompiled_code(startpc, (s_nEndBlock-startpc) >> 2);
|
||||||
u32 inpage_sz = sz*4;
|
|
||||||
|
|
||||||
// note: blocks are guaranteed to reside within the confines of a single page.
|
|
||||||
|
|
||||||
const int PageType = mmap_GetRamPageInfo( inpage_ptr );
|
|
||||||
//const u32 pgsz = std::min(0x1000 - inpage_offs, inpage_sz);
|
|
||||||
const u32 pgsz = inpage_sz;
|
|
||||||
|
|
||||||
switch (PageType)
|
|
||||||
{
|
|
||||||
case -1:
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0:
|
|
||||||
mmap_MarkCountedRamPage( inpage_ptr );
|
|
||||||
manual_page[inpage_ptr >> 12] = 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
xMOV( ecx, inpage_ptr );
|
|
||||||
xMOV( edx, pgsz / 4 );
|
|
||||||
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
|
|
||||||
|
|
||||||
u32 lpc = inpage_ptr;
|
|
||||||
u32 stg = pgsz;
|
|
||||||
|
|
||||||
while(stg>0)
|
|
||||||
{
|
|
||||||
xCMP( ptr32[PSM(lpc)], *(u32*)PSM(lpc) );
|
|
||||||
xJNE(DispatchBlockDiscard);
|
|
||||||
|
|
||||||
stg -= 4;
|
|
||||||
lpc += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tweakpoint! 3 is a 'magic' number representing the number of times a counted block
|
|
||||||
// is re-protected before the recompiler gives up and sets it up as an uncounted (permanent)
|
|
||||||
// manual block. Higher thresholds result in more recompilations for blocks that share code
|
|
||||||
// and data on the same page. Side effects of a lower threshold: over extended gameplay
|
|
||||||
// with several map changes, a game's overall performance could degrade.
|
|
||||||
|
|
||||||
// (ideally, perhaps, manual_counter should be reset to 0 every few minutes?)
|
|
||||||
|
|
||||||
if (startpc != 0x81fc0 && manual_counter[inpage_ptr >> 12] <= 3)
|
|
||||||
{
|
|
||||||
// Counted blocks add a weighted (by block size) value into manual_page each time they're
|
|
||||||
// run. If the block gets run a lot, it resets and re-protects itself in the hope
|
|
||||||
// that whatever forced it to be manually-checked before was a 1-time deal.
|
|
||||||
|
|
||||||
// Counted blocks have a secondary threshold check in manual_counter, which forces a block
|
|
||||||
// to 'uncounted' mode if it's recompiled several times. This protects against excessive
|
|
||||||
// recompilation of blocks that reside on the same codepage as data.
|
|
||||||
|
|
||||||
// fixme? Currently this algo is kinda dumb and results in the forced recompilation of a
|
|
||||||
// lot of blocks before it decides to mark a 'busy' page as uncounted. There might be
|
|
||||||
// be a more clever approach that could streamline this process, by doing a first-pass
|
|
||||||
// test using the vtlb memory protection (without recompilation!) to reprotect a counted
|
|
||||||
// block. But unless a new algo is relatively simple in implementation, it's probably
|
|
||||||
// not worth the effort (tests show that we have lots of recompiler memory to spare, and
|
|
||||||
// that the current amount of recompilation is fairly cheap).
|
|
||||||
|
|
||||||
xADD(ptr16[&manual_page[inpage_ptr >> 12]], sz);
|
|
||||||
xJC(DispatchPageReset);
|
|
||||||
|
|
||||||
// note: clearcnt is measured per-page, not per-block!
|
|
||||||
ConsoleColorScope cs( Color_Gray );
|
|
||||||
eeRecPerfLog.Write( "Manual block @ %08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d clearcnt = %d",
|
|
||||||
startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, inpage_sz, manual_counter[inpage_ptr >> 12] );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
eeRecPerfLog.Write( "Uncounted Manual block @ 0x%08X : size =%3d page/offs = 0x%05X/0x%03X inpgsz = %d",
|
|
||||||
startpc, sz, inpage_ptr>>12, inpage_ptr&0xfff, pgsz, inpage_sz );
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip Recompilation if sceMpegIsEnd Pattern detected
|
// Skip Recompilation if sceMpegIsEnd Pattern detected
|
||||||
bool doRecompilation = !skipMPEG_By_Pattern(startpc);
|
bool doRecompilation = !skipMPEG_By_Pattern(startpc);
|
||||||
|
@ -2186,7 +2166,7 @@ StartRecomp:
|
||||||
if ((oldBlock->startpc + oldBlock->size * 4) <= HWADDR(startpc))
|
if ((oldBlock->startpc + oldBlock->size * 4) <= HWADDR(startpc))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (memcmp(&(*recRAMCopy)[oldBlock->startpc / 4], PSM(oldBlock->startpc),
|
if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc),
|
||||||
oldBlock->size * 4))
|
oldBlock->size * 4))
|
||||||
{
|
{
|
||||||
recClear(startpc, (pc - startpc) / 4);
|
recClear(startpc, (pc - startpc) / 4);
|
||||||
|
@ -2196,7 +2176,7 @@ StartRecomp:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(&(*recRAMCopy)[HWADDR(startpc) / 4], PSM(startpc), pc - startpc);
|
memcpy(&recRAMCopy[HWADDR(startpc) / 4], PSM(startpc), pc - startpc);
|
||||||
}
|
}
|
||||||
|
|
||||||
s_pCurBlock->SetFnptr((uptr)recPtr);
|
s_pCurBlock->SetFnptr((uptr)recPtr);
|
||||||
|
|
|
@ -30,7 +30,7 @@ static __fi void mVUthrowHardwareDeficiency(const wxChar* extFail, int vuIndex)
|
||||||
|
|
||||||
void mVUreserveCache(microVU& mVU) {
|
void mVUreserveCache(microVU& mVU) {
|
||||||
|
|
||||||
mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index));
|
mVU.cache_reserve = new RecompiledCodeReserve(pxsFmt("Micro VU%u Recompiler Cache", mVU.index), _16mb);
|
||||||
mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index));
|
mVU.cache_reserve->SetProfilerName(pxsFmt("mVU%urec", mVU.index));
|
||||||
|
|
||||||
mVU.cache = mVU.index ?
|
mVU.cache = mVU.index ?
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
void dVifReserve(int idx) {
|
void dVifReserve(int idx) {
|
||||||
if(!nVif[idx].recReserve)
|
if(!nVif[idx].recReserve)
|
||||||
nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx));
|
nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx), _8mb);
|
||||||
|
|
||||||
nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec );
|
nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec );
|
||||||
}
|
}
|
||||||
|
|
|
@ -417,7 +417,7 @@ void VifUnpackSSE_Init()
|
||||||
|
|
||||||
DevCon.WriteLn( "Generating SSE-optimized unpacking functions for VIF interpreters..." );
|
DevCon.WriteLn( "Generating SSE-optimized unpacking functions for VIF interpreters..." );
|
||||||
|
|
||||||
nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions");
|
nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions", _64kb);
|
||||||
nVifUpkExec->SetProfilerName("iVIF-SSE");
|
nVifUpkExec->SetProfilerName("iVIF-SSE");
|
||||||
nVifUpkExec->SetBlockSize( 1 );
|
nVifUpkExec->SetBlockSize( 1 );
|
||||||
nVifUpkExec->Reserve( _64kb );
|
nVifUpkExec->Reserve( _64kb );
|
||||||
|
|
Loading…
Reference in New Issue