newHostVM: Made use of the new SpatialArrayReserve for the EE/R5900 recompiler. Reduces the recompiler's lookup tables from ~40mb to a mere 1-3mb.

git-svn-id: http://pcsx2.googlecode.com/svn/branches/newHostVM@4003 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-11-06 16:25:40 +00:00
parent 7f51e8ee66
commit d6de2e3942
5 changed files with 525 additions and 468 deletions

View File

@ -148,6 +148,18 @@ public:
operator u8*() { return (u8*)m_baseptr; }
operator const u8*() const { return (u8*)m_baseptr; }
u8& operator[](uint idx)
{
pxAssume(idx < (m_reserved * __pagesize));
return *((u8*)m_baseptr + idx);
}
const u8& operator[](uint idx) const
{
pxAssume(idx < (m_reserved * __pagesize));
return *((u8*)m_baseptr + idx);
}
protected:
void OnPageFaultEvent( const PageFaultInfo& info, bool& handled );
@ -204,7 +216,7 @@ protected:
ScopedAlignedAlloc<u8,16> m_blockbits;
public:
SpatialArrayReserve( const wxString& name, uint defCommit = 0 );
SpatialArrayReserve( const wxString& name );
virtual void* Reserve( uint size, uptr base = 0, uptr upper_bounds = 0 );
virtual void Reset();
@ -216,14 +228,16 @@ public:
SpatialArrayReserve& SetBlockSizeInPages( uint bytes );
uint SetBlockSize( uint bytes );
operator void*() { return m_baseptr; }
operator const void*() const { return m_baseptr; }
operator u8*() { return (u8*)m_baseptr; }
operator const u8*() const { return (u8*)m_baseptr; }
using __parent::operator[];
protected:
void DoCommitAndProtect( uptr page );
uint _calcBlockBitArrayLength() const;
};

View File

@ -237,14 +237,28 @@ void BaseVirtualMemoryReserve::OnPageFaultEvent(const PageFaultInfo& info, bool&
// SpatialArrayReserve (implementations)
// --------------------------------------------------------------------------------------
SpatialArrayReserve::SpatialArrayReserve( const wxString& name ) :
__parent( name )
{
m_prot_mode = PageAccess_ReadWrite();
}
uint SpatialArrayReserve::_calcBlockBitArrayLength() const
{
return (m_numblocks + 127) / 128;
// divide by 8 (rounded up) to compress 8 bits into each byte.
// mask off lower bits (rounded up) to allow for 128-bit alignment and SSE operations.
return (((m_numblocks + 7) / 8) + 15) & ~15;
}
void* SpatialArrayReserve::Reserve( uint size, uptr base, uptr upper_bounds )
{
return __parent::Reserve( size, base, upper_bounds );
void* addr = __parent::Reserve( size, base, upper_bounds );
if (!addr) return NULL;
if (m_blocksize) SetBlockSizeInPages( m_blocksize );
m_blockbits.Alloc( _calcBlockBitArrayLength() );
return addr;
}
// Resets/clears the spatial array, reducing the memory commit pool overhead to zero (0).
@ -307,7 +321,7 @@ SpatialArrayReserve& SpatialArrayReserve::SetBlockCount( uint blocks )
// a modified buffer will be ignored (and generate an assertion in dev/debug modes).
SpatialArrayReserve& SpatialArrayReserve::SetBlockSizeInPages( uint pages )
{
if (pxAssertDev(m_commited, "Invalid object state: Block size can only be changed prior to accessing or modifying the reserved buffer contents."))
if (pxAssertDev(!m_commited, "Invalid object state: Block size can only be changed prior to accessing or modifying the reserved buffer contents."))
{
m_blocksize = pages;
m_numblocks = (m_reserved + m_blocksize - 1) / m_blocksize;
@ -327,16 +341,23 @@ uint SpatialArrayReserve::SetBlockSize( uint bytes )
return m_blocksize * __pagesize;
}
void SpatialArrayReserve::DoCommitAndProtect( uptr page )
{
// Spatial Arrays work on block granularity only:
// Round the page into a block, and commit the whole block that the page belongs to.
uint block = page / m_blocksize;
CommitBlocks(block*m_blocksize, 1);
}
void SpatialArrayReserve::OnCommittedBlock( void* block )
{
// Determine the block position in the blockbits array, flag it, and be done!
uptr relative = (uptr)m_baseptr - (uptr)block;
pxAssume( (relative % (m_blocksize * __pagesize)) == 0);
uptr relative = (uptr)block - (uptr)m_baseptr;
relative /= m_blocksize * __pagesize;
m_blockbits[relative/32] |= 1 << (relative & 31);
m_commited += m_blocksize;
}

View File

@ -70,6 +70,7 @@
Name="VCLinkerTool"
AdditionalDependencies="zlib.lib rpcrt4.lib"
OutputFile="$(OutDir)\$(ProjectName)-dbg.exe"
RandomizedBaseAddress="1"
/>
<Tool
Name="VCALinkTool"

View File

@ -81,9 +81,6 @@ public:
__fi int Index (u32 startpc) const
{
int idx = LastIndex(startpc);
// fixme: I changed the parenthesis to be unambiguous, but this needs to be checked to see if ((x or y or z) and w)
// is correct, or ((x or y) or (z and w)), or some other variation. --arcum42
// Mixing &&'s and ||'s is not actually ambiguous; &&'s take precedence. Reverted to old behavior -- ChickenLiver.
if ((idx == -1) || (startpc < blocks[idx].startpc) ||
((blocks[idx].size) && (startpc >= blocks[idx].startpc + blocks[idx].size * 4)))
return -1;
@ -139,9 +136,10 @@ public:
static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000],
BASEBLOCK *mapbase, uint pagebase, uint pageidx, uint mappage)
{
// this value is in 64k pages!
uint page = pagebase + pageidx;
jASSUME( page < 0x10000 );
pxAssume( page < 0x10000 );
reclut[page] = (uptr)&mapbase[(mappage - page) << 14];
if (hwlut)
hwlut[page] = 0u - (pagebase << 16);

View File

@ -36,14 +36,18 @@
# include <csetjmp>
#endif
#include "Utilities/MemsetFast.inl"
using namespace x86Emitter;
using namespace R5900;
#define PC_GETBLOCK(x) PC_GETBLOCK_(x, recLUT)
u32 maxrecmem = 0;
static __aligned16 uptr recLUT[0x10000];
static __aligned16 uptr hwLUT[0x10000];
static __aligned16 uptr recLUT[_64kb];
static __aligned16 uptr hwLUT[_64kb];
#define HWADDR(mem) (hwLUT[mem >> 16] + (mem))
@ -56,6 +60,24 @@ __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
// --------------------------------------------------------------------------------------
// R5900LutReserve_RAM
// --------------------------------------------------------------------------------------
class R5900LutReserve_RAM : public SpatialArrayReserve
{
typedef SpatialArrayReserve __parent;
public:
R5900LutReserve_RAM( const wxString& name )
: __parent( name )
{
}
protected:
void OnCommittedBlock( void* block );
};
////////////////////////////////////////////////////////////////
// Static Private Variables - R5900 Dynarec
@ -63,13 +85,16 @@ bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalExcept
static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
static RecompiledCodeReserve* recMem = NULL;
static SpatialArrayReserve* recRAMCopy = NULL;
static R5900LutReserve_RAM* recLutReserve_RAM = NULL;
static uptr m_ConfiguredCacheReserve = 64;
static u32* recConstBuf = NULL; // 64-bit pseudo-immediates
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static u32 *recRAMCopy = NULL;
static BaseBlocks recBlocks;
static u8* recPtr = NULL;
static u32 *recConstBufPtr = NULL;
@ -534,12 +559,17 @@ static void _DynGen_Dispatchers()
//
static void __fastcall dyna_block_discard(u32 start,u32 sz);
// memory allocation handle for the entire BASEBLOCK and stack allocations.
static u8* m_recBlockAlloc = NULL;
static __ri void ClearRecLUT(BASEBLOCK* base, int memsize)
{
for (int i = 0; i < memsize/4; i++)
base[i].SetFnptr((uptr)JITCompile);
}
static const uint m_recBlockAllocSize =
(((Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4) * sizeof(BASEBLOCK))
+ RECCONSTBUF_SIZE * sizeof(u32) + Ps2MemSize::MainRam;
void R5900LutReserve_RAM::OnCommittedBlock( void* block )
{
__parent::OnCommittedBlock(block);
ClearRecLUT((BASEBLOCK*)block, __pagesize * m_blocksize);
}
static void recThrowHardwareDeficiency( const wxChar* extFail )
{
@ -599,83 +629,26 @@ static void recReserve()
static void recAlloc()
{
// Goal: Allocate BASEBLOCKs for every possible branch target in PS2 memory.
// Any 4-byte aligned address makes a valid branch target as per MIPS design (all
// instructions are always 4 bytes long).
if( m_recBlockAlloc == NULL )
m_recBlockAlloc = (u8*) _aligned_malloc( m_recBlockAllocSize, 4096 );
if( m_recBlockAlloc == NULL )
throw Exception::OutOfMemory( L"R5900-32 BASEBLOCK tables" );
u8* curpos = m_recBlockAlloc;
recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::MainRam / 4) * sizeof(BASEBLOCK);
recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
recConstBuf = (u32*)curpos; curpos += RECCONSTBUF_SIZE * sizeof(u32);
recRAMCopy = (u32*)curpos;
if( s_pInstCache == NULL )
if (!recRAMCopy)
{
s_nInstCacheSize = 128;
s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize );
recRAMCopy = new SpatialArrayReserve( L"R5900 RAM copy" );
recRAMCopy->SetBlockSize(_16kb);
recRAMCopy->Reserve(Ps2MemSize::MainRam);
}
if( s_pInstCache == NULL )
throw Exception::OutOfMemory( L"R5900-32 InstCache" );
if (!recRAM)
{
recLutReserve_RAM = new R5900LutReserve_RAM( L"R5900 RAM LUT" );
recLutReserve_RAM->SetBlockSize(_16kb);
recLutReserve_RAM->Reserve(Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1);
}
// No errors.. Proceed with initialization:
BASEBLOCK* basepos = (BASEBLOCK*)recLutReserve_RAM->GetPtr();
recRAM = basepos; basepos += (Ps2MemSize::MainRam / 4);
recROM = basepos; basepos += (Ps2MemSize::Rom / 4);
recROM1 = basepos; basepos += (Ps2MemSize::Rom1 / 4);
_DynGen_Dispatchers();
x86FpuState = FPU_STATE;
}
struct ManualPageTracking
{
u16 page;
u8 counter;
};
static __aligned16 u16 manual_page[Ps2MemSize::MainRam >> 12];
static __aligned16 u8 manual_counter[Ps2MemSize::MainRam >> 12];
static u32 eeRecIsReset = false;
static u32 eeRecNeedsReset = false;
static bool eeRecIsActive = false;
static bool eeCpuExecuting = false;
////////////////////////////////////////////////////
static void recResetRaw()
{
recAlloc();
if( AtomicExchange( eeRecIsReset, true ) ) return;
AtomicExchange( eeRecNeedsReset, false );
Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" );
recMem->Reset();
maxrecmem = 0;
memzero_ptr<m_recBlockAllocSize - Ps2MemSize::MainRam>( m_recBlockAlloc ); // Excluding the 32mb ram copy
memzero_ptr<RECCONSTBUF_SIZE * sizeof(u32)>(recConstBuf);
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
(((Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));
if( s_pInstCache )
memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );
recBlocks.Reset();
mmap_ResetBlockTracking();
#ifdef _MSC_VER
__asm emms;
#else
__asm__("emms");
#endif
pxAssert(recLutReserve_RAM->GetPtrEnd() == (u8*)basepos);
for (int i = 0; i < 0x10000; i++)
recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
@ -706,6 +679,65 @@ static void recResetRaw()
recLUT_SetPage(recLUT, hwLUT, recROM1, 0xa000, i, i - 0x1e00);
}
if( recConstBuf == NULL )
recConstBuf = (u32*) _aligned_malloc( RECCONSTBUF_SIZE * sizeof(*recConstBuf), 16 );
if( recConstBuf == NULL )
throw Exception::OutOfMemory( L"R5900-32 SIMD Constants Buffer" );
if( s_pInstCache == NULL )
{
s_nInstCacheSize = 128;
s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize );
}
if( s_pInstCache == NULL )
throw Exception::OutOfMemory( L"R5900-32 InstCache" );
// No errors.. Proceed with initialization:
_DynGen_Dispatchers();
x86FpuState = FPU_STATE;
}
struct ManualPageTracking
{
u16 page;
u8 counter;
};
static __aligned16 u16 manual_page[Ps2MemSize::MainRam >> 12];
static __aligned16 u8 manual_counter[Ps2MemSize::MainRam >> 12];
static u32 eeRecIsReset = false;
static u32 eeRecNeedsReset = false;
static bool eeCpuExecuting = false;
////////////////////////////////////////////////////
static void recResetRaw()
{
recAlloc();
if( AtomicExchange( eeRecIsReset, true ) ) return;
AtomicExchange( eeRecNeedsReset, false );
Console.WriteLn( Color_StrongBlack, "EE/iR5900-32 Recompiler Reset" );
recMem->Reset();
recRAMCopy->Reset();
recLutReserve_RAM->Reset();
maxrecmem = 0;
memzero_ptr<RECCONSTBUF_SIZE * sizeof(recConstBuf)>(recConstBuf);
if( s_pInstCache )
memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );
recBlocks.Reset();
mmap_ResetBlockTracking();
x86SetPtr(*recMem);
recPtr = *recMem;
@ -718,20 +750,21 @@ static void recResetRaw()
static void recShutdown()
{
safe_delete( recMem );
safe_delete( recRAMCopy );
safe_delete( recLutReserve_RAM );
recBlocks.Reset();
safe_aligned_free( m_recBlockAlloc );
recRAM = recROM = recROM1 = NULL;
recConstBuf = NULL;
recRAMCopy = NULL;
safe_aligned_free( recConstBuf );
safe_free( s_pInstCache );
s_nInstCacheSize = 0;
}
static void recResetEE()
{
if (eeRecIsActive || eeCpuExecuting)
if (eeCpuExecuting)
{
AtomicExchange( eeRecNeedsReset, true );
return;
@ -848,18 +881,8 @@ void R5900::Dynarec::OpcodeImpl::recBREAK( void )
//branch = 2;
}
// Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default.
static __ri void ClearRecLUT(BASEBLOCK* base, int count)
{
for (int i = 0; i < count; i++)
base[i].SetFnptr((uptr)JITCompile);
}
void recClear(u32 addr, u32 size)
{
BASEBLOCKEX* pexblock;
BASEBLOCK* pblock;
// necessary since recompiler doesn't call femms/emms
#ifdef _MSC_VER
__asm emms;
@ -878,14 +901,14 @@ void recClear(u32 addr, u32 size)
u32 lowerextent = (u32)-1, upperextent = 0, ceiling = (u32)-1;
pexblock = recBlocks[blockidx + 1];
BASEBLOCKEX* pexblock = recBlocks[blockidx + 1];
if (pexblock)
ceiling = pexblock->startpc;
while (pexblock = recBlocks[blockidx]) {
u32 blockstart = pexblock->startpc;
u32 blockend = pexblock->startpc + pexblock->size * 4;
pblock = PC_GETBLOCK(blockstart);
BASEBLOCK* pblock = PC_GETBLOCK(blockstart);
if (pblock == s_pCurBlock) {
blockidx--;
@ -921,7 +944,7 @@ void recClear(u32 addr, u32 size)
}
if (upperextent > lowerextent)
ClearRecLUT(PC_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
ClearRecLUT(PC_GETBLOCK(lowerextent), upperextent - lowerextent);
}
@ -1396,14 +1419,12 @@ static void __fastcall recRecompile( const u32 startpc )
if (eeRecNeedsReset) recResetRaw();
// From here on we need to have EE recompile resets disabled, since to reset
// the rec while we're writing to it typically leads to GPF.
//ScopedBool active_scope(eeRecIsActive);
xSetPtr( recPtr );
recPtr = xGetAlignedCallTarget();
if (0x8000d618 == startpc)
DbgCon.WriteLn("Compiling block @ 0x%08x", startpc);
s_pCurBlock = PC_GETBLOCK(startpc);
pxAssert(s_pCurBlock->GetFnptr() == (uptr)JITCompile
@ -1813,10 +1834,12 @@ StartRecomp:
continue;
if (oldBlock->startpc >= HWADDR(pc))
continue;
if (oldBlock->startpc + oldBlock->size * 4 <= HWADDR(startpc))
if ((oldBlock->startpc + oldBlock->size * 4) <= HWADDR(startpc))
break;
if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc),
oldBlock->size * 4)) {
if (memcmp(&(*recRAMCopy)[oldBlock->startpc / 4], PSM(oldBlock->startpc),
oldBlock->size * 4))
{
recClear(startpc, (pc - startpc) / 4);
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
pxAssert(s_pCurBlockEx->startpc == HWADDR(startpc));
@ -1824,7 +1847,7 @@ StartRecomp:
}
}
memcpy_fast(&recRAMCopy[HWADDR(startpc) / 4], PSM(startpc), pc - startpc);
memcpy_fast(&(*recRAMCopy)[HWADDR(startpc) / 4], PSM(startpc), pc - startpc);
}
s_pCurBlock->SetFnptr((uptr)recPtr);