Fixes for the new block manager optimizations by Pseudonym.

There's a bit more speed even :)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@700 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
ramapcsx2 2009-03-06 17:14:30 +00:00
parent 3880bf5a14
commit 801d71f7f0
6 changed files with 300 additions and 354 deletions

View File

@ -19,143 +19,51 @@
#include "PrecompiledHeader.h"
#include "BaseblockEx.h"
#include <vector>
using namespace std;
struct BASEBLOCKS
BASEBLOCKEX* BaseBlocks::New(u32 startpc)
{
// 0 - ee, 1 - iop
void Add(BASEBLOCKEX*);
void Remove(BASEBLOCKEX*);
int Get(u32 startpc);
void Reset();
if (blocks.size() == size)
return 0;
BASEBLOCKEX** GetAll(int* pnum);
BASEBLOCKEX newblock;
std::vector<BASEBLOCKEX>::iterator iter;
memset(&newblock, 0, sizeof newblock);
newblock.startpc = startpc;
vector<BASEBLOCKEX*> blocks;
};
void BASEBLOCKS::Add(BASEBLOCKEX* pex)
{
assert( pex != NULL );
switch(blocks.size()) {
case 0:
blocks.push_back(pex);
return;
case 1:
assert( blocks.front()->startpc != pex->startpc );
if( blocks.front()->startpc < pex->startpc ) {
blocks.push_back(pex);
}
else blocks.insert(blocks.begin(), pex);
return;
default:
{
int imin = 0, imax = blocks.size(), imid;
while (imin < imax) {
imid = (imin+imax)>>1;
if( blocks[imid]->startpc > pex->startpc ) imax = imid;
else imin = imid+1;
}
assert( imin == blocks.size() || blocks[imin]->startpc > pex->startpc );
blocks.insert(blocks.begin()+imin, pex);
return;
}
}
}
int BASEBLOCKS::Get(u32 startpc)
{
switch(blocks.size()) {
case 0:
return -1;
case 1:
if (blocks.front()->startpc + blocks.front()->size*4 <= startpc)
return -1;
if (blocks[imid].startpc > startpc)
imax = imid;
else
return 0;
/*case 2:
return blocks.front()->startpc < startpc;*/
imin = imid + 1;
}
default:
assert(imin == blocks.size() || blocks[imin].startpc > startpc);
iter = blocks.insert(blocks.begin() + imin, newblock);
return &*iter;
}
int BaseBlocks::Index(u32 startpc) const
{
if (0 == blocks.size())
return -1;
int imin = 0, imax = blocks.size() - 1, imid;
while(imin < imax) {
imid = (imin+imax)>>1;
while(imin != imax) {
imid = (imin+imax+1)>>1;
if( blocks[imid]->startpc > startpc ) imax = imid;
else if( blocks[imid]->startpc == startpc ) return imid;
else imin = imid+1;
if (blocks[imid].startpc > startpc)
imax = imid - 1;
else
imin = imid;
}
//assert( blocks[imin]->startpc == startpc );
if (startpc < blocks[imin]->startpc ||
startpc >= blocks[imin]->startpc + blocks[imin]->size*4)
if (startpc < blocks[imin].startpc ||
startpc >= blocks[imin].startpc + blocks[imin].size*4)
return -1;
else
return imin;
}
}
}
void BASEBLOCKS::Remove(BASEBLOCKEX* pex)
{
assert( pex != NULL );
int i = Get(pex->startpc);
assert( blocks[i] == pex );
blocks.erase(blocks.begin()+i);
}
void BASEBLOCKS::Reset()
{
blocks.resize(0);
blocks.reserve(512);
}
BASEBLOCKEX** BASEBLOCKS::GetAll(int* pnum)
{
assert( pnum != NULL );
*pnum = blocks.size();
return &blocks[0];
}
static BASEBLOCKS s_vecBaseBlocksEx[2];
void AddBaseBlockEx(BASEBLOCKEX* pex, int cpu)
{
s_vecBaseBlocksEx[cpu].Add(pex);
}
BASEBLOCKEX* GetBaseBlockEx(u32 startpc, int cpu)
{
int i = s_vecBaseBlocksEx[cpu].Get(startpc);
if (i < 0)
return 0;
else
return s_vecBaseBlocksEx[cpu].blocks[i];
}
void RemoveBaseBlockEx(BASEBLOCKEX* pex, int cpu)
{
s_vecBaseBlocksEx[cpu].Remove(pex);
}
void ResetBaseBlockEx(int cpu)
{
s_vecBaseBlocksEx[cpu].Reset();
}
BASEBLOCKEX** GetAllBaseBlocks(int* pnum, int cpu)
{
return s_vecBaseBlocksEx[cpu].GetAll(pnum);
}

View File

@ -16,8 +16,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef _BASEBLOCKEX_H_
#define _BASEBLOCKEX_H_
#pragma once
#include "PrecompiledHeader.h"
#include <vector>
// used to keep block information
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
@ -51,28 +53,58 @@ struct BASEBLOCKEX
};
// This is an asinine macro that bases indexing on sizeof(BASEBLOCK) for no reason. (air)
#define GET_BLOCKTYPE(b) ((b)->Type)
// x * (sizeof(BASEBLOCK) / 4) sacrifices safety for speed compared to
// x / 4 * sizeof(BASEBLOCK) or a higher level approach.
#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
static void recLUT_SetPage( uptr reclut[0x10000], uint page, void* mapping )
class BaseBlocks
{
jASSUME( page < 0x10000 );
reclut[page] = ((uptr)mapping) - ((page << 14) * sizeof(BASEBLOCK));
private:
std::vector<BASEBLOCKEX> blocks;
unsigned long size;
public:
BaseBlocks(unsigned long max) :
size(max),
blocks(0)
{
blocks.reserve(size);
}
BASEBLOCKEX* New(u32 startpc);
int Index (u32 startpc) const;
inline BASEBLOCKEX* operator[](int idx)
{
if (idx < 0 || idx >= (int)blocks.size())
return 0;
return &blocks[idx];
}
inline BASEBLOCKEX* Get(u32 startpc)
{
return (*this)[Index(startpc)];
}
inline void Remove(int idx)
{
blocks.erase(blocks.begin() + idx);
}
inline void Reset()
{
blocks.clear();
}
};
#define GET_BLOCKTYPE(b) ((b)->Type)
#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000],
BASEBLOCK *mapbase, uint pagebase, uint pageidx, uint mappage)
{
uint page = pagebase + pageidx;
jASSUME( page < 0x10000 );
reclut[page] = (uptr)&mapbase[(mappage - page) << 14];
if (hwlut)
hwlut[page] = 0u - (pagebase << 16);
}
// This is needed because of the retarded GETBLOCK macro above.
C_ASSERT( sizeof(BASEBLOCK) == 8 );
// 0 - ee, 1 - iop
extern void AddBaseBlockEx(BASEBLOCKEX*, int cpu);
extern void RemoveBaseBlockEx(BASEBLOCKEX*, int cpu);
extern BASEBLOCKEX* GetBaseBlockEx(u32 startpc, int cpu);
extern void ResetBaseBlockEx(int cpu);
extern BASEBLOCKEX** GetAllBaseBlocks(int* pnum, int cpu);
#endif

View File

@ -45,6 +45,9 @@ u32 g_psxMaxRecMem = 0;
u32 s_psxrecblocks[] = {0};
uptr psxRecLUT[0x10000];
uptr psxhwLUT[0x10000];
#define HWADDR(mem) (psxhwLUT[mem >> 16] + (mem))
#define PSX_NUMBLOCKS (1<<12)
#define MAPBASE 0x48000000
@ -59,7 +62,7 @@ static u8 *recMem = NULL; // the recompiled blocks will be here
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static BASEBLOCKEX *recBlocks = NULL;
static BaseBlocks recBlocks(PSX_NUMBLOCKS);
static u8 *recPtr = NULL;
u32 psxpc; // recompiler psxpc
int psxbranch; // set for branch
@ -73,8 +76,6 @@ static BASEBLOCKEX* s_pCurBlockEx = NULL;
static u32 s_nEndBlock = 0; // what psxpc the current block ends
static u32 s_nNextBlock = 0; // next free block in recBlocks
static u32 s_ConstGPRreg;
static u32 s_saveConstGPRreg = 0, s_saveHasConstReg = 0, s_saveFlushedConstReg = 0;
static EEINST* s_psaveInstInfo = NULL;
@ -88,7 +89,7 @@ void psxRecompileNextInstruction(int delayslot);
extern void (*rpsxBSC[64])();
void rpsxpropBSC(EEINST* prev, EEINST* pinst);
static void iopInitRecLUT(BASEBLOCK* base, int count);
static void iopClearRecLUT(BASEBLOCK* base, int count);
#ifdef _DEBUG
u32 psxdump = 0;
@ -98,21 +99,9 @@ u32 psxdump = 0;
#define PSX_GETBLOCK(x) PC_GETBLOCK_(x, psxRecLUT)
#define PSXREC_CLEARM(mem) { \
if ((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + mem)) { \
psxRecClearMem(mem); \
} \
} \
BASEBLOCKEX* PSX_GETBLOCKEX(BASEBLOCK* p)
{
// BASEBLOCKEX* pex = *(BASEBLOCKEX**)(p+1);
// if( pex >= recBlocks && pex < recBlocks+PSX_NUMBLOCKS )
// return pex;
// otherwise, use the sorted list
return GetBaseBlockEx(p->GetStartPC(), 1);
}
#define PSXREC_CLEARM(mem) \
(((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + (mem))) ? \
psxRecClearMem(mem) : 4)
////////////////////////////////////////////////////
#ifdef _DEBUG
@ -528,8 +517,7 @@ void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode,
static u8* m_recBlockAlloc = NULL;
static const uint m_recBlockAllocSize =
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4) * sizeof(BASEBLOCK))
+ (PSX_NUMBLOCKS*sizeof(BASEBLOCKEX)); // recBlocks
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4) * sizeof(BASEBLOCK));
static void recAlloc()
{
@ -557,7 +545,6 @@ static void recAlloc()
recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK);
recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
recBlocks = (BASEBLOCKEX*)curpos; // curpos += sizeof(BASEBLOCKEX)*EE_NUMBLOCKS;
if( s_pInstCache == NULL )
{
@ -574,45 +561,45 @@ static void recAlloc()
void recResetIOP()
{
// calling recResetIOP without first calling recInit is bad mojo.
jASSUME( psxRecLUT != NULL );
jASSUME( recMem != NULL );
jASSUME( m_recBlockAlloc != NULL );
DevCon::Status( "iR3000A Resetting recompiler memory and structures" );
memzero_ptr<sizeof(psxRecLUT)>( psxRecLUT );
memset_8<0xcd,RECMEM_SIZE>( recMem );
memzero_ptr<PSX_NUMBLOCKS*sizeof(BASEBLOCKEX)>(recBlocks);
iopInitRecLUT((BASEBLOCK*)m_recBlockAlloc,
iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));
for (int i = 0; i < 0x10000; i++)
recLUT_SetPage(psxRecLUT, 0, 0, 0, i, 0);
// We're only mapping 20 pages here in 4 places.
// 0x80 comes from : (Ps2MemSize::IopRam / 0x10000) * 4
for (int i=0; i<0x80; i++)
{
recLUT_SetPage(psxRecLUT, i + 0x0000, &recRAM[(i & 0x1f) << 14]);
recLUT_SetPage(psxRecLUT, i + 0x8000, &recRAM[(i & 0x1f) << 14]);
recLUT_SetPage(psxRecLUT, i + 0xa000, &recRAM[(i & 0x1f) << 14]);
recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0x0000, i, i & 0x1f);
recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0x8000, i, i & 0x1f);
recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0xa000, i, i & 0x1f);
}
for (int i=0; i<(Ps2MemSize::Rom / 0x10000); i++)
for (int i=0x1fc0; i<0x2000; i++)
{
recLUT_SetPage(psxRecLUT, i + 0x1fc0, &recROM[i << 14]);
recLUT_SetPage(psxRecLUT, i + 0x9fc0, &recROM[i << 14]);
recLUT_SetPage(psxRecLUT, i + 0xbfc0, &recROM[i << 14]);
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0x0000, i, i - 0x1fc0);
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0x8000, i, i - 0x1fc0);
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0xa000, i, i - 0x1fc0);
}
for (int i=0; i<(Ps2MemSize::Rom1 / 0x10000); i++)
for (int i=0x1e00; i<0x1e04; i++)
{
recLUT_SetPage(psxRecLUT, i + 0x1e00, &recROM1[i << 14]);
recLUT_SetPage(psxRecLUT, i + 0x9e00, &recROM1[i << 14]);
recLUT_SetPage(psxRecLUT, i + 0xbe00, &recROM1[i << 14]);
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0x0000, i, i - 0x1fc0);
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0x8000, i, i - 0x1fc0);
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0xa000, i, i - 0x1fc0);
}
if( s_pInstCache )
memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );
ResetBaseBlockEx(1);
recBlocks.Reset();
g_psxMaxRecMem = 0;
recPtr = recMem;
@ -782,7 +769,7 @@ static __declspec(naked) void iopDispatcherReg()
}
#endif // _MSC_VER
static void iopInitRecLUT(BASEBLOCK* base, int count)
static void iopClearRecLUT(BASEBLOCK* base, int count)
{
for (int i = 0; i < count; i++) {
base[i].SetFnptr((uptr)iopJITCompile);
@ -845,59 +832,95 @@ static s32 recExecuteBlock( s32 eeCycles )
static void recClear(u32 Addr, u32 Size)
{
u32 i;
for(i = 0; i < Size; ++i, Addr+=4) {
PSXREC_CLEARM(Addr);
}
u32 pc = Addr;
while (pc < Addr + Size*4)
pc += PSXREC_CLEARM(pc);
}
#define IOP_MIN_BLOCK_BYTES 15
// not used and not right for now
#if 0
void rpsxMemConstClear(u32 mem)
{
// NOTE! This assumes recLUT never changes its mapping
if( !(psxRecLUT[mem>>16] + mem) )
return;
CMP32ItoM((uptr)PSX_GETBLOCK(mem), 0);
CMP32ItoM((uptr)PSX_GETBLOCK(mem), iopJITCompile);
j8Ptr[6] = JE8(0);
_callFunctionArg1((uptr)psxRecClearMem, MEM_CONSTTAG, mem);
x86SetJ8(j8Ptr[6]);
}
#endif
void psxRecClearMem(u32 pc)
// Returns the offset to the next instruction after any cleared memory
u32 psxRecClearMem(u32 pc)
{
BASEBLOCKEX* pexblock;
BASEBLOCK* pstart;
BASEBLOCK* p;
BASEBLOCK* pblock;
p= PSX_GETBLOCK(pc);
pc = p->GetStartPC();
if (!pc)
return;
pexblock = GetBaseBlockEx(pc, 1);
if (!pexblock)
return;
pstart = PSX_GETBLOCK(pexblock->startpc);
pblock = PSX_GETBLOCK(pc);
// if ((u8*)iopJITCompile == pblock->GetFnptr())
if (!pblock->GetStartPC())
return 4;
for (int i = 0; i < pexblock->size; i++) {
x86Ptr[0] = (u8*)pstart[i].GetFnptr();
if (x86Ptr[0] == (u8*)iopJITCompile)
continue;
pc = HWADDR(pc);
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, pexblock->startpc + i*4);
assert( (uptr)x86Ptr[0] <= 0xffffffff );
PUSH32I((uptr)x86Ptr[0]); // will be replaced by JMP32
JMP32((uptr)iopDispatcherClear - ( (uptr)x86Ptr[0] + 5 ));
u32 lowerextent = pc, upperextent = pc + 4;
int blockidx = recBlocks.Index(pc);
jASSUME(blockidx != -1);
while (pexblock = recBlocks[blockidx - 1]) {
if (pexblock->startpc + pexblock->size * 4 <= lowerextent)
break;
lowerextent = min(lowerextent, pexblock->startpc);
blockidx--;
}
iopInitRecLUT(pstart, pexblock->size);
while (pexblock = recBlocks[blockidx]) {
if (pexblock->startpc >= upperextent)
break;
RemoveBaseBlockEx(pexblock, 1);
pexblock->size = 0;
pexblock->startpc = 0;
pblock = PSX_GETBLOCK(pexblock->startpc);
x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr();
jASSUME((u8*)iopJITCompile != x86Ptr[_EmitterId_]);
// jASSUME((u8*)iopJITCompileInside != x86Ptr[_EmitterId_]);
// This is breaking things currently, rather than figure it out
// I'm just using DispatcherReg, it's fast enough now.
// Actually, if we want to do this at all maybe keeping a hash
// table of const jumps and modifying the jumps straight from
// here is the way to go.
#if 0
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, pexblock->startpc);
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32
JMP32((uptr)iopDispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5));
#else
MOV32ItoM((uptr)&psxRegs.pc, pexblock->startpc);
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr[_EmitterId_] + 5));
#endif
lowerextent = min(lowerextent, pexblock->startpc);
upperextent = max(upperextent, pexblock->startpc + pexblock->size * 4);
recBlocks.Remove(blockidx);
}
#ifdef PCSX2_DEVBUILD
for (int i = 0; pexblock = recBlocks[i]; i++)
if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) {
Console::Error("Impossible block clearing failure");
jASSUME(0);
}
#endif
iopClearRecLUT(PSX_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
return upperextent - pc;
}
void psxSetBranchReg(u32 reg)
@ -1067,7 +1090,7 @@ void psxRecompileNextInstruction(int delayslot)
if( !delayslot && psxpc == pblock->GetStartPC() )
{
// code already in place, so jump to it and exit recomp
assert( PSX_GETBLOCKEX(pblock)->startpc == pblock->GetStartPC() );
assert( recBlocks.Get(HWADDR(psxpc))->startpc == HWADDR(psxpc) );
_psxFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
@ -1197,10 +1220,7 @@ void iopRecRecompile(u32 startpc)
// if recPtr reached the mem limit reset whole mem
if (((uptr)recPtr - (uptr)recMem) >= (RECMEM_SIZE - 0x10000))
{
// This is getting called pretty often in Linux. (21 times in the course of getting to the starting screen of KH1) --arcum42
recResetIOP();
}
s_pCurBlock = PSX_GETBLOCK(startpc);
@ -1211,27 +1231,17 @@ void iopRecRecompile(u32 startpc)
}
if( s_pCurBlock->GetStartPC() == startpc ) {
s_pCurBlockEx = PSX_GETBLOCKEX(s_pCurBlock);
assert( s_pCurBlockEx->startpc == startpc );
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
assert( s_pCurBlockEx->startpc == HWADDR(startpc) );
}
else {
s_pCurBlockEx = NULL;
for(i = 0; i < PSX_NUMBLOCKS; ++i) {
if( recBlocks[(i+s_nNextBlock)%PSX_NUMBLOCKS].size == 0 ) {
s_pCurBlockEx = recBlocks+(i+s_nNextBlock)%PSX_NUMBLOCKS;
s_nNextBlock = (i+s_nNextBlock+1)%PSX_NUMBLOCKS;
break;
}
}
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
if( s_pCurBlockEx == NULL ) {
DevCon::WriteLn("IOP Recompiler data reset");
recResetIOP();
s_nNextBlock = 0;
s_pCurBlockEx = recBlocks;
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
}
s_pCurBlockEx->startpc = startpc;
}
x86SetPtr( recPtr );
@ -1385,9 +1395,6 @@ StartRecomp:
}
#endif
// set the block ptr
AddBaseBlockEx(s_pCurBlockEx, 1);
if( !(psxpc&0x10000000) )
g_psxMaxRecMem = std::max( (psxpc&~0xa0000000), g_psxMaxRecMem );
@ -1426,7 +1433,6 @@ StartRecomp:
}
}
assert( x86Ptr[0] >= (u8*)s_pCurBlock->GetFnptr() + IOP_MIN_BLOCK_BYTES );
assert( x86Ptr[0] < recMem+RECMEM_SIZE );
recPtr = x86Ptr[0];

View File

@ -70,7 +70,7 @@ void psxLoadBranchState();
void psxSetBranchReg(u32 reg);
void psxSetBranchImm( u32 imm );
void psxRecompileNextInstruction(int delayslot);
void psxRecClearMem(u32 p);
u32 psxRecClearMem(u32 p);
////////////////////////////////////////////////////////////////////
// IOP Constant Propagation Defines, Vars, and API - From here down!

View File

@ -92,8 +92,8 @@ extern GPR_reg64 s_ConstGPRreg;
// Used to clear recompiled code blocks during memory/dma write operations.
void recClearMem(u32 pc);
void REC_CLEARM( u32 mem );
u32 recClearMem(u32 pc);
u32 REC_CLEARM( u32 mem );
// used when processing branches
void SaveBranchState();

View File

@ -53,6 +53,9 @@ bool g_EEFreezeRegs = false;
u32 maxrecmem = 0;
uptr recLUT[0x10000];
uptr hwLUT[0x10000];
#define HWADDR(mem) (hwLUT[mem >> 16] + (mem))
u32 s_nBlockCycles = 0; // cycles of current block recompiling
//u8* dyna_block_discard_recmem=0;
@ -77,7 +80,7 @@ static u8* recStack = NULL; // stack mem
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static BASEBLOCKEX *recBlocks = NULL;
static BaseBlocks recBlocks(EE_NUMBLOCKS);
static u8* recPtr = NULL, *recStackPtr = NULL;
static EEINST* s_pInstCache = NULL;
static u32 s_nInstCacheSize = 0;
@ -87,8 +90,6 @@ static BASEBLOCKEX* s_pCurBlockEx = NULL;
static u32 s_nEndBlock = 0; // what pc the current block ends
static u32 s_nHasDelay = 0;
static u32 s_nNextBlock = 0; // next free block in recBlocks
// save states for branches
static u16 s_savex86FpuState, s_saveiCWstate;
static u32 s_saveHasConstReg = 0, s_saveFlushedConstReg = 0, s_saveRegHasLive1 = 0, s_saveRegHasSignExt = 0;
@ -111,17 +112,7 @@ static u32 dumplog = 0;
#endif
static void iBranchTest(u32 newpc, bool noDispatch=false);
static void InitRecLUT(BASEBLOCK* base, int count);
BASEBLOCKEX* PC_GETBLOCKEX(u32 pc)
{
// BASEBLOCKEX* pex = *(BASEBLOCKEX**)(p+1);
// if( pex >= recBlocks && pex < recBlocks+EE_NUMBLOCKS )
// return pex;
// otherwise, use the sorted list
return GetBaseBlockEx(pc, 0);
}
static void ClearRecLUT(BASEBLOCK* base, int count);
////////////////////////////////////////////////////
static void iDumpBlock( int startpc, u8 * ptr )
@ -464,7 +455,6 @@ static u8* m_recBlockAlloc = NULL;
static const uint m_recBlockAllocSize =
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4) * sizeof(BASEBLOCK))
+ (EE_NUMBLOCKS*sizeof(BASEBLOCKEX)) // recBlocks
+ RECSTACK_SIZE; // recStack
static void recAlloc()
@ -507,7 +497,6 @@ static void recAlloc()
recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Base / 4) * sizeof(BASEBLOCK);
recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
recBlocks = (BASEBLOCKEX*)curpos; curpos += sizeof(BASEBLOCKEX)*EE_NUMBLOCKS;
recStack = (u8*)curpos;
if( s_pInstCache == NULL )
@ -531,19 +520,17 @@ void recResetEE( void )
{
DbgCon::Status( "iR5900-32 > Resetting recompiler memory and structures." );
s_nNextBlock = 0;
maxrecmem = 0;
memset_8<0xcd, REC_CACHEMEM>(recMem);
memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc );
memzero_ptr<EE_NUMBLOCKS*sizeof(BASEBLOCKEX)>(recBlocks);
InitRecLUT((BASEBLOCK*)m_recBlockAlloc,
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));
if( s_pInstCache )
memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );
ResetBaseBlockEx(0);
recBlocks.Reset();
mmap_ResetBlockTracking();
#ifdef _MSC_VER
@ -552,32 +539,35 @@ void recResetEE( void )
__asm__("emms");
#endif
memzero_ptr<sizeof recLUT>( recLUT );
#define GET_HWADDR(mem)
for (int i = 0; i < 0x10000; i++)
recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
for ( int i = 0x0000; i < 0x0200; i++ )
{
recLUT_SetPage(recLUT, i + 0x0000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0x2000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0x3000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0x8000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0xa000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0xb000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0xc000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0xd000, &recRAM[ i << 14 ]);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x0000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x2000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x3000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x8000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0xa000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0xb000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0xc000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0xd000, i, i);
}
for ( int i = 0x0000; i < 0x0040; i++ )
for ( int i = 0x1fc0; i < 0x2000; i++ )
{
recLUT_SetPage(recLUT, i + 0x1fc0, &recROM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0x9fc0, &recROM[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0xbfc0, &recROM[ i << 14 ]);
recLUT_SetPage(recLUT, hwLUT, recROM, 0x0000, i, i - 0x1fc0);
recLUT_SetPage(recLUT, hwLUT, recROM, 0x8000, i, i - 0x1fc0);
recLUT_SetPage(recLUT, hwLUT, recROM, 0xa000, i, i - 0x1fc0);
}
for ( int i = 0x0000; i < 0x0004; i++ )
for ( int i = 0x1e00; i < 0x1e04; i++ )
{
recLUT_SetPage(recLUT, i + 0x1e00, &recROM1[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0x9e00, &recROM1[ i << 14 ]);
recLUT_SetPage(recLUT, i + 0xbe00, &recROM1[ i << 14 ]);
recLUT_SetPage(recLUT, hwLUT, recROM1, 0x0000, i, i - 0x1e00);
recLUT_SetPage(recLUT, hwLUT, recROM1, 0x8000, i, i - 0x1e00);
recLUT_SetPage(recLUT, hwLUT, recROM1, 0xa000, i, i - 0x1e00);
}
// drk||Raziel says this is useful but I'm not sure why. Something to do with forward jumps.
@ -603,12 +593,11 @@ void recResetEE( void )
static void recShutdown( void )
{
ProfilerTerminateSource( "EERec" );
ResetBaseBlockEx(0);
recBlocks.Reset();
SafeSysMunmap( recMem, REC_CACHEMEM );
safe_aligned_free( m_recBlockAlloc );
recRAM = recROM = recROM1 = NULL;
recBlocks = NULL;
recStack = NULL;
safe_free( s_pInstCache );
@ -880,21 +869,23 @@ void recBREAK( void ) {
} } } // end namespace R5900::Dynarec::OpcodeImpl
////////////////////////////////////////////////////
static void REC_CLEARM( u32 mem )
static u32 REC_CLEARM( u32 mem )
{
if ((mem) < maxrecmem && (recLUT[(mem) >> 16] + mem))
recClearMem(mem);
return recClearMem(mem);
else
return 4;
}
void recClear( u32 Addr, u32 Size )
{
u32 i;
for(i = 0; i < Size; ++i, Addr+=4)
REC_CLEARM(Addr);
u32 pc = Addr;
while (pc < Addr + Size*4)
pc += REC_CLEARM(pc);
}
// Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default.
static void InitRecLUT(BASEBLOCK* base, int count)
static void ClearRecLUT(BASEBLOCK* base, int count)
{
for (int i = 0; i < count; i++)
{
@ -904,52 +895,74 @@ static void InitRecLUT(BASEBLOCK* base, int count)
}
}
void recClearMem(u32 pc)
// Returns the offset to the next instruction after any cleared memory
u32 recClearMem(u32 pc)
{
BASEBLOCKEX* pexblock;
BASEBLOCK* pstart;
BASEBLOCK* p;
BASEBLOCK* pblock;
p= PC_GETBLOCK(pc);
pc = p->GetStartPC();
if (!pc)
return;
pexblock = PC_GETBLOCKEX(pc);
if (!pexblock)
return;
pstart = PC_GETBLOCK(pexblock->startpc);
pblock = PC_GETBLOCK(pc);
// if ((u8*)JITCompile == pblock->GetFnptr())
if (!pblock->GetStartPC())
return 4;
// necessary since recompiler doesn't call femms/emms
#ifdef __INTEL_COMPILER
__asm__("emms");
#else
#ifdef _MSC_VER
if (cpucaps.has3DNOWInstructionExtensions) __asm femms;
else __asm emms;
#else
if( cpucaps.has3DNOWInstructionExtensions )__asm__("femms");
else
__asm__("emms");
#endif
#endif
pc = HWADDR(pc);
for (int i = 0; i < pexblock->size; i++)
{
x86Ptr[0] = (u8*)pstart[i].GetFnptr();
if (x86Ptr[0] == (u8*)JITCompile)
continue;
u32 lowerextent = pc, upperextent = pc + 4;
int blockidx = recBlocks.Index(pc);
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, pexblock->startpc + i*4);
PUSH32I((u32)x86Ptr[0]); // will be replaced by JMP32
JMP32((u32)DispatcherClear - ( (u32)x86Ptr[0] + 5 ));
jASSUME(blockidx != -1);
while (pexblock = recBlocks[blockidx - 1]) {
if (pexblock->startpc + pexblock->size*4 <= lowerextent)
break;
lowerextent = min(lowerextent, pexblock->startpc);
blockidx--;
}
InitRecLUT(pstart, pexblock->size);
while (pexblock = recBlocks[blockidx]) {
if (pexblock->startpc >= upperextent)
break;
RemoveBaseBlockEx(pexblock, 0);
pexblock->size = 0;
pexblock->startpc = 0;
pblock = PC_GETBLOCK(pexblock->startpc);
x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr();
jASSUME((u8*)JITCompile != x86Ptr[_EmitterId_]);
// jASSUME((u8*)JITCompileInside != x86Ptr[_EmitterId_]);
// This is breaking things currently, rather than figure it out
// I'm just using DispatcherReg, it's fast enough now.
// Actually, if we want to do this at all maybe keeping a hash
// table of const jumps and modifying the jumps straight from
// here is the way to go.
#if 0
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, pexblock->startpc);
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32
JMP32((uptr)DispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5));
#else
MOV32ItoM((uptr)&cpuRegs.pc, pexblock->startpc);
JMP32((uptr)DispatcherReg - ((uptr)x86Ptr[_EmitterId_] + 5));
#endif
lowerextent = min(lowerextent, pexblock->startpc);
upperextent = max(upperextent, pexblock->startpc + pexblock->size * 4);
recBlocks.Remove(blockidx);
}
#ifdef PCSX2_DEVBUILD
for (int i = 0; pexblock = recBlocks[i]; i++)
if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) {
Console::Error("Impossible block clearing failure");
jASSUME(0);
}
#endif
ClearRecLUT(PC_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
return upperextent - pc;
}
// check for end of bios
@ -1269,7 +1282,7 @@ void recompileNextInstruction(int delayslot)
if( !delayslot && pc == pblock->GetStartPC() )
{
// code already in place, so jump to it and exit recomp
assert( PC_GETBLOCKEX(pc)->startpc == pblock->GetStartPC() );
assert( recBlocks.Get(HWADDR(pc))->startpc == HWADDR(pc) );
iFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&cpuRegs.pc, pc);
@ -1501,27 +1514,17 @@ void recRecompile( const u32 startpc )
}
if( s_pCurBlock->GetStartPC() == startpc ) {
s_pCurBlockEx = PC_GETBLOCKEX(startpc);
assert( s_pCurBlockEx->startpc == startpc );
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
assert( s_pCurBlockEx->startpc == HWADDR(startpc) );
}
else {
s_pCurBlockEx = NULL;
for(i = 0; i < EE_NUMBLOCKS; ++i) {
if( recBlocks[(i+s_nNextBlock)%EE_NUMBLOCKS].size == 0 ) {
s_pCurBlockEx = recBlocks+(i+s_nNextBlock)%EE_NUMBLOCKS;
s_nNextBlock = (i+s_nNextBlock+1)%EE_NUMBLOCKS;
break;
}
}
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
if( s_pCurBlockEx == NULL ) {
//SysPrintf("ee reset (blocks)\n");
recResetEE();
s_nNextBlock = 0;
s_pCurBlockEx = recBlocks;
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
}
s_pCurBlockEx->startpc = startpc;
}
x86SetPtr( recPtr );
@ -1896,7 +1899,7 @@ StartRecomp:
assert( (pc-startpc)>>2 <= 0xffff );
s_pCurBlockEx->size = (pc-startpc)>>2;
for(i = 1; i < (u32)s_pCurBlockEx->size; ++i) {
for(i = 1; i < (u32)s_pCurBlockEx->size; i++) {
if (!s_pCurBlock[i].GetStartPC())
s_pCurBlock[i].SetStartPC(startpc);
}
@ -1920,9 +1923,6 @@ StartRecomp:
}
#endif
// set the block ptr
AddBaseBlockEx(s_pCurBlockEx, 0);
if( !(pc&0x10000000) )
maxrecmem = std::max( (pc&~0xa0000000), maxrecmem );