More block manager fixes/optimizations by Pseudonym.

This should be the last one (but you never know :p ).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@703 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
ramapcsx2 2009-03-06 21:03:39 +00:00
parent 954b7a5255
commit 4b3d1ccab3
3 changed files with 103 additions and 207 deletions

View File

@ -30,13 +30,9 @@
struct BASEBLOCK
{
u32 m_pFnptr;
u32 startpc : 30;
u32 uType : 2;
const __inline uptr GetFnptr() const { return m_pFnptr; }
void __inline SetFnptr( uptr ptr ) { m_pFnptr = ptr; }
const __inline uptr GetStartPC() const { return startpc << 2; }
void __inline SetStartPC( uptr pc ) { startpc = pc >> 2; }
};
// extra block info (only valid for start of fn)
@ -107,4 +103,4 @@ static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000],
hwlut[page] = 0u - (pagebase << 16);
}
C_ASSERT( sizeof(BASEBLOCK) == 8 );
C_ASSERT( sizeof(BASEBLOCK) == 4 );

View File

@ -639,7 +639,14 @@ static __declspec(naked) void iopJITCompile()
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [psxRecLUT+esi*4]
jmp dword ptr [ecx+ebx*2]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void iopJITCompileInBlock()
{
__asm {
jmp iopJITCompile
}
}
@ -653,9 +660,11 @@ static __declspec(naked) void iopDispatcher()
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
mov eax, dword ptr [ecx+ebx*2]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset iopJITCompile
je notcompiled
cmp eax, offset iopJITCompileInBlock
je notcompiled
lea ebx, [eax-4]
sub ebx, edx
mov dword ptr [edx], ebx
@ -664,7 +673,7 @@ static __declspec(naked) void iopDispatcher()
align 16
notcompiled:
mov esi, edx
lea edi, [ecx+ebx*2]
lea edi, [ecx+ebx]
push ebx
call iopRecRecompile
add esp, 4
@ -677,7 +686,7 @@ notcompiled:
}
}
// edx - baseblock->GetStartPC()
// edx - baseblock start pc
// stack - x86Ptr[0]
static __declspec(naked) void iopDispatcherClear()
{
@ -686,15 +695,17 @@ static __declspec(naked) void iopDispatcherClear()
mov ebx, edx
shr edx, 16
mov ecx, dword ptr [psxRecLUT+edx*4]
mov eax, dword ptr [ecx+ebx*2]
cmp eax, iopJITCompile
mov eax, dword ptr [ecx+ebx]
cmp eax, offset iopJITCompile
je notcompiled
cmp eax, offset iopJITCompileInBlock
je notcompiled
add esp, 4
jmp eax
align 16
notcompiled:
lea edi, [ecx+ebx*2]
lea edi, [ecx+ebx]
push ebx
call iopRecRecompile
add esp, 4
@ -718,18 +729,15 @@ static __declspec(naked) void iopDispatcherReg()
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
jmp dword ptr [ecx+ebx*2]
jmp dword ptr [ecx+ebx]
}
}
#endif // _MSC_VER
static void iopClearRecLUT(BASEBLOCK* base, int count)
{
for (int i = 0; i < count; i++) {
for (int i = 0; i < count; i++)
base[i].SetFnptr((uptr)iopJITCompile);
base[i].SetStartPC(0);
base[i].uType = 0;
}
}
static void recExecute()
@ -815,7 +823,7 @@ u32 psxRecClearMem(u32 pc)
pblock = PSX_GETBLOCK(pc);
// if ((u8*)iopJITCompile == pblock->GetFnptr())
if (!pblock->GetStartPC())
if (pblock->GetFnptr() == (uptr)iopJITCompile)
return 4;
pc = HWADDR(pc);
@ -848,7 +856,7 @@ u32 psxRecClearMem(u32 pc)
// Actually, if we want to do this at all maybe keeping a hash
// table of const jumps and modifying the jumps straight from
// here is the way to go.
#if 0
#if 1
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, pexblock->startpc);
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
@ -1020,18 +1028,6 @@ void rpsxBREAK()
//if (!psxbranch) psxbranch = 2;
}
u32 psxRecompileCodeSafe(u32 temppc)
{
BASEBLOCK* pblock = PSX_GETBLOCK(temppc);
if( pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) {
if( psxpc == pblock->GetStartPC() )
return 0;
}
return 1;
}
void psxRecompileNextInstruction(int delayslot)
{
static u8 s_bFlushReg = 1;
@ -1039,9 +1035,11 @@ void psxRecompileNextInstruction(int delayslot)
BASEBLOCK* pblock = PSX_GETBLOCK(psxpc);
// need *ppblock != s_pCurBlock because of branches
if( pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() )
if (HWADDR(psxpc) != s_pCurBlockEx->startpc
&& pblock->GetFnptr() != (uptr)iopJITCompile
&& pblock->GetFnptr() != (uptr)iopJITCompileInBlock )
{
if( !delayslot && psxpc == pblock->GetStartPC() )
if(!delayslot)
{
// code already in place, so jump to it and exit recomp
assert( recBlocks.Get(HWADDR(psxpc))->startpc == HWADDR(psxpc) );
@ -1052,22 +1050,7 @@ void psxRecompileNextInstruction(int delayslot)
psxbranch = 3;
return;
}
else
{
if( !(delayslot && pblock->GetStartPC() == psxpc) )
{
u8* oldX86 = x86Ptr[0];
//__Log("clear block %x\n", pblock->GetStartPC());
psxRecClearMem(psxpc);
x86Ptr[0] = oldX86;
if( delayslot )
SysPrintf("delay slot %x\n", psxpc);
}
}
}
if( delayslot )
pblock->uType = BLOCKTYPE_DELAYSLOT;
#ifdef _DEBUG
MOV32ItoR(EAX, psxpc);
@ -1178,17 +1161,11 @@ void iopRecRecompile(u32 startpc)
s_pCurBlock = PSX_GETBLOCK(startpc);
if( s_pCurBlock->GetFnptr() != (uptr)iopJITCompile ) {
// clear if already taken
assert( s_pCurBlock->GetStartPC() < startpc );
psxRecClearMem(startpc);
}
assert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile
|| s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock);
if( s_pCurBlock->GetStartPC() == startpc ) {
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
assert( s_pCurBlockEx->startpc == HWADDR(startpc) );
}
else {
if(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) {
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
if( s_pCurBlockEx == NULL ) {
@ -1204,7 +1181,6 @@ void iopRecRecompile(u32 startpc)
psxbranch = 0;
s_pCurBlock->SetStartPC(startpc);
s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] );
s_psxBlockCycles = 0;
@ -1227,15 +1203,14 @@ void iopRecRecompile(u32 startpc)
while(1) {
BASEBLOCK* pblock = PSX_GETBLOCK(i);
if( pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) {
if( i == pblock->GetStartPC() ) {
if (i != startpc
&& pblock->GetFnptr() != (uptr)iopJITCompile
&& pblock->GetFnptr() != (uptr)iopJITCompileInBlock) {
// branch = 3
willbranch3 = 1;
s_nEndBlock = i;
break;
}
}
psxRegs.code = iopMemRead32(i);
@ -1330,25 +1305,10 @@ StartRecomp:
s_pCurBlockEx->size = (psxpc-startpc)>>2;
for(i = 1; i < (u32)s_pCurBlockEx->size; ++i) {
if (!s_pCurBlock[i].GetStartPC())
s_pCurBlock[i].SetStartPC( startpc );
if (s_pCurBlock[i].GetFnptr() == (uptr)iopJITCompile)
s_pCurBlock[i].SetFnptr((uptr)iopJITCompileInBlock);
}
// This is just wrong, right? How can setting a jump to any point in this block
// to jump to the beginning of the block possibly be right?
#ifdef ZERO_TOLERANCE
for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) {
s_pCurBlock[i].SetFnptr( s_pCurBlock->GetFnptr() );
s_pCurBlock[i].SetStartPC( p_CurBlock->startpc );
}
// don't overwrite if delay slot
if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) {
s_pCurBlock[i].SetFnptr(0);
s_pCurBlock[i].SetStartPC(0);
}
#endif
if( !(psxpc&0x10000000) )
g_psxMaxRecMem = std::max( (psxpc&~0xa0000000), g_psxMaxRecMem );
@ -1394,28 +1354,16 @@ StartRecomp:
assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg );
if( !psxbranch ) {
BASEBLOCK* pcurblock = s_pCurBlock;
u32 nEndBlock = s_nEndBlock;
s_pCurBlock = PSX_GETBLOCK(psxpc);
assert( ptr != NULL );
s_pCurBlock = PSX_GETBLOCK(psxpc);
if( s_pCurBlock->GetStartPC() != psxpc ){
if (s_pCurBlock->GetFnptr() == (uptr)iopJITCompile
|| s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock){
iopRecRecompile(psxpc);
}
// could have reset
if( pcurblock->GetStartPC() == startpc ) {
assert( pcurblock->GetFnptr() != (uptr)iopJITCompile );
assert( s_pCurBlock->GetStartPC() == nEndBlock );
*ptr = (u32)((uptr)s_pCurBlock->GetFnptr() - ( (uptr)ptr + 4 ));
*ptr = s_pCurBlock->GetFnptr() - ((u32)ptr + 4);
}
else {
iopRecRecompile(startpc);
assert( pcurblock->GetFnptr() != (uptr)iopJITCompile );
}
}
else
assert( s_pCurBlock->GetFnptr() != (uptr)iopJITCompile );
}
R3000Acpu psxRec = {

View File

@ -647,7 +647,14 @@ static __declspec(naked) void JITCompile()
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [recLUT+esi*4]
jmp dword ptr [ecx+ebx*2]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void JITCompileInBlock()
{
__asm {
jmp JITCompile
}
}
@ -661,10 +668,12 @@ static __naked void Dispatcher()
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [recLUT+eax*4]
mov eax, dword ptr [ecx+ebx*2]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset JITCompile
je notcompiled
cmp eax, offset JITCompileInBlock
je notcompiled
lea ebx, [eax-4]
sub ebx, edx
mov dword ptr [edx], ebx
@ -673,7 +682,7 @@ static __naked void Dispatcher()
align 16
notcompiled:
mov esi, edx
lea edi, [ecx+ebx*2]
lea edi, [ecx+ebx]
push ebx
call recRecompile
add esp, 4
@ -686,7 +695,7 @@ notcompiled:
}
}
// edx - baseblock->GetStartPC()
// edx - block start pc
// stack - x86Ptr[0]
static __naked void DispatcherClear()
{
@ -695,16 +704,18 @@ static __naked void DispatcherClear()
mov ebx, edx
shr edx, 16
mov ecx, dword ptr [recLUT+edx*4]
mov eax, dword ptr [ecx+ebx*2]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset JITCompile
je notcompiled
cmp eax, offset JITCompileInBlock
je notcompiled
add esp, 4
jmp eax
align 16
notcompiled:
lea edi, [ecx+ebx*2]
lea edi, [ecx+ebx]
push ebx
call recRecompile
add esp, 4
@ -728,7 +739,7 @@ static void __naked DispatcherReg()
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [recLUT+eax*4]
jmp dword ptr [ecx+ebx*2]
jmp dword ptr [ecx+ebx]
}
}
@ -888,11 +899,7 @@ void recClear( u32 Addr, u32 Size )
static void ClearRecLUT(BASEBLOCK* base, int count)
{
for (int i = 0; i < count; i++)
{
base[i].SetFnptr((uptr)JITCompile);
base[i].SetStartPC(0);
base[i].uType = 0;
}
}
// Returns the offset to the next instruction after any cleared memory
@ -901,9 +908,25 @@ u32 recClearMem(u32 pc)
BASEBLOCKEX* pexblock;
BASEBLOCK* pblock;
//why the hell?
#if 1
// necessary since recompiler doesn't call femms/emms
#ifdef __INTEL_COMPILER
__asm__("emms");
#else
#ifdef _MSC_VER
if (cpucaps.has3DNOWInstructionExtensions) __asm femms;
else __asm emms;
#else
if( cpucaps.has3DNOWInstructionExtensions )__asm__("femms");
else
__asm__("emms");
#endif
#endif
#endif
pblock = PC_GETBLOCK(pc);
// if ((u8*)JITCompile == pblock->GetFnptr())
if (!pblock->GetStartPC())
if (pblock->GetFnptr() == (uptr)JITCompile)
return 4;
pc = HWADDR(pc);
@ -929,23 +952,17 @@ u32 recClearMem(u32 pc)
x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr();
jASSUME((u8*)JITCompile != x86Ptr[_EmitterId_]);
// jASSUME((u8*)JITCompileInside != x86Ptr[_EmitterId_]);
jASSUME((u8*)JITCompileInBlock != x86Ptr[_EmitterId_]);
// This is breaking things currently, rather than figure it out
// I'm just using DispatcherReg, it's fast enough now.
// Actually, if we want to do this at all maybe keeping a hash
// table of const jumps and modifying the jumps straight from
// here is the way to go.
#if 0
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, pexblock->startpc);
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32
JMP32((uptr)DispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5));
#else
MOV32ItoM((uptr)&cpuRegs.pc, pexblock->startpc);
JMP32((uptr)DispatcherReg - ((uptr)x86Ptr[_EmitterId_] + 5));
#endif
lowerextent = min(lowerextent, pexblock->startpc);
upperextent = max(upperextent, pexblock->startpc + pexblock->size * 4);
@ -1257,18 +1274,6 @@ static void checkcodefn()
assert(0);
}
u32 recompileCodeSafe(u32 temppc)
{
BASEBLOCK* pblock = PC_GETBLOCK(temppc);
if( pblock->GetFnptr() != (uptr)JITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) {
if( pc == pblock->GetStartPC() )
return 0;
}
return 1;
}
void recompileNextInstruction(int delayslot)
{
static u8 s_bFlushReg = 1;
@ -1277,9 +1282,9 @@ void recompileNextInstruction(int delayslot)
BASEBLOCK* pblock = PC_GETBLOCK(pc);
// need *ppblock != s_pCurBlock because of branches
if( pblock->GetFnptr() != (uptr)JITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() )
if (HWADDR(pc) != s_pCurBlockEx->startpc && pblock->GetFnptr() != (uptr)JITCompile && pblock->GetFnptr() != (uptr)JITCompileInBlock)
{
if( !delayslot && pc == pblock->GetStartPC() )
if (!delayslot)
{
// code already in place, so jump to it and exit recomp
assert( recBlocks.Get(HWADDR(pc))->startpc == HWADDR(pc) );
@ -1290,24 +1295,7 @@ void recompileNextInstruction(int delayslot)
branch = 3;
return;
}
else
{
if( !(delayslot && pblock->GetStartPC() == pc) )
{
u8* oldX86 = x86Ptr[0];
//__Log("clear block %x\n", pblock->GetStartPC());
recClearMem(pc);
x86Ptr[0] = oldX86;
if( delayslot )
Console::Notice("delay slot %x", params pc);
}
}
}
#if 1
if( delayslot )
pblock->uType = BLOCKTYPE_DELAYSLOT;
#endif
s_pCode = (int *)PSM( pc );
assert(s_pCode);
@ -1507,17 +1495,11 @@ void recRecompile( const u32 startpc )
s_pCurBlock = PC_GETBLOCK(startpc);
if( s_pCurBlock->GetFnptr() != (uptr)JITCompile ) {
// clear if already taken
assert( s_pCurBlock->GetStartPC() < startpc );
recClearMem(startpc);
}
assert(s_pCurBlock->GetFnptr() == (uptr)JITCompile
|| s_pCurBlock->GetFnptr() == (uptr)JITCompileInBlock);
if( s_pCurBlock->GetStartPC() == startpc ) {
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
assert( s_pCurBlockEx->startpc == HWADDR(startpc) );
}
else {
if (!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) {
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
if( s_pCurBlockEx == NULL ) {
@ -1527,11 +1509,12 @@ void recRecompile( const u32 startpc )
}
}
assert(s_pCurBlockEx);
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr[0];
s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] );
s_pCurBlock->SetStartPC(startpc);
branch = 0;
@ -1568,15 +1551,12 @@ void recRecompile( const u32 startpc )
while(1) {
BASEBLOCK* pblock = PC_GETBLOCK(i);
if( pblock->GetFnptr() != (uptr)JITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) {
if( i == pblock->GetStartPC() ) {
if (i != startpc && pblock->GetFnptr() != (uptr)JITCompile && pblock->GetFnptr() != (uptr)JITCompileInBlock) {
// branch = 3
willbranch3 = 1;
s_nEndBlock = i;
break;
}
}
//HUH ? PSM ? whut ? THIS IS VIRTUAL ACCESS GOD DAMMIT
cpuRegs.code = *(int *)PSM(i);
@ -1900,29 +1880,10 @@ StartRecomp:
s_pCurBlockEx->size = (pc-startpc)>>2;
for(i = 1; i < (u32)s_pCurBlockEx->size; i++) {
if (!s_pCurBlock[i].GetStartPC())
s_pCurBlock[i].SetStartPC(startpc);
if ((uptr)JITCompile == s_pCurBlock[i].GetFnptr())
s_pCurBlock[i].SetFnptr((uptr)JITCompileInBlock);
}
// This is just wrong, right? How can setting a jump to any point in this block
// to jump to the beginning of the block possibly be right? -pseudonym
// - Jumping to the beginning of the block will work fine so long as the registers
// are flushed first before the jump is made. Of course that's how all static
// links work so I still don't see the point of any complication for it -air
#ifdef ZERO_TOLERANCE
for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) {
s_pCurBlock[i].SetFnptr( s_pCurBlock->GetFnptr() );
s_pCurBlock[i].SetStartPC( p_CurBlock->startpc );
}
// don't overwrite if delay slot
if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) {
s_pCurBlock[i].SetFnptr(0);
s_pCurBlock[i].SetStartPC(0);
}
#endif
if( !(pc&0x10000000) )
maxrecmem = std::max( (pc&~0xa0000000), maxrecmem );
@ -1972,23 +1933,14 @@ StartRecomp:
assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );
if( !branch ) {
BASEBLOCK* pcurblock = s_pCurBlock;
u32 nEndBlock = s_nEndBlock;
s_pCurBlock = PC_GETBLOCK(pc);
assert( ptr != NULL );
s_pCurBlock = PC_GETBLOCK(pc);
if( s_pCurBlock->GetStartPC() != pc )
if (s_pCurBlock->GetFnptr() == (uptr)JITCompile
|| s_pCurBlock->GetFnptr() == (uptr)JITCompileInBlock)
recRecompile(pc);
if( pcurblock->GetStartPC() == startpc ) {
assert( pcurblock->GetFnptr() != (uptr)JITCompile );
assert( s_pCurBlock->GetStartPC() == nEndBlock );
*ptr = s_pCurBlock->GetFnptr() - ( (u32)ptr + 4 );
}
else {
recRecompile(startpc);
assert( pcurblock->GetFnptr() != (uptr)JITCompile );
}
*ptr = s_pCurBlock->GetFnptr() - ((u32)ptr + 4);
}
}