From 4b3d1ccab3d9a8724d1d3b2d82e81acd25679850 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Fri, 6 Mar 2009 21:03:39 +0000 Subject: [PATCH] More block manager fixes/optimizations by Pseudonym. This should be the last one (but you never know :p ). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@703 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/BaseblockEx.h | 6 +- pcsx2/x86/iR3000A.cpp | 140 +++++++++------------------ pcsx2/x86/ix86-32/iR5900-32.cpp | 164 +++++++++++--------------------- 3 files changed, 103 insertions(+), 207 deletions(-) diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index 7c00110589..7d94533af7 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -30,13 +30,9 @@ struct BASEBLOCK { u32 m_pFnptr; - u32 startpc : 30; - u32 uType : 2; const __inline uptr GetFnptr() const { return m_pFnptr; } void __inline SetFnptr( uptr ptr ) { m_pFnptr = ptr; } - const __inline uptr GetStartPC() const { return startpc << 2; } - void __inline SetStartPC( uptr pc ) { startpc = pc >> 2; } }; // extra block info (only valid for start of fn) @@ -107,4 +103,4 @@ static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000], hwlut[page] = 0u - (pagebase << 16); } -C_ASSERT( sizeof(BASEBLOCK) == 8 ); \ No newline at end of file +C_ASSERT( sizeof(BASEBLOCK) == 4 ); \ No newline at end of file diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index aed618e8e8..ca4fdad790 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -639,7 +639,14 @@ static __declspec(naked) void iopJITCompile() mov ebx, esi shr esi, 16 mov ecx, dword ptr [psxRecLUT+esi*4] - jmp dword ptr [ecx+ebx*2] + jmp dword ptr [ecx+ebx] + } +} + +static __declspec(naked) void iopJITCompileInBlock() +{ + __asm { + jmp iopJITCompile } } @@ -653,9 +660,11 @@ static __declspec(naked) void iopDispatcher() mov ebx, eax shr eax, 16 mov ecx, dword ptr [psxRecLUT+eax*4] - mov eax, dword ptr [ecx+ebx*2] + mov eax, dword ptr [ecx+ebx] cmp eax, offset iopJITCompile je notcompiled + cmp eax, offset iopJITCompileInBlock + je notcompiled lea ebx, [eax-4] sub ebx, edx mov dword ptr [edx], ebx @@ -664,7 +673,7 @@ static __declspec(naked) void iopDispatcher() align 16 notcompiled: mov esi, edx - lea edi, [ecx+ebx*2] + lea edi, [ecx+ebx] push ebx call iopRecRecompile add esp, 4 @@ -677,7 +686,7 @@ notcompiled: } } -// edx - baseblock->GetStartPC() +// edx - baseblock start pc // stack - x86Ptr[0] static __declspec(naked) void iopDispatcherClear() { @@ -686,15 +695,17 @@ static __declspec(naked) void iopDispatcherClear() mov ebx, edx shr edx, 16 mov ecx, dword ptr [psxRecLUT+edx*4] - mov eax, dword ptr [ecx+ebx*2] - cmp eax, iopJITCompile + mov eax, dword ptr [ecx+ebx] + cmp eax, offset iopJITCompile + je notcompiled + cmp eax, offset iopJITCompileInBlock je notcompiled add esp, 4 jmp eax align 16 notcompiled: - lea edi, [ecx+ebx*2] + lea edi, [ecx+ebx] push ebx call iopRecRecompile add esp, 4 @@ -718,18 +729,15 @@ static __declspec(naked) void iopDispatcherReg() mov ebx, eax shr eax, 16 mov ecx, dword ptr [psxRecLUT+eax*4] - jmp dword ptr [ecx+ebx*2] + jmp dword ptr [ecx+ebx] } } #endif // _MSC_VER static void iopClearRecLUT(BASEBLOCK* base, int count) { - for (int i = 0; i < count; i++) { + for (int i = 0; i < count; i++) base[i].SetFnptr((uptr)iopJITCompile); - base[i].SetStartPC(0); - base[i].uType = 0; - } } static void recExecute() @@ -815,7 +823,7 @@ u32 psxRecClearMem(u32 pc) pblock = PSX_GETBLOCK(pc); // if ((u8*)iopJITCompile == pblock->GetFnptr()) - if (!pblock->GetStartPC()) + if (pblock->GetFnptr() == (uptr)iopJITCompile) return 4; pc = HWADDR(pc); @@ -848,7 +856,7 @@ u32 psxRecClearMem(u32 pc) // Actually, if we want to do this at all maybe keeping a hash // table of const jumps and modifying the jumps straight from // here is the way to go. -#if 0 +#if 1 // there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which MOV32ItoR(EDX, pexblock->startpc); assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff); @@ -1020,18 +1028,6 @@ void rpsxBREAK() //if (!psxbranch) psxbranch = 2; } -u32 psxRecompileCodeSafe(u32 temppc) -{ - BASEBLOCK* pblock = PSX_GETBLOCK(temppc); - - if( pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) { - if( psxpc == pblock->GetStartPC() ) - return 0; - } - - return 1; -} - void psxRecompileNextInstruction(int delayslot) { static u8 s_bFlushReg = 1; @@ -1039,9 +1035,11 @@ void psxRecompileNextInstruction(int delayslot) BASEBLOCK* pblock = PSX_GETBLOCK(psxpc); // need *ppblock != s_pCurBlock because of branches - if( pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) + if (HWADDR(psxpc) != s_pCurBlockEx->startpc + && pblock->GetFnptr() != (uptr)iopJITCompile + && pblock->GetFnptr() != (uptr)iopJITCompileInBlock ) { - if( !delayslot && psxpc == pblock->GetStartPC() ) + if(!delayslot) { // code already in place, so jump to it and exit recomp assert( recBlocks.Get(HWADDR(psxpc))->startpc == HWADDR(psxpc) ); @@ -1052,23 +1050,8 @@ void psxRecompileNextInstruction(int delayslot) psxbranch = 3; return; } - else - { - if( !(delayslot && pblock->GetStartPC() == psxpc) ) - { - u8* oldX86 = x86Ptr[0]; - //__Log("clear block %x\n", pblock->GetStartPC()); - psxRecClearMem(psxpc); - x86Ptr[0] = oldX86; - if( delayslot ) - SysPrintf("delay slot %x\n", psxpc); - } - } } - if( delayslot ) - pblock->uType = BLOCKTYPE_DELAYSLOT; - #ifdef _DEBUG MOV32ItoR(EAX, psxpc); #endif @@ -1178,17 +1161,11 @@ void iopRecRecompile(u32 startpc) s_pCurBlock = PSX_GETBLOCK(startpc); - if( s_pCurBlock->GetFnptr() != (uptr)iopJITCompile ) { - // clear if already taken - assert( s_pCurBlock->GetStartPC() < startpc ); - psxRecClearMem(startpc); - } + assert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile + || s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock); - if( s_pCurBlock->GetStartPC() == startpc ) { - s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); - assert( s_pCurBlockEx->startpc == HWADDR(startpc) ); - } - else { + s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); + if(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) { s_pCurBlockEx = recBlocks.New(HWADDR(startpc)); if( s_pCurBlockEx == NULL ) { @@ -1204,7 +1181,6 @@ void iopRecRecompile(u32 startpc) psxbranch = 0; - s_pCurBlock->SetStartPC(startpc); s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] ); s_psxBlockCycles = 0; @@ -1227,14 +1203,13 @@ void iopRecRecompile(u32 startpc) while(1) { BASEBLOCK* pblock = PSX_GETBLOCK(i); - if( pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) { - - if( i == pblock->GetStartPC() ) { - // branch = 3 - willbranch3 = 1; - s_nEndBlock = i; - break; - } + if (i != startpc + && pblock->GetFnptr() != (uptr)iopJITCompile + && pblock->GetFnptr() != (uptr)iopJITCompileInBlock) { + // branch = 3 + willbranch3 = 1; + s_nEndBlock = i; + break; } psxRegs.code = iopMemRead32(i); @@ -1330,25 +1305,10 @@ StartRecomp: s_pCurBlockEx->size = (psxpc-startpc)>>2; for(i = 1; i < (u32)s_pCurBlockEx->size; ++i) { - if (!s_pCurBlock[i].GetStartPC()) - s_pCurBlock[i].SetStartPC( startpc ); + if (s_pCurBlock[i].GetFnptr() == (uptr)iopJITCompile) + s_pCurBlock[i].SetFnptr((uptr)iopJITCompileInBlock); } -// This is just wrong, right? How can setting a jump to any point in this block -// to jump to the beginning of the block possibly be right? -#ifdef ZERO_TOLERANCE - for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) { - s_pCurBlock[i].SetFnptr( s_pCurBlock->GetFnptr() ); - s_pCurBlock[i].SetStartPC( p_CurBlock->startpc ); - } - - // don't overwrite if delay slot - if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) { - s_pCurBlock[i].SetFnptr(0); - s_pCurBlock[i].SetStartPC(0); - } -#endif - if( !(psxpc&0x10000000) ) g_psxMaxRecMem = std::max( (psxpc&~0xa0000000), g_psxMaxRecMem ); @@ -1394,28 +1354,16 @@ StartRecomp: assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg ); if( !psxbranch ) { - BASEBLOCK* pcurblock = s_pCurBlock; - u32 nEndBlock = s_nEndBlock; - s_pCurBlock = PSX_GETBLOCK(psxpc); assert( ptr != NULL ); - - if( s_pCurBlock->GetStartPC() != psxpc ){ + s_pCurBlock = PSX_GETBLOCK(psxpc); + + if (s_pCurBlock->GetFnptr() == (uptr)iopJITCompile + || s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock){ iopRecRecompile(psxpc); } - // could have reset - if( pcurblock->GetStartPC() == startpc ) { - assert( pcurblock->GetFnptr() != (uptr)iopJITCompile ); - assert( s_pCurBlock->GetStartPC() == nEndBlock ); - *ptr = (u32)((uptr)s_pCurBlock->GetFnptr() - ( (uptr)ptr + 4 )); - } - else { - iopRecRecompile(startpc); - assert( pcurblock->GetFnptr() != (uptr)iopJITCompile ); - } + *ptr = s_pCurBlock->GetFnptr() - ((u32)ptr + 4); } - else - assert( s_pCurBlock->GetFnptr() != (uptr)iopJITCompile ); } R3000Acpu psxRec = { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 1b13c9b094..89db8a8f41 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -647,7 +647,14 @@ static __declspec(naked) void JITCompile() mov ebx, esi shr esi, 16 mov ecx, dword ptr [recLUT+esi*4] - jmp dword ptr [ecx+ebx*2] + jmp dword ptr [ecx+ebx] + } +} + +static __declspec(naked) void JITCompileInBlock() +{ + __asm { + jmp JITCompile } } @@ -661,10 +668,12 @@ static __naked void Dispatcher() mov ebx, eax shr eax, 16 mov ecx, dword ptr [recLUT+eax*4] - mov eax, dword ptr [ecx+ebx*2] + mov eax, dword ptr [ecx+ebx] cmp eax, offset JITCompile je notcompiled + cmp eax, offset JITCompileInBlock + je notcompiled lea ebx, [eax-4] sub ebx, edx mov dword ptr [edx], ebx @@ -673,7 +682,7 @@ static __naked void Dispatcher() align 16 notcompiled: mov esi, edx - lea edi, [ecx+ebx*2] + lea edi, [ecx+ebx] push ebx call recRecompile add esp, 4 @@ -686,7 +695,7 @@ notcompiled: } } -// edx - baseblock->GetStartPC() +// edx - block start pc // stack - x86Ptr[0] static __naked void DispatcherClear() { @@ -695,16 +704,18 @@ static __naked void DispatcherClear() mov ebx, edx shr edx, 16 mov ecx, dword ptr [recLUT+edx*4] - mov eax, dword ptr [ecx+ebx*2] + mov eax, dword ptr [ecx+ebx] cmp eax, offset JITCompile je notcompiled + cmp eax, offset JITCompileInBlock + je notcompiled add esp, 4 jmp eax align 16 notcompiled: - lea edi, [ecx+ebx*2] + lea edi, [ecx+ebx] push ebx call recRecompile add esp, 4 @@ -728,7 +739,7 @@ static void __naked DispatcherReg() mov ebx, eax shr eax, 16 mov ecx, dword ptr [recLUT+eax*4] - jmp dword ptr [ecx+ebx*2] + jmp dword ptr [ecx+ebx] } } @@ -888,11 +899,7 @@ void recClear( u32 Addr, u32 Size ) static void ClearRecLUT(BASEBLOCK* base, int count) { for (int i = 0; i < count; i++) - { base[i].SetFnptr((uptr)JITCompile); - base[i].SetStartPC(0); - base[i].uType = 0; - } } // Returns the offset to the next instruction after any cleared memory @@ -901,9 +908,25 @@ u32 recClearMem(u32 pc) BASEBLOCKEX* pexblock; BASEBLOCK* pblock; + //why the hell? +#if 1 + // necessary since recompiler doesn't call femms/emms +#ifdef __INTEL_COMPILER + __asm__("emms"); +#else + #ifdef _MSC_VER + if (cpucaps.has3DNOWInstructionExtensions) __asm femms; + else __asm emms; + #else + if( cpucaps.has3DNOWInstructionExtensions )__asm__("femms"); + else + __asm__("emms"); + #endif +#endif +#endif + pblock = PC_GETBLOCK(pc); - // if ((u8*)JITCompile == pblock->GetFnptr()) - if (!pblock->GetStartPC()) + if (pblock->GetFnptr() == (uptr)JITCompile) return 4; pc = HWADDR(pc); @@ -929,23 +952,17 @@ u32 recClearMem(u32 pc) x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr(); jASSUME((u8*)JITCompile != x86Ptr[_EmitterId_]); - // jASSUME((u8*)JITCompileInside != x86Ptr[_EmitterId_]); + jASSUME((u8*)JITCompileInBlock != x86Ptr[_EmitterId_]); - // This is breaking things currently, rather than figure it out - // I'm just using DispatcherReg, it's fast enough now. // Actually, if we want to do this at all maybe keeping a hash // table of const jumps and modifying the jumps straight from // here is the way to go. -#if 0 + // there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which MOV32ItoR(EDX, pexblock->startpc); assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff); PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32 JMP32((uptr)DispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5)); -#else - MOV32ItoM((uptr)&cpuRegs.pc, pexblock->startpc); - JMP32((uptr)DispatcherReg - ((uptr)x86Ptr[_EmitterId_] + 5)); -#endif lowerextent = min(lowerextent, pexblock->startpc); upperextent = max(upperextent, pexblock->startpc + pexblock->size * 4); @@ -1257,18 +1274,6 @@ static void checkcodefn() assert(0); } -u32 recompileCodeSafe(u32 temppc) -{ - BASEBLOCK* pblock = PC_GETBLOCK(temppc); - - if( pblock->GetFnptr() != (uptr)JITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) { - if( pc == pblock->GetStartPC() ) - return 0; - } - - return 1; -} - void recompileNextInstruction(int delayslot) { static u8 s_bFlushReg = 1; @@ -1277,9 +1282,9 @@ void recompileNextInstruction(int delayslot) BASEBLOCK* pblock = PC_GETBLOCK(pc); // need *ppblock != s_pCurBlock because of branches - if( pblock->GetFnptr() != (uptr)JITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) + if (HWADDR(pc) != s_pCurBlockEx->startpc && pblock->GetFnptr() != (uptr)JITCompile && pblock->GetFnptr() != (uptr)JITCompileInBlock) { - if( !delayslot && pc == pblock->GetStartPC() ) + if (!delayslot) { // code already in place, so jump to it and exit recomp assert( recBlocks.Get(HWADDR(pc))->startpc == HWADDR(pc) ); @@ -1290,25 +1295,8 @@ void recompileNextInstruction(int delayslot) branch = 3; return; } - else - { - if( !(delayslot && pblock->GetStartPC() == pc) ) - { - u8* oldX86 = x86Ptr[0]; - //__Log("clear block %x\n", pblock->GetStartPC()); - recClearMem(pc); - x86Ptr[0] = oldX86; - if( delayslot ) - Console::Notice("delay slot %x", params pc); - } - } } -#if 1 - if( delayslot ) - pblock->uType = BLOCKTYPE_DELAYSLOT; -#endif - s_pCode = (int *)PSM( pc ); assert(s_pCode); @@ -1506,18 +1494,12 @@ void recRecompile( const u32 startpc ) } s_pCurBlock = PC_GETBLOCK(startpc); - - if( s_pCurBlock->GetFnptr() != (uptr)JITCompile ) { - // clear if already taken - assert( s_pCurBlock->GetStartPC() < startpc ); - recClearMem(startpc); - } - if( s_pCurBlock->GetStartPC() == startpc ) { - s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); - assert( s_pCurBlockEx->startpc == HWADDR(startpc) ); - } - else { + assert(s_pCurBlock->GetFnptr() == (uptr)JITCompile + || s_pCurBlock->GetFnptr() == (uptr)JITCompileInBlock); + + s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); + if (!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) { s_pCurBlockEx = recBlocks.New(HWADDR(startpc)); if( s_pCurBlockEx == NULL ) { @@ -1527,11 +1509,12 @@ void recRecompile( const u32 startpc ) } } + assert(s_pCurBlockEx); + x86SetPtr( recPtr ); x86Align(16); recPtr = x86Ptr[0]; s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] ); - s_pCurBlock->SetStartPC(startpc); branch = 0; @@ -1568,14 +1551,11 @@ void recRecompile( const u32 startpc ) while(1) { BASEBLOCK* pblock = PC_GETBLOCK(i); - if( pblock->GetFnptr() != (uptr)JITCompile && pblock->GetStartPC() != s_pCurBlock->GetStartPC() ) { - - if( i == pblock->GetStartPC() ) { - // branch = 3 - willbranch3 = 1; - s_nEndBlock = i; - break; - } + if (i != startpc && pblock->GetFnptr() != (uptr)JITCompile && pblock->GetFnptr() != (uptr)JITCompileInBlock) { + // branch = 3 + willbranch3 = 1; + s_nEndBlock = i; + break; } //HUH ? PSM ? whut ? THIS IS VIRTUAL ACCESS GOD DAMMIT cpuRegs.code = *(int *)PSM(i); @@ -1900,29 +1880,10 @@ StartRecomp: s_pCurBlockEx->size = (pc-startpc)>>2; for(i = 1; i < (u32)s_pCurBlockEx->size; i++) { - if (!s_pCurBlock[i].GetStartPC()) - s_pCurBlock[i].SetStartPC(startpc); + if ((uptr)JITCompile == s_pCurBlock[i].GetFnptr()) + s_pCurBlock[i].SetFnptr((uptr)JITCompileInBlock); } - -// This is just wrong, right? How can setting a jump to any point in this block -// to jump to the beginning of the block possibly be right? -pseudonym -// - Jumping to the beginning of the block will work fine so long as the registers -// are flushed first before the jump is made. Of course that's how all static -// links work so I still don't see the point of any complication for it -air -#ifdef ZERO_TOLERANCE - for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) { - s_pCurBlock[i].SetFnptr( s_pCurBlock->GetFnptr() ); - s_pCurBlock[i].SetStartPC( p_CurBlock->startpc ); - } - - // don't overwrite if delay slot - if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) { - s_pCurBlock[i].SetFnptr(0); - s_pCurBlock[i].SetStartPC(0); - } -#endif - if( !(pc&0x10000000) ) maxrecmem = std::max( (pc&~0xa0000000), maxrecmem ); @@ -1972,23 +1933,14 @@ StartRecomp: assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); if( !branch ) { - BASEBLOCK* pcurblock = s_pCurBlock; - u32 nEndBlock = s_nEndBlock; - s_pCurBlock = PC_GETBLOCK(pc); assert( ptr != NULL ); + s_pCurBlock = PC_GETBLOCK(pc); - if( s_pCurBlock->GetStartPC() != pc ) + if (s_pCurBlock->GetFnptr() == (uptr)JITCompile + || s_pCurBlock->GetFnptr() == (uptr)JITCompileInBlock) recRecompile(pc); - if( pcurblock->GetStartPC() == startpc ) { - assert( pcurblock->GetFnptr() != (uptr)JITCompile ); - assert( s_pCurBlock->GetStartPC() == nEndBlock ); - *ptr = s_pCurBlock->GetFnptr() - ( (u32)ptr + 4 ); - } - else { - recRecompile(startpc); - assert( pcurblock->GetFnptr() != (uptr)JITCompile ); - } + *ptr = s_pCurBlock->GetFnptr() - ((u32)ptr + 4); } }