From 91a4ee79cc03c6faf61673cb77092ff57c688f43 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 7 May 2009 08:21:57 +0000 Subject: [PATCH] Made some tweaks to the EErec block manager in hopes of resolving Issue 208 [Soul Calibur 3 missing gfx], involving the willbranch3 logic path. Emitter: Added xJcc8 / xJcc32 functions for doing modified jump targets. Also: minor cleanup to recent counters fixes (nothing relevant, just reduced out some copy-paste jobs for clarity). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1143 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Counters.cpp | 15 +---- pcsx2/IopCounters.cpp | 12 +--- pcsx2/NakedAsm.h | 52 ++++++++-------- pcsx2/x86/BaseblockEx.cpp | 11 ++-- pcsx2/x86/BaseblockEx.h | 2 +- pcsx2/x86/iR3000A.cpp | 97 ++++++++++++++---------------- pcsx2/x86/ix86-32/iR5900-32.cpp | 69 +++++++++------------ pcsx2/x86/ix86/ix86_instructions.h | 2 + pcsx2/x86/ix86/ix86_jmp.cpp | 46 +++++++++----- 9 files changed, 143 insertions(+), 163 deletions(-) diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index c75d5f7e15..5dc192f0ec 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -85,16 +85,12 @@ static __forceinline void _rcntSet( int cntidx ) if (c < nextCounter) { nextCounter = c; - - if((g_nextBranchCycle - nextsCounter) > (u32)nextCounter) //Need to update on counter resets/target changes - { - g_nextBranchCycle = nextsCounter + nextCounter; - } + cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes } // Ignore target diff if target is currently disabled. // (the overflow is all we care about since it goes first, and then the - // target will be turned on afterward). + // target will be turned on afterward, and handled in the next event test). if( counter.target & EECNT_FUTURE_TARGET ) { @@ -107,14 +103,9 @@ static __forceinline void _rcntSet( int cntidx ) if (c < nextCounter) { nextCounter = c; - - if((g_nextBranchCycle - nextsCounter) > (u32)nextCounter) //Need to update on counter resets/target changes - { - g_nextBranchCycle = nextsCounter + nextCounter; - } + cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes } } - //cpuSetNextBranch( nextsCounter, nextCounter ); } diff --git a/pcsx2/IopCounters.cpp b/pcsx2/IopCounters.cpp index c5f5034908..fb38ff3a94 100644 --- a/pcsx2/IopCounters.cpp +++ b/pcsx2/IopCounters.cpp @@ -95,11 +95,7 @@ static void _rcntSet( int cntidx ) if(c < (u64)psxNextCounter) { psxNextCounter = (u32)c; - - if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter) //Need to update on counter resets/target changes - { - g_psxNextBranchCycle = psxNextsCounter + psxNextCounter; - } + psxSetNextBranch( psxNextsCounter, psxNextCounter ); //Need to update on counter resets/target changes } //if((counter.mode & 0x10) == 0 || psxCounters[i].target > 0xffff) continue; @@ -111,11 +107,7 @@ static void _rcntSet( int cntidx ) if(c < (u64)psxNextCounter) { psxNextCounter = (u32)c; - - if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter) //Need to update on counter resets/target changes - { - g_psxNextBranchCycle = psxNextsCounter + psxNextCounter; - } + psxSetNextBranch( psxNextsCounter, psxNextCounter ); //Need to update on counter resets/target changes } } diff --git a/pcsx2/NakedAsm.h b/pcsx2/NakedAsm.h index 589bdc2957..e9685e9fa6 100644 --- a/pcsx2/NakedAsm.h +++ b/pcsx2/NakedAsm.h @@ -25,43 +25,39 @@ // Common to Windows and Linux extern "C" { -// acoroutine.S -void so_call(coroutine_t coro); -void so_resume(void); -void so_exit(void); + // acoroutine.S + void so_call(coroutine_t coro); + void so_resume(void); + void so_exit(void); -// I can't find where the Linux recRecompile is defined. Is it used anymore? -// If so, namespacing might break it. :/ (air) -void recRecompile( u32 startpc ); + void recRecompile( u32 startpc ); -// aR3000A.S -void iopRecRecompile(u32 startpc); + // aR3000A.S + void iopRecRecompile(u32 startpc); } -// Linux specific #ifdef __LINUX__ -PCSX2_ALIGNED16( u8 _xmm_backup[16*2] ); -PCSX2_ALIGNED16( u8 _mmx_backup[8*4] ); +PCSX2_ALIGNED16_EXTERN( u8 _xmm_backup[16*2] ); +PCSX2_ALIGNED16_EXTERN( u8 _mmx_backup[8*4] ); extern "C" { -// aVUzerorec.S -void* SuperVUGetProgram(u32 startpc, int vuindex); -void SuperVUCleanupProgram(u32 startpc, int vuindex); -void svudispfn(); - -// aR3000A.S -void iopJITCompile(); -void iopJITCompileInBlock(); -void iopDispatcherReg(); - -// aR5900-32.S -void JITCompile(); -void JITCompileInBlock(); -void DispatcherReg(); - + // aVUzerorec.S + void* SuperVUGetProgram(u32 startpc, int vuindex); + void SuperVUCleanupProgram(u32 startpc, int vuindex); + void svudispfn(); + + // aR3000A.S + void iopJITCompile(); + void iopJITCompileInBlock(); + void iopDispatcherReg(); + + // aR5900-32.S + void JITCompile(); + void JITCompileInBlock(); + void DispatcherReg(); } #endif -#endif +#endif diff --git a/pcsx2/x86/BaseblockEx.cpp b/pcsx2/x86/BaseblockEx.cpp index 7d8e5730e2..f660e19944 100644 --- a/pcsx2/x86/BaseblockEx.cpp +++ b/pcsx2/x86/BaseblockEx.cpp @@ -90,12 +90,13 @@ BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) return &blocks[imin]; } -void BaseBlocks::Link(u32 pc, uptr jumpptr) +void BaseBlocks::Link(u32 pc, s32* jumpptr) { BASEBLOCKEX *targetblock = Get(pc); if (targetblock && targetblock->startpc == pc) - *(u32*)jumpptr = targetblock->fnptr - (jumpptr + 4); + *jumpptr = (s32)(targetblock->fnptr - (sptr)(jumpptr + 1)); else - *(u32*)jumpptr = recompiler - (jumpptr + 4); - links.insert(std::pair(pc, jumpptr)); -} \ No newline at end of file + *jumpptr = (s32)(recompiler - (sptr)(jumpptr + 1)); + links.insert(std::pair(pc, (uptr)jumpptr)); +} + diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index 38f97954c5..db8466b44b 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -115,7 +115,7 @@ public: blocks.erase(blocks.begin() + idx); } - void Link(u32 pc, uptr jumpptr); + void Link(u32 pc, s32* jumpptr); __forceinline void Reset() { diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 2aa7351333..074222ea11 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -39,6 +39,8 @@ #include "SamplProf.h" #include "NakedAsm.h" +using namespace x86Emitter; + extern u32 g_psxNextBranchCycle; extern void psxBREAK(); extern void zeroEx(); @@ -57,11 +59,51 @@ uptr psxhwLUT[0x10000]; // R3000A statics int psxreclog = 0; +#ifdef _MSC_VER + +static u32 g_temp; + +// The address for all cleared blocks. It recompiles the current pc and then +// dispatches to the recompiled block address. +static __declspec(naked) void iopJITCompile() +{ + __asm { + mov esi, dword ptr [psxRegs.pc] + push esi + call iopRecRecompile + add esp, 4 + mov ebx, esi + shr esi, 16 + mov ecx, dword ptr [psxRecLUT+esi*4] + jmp dword ptr [ecx+ebx] + } +} + +static __declspec(naked) void iopJITCompileInBlock() +{ + __asm { + jmp iopJITCompile + } +} + +// called when jumping to variable psxpc address +static __declspec(naked) void iopDispatcherReg() +{ + __asm { + mov eax, dword ptr [psxRegs.pc] + mov ebx, eax + shr eax, 16 + mov ecx, dword ptr [psxRecLUT+eax*4] + jmp dword ptr [ecx+ebx] + } +} +#endif // _MSC_VER + + static u8 *recMem = NULL; // the recompiled blocks will be here static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here static BASEBLOCK *recROM = NULL; // and here static BASEBLOCK *recROM1 = NULL; // also here -void iopJITCompile(); static BaseBlocks recBlocks((uptr)iopJITCompile); static u8 *recPtr = NULL; u32 psxpc; // recompiler psxpc @@ -596,46 +638,6 @@ static void recShutdown() u32 g_psxlastpc = 0; -#ifdef _MSC_VER - -static u32 g_temp; - -// The address for all cleared blocks. It recompiles the current pc and then -// dispatches to the recompiled block address. -static __declspec(naked) void iopJITCompile() -{ - __asm { - mov esi, dword ptr [psxRegs.pc] - push esi - call iopRecRecompile - add esp, 4 - mov ebx, esi - shr esi, 16 - mov ecx, dword ptr [psxRecLUT+esi*4] - jmp dword ptr [ecx+ebx] - } -} - -static __declspec(naked) void iopJITCompileInBlock() -{ - __asm { - jmp iopJITCompile - } -} - -// called when jumping to variable psxpc address -static __declspec(naked) void iopDispatcherReg() -{ - __asm { - mov eax, dword ptr [psxRegs.pc] - mov ebx, eax - shr eax, 16 - mov ecx, dword ptr [psxRecLUT+eax*4] - jmp dword ptr [ecx+ebx] - } -} -#endif // _MSC_VER - static void iopClearRecLUT(BASEBLOCK* base, int count) { for (int i = 0; i < count; i++) @@ -778,7 +780,6 @@ void psxSetBranchReg(u32 reg) void psxSetBranchImm( u32 imm ) { - u32* ptr; psxbranch = 1; assert( imm ); @@ -787,16 +788,9 @@ void psxSetBranchImm( u32 imm ) _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(imm, imm <= psxpc); - ptr = JMP32(0); - recBlocks.Link(HWADDR(imm), (uptr)ptr); + recBlocks.Link(HWADDR(imm), xJcc32()); } -//fixme : this is all a huge hack, we base the counter advancements on the average an opcode should take (wtf?) -// If that wasn't bad enough we have default values like 9/8 which will get cast to int later -// (yeah, that means all sync code couldn't have worked to begin with) -// So for now these are new settings that work. -// (rama) - static __forceinline u32 psxScaleBlockCycles() { return s_psxBlockCycles * (Config.Hacks.IOPCycleDouble ? 2 : 1); @@ -1139,8 +1133,7 @@ StartRecomp: assert( psxpc == s_nEndBlock ); _psxFlushCall(FLUSH_EVERYTHING); MOV32ItoM((uptr)&psxRegs.pc, psxpc); - u32 *ptr = JMP32(0); - recBlocks.Link(HWADDR(s_nEndBlock), (uptr)ptr); + recBlocks.Link(HWADDR(s_nEndBlock), xJcc32() ); psxbranch = 3; } } diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 8f717c7fc1..629198bff4 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -484,15 +484,6 @@ void recResetEE( void ) recLUT_SetPage(recLUT, hwLUT, recROM1, 0xa000, i, i - 0x1e00); } - // drk||Raziel says this is useful but I'm not sure why. Something to do with forward jumps. - // Anyways, it causes random crashing for some reasom, possibly because of memory - // corrupition elsewhere in the recs. I can't reproduce the problem here though, - // so a fix will have to wait until later. -_- (air) - - //x86SetPtr(recMem+REC_CACHEMEM); - //dyna_block_discard_recmem=(u8*)x86Ptr; - //JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr + 5 )); - x86SetPtr(recMem); recPtr = recMem; @@ -725,7 +716,7 @@ void recBREAK( void ) { } } } // end namespace R5900::Dynarec::OpcodeImpl // Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default. -static void ClearRecLUT(BASEBLOCK* base, int count) +static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count) { for (int i = 0; i < count; i++) base[i].SetFnptr((uptr)JITCompile); @@ -967,7 +958,7 @@ void iFlushCall(int flushtype) //} -u32 eeScaleBlockCycles() +static u32 scaleBlockCycles_helper() { // Note: s_nBlockCycles is 3 bit fixed point. Divide by 8 when done! @@ -999,12 +990,6 @@ u32 eeScaleBlockCycles() scalarHigh = 7; break; - case 3: // Sync hack x3 - scalarLow = 10; - scalarMid = 19; - scalarHigh = 10; - break; - jNO_DEFAULT } @@ -1016,19 +1001,14 @@ u32 eeScaleBlockCycles() return temp >> (3+2); } -static void iBranch(u32 newpc, int type) +static u32 eeScaleBlockCycles() { - u32* ptr; - - MOV32ItoM((uptr)&cpuRegs.pc, newpc); - if (type == 0) - ptr = JMP32(0); - else if (type == 1) - ptr = JS32(0); - - recBlocks.Link(HWADDR(newpc), (uptr)ptr); + // Ensures block cycles count is never less than 1: + u32 retval = scaleBlockCycles_helper(); + return (retval < 1) ? 1 : retval; } + // Generates dynarec code for Event tests followed by a block dispatch (branch). // Parameters: // newpc - address to jump to at the end of the block. If newpc == 0xffffffff then @@ -1058,20 +1038,23 @@ static void iBranchTest(u32 newpc, bool noDispatch) xCMP(eax, ptr32[&cpuRegs.cycle]); xCMOVL(eax, ptr32[&cpuRegs.cycle]); xMOV(ptr32[&cpuRegs.cycle], eax); - RET(); } else { - MOV32MtoR(EAX, (uptr)&cpuRegs.cycle); - ADD32ItoR(EAX, eeScaleBlockCycles()); - MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles - SUB32MtoR(EAX, (uptr)&g_nextBranchCycle); - if (!noDispatch) { + xMOV(eax, &cpuRegs.cycle); + xADD(eax, eeScaleBlockCycles()); + xMOV(&cpuRegs.cycle, eax); // update cycles + xSUB(eax, &g_nextBranchCycle); + if (!noDispatch) + { if (newpc == 0xffffffff) - JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); + xJS( DispatcherReg ); else - iBranch(newpc, 1); + { + xMOV( ptr32[&cpuRegs.pc], newpc ); + recBlocks.Link( HWADDR(newpc), xJcc32( Jcc_Signed ) ); + } } - RET(); } + xRET(); } static void checkcodefn() @@ -1170,7 +1153,7 @@ void recompileNextInstruction(int delayslot) return; } } - //If thh COP0 DIE bit is disabled, double the cycles. Happens rarely. + //If the COP0 DIE bit is disabled, double the cycles. Happens rarely. s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); opcode.recompile(); @@ -1235,14 +1218,14 @@ void badespfn() { void __fastcall dyna_block_discard(u32 start,u32 sz) { DevCon::WriteLn("dyna_block_discard .. start: %08X count=%d", params start,sz); - Cpu->Clear(start, sz); + recClear(start, sz); } void __fastcall dyna_page_reset(u32 start,u32 sz) { DevCon::WriteLn("dyna_page_reset .. start=%08X size=%d", params start,sz*4); - Cpu->Clear(start & ~0xfffUL, 0x400); + recClear(start & ~0xfffUL, 0x400); manual_counter[start >> 12]++; mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL); } @@ -1657,10 +1640,14 @@ StartRecomp: // performance reasons. int numinsts = (pc - startpc) / 4; - if( numinsts > 12 ) + if( numinsts > 6 ) iBranchTest(pc); else - iBranch(pc,0); // unconditional static link + { + xMOV( ptr32[&cpuRegs.pc], pc ); + xADD( ptr32[&cpuRegs.cycle], eeScaleBlockCycles() ); + recBlocks.Link( HWADDR(pc), xJcc32() ); + } } } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 230ca93a5e..889848d4f8 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -212,6 +212,8 @@ namespace x86Emitter // JMP / Jcc Instructions! extern void xJcc( JccComparisonType comparison, const void* target ); + extern s8* xJcc8( JccComparisonType comparison=Jcc_Unconditional, s8 displacement=0 ); + extern s32* xJcc32( JccComparisonType comparison=Jcc_Unconditional, s32 displacement=0 ); // ------------------------------------------------------------------------ // Conditional jumps to fixed targets. diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index ab5611ce9e..fb304af18b 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -73,6 +73,34 @@ xSmartJump::~xSmartJump() m_baseptr = NULL; // just in case (sometimes helps in debugging too) } +// ------------------------------------------------------------------------ +// Emits a 32 bit jump, and returns a pointer to the 32 bit displacement. +// (displacements should be assigned relative to the end of the jump instruction, +// or in other words *(retval+1) ) +__emitinline s32* xJcc32( JccComparisonType comparison, s32 displacement ) +{ + if( comparison == Jcc_Unconditional ) + xWrite8( 0xe9 ); + else + { + xWrite8( 0x0f ); + xWrite8( 0x80 | comparison ); + } + xWrite( displacement ); + + return ((s32*)xGetPtr()) - 1; +} + +// ------------------------------------------------------------------------ +// Emits a 32 bit jump, and returns a pointer to the 8 bit displacement. +// (displacements should be assigned relative to the end of the jump instruction, +// or in other words *(retval+1) ) +__emitinline s8* xJcc8( JccComparisonType comparison, s8 displacement ) +{ + xWrite8( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); + xWrite( displacement ); + return (s8*)xGetPtr() - 1; +} // ------------------------------------------------------------------------ // Writes a jump at the current x86Ptr, which targets a pre-established target address. @@ -84,7 +112,7 @@ xSmartJump::~xSmartJump() __emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward ) { // Calculate the potential j8 displacement first, assuming an instruction length of 2: - sptr displacement8 = (sptr)target - ((sptr)xGetPtr() + 2); + sptr displacement8 = (sptr)target - (sptr)(xGetPtr() + 2); const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0; displacement8 -= slideVal; @@ -94,22 +122,12 @@ __emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const if( slideForward ) jASSUME( displacement8 >= 0 ); if( is_s8( displacement8 ) ) - { - xWrite8( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); - xWrite( displacement8 ); - } + xJcc8( comparison, displacement8 ); else { // Perform a 32 bit jump instead. :( - - if( comparison == Jcc_Unconditional ) - xWrite8( 0xe9 ); - else - { - xWrite8( 0x0f ); - xWrite8( 0x80 | comparison ); - } - xWrite( (sptr)target - ((sptr)xGetPtr() + 4) ); + s32* bah = xJcc32( comparison ); + *bah = (s32)target - (s32)xGetPtr(); } }