Made some tweaks to the EErec block manager in hopes of resolving Issue 208 [Soul Calibur 3 missing gfx], involving the willbranch3 logic path.

Emitter: Added xJcc8 / xJcc32 functions for doing modified jump targets.

Also: minor cleanup to recent counters fixes (nothing relevant, just reduced out some copy-paste jobs for clarity).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1143 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-05-07 08:21:57 +00:00
parent 3202a42578
commit 91a4ee79cc
9 changed files with 143 additions and 163 deletions

View File

@ -85,16 +85,12 @@ static __forceinline void _rcntSet( int cntidx )
if (c < nextCounter) if (c < nextCounter)
{ {
nextCounter = c; nextCounter = c;
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
if((g_nextBranchCycle - nextsCounter) > (u32)nextCounter) //Need to update on counter resets/target changes
{
g_nextBranchCycle = nextsCounter + nextCounter;
}
} }
// Ignore target diff if target is currently disabled. // Ignore target diff if target is currently disabled.
// (the overflow is all we care about since it goes first, and then the // (the overflow is all we care about since it goes first, and then the
// target will be turned on afterward). // target will be turned on afterward, and handled in the next event test).
if( counter.target & EECNT_FUTURE_TARGET ) if( counter.target & EECNT_FUTURE_TARGET )
{ {
@ -107,14 +103,9 @@ static __forceinline void _rcntSet( int cntidx )
if (c < nextCounter) if (c < nextCounter)
{ {
nextCounter = c; nextCounter = c;
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
if((g_nextBranchCycle - nextsCounter) > (u32)nextCounter) //Need to update on counter resets/target changes
{
g_nextBranchCycle = nextsCounter + nextCounter;
}
} }
} }
//cpuSetNextBranch( nextsCounter, nextCounter );
} }

View File

@ -95,11 +95,7 @@ static void _rcntSet( int cntidx )
if(c < (u64)psxNextCounter) if(c < (u64)psxNextCounter)
{ {
psxNextCounter = (u32)c; psxNextCounter = (u32)c;
psxSetNextBranch( psxNextsCounter, psxNextCounter ); //Need to update on counter resets/target changes
if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter) //Need to update on counter resets/target changes
{
g_psxNextBranchCycle = psxNextsCounter + psxNextCounter;
}
} }
//if((counter.mode & 0x10) == 0 || psxCounters[i].target > 0xffff) continue; //if((counter.mode & 0x10) == 0 || psxCounters[i].target > 0xffff) continue;
@ -111,11 +107,7 @@ static void _rcntSet( int cntidx )
if(c < (u64)psxNextCounter) if(c < (u64)psxNextCounter)
{ {
psxNextCounter = (u32)c; psxNextCounter = (u32)c;
psxSetNextBranch( psxNextsCounter, psxNextCounter ); //Need to update on counter resets/target changes
if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter) //Need to update on counter resets/target changes
{
g_psxNextBranchCycle = psxNextsCounter + psxNextCounter;
}
} }
} }

View File

@ -25,43 +25,39 @@
// Common to Windows and Linux // Common to Windows and Linux
extern "C" extern "C"
{ {
// acoroutine.S // acoroutine.S
void so_call(coroutine_t coro); void so_call(coroutine_t coro);
void so_resume(void); void so_resume(void);
void so_exit(void); void so_exit(void);
// I can't find where the Linux recRecompile is defined. Is it used anymore? void recRecompile( u32 startpc );
// If so, namespacing might break it. :/ (air)
void recRecompile( u32 startpc );
// aR3000A.S // aR3000A.S
void iopRecRecompile(u32 startpc); void iopRecRecompile(u32 startpc);
} }
// Linux specific
#ifdef __LINUX__ #ifdef __LINUX__
PCSX2_ALIGNED16( u8 _xmm_backup[16*2] ); PCSX2_ALIGNED16_EXTERN( u8 _xmm_backup[16*2] );
PCSX2_ALIGNED16( u8 _mmx_backup[8*4] ); PCSX2_ALIGNED16_EXTERN( u8 _mmx_backup[8*4] );
extern "C" extern "C"
{ {
// aVUzerorec.S // aVUzerorec.S
void* SuperVUGetProgram(u32 startpc, int vuindex); void* SuperVUGetProgram(u32 startpc, int vuindex);
void SuperVUCleanupProgram(u32 startpc, int vuindex); void SuperVUCleanupProgram(u32 startpc, int vuindex);
void svudispfn(); void svudispfn();
// aR3000A.S // aR3000A.S
void iopJITCompile(); void iopJITCompile();
void iopJITCompileInBlock(); void iopJITCompileInBlock();
void iopDispatcherReg(); void iopDispatcherReg();
// aR5900-32.S // aR5900-32.S
void JITCompile(); void JITCompile();
void JITCompileInBlock(); void JITCompileInBlock();
void DispatcherReg(); void DispatcherReg();
} }
#endif #endif
#endif
#endif

View File

@ -90,12 +90,13 @@ BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip)
return &blocks[imin]; return &blocks[imin];
} }
void BaseBlocks::Link(u32 pc, uptr jumpptr) void BaseBlocks::Link(u32 pc, s32* jumpptr)
{ {
BASEBLOCKEX *targetblock = Get(pc); BASEBLOCKEX *targetblock = Get(pc);
if (targetblock && targetblock->startpc == pc) if (targetblock && targetblock->startpc == pc)
*(u32*)jumpptr = targetblock->fnptr - (jumpptr + 4); *jumpptr = (s32)(targetblock->fnptr - (sptr)(jumpptr + 1));
else else
*(u32*)jumpptr = recompiler - (jumpptr + 4); *jumpptr = (s32)(recompiler - (sptr)(jumpptr + 1));
links.insert(std::pair<u32, uptr>(pc, jumpptr)); links.insert(std::pair<u32, uptr>(pc, (uptr)jumpptr));
} }

View File

@ -115,7 +115,7 @@ public:
blocks.erase(blocks.begin() + idx); blocks.erase(blocks.begin() + idx);
} }
void Link(u32 pc, uptr jumpptr); void Link(u32 pc, s32* jumpptr);
__forceinline void Reset() __forceinline void Reset()
{ {

View File

@ -39,6 +39,8 @@
#include "SamplProf.h" #include "SamplProf.h"
#include "NakedAsm.h" #include "NakedAsm.h"
using namespace x86Emitter;
extern u32 g_psxNextBranchCycle; extern u32 g_psxNextBranchCycle;
extern void psxBREAK(); extern void psxBREAK();
extern void zeroEx(); extern void zeroEx();
@ -57,11 +59,51 @@ uptr psxhwLUT[0x10000];
// R3000A statics // R3000A statics
int psxreclog = 0; int psxreclog = 0;
#ifdef _MSC_VER
static u32 g_temp;
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __declspec(naked) void iopJITCompile()
{
__asm {
mov esi, dword ptr [psxRegs.pc]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [psxRecLUT+esi*4]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void iopJITCompileInBlock()
{
__asm {
jmp iopJITCompile
}
}
// called when jumping to variable psxpc address
static __declspec(naked) void iopDispatcherReg()
{
__asm {
mov eax, dword ptr [psxRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
#endif // _MSC_VER
static u8 *recMem = NULL; // the recompiled blocks will be here static u8 *recMem = NULL; // the recompiled blocks will be here
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here static BASEBLOCK *recROM1 = NULL; // also here
void iopJITCompile();
static BaseBlocks recBlocks((uptr)iopJITCompile); static BaseBlocks recBlocks((uptr)iopJITCompile);
static u8 *recPtr = NULL; static u8 *recPtr = NULL;
u32 psxpc; // recompiler psxpc u32 psxpc; // recompiler psxpc
@ -596,46 +638,6 @@ static void recShutdown()
u32 g_psxlastpc = 0; u32 g_psxlastpc = 0;
#ifdef _MSC_VER
static u32 g_temp;
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __declspec(naked) void iopJITCompile()
{
__asm {
mov esi, dword ptr [psxRegs.pc]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [psxRecLUT+esi*4]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void iopJITCompileInBlock()
{
__asm {
jmp iopJITCompile
}
}
// called when jumping to variable psxpc address
static __declspec(naked) void iopDispatcherReg()
{
__asm {
mov eax, dword ptr [psxRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
#endif // _MSC_VER
static void iopClearRecLUT(BASEBLOCK* base, int count) static void iopClearRecLUT(BASEBLOCK* base, int count)
{ {
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
@ -778,7 +780,6 @@ void psxSetBranchReg(u32 reg)
void psxSetBranchImm( u32 imm ) void psxSetBranchImm( u32 imm )
{ {
u32* ptr;
psxbranch = 1; psxbranch = 1;
assert( imm ); assert( imm );
@ -787,16 +788,9 @@ void psxSetBranchImm( u32 imm )
_psxFlushCall(FLUSH_EVERYTHING); _psxFlushCall(FLUSH_EVERYTHING);
iPsxBranchTest(imm, imm <= psxpc); iPsxBranchTest(imm, imm <= psxpc);
ptr = JMP32(0); recBlocks.Link(HWADDR(imm), xJcc32());
recBlocks.Link(HWADDR(imm), (uptr)ptr);
} }
//fixme : this is all a huge hack, we base the counter advancements on the average an opcode should take (wtf?)
// If that wasn't bad enough we have default values like 9/8 which will get cast to int later
// (yeah, that means all sync code couldn't have worked to begin with)
// So for now these are new settings that work.
// (rama)
static __forceinline u32 psxScaleBlockCycles() static __forceinline u32 psxScaleBlockCycles()
{ {
return s_psxBlockCycles * (Config.Hacks.IOPCycleDouble ? 2 : 1); return s_psxBlockCycles * (Config.Hacks.IOPCycleDouble ? 2 : 1);
@ -1139,8 +1133,7 @@ StartRecomp:
assert( psxpc == s_nEndBlock ); assert( psxpc == s_nEndBlock );
_psxFlushCall(FLUSH_EVERYTHING); _psxFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&psxRegs.pc, psxpc); MOV32ItoM((uptr)&psxRegs.pc, psxpc);
u32 *ptr = JMP32(0); recBlocks.Link(HWADDR(s_nEndBlock), xJcc32() );
recBlocks.Link(HWADDR(s_nEndBlock), (uptr)ptr);
psxbranch = 3; psxbranch = 3;
} }
} }

View File

@ -484,15 +484,6 @@ void recResetEE( void )
recLUT_SetPage(recLUT, hwLUT, recROM1, 0xa000, i, i - 0x1e00); recLUT_SetPage(recLUT, hwLUT, recROM1, 0xa000, i, i - 0x1e00);
} }
// drk||Raziel says this is useful but I'm not sure why. Something to do with forward jumps.
// Anyways, it causes random crashing for some reasom, possibly because of memory
// corrupition elsewhere in the recs. I can't reproduce the problem here though,
// so a fix will have to wait until later. -_- (air)
//x86SetPtr(recMem+REC_CACHEMEM);
//dyna_block_discard_recmem=(u8*)x86Ptr;
//JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr + 5 ));
x86SetPtr(recMem); x86SetPtr(recMem);
recPtr = recMem; recPtr = recMem;
@ -725,7 +716,7 @@ void recBREAK( void ) {
} } } // end namespace R5900::Dynarec::OpcodeImpl } } } // end namespace R5900::Dynarec::OpcodeImpl
// Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default. // Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default.
static void ClearRecLUT(BASEBLOCK* base, int count) static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count)
{ {
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
base[i].SetFnptr((uptr)JITCompile); base[i].SetFnptr((uptr)JITCompile);
@ -967,7 +958,7 @@ void iFlushCall(int flushtype)
//} //}
u32 eeScaleBlockCycles() static u32 scaleBlockCycles_helper()
{ {
// Note: s_nBlockCycles is 3 bit fixed point. Divide by 8 when done! // Note: s_nBlockCycles is 3 bit fixed point. Divide by 8 when done!
@ -999,12 +990,6 @@ u32 eeScaleBlockCycles()
scalarHigh = 7; scalarHigh = 7;
break; break;
case 3: // Sync hack x3
scalarLow = 10;
scalarMid = 19;
scalarHigh = 10;
break;
jNO_DEFAULT jNO_DEFAULT
} }
@ -1016,19 +1001,14 @@ u32 eeScaleBlockCycles()
return temp >> (3+2); return temp >> (3+2);
} }
static void iBranch(u32 newpc, int type) static u32 eeScaleBlockCycles()
{ {
u32* ptr; // Ensures block cycles count is never less than 1:
u32 retval = scaleBlockCycles_helper();
MOV32ItoM((uptr)&cpuRegs.pc, newpc); return (retval < 1) ? 1 : retval;
if (type == 0)
ptr = JMP32(0);
else if (type == 1)
ptr = JS32(0);
recBlocks.Link(HWADDR(newpc), (uptr)ptr);
} }
// Generates dynarec code for Event tests followed by a block dispatch (branch). // Generates dynarec code for Event tests followed by a block dispatch (branch).
// Parameters: // Parameters:
// newpc - address to jump to at the end of the block. If newpc == 0xffffffff then // newpc - address to jump to at the end of the block. If newpc == 0xffffffff then
@ -1058,20 +1038,23 @@ static void iBranchTest(u32 newpc, bool noDispatch)
xCMP(eax, ptr32[&cpuRegs.cycle]); xCMP(eax, ptr32[&cpuRegs.cycle]);
xCMOVL(eax, ptr32[&cpuRegs.cycle]); xCMOVL(eax, ptr32[&cpuRegs.cycle]);
xMOV(ptr32[&cpuRegs.cycle], eax); xMOV(ptr32[&cpuRegs.cycle], eax);
RET();
} else { } else {
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle); xMOV(eax, &cpuRegs.cycle);
ADD32ItoR(EAX, eeScaleBlockCycles()); xADD(eax, eeScaleBlockCycles());
MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles xMOV(&cpuRegs.cycle, eax); // update cycles
SUB32MtoR(EAX, (uptr)&g_nextBranchCycle); xSUB(eax, &g_nextBranchCycle);
if (!noDispatch) { if (!noDispatch)
{
if (newpc == 0xffffffff) if (newpc == 0xffffffff)
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); xJS( DispatcherReg );
else else
iBranch(newpc, 1); {
xMOV( ptr32[&cpuRegs.pc], newpc );
recBlocks.Link( HWADDR(newpc), xJcc32( Jcc_Signed ) );
}
} }
RET();
} }
xRET();
} }
static void checkcodefn() static void checkcodefn()
@ -1170,7 +1153,7 @@ void recompileNextInstruction(int delayslot)
return; return;
} }
} }
//If thh COP0 DIE bit is disabled, double the cycles. Happens rarely. //If the COP0 DIE bit is disabled, double the cycles. Happens rarely.
s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1));
opcode.recompile(); opcode.recompile();
@ -1235,14 +1218,14 @@ void badespfn() {
void __fastcall dyna_block_discard(u32 start,u32 sz) void __fastcall dyna_block_discard(u32 start,u32 sz)
{ {
DevCon::WriteLn("dyna_block_discard .. start: %08X count=%d", params start,sz); DevCon::WriteLn("dyna_block_discard .. start: %08X count=%d", params start,sz);
Cpu->Clear(start, sz); recClear(start, sz);
} }
void __fastcall dyna_page_reset(u32 start,u32 sz) void __fastcall dyna_page_reset(u32 start,u32 sz)
{ {
DevCon::WriteLn("dyna_page_reset .. start=%08X size=%d", params start,sz*4); DevCon::WriteLn("dyna_page_reset .. start=%08X size=%d", params start,sz*4);
Cpu->Clear(start & ~0xfffUL, 0x400); recClear(start & ~0xfffUL, 0x400);
manual_counter[start >> 12]++; manual_counter[start >> 12]++;
mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL); mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL);
} }
@ -1657,10 +1640,14 @@ StartRecomp:
// performance reasons. // performance reasons.
int numinsts = (pc - startpc) / 4; int numinsts = (pc - startpc) / 4;
if( numinsts > 12 ) if( numinsts > 6 )
iBranchTest(pc); iBranchTest(pc);
else else
iBranch(pc,0); // unconditional static link {
xMOV( ptr32[&cpuRegs.pc], pc );
xADD( ptr32[&cpuRegs.cycle], eeScaleBlockCycles() );
recBlocks.Link( HWADDR(pc), xJcc32() );
}
} }
} }

View File

@ -212,6 +212,8 @@ namespace x86Emitter
// JMP / Jcc Instructions! // JMP / Jcc Instructions!
extern void xJcc( JccComparisonType comparison, const void* target ); extern void xJcc( JccComparisonType comparison, const void* target );
extern s8* xJcc8( JccComparisonType comparison=Jcc_Unconditional, s8 displacement=0 );
extern s32* xJcc32( JccComparisonType comparison=Jcc_Unconditional, s32 displacement=0 );
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Conditional jumps to fixed targets. // Conditional jumps to fixed targets.

View File

@ -73,6 +73,34 @@ xSmartJump::~xSmartJump()
m_baseptr = NULL; // just in case (sometimes helps in debugging too) m_baseptr = NULL; // just in case (sometimes helps in debugging too)
} }
// ------------------------------------------------------------------------
// Emits a 32 bit jump, and returns a pointer to the 32 bit displacement.
// (displacements should be assigned relative to the end of the jump instruction,
// or in other words *(retval+1) )
__emitinline s32* xJcc32( JccComparisonType comparison, s32 displacement )
{
if( comparison == Jcc_Unconditional )
xWrite8( 0xe9 );
else
{
xWrite8( 0x0f );
xWrite8( 0x80 | comparison );
}
xWrite<s32>( displacement );
return ((s32*)xGetPtr()) - 1;
}
// ------------------------------------------------------------------------
// Emits a 32 bit jump, and returns a pointer to the 8 bit displacement.
// (displacements should be assigned relative to the end of the jump instruction,
// or in other words *(retval+1) )
__emitinline s8* xJcc8( JccComparisonType comparison, s8 displacement )
{
xWrite8( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) );
xWrite<s8>( displacement );
return (s8*)xGetPtr() - 1;
}
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Writes a jump at the current x86Ptr, which targets a pre-established target address. // Writes a jump at the current x86Ptr, which targets a pre-established target address.
@ -84,7 +112,7 @@ xSmartJump::~xSmartJump()
__emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward ) __emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward )
{ {
// Calculate the potential j8 displacement first, assuming an instruction length of 2: // Calculate the potential j8 displacement first, assuming an instruction length of 2:
sptr displacement8 = (sptr)target - ((sptr)xGetPtr() + 2); sptr displacement8 = (sptr)target - (sptr)(xGetPtr() + 2);
const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0; const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0;
displacement8 -= slideVal; displacement8 -= slideVal;
@ -94,22 +122,12 @@ __emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const
if( slideForward ) jASSUME( displacement8 >= 0 ); if( slideForward ) jASSUME( displacement8 >= 0 );
if( is_s8( displacement8 ) ) if( is_s8( displacement8 ) )
{ xJcc8( comparison, displacement8 );
xWrite8( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) );
xWrite<s8>( displacement8 );
}
else else
{ {
// Perform a 32 bit jump instead. :( // Perform a 32 bit jump instead. :(
s32* bah = xJcc32( comparison );
if( comparison == Jcc_Unconditional ) *bah = (s32)target - (s32)xGetPtr();
xWrite8( 0xe9 );
else
{
xWrite8( 0x0f );
xWrite8( 0x80 | comparison );
}
xWrite<s32>( (sptr)target - ((sptr)xGetPtr() + 4) );
} }
} }