Made some tweaks to the EErec block manager in hopes of resolving Issue 208 [Soul Calibur 3 missing gfx], involving the willbranch3 logic path.

Emitter: Added xJcc8 / xJcc32 functions for doing modified jump targets.

Also: minor cleanup to recent counters fixes (nothing relevant, just reduced out some copy-paste jobs for clarity).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1143 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-05-07 08:21:57 +00:00
parent 3202a42578
commit 91a4ee79cc
9 changed files with 143 additions and 163 deletions

View File

@ -85,16 +85,12 @@ static __forceinline void _rcntSet( int cntidx )
if (c < nextCounter)
{
nextCounter = c;
if((g_nextBranchCycle - nextsCounter) > (u32)nextCounter) //Need to update on counter resets/target changes
{
g_nextBranchCycle = nextsCounter + nextCounter;
}
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
}
// Ignore target diff if target is currently disabled.
// (the overflow is all we care about since it goes first, and then the
// target will be turned on afterward).
// target will be turned on afterward, and handled in the next event test).
if( counter.target & EECNT_FUTURE_TARGET )
{
@ -107,14 +103,9 @@ static __forceinline void _rcntSet( int cntidx )
if (c < nextCounter)
{
nextCounter = c;
if((g_nextBranchCycle - nextsCounter) > (u32)nextCounter) //Need to update on counter resets/target changes
{
g_nextBranchCycle = nextsCounter + nextCounter;
}
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
}
}
//cpuSetNextBranch( nextsCounter, nextCounter );
}

View File

@ -95,11 +95,7 @@ static void _rcntSet( int cntidx )
if(c < (u64)psxNextCounter)
{
psxNextCounter = (u32)c;
if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter) //Need to update on counter resets/target changes
{
g_psxNextBranchCycle = psxNextsCounter + psxNextCounter;
}
psxSetNextBranch( psxNextsCounter, psxNextCounter ); //Need to update on counter resets/target changes
}
//if((counter.mode & 0x10) == 0 || psxCounters[i].target > 0xffff) continue;
@ -111,11 +107,7 @@ static void _rcntSet( int cntidx )
if(c < (u64)psxNextCounter)
{
psxNextCounter = (u32)c;
if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter) //Need to update on counter resets/target changes
{
g_psxNextBranchCycle = psxNextsCounter + psxNextCounter;
}
psxSetNextBranch( psxNextsCounter, psxNextCounter ); //Need to update on counter resets/target changes
}
}

View File

@ -25,43 +25,39 @@
// Common to Windows and Linux
extern "C"
{
// acoroutine.S
void so_call(coroutine_t coro);
void so_resume(void);
void so_exit(void);
// acoroutine.S
void so_call(coroutine_t coro);
void so_resume(void);
void so_exit(void);
// I can't find where the Linux recRecompile is defined. Is it used anymore?
// If so, namespacing might break it. :/ (air)
void recRecompile( u32 startpc );
void recRecompile( u32 startpc );
// aR3000A.S
void iopRecRecompile(u32 startpc);
// aR3000A.S
void iopRecRecompile(u32 startpc);
}
// Linux specific
#ifdef __LINUX__
PCSX2_ALIGNED16( u8 _xmm_backup[16*2] );
PCSX2_ALIGNED16( u8 _mmx_backup[8*4] );
PCSX2_ALIGNED16_EXTERN( u8 _xmm_backup[16*2] );
PCSX2_ALIGNED16_EXTERN( u8 _mmx_backup[8*4] );
extern "C"
{
// aVUzerorec.S
void* SuperVUGetProgram(u32 startpc, int vuindex);
void SuperVUCleanupProgram(u32 startpc, int vuindex);
void svudispfn();
// aR3000A.S
void iopJITCompile();
void iopJITCompileInBlock();
void iopDispatcherReg();
// aR5900-32.S
void JITCompile();
void JITCompileInBlock();
void DispatcherReg();
// aVUzerorec.S
void* SuperVUGetProgram(u32 startpc, int vuindex);
void SuperVUCleanupProgram(u32 startpc, int vuindex);
void svudispfn();
// aR3000A.S
void iopJITCompile();
void iopJITCompileInBlock();
void iopDispatcherReg();
// aR5900-32.S
void JITCompile();
void JITCompileInBlock();
void DispatcherReg();
}
#endif
#endif
#endif

View File

@ -90,12 +90,13 @@ BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip)
return &blocks[imin];
}
void BaseBlocks::Link(u32 pc, uptr jumpptr)
void BaseBlocks::Link(u32 pc, s32* jumpptr)
{
BASEBLOCKEX *targetblock = Get(pc);
if (targetblock && targetblock->startpc == pc)
*(u32*)jumpptr = targetblock->fnptr - (jumpptr + 4);
*jumpptr = (s32)(targetblock->fnptr - (sptr)(jumpptr + 1));
else
*(u32*)jumpptr = recompiler - (jumpptr + 4);
links.insert(std::pair<u32, uptr>(pc, jumpptr));
}
*jumpptr = (s32)(recompiler - (sptr)(jumpptr + 1));
links.insert(std::pair<u32, uptr>(pc, (uptr)jumpptr));
}

View File

@ -115,7 +115,7 @@ public:
blocks.erase(blocks.begin() + idx);
}
void Link(u32 pc, uptr jumpptr);
void Link(u32 pc, s32* jumpptr);
__forceinline void Reset()
{

View File

@ -39,6 +39,8 @@
#include "SamplProf.h"
#include "NakedAsm.h"
using namespace x86Emitter;
extern u32 g_psxNextBranchCycle;
extern void psxBREAK();
extern void zeroEx();
@ -57,11 +59,51 @@ uptr psxhwLUT[0x10000];
// R3000A statics
int psxreclog = 0;
#ifdef _MSC_VER
static u32 g_temp;
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __declspec(naked) void iopJITCompile()
{
__asm {
mov esi, dword ptr [psxRegs.pc]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [psxRecLUT+esi*4]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void iopJITCompileInBlock()
{
__asm {
jmp iopJITCompile
}
}
// called when jumping to variable psxpc address
static __declspec(naked) void iopDispatcherReg()
{
__asm {
mov eax, dword ptr [psxRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
#endif // _MSC_VER
static u8 *recMem = NULL; // the recompiled blocks will be here
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
void iopJITCompile();
static BaseBlocks recBlocks((uptr)iopJITCompile);
static u8 *recPtr = NULL;
u32 psxpc; // recompiler psxpc
@ -596,46 +638,6 @@ static void recShutdown()
u32 g_psxlastpc = 0;
#ifdef _MSC_VER
static u32 g_temp;
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static __declspec(naked) void iopJITCompile()
{
__asm {
mov esi, dword ptr [psxRegs.pc]
push esi
call iopRecRecompile
add esp, 4
mov ebx, esi
shr esi, 16
mov ecx, dword ptr [psxRecLUT+esi*4]
jmp dword ptr [ecx+ebx]
}
}
static __declspec(naked) void iopJITCompileInBlock()
{
__asm {
jmp iopJITCompile
}
}
// called when jumping to variable psxpc address
static __declspec(naked) void iopDispatcherReg()
{
__asm {
mov eax, dword ptr [psxRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
jmp dword ptr [ecx+ebx]
}
}
#endif // _MSC_VER
static void iopClearRecLUT(BASEBLOCK* base, int count)
{
for (int i = 0; i < count; i++)
@ -778,7 +780,6 @@ void psxSetBranchReg(u32 reg)
void psxSetBranchImm( u32 imm )
{
u32* ptr;
psxbranch = 1;
assert( imm );
@ -787,16 +788,9 @@ void psxSetBranchImm( u32 imm )
_psxFlushCall(FLUSH_EVERYTHING);
iPsxBranchTest(imm, imm <= psxpc);
ptr = JMP32(0);
recBlocks.Link(HWADDR(imm), (uptr)ptr);
recBlocks.Link(HWADDR(imm), xJcc32());
}
//fixme : this is all a huge hack, we base the counter advancements on the average an opcode should take (wtf?)
// If that wasn't bad enough we have default values like 9/8 which will get cast to int later
// (yeah, that means all sync code couldn't have worked to begin with)
// So for now these are new settings that work.
// (rama)
static __forceinline u32 psxScaleBlockCycles()
{
return s_psxBlockCycles * (Config.Hacks.IOPCycleDouble ? 2 : 1);
@ -1139,8 +1133,7 @@ StartRecomp:
assert( psxpc == s_nEndBlock );
_psxFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
u32 *ptr = JMP32(0);
recBlocks.Link(HWADDR(s_nEndBlock), (uptr)ptr);
recBlocks.Link(HWADDR(s_nEndBlock), xJcc32() );
psxbranch = 3;
}
}

View File

@ -484,15 +484,6 @@ void recResetEE( void )
recLUT_SetPage(recLUT, hwLUT, recROM1, 0xa000, i, i - 0x1e00);
}
// drk||Raziel says this is useful but I'm not sure why. Something to do with forward jumps.
// Anyways, it causes random crashing for some reasom, possibly because of memory
// corrupition elsewhere in the recs. I can't reproduce the problem here though,
// so a fix will have to wait until later. -_- (air)
//x86SetPtr(recMem+REC_CACHEMEM);
//dyna_block_discard_recmem=(u8*)x86Ptr;
//JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr + 5 ));
x86SetPtr(recMem);
recPtr = recMem;
@ -725,7 +716,7 @@ void recBREAK( void ) {
} } } // end namespace R5900::Dynarec::OpcodeImpl
// Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default.
static void ClearRecLUT(BASEBLOCK* base, int count)
static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count)
{
for (int i = 0; i < count; i++)
base[i].SetFnptr((uptr)JITCompile);
@ -967,7 +958,7 @@ void iFlushCall(int flushtype)
//}
u32 eeScaleBlockCycles()
static u32 scaleBlockCycles_helper()
{
// Note: s_nBlockCycles is 3 bit fixed point. Divide by 8 when done!
@ -999,12 +990,6 @@ u32 eeScaleBlockCycles()
scalarHigh = 7;
break;
case 3: // Sync hack x3
scalarLow = 10;
scalarMid = 19;
scalarHigh = 10;
break;
jNO_DEFAULT
}
@ -1016,19 +1001,14 @@ u32 eeScaleBlockCycles()
return temp >> (3+2);
}
static void iBranch(u32 newpc, int type)
static u32 eeScaleBlockCycles()
{
u32* ptr;
MOV32ItoM((uptr)&cpuRegs.pc, newpc);
if (type == 0)
ptr = JMP32(0);
else if (type == 1)
ptr = JS32(0);
recBlocks.Link(HWADDR(newpc), (uptr)ptr);
// Ensures block cycles count is never less than 1:
u32 retval = scaleBlockCycles_helper();
return (retval < 1) ? 1 : retval;
}
// Generates dynarec code for Event tests followed by a block dispatch (branch).
// Parameters:
// newpc - address to jump to at the end of the block. If newpc == 0xffffffff then
@ -1058,20 +1038,23 @@ static void iBranchTest(u32 newpc, bool noDispatch)
xCMP(eax, ptr32[&cpuRegs.cycle]);
xCMOVL(eax, ptr32[&cpuRegs.cycle]);
xMOV(ptr32[&cpuRegs.cycle], eax);
RET();
} else {
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle);
ADD32ItoR(EAX, eeScaleBlockCycles());
MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles
SUB32MtoR(EAX, (uptr)&g_nextBranchCycle);
if (!noDispatch) {
xMOV(eax, &cpuRegs.cycle);
xADD(eax, eeScaleBlockCycles());
xMOV(&cpuRegs.cycle, eax); // update cycles
xSUB(eax, &g_nextBranchCycle);
if (!noDispatch)
{
if (newpc == 0xffffffff)
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 ));
xJS( DispatcherReg );
else
iBranch(newpc, 1);
{
xMOV( ptr32[&cpuRegs.pc], newpc );
recBlocks.Link( HWADDR(newpc), xJcc32( Jcc_Signed ) );
}
}
RET();
}
xRET();
}
static void checkcodefn()
@ -1170,7 +1153,7 @@ void recompileNextInstruction(int delayslot)
return;
}
}
//If thh COP0 DIE bit is disabled, double the cycles. Happens rarely.
//If the COP0 DIE bit is disabled, double the cycles. Happens rarely.
s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1));
opcode.recompile();
@ -1235,14 +1218,14 @@ void badespfn() {
void __fastcall dyna_block_discard(u32 start,u32 sz)
{
DevCon::WriteLn("dyna_block_discard .. start: %08X count=%d", params start,sz);
Cpu->Clear(start, sz);
recClear(start, sz);
}
void __fastcall dyna_page_reset(u32 start,u32 sz)
{
DevCon::WriteLn("dyna_page_reset .. start=%08X size=%d", params start,sz*4);
Cpu->Clear(start & ~0xfffUL, 0x400);
recClear(start & ~0xfffUL, 0x400);
manual_counter[start >> 12]++;
mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL);
}
@ -1657,10 +1640,14 @@ StartRecomp:
// performance reasons.
int numinsts = (pc - startpc) / 4;
if( numinsts > 12 )
if( numinsts > 6 )
iBranchTest(pc);
else
iBranch(pc,0); // unconditional static link
{
xMOV( ptr32[&cpuRegs.pc], pc );
xADD( ptr32[&cpuRegs.cycle], eeScaleBlockCycles() );
recBlocks.Link( HWADDR(pc), xJcc32() );
}
}
}

View File

@ -212,6 +212,8 @@ namespace x86Emitter
// JMP / Jcc Instructions!
extern void xJcc( JccComparisonType comparison, const void* target );
extern s8* xJcc8( JccComparisonType comparison=Jcc_Unconditional, s8 displacement=0 );
extern s32* xJcc32( JccComparisonType comparison=Jcc_Unconditional, s32 displacement=0 );
// ------------------------------------------------------------------------
// Conditional jumps to fixed targets.

View File

@ -73,6 +73,34 @@ xSmartJump::~xSmartJump()
m_baseptr = NULL; // just in case (sometimes helps in debugging too)
}
// ------------------------------------------------------------------------
// Emits a 32 bit jump, and returns a pointer to the 32 bit displacement.
// (displacements should be assigned relative to the end of the jump instruction,
// or in other words *(retval+1) )
__emitinline s32* xJcc32( JccComparisonType comparison, s32 displacement )
{
if( comparison == Jcc_Unconditional )
xWrite8( 0xe9 );
else
{
xWrite8( 0x0f );
xWrite8( 0x80 | comparison );
}
xWrite<s32>( displacement );
return ((s32*)xGetPtr()) - 1;
}
// ------------------------------------------------------------------------
// Emits a 32 bit jump, and returns a pointer to the 8 bit displacement.
// (displacements should be assigned relative to the end of the jump instruction,
// or in other words *(retval+1) )
__emitinline s8* xJcc8( JccComparisonType comparison, s8 displacement )
{
xWrite8( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) );
xWrite<s8>( displacement );
return (s8*)xGetPtr() - 1;
}
// ------------------------------------------------------------------------
// Writes a jump at the current x86Ptr, which targets a pre-established target address.
@ -84,7 +112,7 @@ xSmartJump::~xSmartJump()
__emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward )
{
// Calculate the potential j8 displacement first, assuming an instruction length of 2:
sptr displacement8 = (sptr)target - ((sptr)xGetPtr() + 2);
sptr displacement8 = (sptr)target - (sptr)(xGetPtr() + 2);
const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0;
displacement8 -= slideVal;
@ -94,22 +122,12 @@ __emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const
if( slideForward ) jASSUME( displacement8 >= 0 );
if( is_s8( displacement8 ) )
{
xWrite8( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) );
xWrite<s8>( displacement8 );
}
xJcc8( comparison, displacement8 );
else
{
// Perform a 32 bit jump instead. :(
if( comparison == Jcc_Unconditional )
xWrite8( 0xe9 );
else
{
xWrite8( 0x0f );
xWrite8( 0x80 | comparison );
}
xWrite<s32>( (sptr)target - ((sptr)xGetPtr() + 4) );
s32* bah = xJcc32( comparison );
*bah = (s32)target - (s32)xGetPtr();
}
}