Rewrite immediate jumps from the block manager instead of having dispatchers to do this at execution time.

Should be a tad faster.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@782 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
sudonim1 2009-03-14 16:30:35 +00:00
parent d793d84860
commit c483f17331
7 changed files with 102 additions and 481 deletions

View File

@ -54,15 +54,11 @@ void svudispfn();
// aR3000A.S
void iopJITCompile();
void iopJITCompileInBlock();
void iopDispatcher();
void iopDispatcherClear();
void iopDispatcherReg();
// aR5900-32.S
void JITCompile();
void JITCompileInBlock();
void Dispatcher();
void DispatcherClear();
void DispatcherReg();
}

View File

@ -19,7 +19,7 @@
#include "PrecompiledHeader.h"
#include "BaseblockEx.h"
BASEBLOCKEX* BaseBlocks::New(u32 startpc)
BASEBLOCKEX* BaseBlocks::New(u32 startpc, uptr fnptr)
{
if (blocks.size() == size)
return 0;
@ -28,6 +28,7 @@ BASEBLOCKEX* BaseBlocks::New(u32 startpc)
std::vector<BASEBLOCKEX>::iterator iter;
memset(&newblock, 0, sizeof newblock);
newblock.startpc = startpc;
newblock.fnptr = fnptr;
int imin = 0, imax = blocks.size(), imid;
@ -42,6 +43,11 @@ BASEBLOCKEX* BaseBlocks::New(u32 startpc)
assert(imin == blocks.size() || blocks[imin].startpc > startpc);
iter = blocks.insert(blocks.begin() + imin, newblock);
std::pair<linkiter_t, linkiter_t> range = links.equal_range(startpc);
for (linkiter_t i = range.first; i != range.second; ++i)
*(u32*)i->second = fnptr - (i->second + 4);
return &*iter;
}
@ -63,3 +69,19 @@ int BaseBlocks::LastIndex(u32 startpc) const
return imin;
}
BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) const
{
// TODO
return 0;
}
void BaseBlocks::Link(u32 pc, uptr jumpptr)
{
BASEBLOCKEX *targetblock = Get(pc);
if (targetblock && targetblock->startpc == pc)
*(u32*)jumpptr = targetblock->fnptr - (jumpptr + 4);
else
*(u32*)jumpptr = recompiler - (jumpptr + 4);
links.insert(std::pair<u32, uptr>(pc, jumpptr));
}

View File

@ -20,6 +20,8 @@
#include "PrecompiledHeader.h"
#include <vector>
#include <map>
#include <utility>
// used to keep block information
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
@ -38,9 +40,10 @@ struct BASEBLOCK
// extra block info (only valid for start of fn)
struct BASEBLOCKEX
{
u16 size; // size in dwords
u16 dummy;
u32 startpc;
uptr fnptr;
u16 size; // size in dwords
u16 x86size;
#ifdef PCSX2_DEVBUILD
u32 visited; // number of times called
@ -53,24 +56,30 @@ class BaseBlocks
{
private:
std::vector<BASEBLOCKEX> blocks;
// switch to a hash map later?
std::multimap<u32, uptr> links;
typedef std::multimap<u32, uptr>::iterator linkiter_t;
unsigned long size;
uptr recompiler;
public:
BaseBlocks(unsigned long max) :
size(max),
BaseBlocks(unsigned long size_, uptr recompiler_) :
size(size_),
recompiler(recompiler_),
blocks(0)
{
blocks.reserve(size);
}
BASEBLOCKEX* New(u32 startpc);
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
int LastIndex (u32 startpc) const;
BASEBLOCKEX* GetByX86(uptr ip) const;
inline int Index (u32 startpc) const
{
int idx = LastIndex(startpc);
if (idx == -1 || startpc < blocks[idx].startpc ||
startpc >= blocks[idx].startpc + blocks[idx].size * 4)
blocks[idx].size && (startpc >= blocks[idx].startpc + blocks[idx].size * 4))
return -1;
else
return idx;
@ -90,12 +99,20 @@ public:
inline void Remove(int idx)
{
u32 startpc = blocks[idx].startpc;
std::pair<linkiter_t, linkiter_t> range = links.equal_range(blocks[idx].startpc);
for (linkiter_t i = range.first; i != range.second; ++i)
*(u32*)i->second = recompiler - (i->second + 4);
// TODO: remove links from this block?
blocks.erase(blocks.begin() + idx);
}
void Link(u32 pc, uptr jumpptr);
inline void Reset()
{
blocks.clear();
links.clear();
}
};

View File

@ -18,11 +18,6 @@
.extern RECLUT
.extern iopRecRecompile
#define BLOCKTYPE_STARTPC 4 // startpc offset
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
#define BASEBLOCK_SIZE 2 // in dwords
//////////////////////////////////////////////////////////////////////////
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
@ -42,87 +37,6 @@ iopJITCompile:
iopJITCompileInBlock:
jmp iopJITCompile
//////////////////////////////////////////////////////////////////////////
// Recompiles the next block, and links the old block directly to it.
// This is a on-shot execution for any block which uses it. Once the block
// has been statically linked to the new block, this function will be bypassed
//
// edx - jump address to modify
.globl iopDispatcher
iopDispatcher:
# calc PC_GETBLOCK
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
mov eax, dword ptr [REGINFO + PCOFFSET]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [RECLUT+eax*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset iopJITCompile
je Dispatch_notcompiled
cmp eax, offset iopJITCompileInBlock
je Dispatch_notcompiled
lea ebx, [eax-4]
sub ebx, edx
mov dword ptr [edx], ebx
jmp eax
.align 16
Dispatch_notcompiled:
mov esi, edx
lea edi, [ecx+ebx]
push ebx
call iopRecRecompile
add esp, 4
mov eax, dword ptr [edi]
lea ebx, [eax-4]
sub ebx, esi
mov dword ptr [esi], ebx
jmp eax
//////////////////////////////////////////////////////////////////////////
// edx - baseblock->startpc
// stack - x86Ptr
.globl iopDispatcherClear
iopDispatcherClear:
mov [REGINFO + PCOFFSET], edx
# calc PC_GETBLOCK
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
mov ebx, edx
shr edx, 16
mov ecx, dword ptr [RECLUT+edx*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset iopJITCompile
je Clear_notcompiled
cmp eax, offset iopJITCompileInBlock
je Clear_notcompiled
add esp, 4
jmp eax
.align 16
Clear_notcompiled:
lea edi, [ecx+ebx]
push ebx
call iopRecRecompile
add esp, 4
mov eax, dword ptr [edi]
pop ecx
mov byte ptr [ecx], 0xe9 // jmp32
lea ebx, [eax-5]
sub ebx, ecx
mov dword ptr [ecx+1], ebx
jmp eax
//////////////////////////////////////////////////////////////////////////
// called when jumping to variable pc address.

View File

@ -60,7 +60,8 @@ static u8 *recMem = NULL; // the recompiled blocks will be here
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static BaseBlocks recBlocks(PSX_NUMBLOCKS);
void iopJITCompile();
static BaseBlocks recBlocks(PSX_NUMBLOCKS, (uptr)iopJITCompile);
static u8 *recPtr = NULL;
u32 psxpc; // recompiler psxpc
int psxbranch; // set for branch
@ -621,77 +622,6 @@ static __declspec(naked) void iopJITCompileInBlock()
}
}
// jumped to when an immediate branch (EE side) hasn't been statically linked yet.
// Block is compiled if needed, and the link is made.
// EDX contains the jump addr to modify
static __declspec(naked) void iopDispatcher()
{
__asm {
mov eax, dword ptr [psxRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [psxRecLUT+eax*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset iopJITCompile
je notcompiled
cmp eax, offset iopJITCompileInBlock
je notcompiled
lea ebx, [eax-4]
sub ebx, edx
mov dword ptr [edx], ebx
jmp eax
align 16
notcompiled:
mov esi, edx
lea edi, [ecx+ebx]
push ebx
call iopRecRecompile
add esp, 4
mov eax, dword ptr [edi]
lea ebx, [eax-4]
sub ebx, esi
mov dword ptr [esi], ebx
jmp eax
}
}
// edx - baseblock start pc
// stack - x86Ptr[0]
static __declspec(naked) void iopDispatcherClear()
{
__asm {
mov [psxRegs.pc], edx
mov ebx, edx
shr edx, 16
mov ecx, dword ptr [psxRecLUT+edx*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset iopJITCompile
je notcompiled
cmp eax, offset iopJITCompileInBlock
je notcompiled
add esp, 4
jmp eax
align 16
notcompiled:
lea edi, [ecx+ebx]
push ebx
call iopRecRecompile
add esp, 4
mov eax, dword ptr [edi]
pop ecx
mov byte ptr [ecx], 0xe9 // jmp32
lea ebx, [eax-5]
sub ebx, ecx
mov dword ptr [ecx+1], ebx
jmp eax
}
}
// called when jumping to variable psxpc address
static __declspec(naked) void iopDispatcherReg()
{
@ -793,28 +723,6 @@ static __forceinline u32 psxRecClearMem(u32 pc)
if (pexblock->startpc >= upperextent)
break;
pblock = PSX_GETBLOCK(pexblock->startpc);
x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr();
jASSUME((u8*)iopJITCompile != x86Ptr[_EmitterId_]);
// jASSUME((u8*)iopJITCompileInside != x86Ptr[_EmitterId_]);
// This is breaking things currently, rather than figure it out
// I'm just using DispatcherReg, it's fast enough now.
// Actually, if we want to do this at all maybe keeping a hash
// table of const jumps and modifying the jumps straight from
// here is the way to go.
#if 1
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, pexblock->startpc);
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32
JMP32((uptr)iopDispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5));
#else
MOV32ItoM((uptr)&psxRegs.pc, pexblock->startpc);
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr[_EmitterId_] + 5));
#endif
lowerextent = min(lowerextent, pexblock->startpc);
upperextent = max(upperextent, pexblock->startpc + pexblock->size * 4);
recBlocks.Remove(blockidx);
@ -878,9 +786,8 @@ void psxSetBranchImm( u32 imm )
_psxFlushCall(FLUSH_EVERYTHING);
iPsxBranchTest(imm, imm <= psxpc);
MOV32ItoR(EDX, 0);
ptr = (u32*)(x86Ptr[0]-4);
*ptr = (uptr)JMP32((uptr)iopDispatcher - ( (uptr)x86Ptr[0] + 5 ));
ptr = JMP32(0);
recBlocks.Link(imm, (uptr)ptr);
}
//fixme : this is all a huge hack, we base the counter advancements on the average an opcode should take (wtf?)
@ -989,24 +896,6 @@ void psxRecompileNextInstruction(int delayslot)
BASEBLOCK* pblock = PSX_GETBLOCK(psxpc);
// need *ppblock != s_pCurBlock because of branches
if (HWADDR(psxpc) != s_pCurBlockEx->startpc
&& pblock->GetFnptr() != (uptr)iopJITCompile
&& pblock->GetFnptr() != (uptr)iopJITCompileInBlock )
{
if(!delayslot)
{
// code already in place, so jump to it and exit recomp
assert( recBlocks.Get(HWADDR(psxpc))->startpc == HWADDR(psxpc) );
_psxFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
JMP32((uptr)pblock->GetFnptr() - ((uptr)x86Ptr[0] + 5));
psxbranch = 3;
return;
}
}
#ifdef _DEBUG
MOV32ItoR(EAX, psxpc);
#endif
@ -1101,7 +990,6 @@ void iopRecRecompile(u32 startpc)
u32 i;
u32 branchTo;
u32 willbranch3 = 0;
u32* ptr;
#ifdef _DEBUG
if( psxdump & 4 )
@ -1114,6 +1002,10 @@ void iopRecRecompile(u32 startpc)
if (((uptr)recPtr - (uptr)recMem) >= (RECMEM_SIZE - 0x10000))
recResetIOP();
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr[_EmitterId_];
s_pCurBlock = PSX_GETBLOCK(startpc);
assert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile
@ -1121,19 +1013,16 @@ void iopRecRecompile(u32 startpc)
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
if(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) {
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
if( s_pCurBlockEx == NULL ) {
DevCon::WriteLn("IOP Recompiler data reset");
recResetIOP();
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
x86SetPtr( recPtr );
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
}
}
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr[0];
psxbranch = 0;
s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] );
@ -1274,7 +1163,6 @@ StartRecomp:
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
}
else {
assert( psxbranch != 3 );
if( psxbranch ) assert( !willbranch3 );
else
{
@ -1282,42 +1170,27 @@ StartRecomp:
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
}
if( willbranch3 ) {
BASEBLOCK* pblock = PSX_GETBLOCK(s_nEndBlock);
if (willbranch3 || !psxbranch) {
assert( psxpc == s_nEndBlock );
_psxFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
JMP32((uptr)pblock->GetFnptr() - ((uptr)x86Ptr[0] + 5));
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
u32 *ptr = JMP32(0);
recBlocks.Link(s_nEndBlock, (uptr)ptr);
psxbranch = 3;
}
else if( !psxbranch ) {
// didn't branch, but had to stop
MOV32ItoM( (uptr)&psxRegs.pc, psxpc );
_psxFlushCall(FLUSH_EVERYTHING);
ptr = JMP32(0);
//JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
}
}
assert( x86Ptr[0] < recMem+RECMEM_SIZE );
assert(x86Ptr[_EmitterId_] - recPtr < 0x10000);
s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr;
recPtr = x86Ptr[0];
assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg );
if( !psxbranch ) {
assert( ptr != NULL );
s_pCurBlock = PSX_GETBLOCK(psxpc);
if (s_pCurBlock->GetFnptr() == (uptr)iopJITCompile
|| s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock){
iopRecRecompile(psxpc);
}
*ptr = s_pCurBlock->GetFnptr() - ((u32)ptr + 4);
}
s_pCurBlock = NULL;
s_pCurBlockEx = NULL;
}
R3000Acpu psxRec = {

View File

@ -18,11 +18,6 @@
.extern RECLUT
.extern recRecompile
#define BLOCKTYPE_STARTPC 4 // startpc offset
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
#define BASEBLOCK_SIZE 2 // in dwords
//////////////////////////////////////////////////////////////////////////
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
@ -41,87 +36,6 @@ JITCompile:
.global JITCompileInBlock
JITCompileInBlock:
jmp JITCompile
//////////////////////////////////////////////////////////////////////////
// Recompiles the next block, and links the old block directly to it.
// This is a on-shot execution for any block which uses it. Once the block
// has been statically linked to the new block, this function will be bypassed
//
// edx - jump address to modify
.globl Dispatcher
Dispatcher:
# calc PC_GETBLOCK
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
mov eax, dword ptr [REGINFO + PCOFFSET]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [RECLUT+eax*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset JITCompile
je Dispatch_notcompiled
cmp eax, offset JITCompileInBlock
je Dispatch_notcompiled
lea ebx, [eax-4]
sub ebx, edx
mov dword ptr [edx], ebx
jmp eax
.align 16
Dispatch_notcompiled:
mov esi, edx
lea edi, [ecx+ebx]
push ebx
call recRecompile
add esp, 4
mov eax, dword ptr [edi]
lea ebx, [eax-4]
sub ebx, esi
mov dword ptr [esi], ebx
jmp eax
//////////////////////////////////////////////////////////////////////////
// edx - baseblock->startpc
// stack - x86Ptr
.globl DispatcherClear
DispatcherClear:
mov [REGINFO + PCOFFSET], edx
# calc PC_GETBLOCK
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
mov ebx, edx
shr edx, 16
mov ecx, dword ptr [RECLUT+edx*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset JITCompile
je Clear_notcompiled
cmp eax, offset JITCompileInBlock
je Clear_notcompiled
add esp, 4
jmp eax
.align 16
Clear_notcompiled:
lea edi, [ecx+ebx]
push ebx
call recRecompile
add esp, 4
mov eax, dword ptr [edi]
pop ecx
mov byte ptr [ecx], 0xe9 // jmp32
lea ebx, [eax-5]
sub ebx, ecx
mov dword ptr [ecx+1], ebx
jmp eax
//////////////////////////////////////////////////////////////////////////
// called when jumping to variable pc address.

View File

@ -79,7 +79,8 @@ static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static u32 *recRAMCopy = NULL;
static BaseBlocks recBlocks(EE_NUMBLOCKS);
void JITCompile();
static BaseBlocks recBlocks(EE_NUMBLOCKS, (uptr)JITCompile);
static u8* recPtr = NULL, *recStackPtr = NULL;
static EEINST* s_pInstCache = NULL;
static u32 s_nInstCacheSize = 0;
@ -111,7 +112,7 @@ static u32 dumplog = 0;
//static const char *txt2 = "M32 = %x\n";
#endif
static void iBranchTest(u32 newpc, bool noDispatch=false);
static void iBranchTest(u32 newpc = 0xffffffff, bool noDispatch=false);
static void ClearRecLUT(BASEBLOCK* base, int count);
////////////////////////////////////////////////////
@ -446,7 +447,6 @@ u32* recAllocStackMem(int size, int align)
return (u32*)(recStackPtr-size);
}
static const int REC_CACHEMEM = 0x01000000;
static void __fastcall dyna_block_discard(u32 start,u32 sz);
@ -600,6 +600,7 @@ static void recShutdown( void )
safe_aligned_free( m_recBlockAlloc );
recRAM = recROM = recROM1 = NULL;
recStack = NULL;
recRAMCopy = NULL;
safe_free( s_pInstCache );
s_nInstCacheSize = 0;
@ -662,79 +663,6 @@ static __declspec(naked) void JITCompileInBlock()
}
}
// jumped to when an immediate branch (EE side) hasn't been statically linked yet.
// Block is compiled if needed, and the link is made.
// EDX contains the jump addr to modify
static __naked void Dispatcher()
{
__asm {
mov eax, dword ptr [cpuRegs.pc]
mov ebx, eax
shr eax, 16
mov ecx, dword ptr [recLUT+eax*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset JITCompile
je notcompiled
cmp eax, offset JITCompileInBlock
je notcompiled
lea ebx, [eax-4]
sub ebx, edx
mov dword ptr [edx], ebx
jmp eax
align 16
notcompiled:
mov esi, edx
lea edi, [ecx+ebx]
push ebx
call recRecompile
add esp, 4
mov eax, dword ptr [edi]
lea ebx, [eax-4]
sub ebx, esi
mov dword ptr [esi], ebx
jmp eax
}
}
// edx - block start pc
// stack - x86Ptr[0]
static __naked void DispatcherClear()
{
__asm {
mov [cpuRegs.pc], edx
mov ebx, edx
shr edx, 16
mov ecx, dword ptr [recLUT+edx*4]
mov eax, dword ptr [ecx+ebx]
cmp eax, offset JITCompile
je notcompiled
cmp eax, offset JITCompileInBlock
je notcompiled
add esp, 4
jmp eax
align 16
notcompiled:
lea edi, [ecx+ebx]
push ebx
call recRecompile
add esp, 4
mov eax, dword ptr [edi]
pop ecx
mov byte ptr [ecx], 0xe9 // jmp32
lea ebx, [eax-5]
sub ebx, ecx
mov dword ptr [ecx+1], ebx
jmp eax
}
}
// called when jumping to variable pc address
static void __naked DispatcherReg()
{
@ -943,21 +871,6 @@ void recClear(u32 addr, u32 size)
break;
}
x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr();
jASSUME((u8*)JITCompile != x86Ptr[_EmitterId_]);
jASSUME((u8*)JITCompileInBlock != x86Ptr[_EmitterId_]);
// Actually, if we want to do this at all maybe keeping a hash
// table of const jumps and modifying the jumps straight from
// here is the way to go.
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
MOV32ItoR(EDX, blockstart);
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32
JMP32((uptr)DispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5));
lowerextent = min(lowerextent, blockstart);
upperextent = max(upperextent, blockend);
// This might end up inside a block that doesn't contain the clearing range,
@ -1055,7 +968,7 @@ void SetBranchReg( u32 reg )
iFlushCall(FLUSH_EVERYTHING);
iBranchTest(0xffffffff);
iBranchTest();
}
void SetBranchImm( u32 imm )
@ -1065,9 +978,7 @@ void SetBranchImm( u32 imm )
assert( imm );
// end the current block
MOV32ItoM( (uptr)&cpuRegs.pc, imm );
iFlushCall(FLUSH_EVERYTHING);
iBranchTest(imm);
}
@ -1197,6 +1108,19 @@ u32 eeScaleBlockCycles()
return temp >> (3+2);
}
static void iBranch(u32 newpc, int type)
{
u32* ptr;
MOV32ItoM((uptr)&cpuRegs.pc, newpc);
if (type == 0)
ptr = JMP32(0);
else if (type == 1)
ptr = JS32(0);
recBlocks.Link(HWADDR(newpc), (uptr)ptr);
}
// Generates dynarec code for Event tests followed by a block dispatch (branch).
// Parameters:
// newpc - address to jump to at the end of the block. If newpc == 0xffffffff then
@ -1212,41 +1136,23 @@ static void iBranchTest(u32 newpc, bool noDispatch)
#ifdef _DEBUG
//CALLFunc((uptr)testfpu);
#endif
u32* ptr;
if( bExecBIOS ) CheckForBIOSEnd();
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle);
if( !noDispatch && newpc != 0xffffffff )
{
// Optimization note: Instructions order to pair EDX with EAX's load above.
// Load EDX with the address of the JS32 jump below.
// We do this because the the Dispatcher will use this info to modify
// the JS instruction later on with the address of the block it's jumping
// to; creating a static link of blocks that doesn't require the overhead
// of a dispatcher.
MOV32ItoR(EDX, 0);
ptr = (u32*)(x86Ptr[0]-4);
}
// Check the Event scheduler if our "cycle target" has been reached.
// Equiv code to:
// cpuRegs.cycle += blockcycles;
// if( cpuRegs.cycle > g_nextBranchCycle ) { DoEvents(); }
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle);
ADD32ItoR(EAX, eeScaleBlockCycles());
MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles
SUB32MtoR(EAX, (uptr)&g_nextBranchCycle);
if( newpc != 0xffffffff )
{
// This is the jump instruction which gets modified by Dispatcher.
*ptr = (u32)JS32((u32)Dispatcher - ( (u32)x86Ptr[0] + 6 ));
}
else if( !noDispatch )
{
// This instruction is a dynamic link, so it's never modified.
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 ));
if (!noDispatch) {
if (newpc == 0xffffffff)
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 ));
else
iBranch(newpc, 1);
}
RET();
@ -1449,7 +1355,6 @@ void recRecompile( const u32 startpc )
u32 i = 0;
u32 branchTo;
u32 willbranch3 = 0;
u32* ptr;
u32 usecop2;
#ifdef _DEBUG
@ -1470,6 +1375,10 @@ void recRecompile( const u32 startpc )
recResetEE();
}
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr[_EmitterId_];
s_pCurBlock = PC_GETBLOCK(startpc);
assert(s_pCurBlock->GetFnptr() == (uptr)JITCompile
@ -1478,20 +1387,17 @@ void recRecompile( const u32 startpc )
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
assert(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc));
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
if( s_pCurBlockEx == NULL ) {
//SysPrintf("ee reset (blocks)\n");
recResetEE();
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
x86SetPtr( recPtr );
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
}
assert(s_pCurBlockEx);
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr[0];
branch = 0;
// reset recomp state variables
@ -1852,9 +1758,7 @@ StartRecomp:
break;
if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc),
oldBlock->size * 4)) {
u8* oldX86 = x86Ptr[_EmitterId_];
recClear(startpc, (pc - startpc) / 4);
x86Ptr[_EmitterId_] = oldX86;
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
assert(s_pCurBlockEx->startpc == HWADDR(startpc));
break;
@ -1893,21 +1797,9 @@ StartRecomp:
else
ADD32ItoM((int)&cpuRegs.cycle, eeScaleBlockCycles() );
if( willbranch3 ) {
BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock);
assert( pc == s_nEndBlock );
if( willbranch3 || !branch) {
iFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&cpuRegs.pc, pc);
JMP32((uptr)pblock->GetFnptr() - ((uptr)x86Ptr[0] + 5));
branch = 3;
}
else if( !branch ) {
// didn't branch, but had to stop
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
ptr = JMP32(0);
iBranch(pc, 0);
}
}
@ -1915,20 +1807,13 @@ StartRecomp:
assert( recStackPtr < recStack+RECSTACK_SIZE );
assert( x86FpuState == 0 );
assert(x86Ptr[_EmitterId_] - recPtr < 0x10000);
s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr;
recPtr = x86Ptr[0];
assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );
if( !branch ) {
assert( ptr != NULL );
BASEBLOCK *pblock = PC_GETBLOCK(pc);
if (pblock->GetFnptr() == (uptr)JITCompile
|| pblock->GetFnptr() == (uptr)JITCompileInBlock)
recRecompile(pc);
*ptr = pblock->GetFnptr() - ((u32)ptr + 4);
}
s_pCurBlock = NULL;
s_pCurBlockEx = NULL;
}