mirror of https://github.com/PCSX2/pcsx2.git
Rewrite immediate jumps from the block manager instead of having dispatchers to do this at execution time.
Should be a tad faster. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@782 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
d793d84860
commit
c483f17331
|
@ -54,15 +54,11 @@ void svudispfn();
|
|||
// aR3000A.S
|
||||
void iopJITCompile();
|
||||
void iopJITCompileInBlock();
|
||||
void iopDispatcher();
|
||||
void iopDispatcherClear();
|
||||
void iopDispatcherReg();
|
||||
|
||||
// aR5900-32.S
|
||||
void JITCompile();
|
||||
void JITCompileInBlock();
|
||||
void Dispatcher();
|
||||
void DispatcherClear();
|
||||
void DispatcherReg();
|
||||
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "PrecompiledHeader.h"
|
||||
#include "BaseblockEx.h"
|
||||
|
||||
BASEBLOCKEX* BaseBlocks::New(u32 startpc)
|
||||
BASEBLOCKEX* BaseBlocks::New(u32 startpc, uptr fnptr)
|
||||
{
|
||||
if (blocks.size() == size)
|
||||
return 0;
|
||||
|
@ -28,6 +28,7 @@ BASEBLOCKEX* BaseBlocks::New(u32 startpc)
|
|||
std::vector<BASEBLOCKEX>::iterator iter;
|
||||
memset(&newblock, 0, sizeof newblock);
|
||||
newblock.startpc = startpc;
|
||||
newblock.fnptr = fnptr;
|
||||
|
||||
int imin = 0, imax = blocks.size(), imid;
|
||||
|
||||
|
@ -42,6 +43,11 @@ BASEBLOCKEX* BaseBlocks::New(u32 startpc)
|
|||
|
||||
assert(imin == blocks.size() || blocks[imin].startpc > startpc);
|
||||
iter = blocks.insert(blocks.begin() + imin, newblock);
|
||||
|
||||
std::pair<linkiter_t, linkiter_t> range = links.equal_range(startpc);
|
||||
for (linkiter_t i = range.first; i != range.second; ++i)
|
||||
*(u32*)i->second = fnptr - (i->second + 4);
|
||||
|
||||
return &*iter;
|
||||
}
|
||||
|
||||
|
@ -63,3 +69,19 @@ int BaseBlocks::LastIndex(u32 startpc) const
|
|||
|
||||
return imin;
|
||||
}
|
||||
|
||||
BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) const
|
||||
{
|
||||
// TODO
|
||||
return 0;
|
||||
}
|
||||
|
||||
void BaseBlocks::Link(u32 pc, uptr jumpptr)
|
||||
{
|
||||
BASEBLOCKEX *targetblock = Get(pc);
|
||||
if (targetblock && targetblock->startpc == pc)
|
||||
*(u32*)jumpptr = targetblock->fnptr - (jumpptr + 4);
|
||||
else
|
||||
*(u32*)jumpptr = recompiler - (jumpptr + 4);
|
||||
links.insert(std::pair<u32, uptr>(pc, jumpptr));
|
||||
}
|
|
@ -20,6 +20,8 @@
|
|||
|
||||
#include "PrecompiledHeader.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
// used to keep block information
|
||||
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
|
||||
|
@ -38,9 +40,10 @@ struct BASEBLOCK
|
|||
// extra block info (only valid for start of fn)
|
||||
struct BASEBLOCKEX
|
||||
{
|
||||
u16 size; // size in dwords
|
||||
u16 dummy;
|
||||
u32 startpc;
|
||||
uptr fnptr;
|
||||
u16 size; // size in dwords
|
||||
u16 x86size;
|
||||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
u32 visited; // number of times called
|
||||
|
@ -53,24 +56,30 @@ class BaseBlocks
|
|||
{
|
||||
private:
|
||||
std::vector<BASEBLOCKEX> blocks;
|
||||
// switch to a hash map later?
|
||||
std::multimap<u32, uptr> links;
|
||||
typedef std::multimap<u32, uptr>::iterator linkiter_t;
|
||||
unsigned long size;
|
||||
uptr recompiler;
|
||||
|
||||
public:
|
||||
BaseBlocks(unsigned long max) :
|
||||
size(max),
|
||||
BaseBlocks(unsigned long size_, uptr recompiler_) :
|
||||
size(size_),
|
||||
recompiler(recompiler_),
|
||||
blocks(0)
|
||||
{
|
||||
blocks.reserve(size);
|
||||
}
|
||||
|
||||
BASEBLOCKEX* New(u32 startpc);
|
||||
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
|
||||
int LastIndex (u32 startpc) const;
|
||||
BASEBLOCKEX* GetByX86(uptr ip) const;
|
||||
|
||||
inline int Index (u32 startpc) const
|
||||
{
|
||||
int idx = LastIndex(startpc);
|
||||
if (idx == -1 || startpc < blocks[idx].startpc ||
|
||||
startpc >= blocks[idx].startpc + blocks[idx].size * 4)
|
||||
blocks[idx].size && (startpc >= blocks[idx].startpc + blocks[idx].size * 4))
|
||||
return -1;
|
||||
else
|
||||
return idx;
|
||||
|
@ -90,12 +99,20 @@ public:
|
|||
|
||||
inline void Remove(int idx)
|
||||
{
|
||||
u32 startpc = blocks[idx].startpc;
|
||||
std::pair<linkiter_t, linkiter_t> range = links.equal_range(blocks[idx].startpc);
|
||||
for (linkiter_t i = range.first; i != range.second; ++i)
|
||||
*(u32*)i->second = recompiler - (i->second + 4);
|
||||
// TODO: remove links from this block?
|
||||
blocks.erase(blocks.begin() + idx);
|
||||
}
|
||||
|
||||
void Link(u32 pc, uptr jumpptr);
|
||||
|
||||
inline void Reset()
|
||||
{
|
||||
blocks.clear();
|
||||
links.clear();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -18,11 +18,6 @@
|
|||
.extern RECLUT
|
||||
.extern iopRecRecompile
|
||||
|
||||
#define BLOCKTYPE_STARTPC 4 // startpc offset
|
||||
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
|
||||
|
||||
#define BASEBLOCK_SIZE 2 // in dwords
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// The address for all cleared blocks. It recompiles the current pc and then
|
||||
// dispatches to the recompiled block address.
|
||||
|
@ -42,87 +37,6 @@ iopJITCompile:
|
|||
iopJITCompileInBlock:
|
||||
|
||||
jmp iopJITCompile
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Recompiles the next block, and links the old block directly to it.
|
||||
// This is a on-shot execution for any block which uses it. Once the block
|
||||
// has been statically linked to the new block, this function will be bypassed
|
||||
//
|
||||
// edx - jump address to modify
|
||||
.globl iopDispatcher
|
||||
iopDispatcher:
|
||||
|
||||
# calc PC_GETBLOCK
|
||||
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
|
||||
|
||||
mov eax, dword ptr [REGINFO + PCOFFSET]
|
||||
mov ebx, eax
|
||||
shr eax, 16
|
||||
mov ecx, dword ptr [RECLUT+eax*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
|
||||
cmp eax, offset iopJITCompile
|
||||
je Dispatch_notcompiled
|
||||
cmp eax, offset iopJITCompileInBlock
|
||||
je Dispatch_notcompiled
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, edx
|
||||
mov dword ptr [edx], ebx
|
||||
jmp eax
|
||||
|
||||
.align 16
|
||||
Dispatch_notcompiled:
|
||||
mov esi, edx
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call iopRecRecompile
|
||||
add esp, 4
|
||||
|
||||
mov eax, dword ptr [edi]
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, esi
|
||||
mov dword ptr [esi], ebx
|
||||
jmp eax
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// edx - baseblock->startpc
|
||||
// stack - x86Ptr
|
||||
.globl iopDispatcherClear
|
||||
iopDispatcherClear:
|
||||
mov [REGINFO + PCOFFSET], edx
|
||||
|
||||
# calc PC_GETBLOCK
|
||||
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
|
||||
|
||||
mov ebx, edx
|
||||
shr edx, 16
|
||||
mov ecx, dword ptr [RECLUT+edx*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
|
||||
cmp eax, offset iopJITCompile
|
||||
je Clear_notcompiled
|
||||
cmp eax, offset iopJITCompileInBlock
|
||||
je Clear_notcompiled
|
||||
add esp, 4
|
||||
jmp eax
|
||||
|
||||
.align 16
|
||||
Clear_notcompiled:
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call iopRecRecompile
|
||||
add esp, 4
|
||||
mov eax, dword ptr [edi]
|
||||
|
||||
pop ecx
|
||||
mov byte ptr [ecx], 0xe9 // jmp32
|
||||
lea ebx, [eax-5]
|
||||
sub ebx, ecx
|
||||
mov dword ptr [ecx+1], ebx
|
||||
|
||||
jmp eax
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// called when jumping to variable pc address.
|
||||
|
|
|
@ -60,7 +60,8 @@ static u8 *recMem = NULL; // the recompiled blocks will be here
|
|||
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
|
||||
static BASEBLOCK *recROM = NULL; // and here
|
||||
static BASEBLOCK *recROM1 = NULL; // also here
|
||||
static BaseBlocks recBlocks(PSX_NUMBLOCKS);
|
||||
void iopJITCompile();
|
||||
static BaseBlocks recBlocks(PSX_NUMBLOCKS, (uptr)iopJITCompile);
|
||||
static u8 *recPtr = NULL;
|
||||
u32 psxpc; // recompiler psxpc
|
||||
int psxbranch; // set for branch
|
||||
|
@ -621,77 +622,6 @@ static __declspec(naked) void iopJITCompileInBlock()
|
|||
}
|
||||
}
|
||||
|
||||
// jumped to when an immediate branch (EE side) hasn't been statically linked yet.
|
||||
// Block is compiled if needed, and the link is made.
|
||||
// EDX contains the jump addr to modify
|
||||
static __declspec(naked) void iopDispatcher()
|
||||
{
|
||||
__asm {
|
||||
mov eax, dword ptr [psxRegs.pc]
|
||||
mov ebx, eax
|
||||
shr eax, 16
|
||||
mov ecx, dword ptr [psxRecLUT+eax*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
cmp eax, offset iopJITCompile
|
||||
je notcompiled
|
||||
cmp eax, offset iopJITCompileInBlock
|
||||
je notcompiled
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, edx
|
||||
mov dword ptr [edx], ebx
|
||||
jmp eax
|
||||
|
||||
align 16
|
||||
notcompiled:
|
||||
mov esi, edx
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call iopRecRecompile
|
||||
add esp, 4
|
||||
|
||||
mov eax, dword ptr [edi]
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, esi
|
||||
mov dword ptr [esi], ebx
|
||||
jmp eax
|
||||
}
|
||||
}
|
||||
|
||||
// edx - baseblock start pc
|
||||
// stack - x86Ptr[0]
|
||||
static __declspec(naked) void iopDispatcherClear()
|
||||
{
|
||||
__asm {
|
||||
mov [psxRegs.pc], edx
|
||||
mov ebx, edx
|
||||
shr edx, 16
|
||||
mov ecx, dword ptr [psxRecLUT+edx*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
cmp eax, offset iopJITCompile
|
||||
je notcompiled
|
||||
cmp eax, offset iopJITCompileInBlock
|
||||
je notcompiled
|
||||
add esp, 4
|
||||
jmp eax
|
||||
|
||||
align 16
|
||||
notcompiled:
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call iopRecRecompile
|
||||
add esp, 4
|
||||
mov eax, dword ptr [edi]
|
||||
|
||||
pop ecx
|
||||
mov byte ptr [ecx], 0xe9 // jmp32
|
||||
lea ebx, [eax-5]
|
||||
sub ebx, ecx
|
||||
mov dword ptr [ecx+1], ebx
|
||||
|
||||
jmp eax
|
||||
}
|
||||
}
|
||||
|
||||
// called when jumping to variable psxpc address
|
||||
static __declspec(naked) void iopDispatcherReg()
|
||||
{
|
||||
|
@ -793,28 +723,6 @@ static __forceinline u32 psxRecClearMem(u32 pc)
|
|||
if (pexblock->startpc >= upperextent)
|
||||
break;
|
||||
|
||||
pblock = PSX_GETBLOCK(pexblock->startpc);
|
||||
x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr();
|
||||
|
||||
jASSUME((u8*)iopJITCompile != x86Ptr[_EmitterId_]);
|
||||
// jASSUME((u8*)iopJITCompileInside != x86Ptr[_EmitterId_]);
|
||||
|
||||
// This is breaking things currently, rather than figure it out
|
||||
// I'm just using DispatcherReg, it's fast enough now.
|
||||
// Actually, if we want to do this at all maybe keeping a hash
|
||||
// table of const jumps and modifying the jumps straight from
|
||||
// here is the way to go.
|
||||
#if 1
|
||||
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
|
||||
MOV32ItoR(EDX, pexblock->startpc);
|
||||
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
|
||||
PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32
|
||||
JMP32((uptr)iopDispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5));
|
||||
#else
|
||||
MOV32ItoM((uptr)&psxRegs.pc, pexblock->startpc);
|
||||
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr[_EmitterId_] + 5));
|
||||
#endif
|
||||
|
||||
lowerextent = min(lowerextent, pexblock->startpc);
|
||||
upperextent = max(upperextent, pexblock->startpc + pexblock->size * 4);
|
||||
recBlocks.Remove(blockidx);
|
||||
|
@ -878,9 +786,8 @@ void psxSetBranchImm( u32 imm )
|
|||
_psxFlushCall(FLUSH_EVERYTHING);
|
||||
iPsxBranchTest(imm, imm <= psxpc);
|
||||
|
||||
MOV32ItoR(EDX, 0);
|
||||
ptr = (u32*)(x86Ptr[0]-4);
|
||||
*ptr = (uptr)JMP32((uptr)iopDispatcher - ( (uptr)x86Ptr[0] + 5 ));
|
||||
ptr = JMP32(0);
|
||||
recBlocks.Link(imm, (uptr)ptr);
|
||||
}
|
||||
|
||||
//fixme : this is all a huge hack, we base the counter advancements on the average an opcode should take (wtf?)
|
||||
|
@ -989,24 +896,6 @@ void psxRecompileNextInstruction(int delayslot)
|
|||
|
||||
BASEBLOCK* pblock = PSX_GETBLOCK(psxpc);
|
||||
|
||||
// need *ppblock != s_pCurBlock because of branches
|
||||
if (HWADDR(psxpc) != s_pCurBlockEx->startpc
|
||||
&& pblock->GetFnptr() != (uptr)iopJITCompile
|
||||
&& pblock->GetFnptr() != (uptr)iopJITCompileInBlock )
|
||||
{
|
||||
if(!delayslot)
|
||||
{
|
||||
// code already in place, so jump to it and exit recomp
|
||||
assert( recBlocks.Get(HWADDR(psxpc))->startpc == HWADDR(psxpc) );
|
||||
|
||||
_psxFlushCall(FLUSH_EVERYTHING);
|
||||
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
|
||||
JMP32((uptr)pblock->GetFnptr() - ((uptr)x86Ptr[0] + 5));
|
||||
psxbranch = 3;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
MOV32ItoR(EAX, psxpc);
|
||||
#endif
|
||||
|
@ -1101,7 +990,6 @@ void iopRecRecompile(u32 startpc)
|
|||
u32 i;
|
||||
u32 branchTo;
|
||||
u32 willbranch3 = 0;
|
||||
u32* ptr;
|
||||
|
||||
#ifdef _DEBUG
|
||||
if( psxdump & 4 )
|
||||
|
@ -1114,6 +1002,10 @@ void iopRecRecompile(u32 startpc)
|
|||
if (((uptr)recPtr - (uptr)recMem) >= (RECMEM_SIZE - 0x10000))
|
||||
recResetIOP();
|
||||
|
||||
x86SetPtr( recPtr );
|
||||
x86Align(16);
|
||||
recPtr = x86Ptr[_EmitterId_];
|
||||
|
||||
s_pCurBlock = PSX_GETBLOCK(startpc);
|
||||
|
||||
assert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile
|
||||
|
@ -1121,19 +1013,16 @@ void iopRecRecompile(u32 startpc)
|
|||
|
||||
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
|
||||
if(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc)) {
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
|
||||
|
||||
if( s_pCurBlockEx == NULL ) {
|
||||
DevCon::WriteLn("IOP Recompiler data reset");
|
||||
recResetIOP();
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
|
||||
x86SetPtr( recPtr );
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
|
||||
}
|
||||
}
|
||||
|
||||
x86SetPtr( recPtr );
|
||||
x86Align(16);
|
||||
recPtr = x86Ptr[0];
|
||||
|
||||
psxbranch = 0;
|
||||
|
||||
s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] );
|
||||
|
@ -1274,7 +1163,6 @@ StartRecomp:
|
|||
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
|
||||
}
|
||||
else {
|
||||
assert( psxbranch != 3 );
|
||||
if( psxbranch ) assert( !willbranch3 );
|
||||
else
|
||||
{
|
||||
|
@ -1282,42 +1170,27 @@ StartRecomp:
|
|||
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
|
||||
}
|
||||
|
||||
if( willbranch3 ) {
|
||||
BASEBLOCK* pblock = PSX_GETBLOCK(s_nEndBlock);
|
||||
if (willbranch3 || !psxbranch) {
|
||||
assert( psxpc == s_nEndBlock );
|
||||
_psxFlushCall(FLUSH_EVERYTHING);
|
||||
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
|
||||
JMP32((uptr)pblock->GetFnptr() - ((uptr)x86Ptr[0] + 5));
|
||||
MOV32ItoM((uptr)&psxRegs.pc, psxpc);
|
||||
u32 *ptr = JMP32(0);
|
||||
recBlocks.Link(s_nEndBlock, (uptr)ptr);
|
||||
psxbranch = 3;
|
||||
}
|
||||
else if( !psxbranch ) {
|
||||
// didn't branch, but had to stop
|
||||
MOV32ItoM( (uptr)&psxRegs.pc, psxpc );
|
||||
|
||||
_psxFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
ptr = JMP32(0);
|
||||
//JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
|
||||
}
|
||||
}
|
||||
|
||||
assert( x86Ptr[0] < recMem+RECMEM_SIZE );
|
||||
|
||||
assert(x86Ptr[_EmitterId_] - recPtr < 0x10000);
|
||||
s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr;
|
||||
|
||||
recPtr = x86Ptr[0];
|
||||
|
||||
assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg );
|
||||
|
||||
if( !psxbranch ) {
|
||||
assert( ptr != NULL );
|
||||
s_pCurBlock = PSX_GETBLOCK(psxpc);
|
||||
|
||||
if (s_pCurBlock->GetFnptr() == (uptr)iopJITCompile
|
||||
|| s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock){
|
||||
iopRecRecompile(psxpc);
|
||||
}
|
||||
|
||||
*ptr = s_pCurBlock->GetFnptr() - ((u32)ptr + 4);
|
||||
}
|
||||
s_pCurBlock = NULL;
|
||||
s_pCurBlockEx = NULL;
|
||||
}
|
||||
|
||||
R3000Acpu psxRec = {
|
||||
|
|
|
@ -18,11 +18,6 @@
|
|||
.extern RECLUT
|
||||
.extern recRecompile
|
||||
|
||||
#define BLOCKTYPE_STARTPC 4 // startpc offset
|
||||
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
|
||||
|
||||
#define BASEBLOCK_SIZE 2 // in dwords
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// The address for all cleared blocks. It recompiles the current pc and then
|
||||
// dispatches to the recompiled block address.
|
||||
|
@ -41,87 +36,6 @@ JITCompile:
|
|||
.global JITCompileInBlock
|
||||
JITCompileInBlock:
|
||||
jmp JITCompile
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Recompiles the next block, and links the old block directly to it.
|
||||
// This is a on-shot execution for any block which uses it. Once the block
|
||||
// has been statically linked to the new block, this function will be bypassed
|
||||
//
|
||||
// edx - jump address to modify
|
||||
.globl Dispatcher
|
||||
Dispatcher:
|
||||
|
||||
# calc PC_GETBLOCK
|
||||
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
|
||||
|
||||
mov eax, dword ptr [REGINFO + PCOFFSET]
|
||||
mov ebx, eax
|
||||
shr eax, 16
|
||||
mov ecx, dword ptr [RECLUT+eax*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
|
||||
cmp eax, offset JITCompile
|
||||
je Dispatch_notcompiled
|
||||
cmp eax, offset JITCompileInBlock
|
||||
je Dispatch_notcompiled
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, edx
|
||||
mov dword ptr [edx], ebx
|
||||
jmp eax
|
||||
|
||||
.align 16
|
||||
Dispatch_notcompiled:
|
||||
mov esi, edx
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call recRecompile
|
||||
add esp, 4
|
||||
|
||||
mov eax, dword ptr [edi]
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, esi
|
||||
mov dword ptr [esi], ebx
|
||||
jmp eax
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// edx - baseblock->startpc
|
||||
// stack - x86Ptr
|
||||
.globl DispatcherClear
|
||||
DispatcherClear:
|
||||
mov [REGINFO + PCOFFSET], edx
|
||||
|
||||
# calc PC_GETBLOCK
|
||||
# ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
|
||||
|
||||
mov ebx, edx
|
||||
shr edx, 16
|
||||
mov ecx, dword ptr [RECLUT+edx*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
|
||||
cmp eax, offset JITCompile
|
||||
je Clear_notcompiled
|
||||
cmp eax, offset JITCompileInBlock
|
||||
je Clear_notcompiled
|
||||
add esp, 4
|
||||
jmp eax
|
||||
|
||||
.align 16
|
||||
Clear_notcompiled:
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call recRecompile
|
||||
add esp, 4
|
||||
mov eax, dword ptr [edi]
|
||||
|
||||
pop ecx
|
||||
mov byte ptr [ecx], 0xe9 // jmp32
|
||||
lea ebx, [eax-5]
|
||||
sub ebx, ecx
|
||||
mov dword ptr [ecx+1], ebx
|
||||
|
||||
jmp eax
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// called when jumping to variable pc address.
|
||||
|
|
|
@ -79,7 +79,8 @@ static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
|
|||
static BASEBLOCK *recROM = NULL; // and here
|
||||
static BASEBLOCK *recROM1 = NULL; // also here
|
||||
static u32 *recRAMCopy = NULL;
|
||||
static BaseBlocks recBlocks(EE_NUMBLOCKS);
|
||||
void JITCompile();
|
||||
static BaseBlocks recBlocks(EE_NUMBLOCKS, (uptr)JITCompile);
|
||||
static u8* recPtr = NULL, *recStackPtr = NULL;
|
||||
static EEINST* s_pInstCache = NULL;
|
||||
static u32 s_nInstCacheSize = 0;
|
||||
|
@ -111,7 +112,7 @@ static u32 dumplog = 0;
|
|||
//static const char *txt2 = "M32 = %x\n";
|
||||
#endif
|
||||
|
||||
static void iBranchTest(u32 newpc, bool noDispatch=false);
|
||||
static void iBranchTest(u32 newpc = 0xffffffff, bool noDispatch=false);
|
||||
static void ClearRecLUT(BASEBLOCK* base, int count);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -446,7 +447,6 @@ u32* recAllocStackMem(int size, int align)
|
|||
return (u32*)(recStackPtr-size);
|
||||
}
|
||||
|
||||
|
||||
static const int REC_CACHEMEM = 0x01000000;
|
||||
static void __fastcall dyna_block_discard(u32 start,u32 sz);
|
||||
|
||||
|
@ -600,6 +600,7 @@ static void recShutdown( void )
|
|||
safe_aligned_free( m_recBlockAlloc );
|
||||
recRAM = recROM = recROM1 = NULL;
|
||||
recStack = NULL;
|
||||
recRAMCopy = NULL;
|
||||
|
||||
safe_free( s_pInstCache );
|
||||
s_nInstCacheSize = 0;
|
||||
|
@ -662,79 +663,6 @@ static __declspec(naked) void JITCompileInBlock()
|
|||
}
|
||||
}
|
||||
|
||||
// jumped to when an immediate branch (EE side) hasn't been statically linked yet.
|
||||
// Block is compiled if needed, and the link is made.
|
||||
// EDX contains the jump addr to modify
|
||||
static __naked void Dispatcher()
|
||||
{
|
||||
__asm {
|
||||
mov eax, dword ptr [cpuRegs.pc]
|
||||
mov ebx, eax
|
||||
shr eax, 16
|
||||
mov ecx, dword ptr [recLUT+eax*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
|
||||
cmp eax, offset JITCompile
|
||||
je notcompiled
|
||||
cmp eax, offset JITCompileInBlock
|
||||
je notcompiled
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, edx
|
||||
mov dword ptr [edx], ebx
|
||||
jmp eax
|
||||
|
||||
align 16
|
||||
notcompiled:
|
||||
mov esi, edx
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call recRecompile
|
||||
add esp, 4
|
||||
|
||||
mov eax, dword ptr [edi]
|
||||
lea ebx, [eax-4]
|
||||
sub ebx, esi
|
||||
mov dword ptr [esi], ebx
|
||||
jmp eax
|
||||
}
|
||||
}
|
||||
|
||||
// edx - block start pc
|
||||
// stack - x86Ptr[0]
|
||||
static __naked void DispatcherClear()
|
||||
{
|
||||
__asm {
|
||||
mov [cpuRegs.pc], edx
|
||||
mov ebx, edx
|
||||
shr edx, 16
|
||||
mov ecx, dword ptr [recLUT+edx*4]
|
||||
mov eax, dword ptr [ecx+ebx]
|
||||
|
||||
cmp eax, offset JITCompile
|
||||
je notcompiled
|
||||
cmp eax, offset JITCompileInBlock
|
||||
je notcompiled
|
||||
add esp, 4
|
||||
jmp eax
|
||||
|
||||
align 16
|
||||
notcompiled:
|
||||
lea edi, [ecx+ebx]
|
||||
push ebx
|
||||
call recRecompile
|
||||
add esp, 4
|
||||
mov eax, dword ptr [edi]
|
||||
|
||||
pop ecx
|
||||
mov byte ptr [ecx], 0xe9 // jmp32
|
||||
lea ebx, [eax-5]
|
||||
sub ebx, ecx
|
||||
mov dword ptr [ecx+1], ebx
|
||||
|
||||
jmp eax
|
||||
}
|
||||
}
|
||||
|
||||
// called when jumping to variable pc address
|
||||
static void __naked DispatcherReg()
|
||||
{
|
||||
|
@ -943,21 +871,6 @@ void recClear(u32 addr, u32 size)
|
|||
break;
|
||||
}
|
||||
|
||||
x86Ptr[_EmitterId_] = (u8*)pblock->GetFnptr();
|
||||
|
||||
jASSUME((u8*)JITCompile != x86Ptr[_EmitterId_]);
|
||||
jASSUME((u8*)JITCompileInBlock != x86Ptr[_EmitterId_]);
|
||||
|
||||
// Actually, if we want to do this at all maybe keeping a hash
|
||||
// table of const jumps and modifying the jumps straight from
|
||||
// here is the way to go.
|
||||
|
||||
// there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which
|
||||
MOV32ItoR(EDX, blockstart);
|
||||
assert((uptr)x86Ptr[_EmitterId_] <= 0xffffffff);
|
||||
PUSH32I((uptr)x86Ptr[_EmitterId_]); // will be replaced by JMP32
|
||||
JMP32((uptr)DispatcherClear - ((uptr)x86Ptr[_EmitterId_] + 5));
|
||||
|
||||
lowerextent = min(lowerextent, blockstart);
|
||||
upperextent = max(upperextent, blockend);
|
||||
// This might end up inside a block that doesn't contain the clearing range,
|
||||
|
@ -1055,7 +968,7 @@ void SetBranchReg( u32 reg )
|
|||
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
iBranchTest(0xffffffff);
|
||||
iBranchTest();
|
||||
}
|
||||
|
||||
void SetBranchImm( u32 imm )
|
||||
|
@ -1065,9 +978,7 @@ void SetBranchImm( u32 imm )
|
|||
assert( imm );
|
||||
|
||||
// end the current block
|
||||
MOV32ItoM( (uptr)&cpuRegs.pc, imm );
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
iBranchTest(imm);
|
||||
}
|
||||
|
||||
|
@ -1197,6 +1108,19 @@ u32 eeScaleBlockCycles()
|
|||
return temp >> (3+2);
|
||||
}
|
||||
|
||||
static void iBranch(u32 newpc, int type)
|
||||
{
|
||||
u32* ptr;
|
||||
|
||||
MOV32ItoM((uptr)&cpuRegs.pc, newpc);
|
||||
if (type == 0)
|
||||
ptr = JMP32(0);
|
||||
else if (type == 1)
|
||||
ptr = JS32(0);
|
||||
|
||||
recBlocks.Link(HWADDR(newpc), (uptr)ptr);
|
||||
}
|
||||
|
||||
// Generates dynarec code for Event tests followed by a block dispatch (branch).
|
||||
// Parameters:
|
||||
// newpc - address to jump to at the end of the block. If newpc == 0xffffffff then
|
||||
|
@ -1212,41 +1136,23 @@ static void iBranchTest(u32 newpc, bool noDispatch)
|
|||
#ifdef _DEBUG
|
||||
//CALLFunc((uptr)testfpu);
|
||||
#endif
|
||||
u32* ptr;
|
||||
|
||||
if( bExecBIOS ) CheckForBIOSEnd();
|
||||
|
||||
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle);
|
||||
if( !noDispatch && newpc != 0xffffffff )
|
||||
{
|
||||
// Optimization note: Instructions order to pair EDX with EAX's load above.
|
||||
|
||||
// Load EDX with the address of the JS32 jump below.
|
||||
// We do this because the the Dispatcher will use this info to modify
|
||||
// the JS instruction later on with the address of the block it's jumping
|
||||
// to; creating a static link of blocks that doesn't require the overhead
|
||||
// of a dispatcher.
|
||||
MOV32ItoR(EDX, 0);
|
||||
ptr = (u32*)(x86Ptr[0]-4);
|
||||
}
|
||||
|
||||
// Check the Event scheduler if our "cycle target" has been reached.
|
||||
// Equiv code to:
|
||||
// cpuRegs.cycle += blockcycles;
|
||||
// if( cpuRegs.cycle > g_nextBranchCycle ) { DoEvents(); }
|
||||
MOV32MtoR(EAX, (uptr)&cpuRegs.cycle);
|
||||
ADD32ItoR(EAX, eeScaleBlockCycles());
|
||||
MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles
|
||||
SUB32MtoR(EAX, (uptr)&g_nextBranchCycle);
|
||||
|
||||
if( newpc != 0xffffffff )
|
||||
{
|
||||
// This is the jump instruction which gets modified by Dispatcher.
|
||||
*ptr = (u32)JS32((u32)Dispatcher - ( (u32)x86Ptr[0] + 6 ));
|
||||
}
|
||||
else if( !noDispatch )
|
||||
{
|
||||
// This instruction is a dynamic link, so it's never modified.
|
||||
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 ));
|
||||
if (!noDispatch) {
|
||||
if (newpc == 0xffffffff)
|
||||
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 ));
|
||||
else
|
||||
iBranch(newpc, 1);
|
||||
}
|
||||
|
||||
RET();
|
||||
|
@ -1449,7 +1355,6 @@ void recRecompile( const u32 startpc )
|
|||
u32 i = 0;
|
||||
u32 branchTo;
|
||||
u32 willbranch3 = 0;
|
||||
u32* ptr;
|
||||
u32 usecop2;
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
@ -1470,6 +1375,10 @@ void recRecompile( const u32 startpc )
|
|||
recResetEE();
|
||||
}
|
||||
|
||||
x86SetPtr( recPtr );
|
||||
x86Align(16);
|
||||
recPtr = x86Ptr[_EmitterId_];
|
||||
|
||||
s_pCurBlock = PC_GETBLOCK(startpc);
|
||||
|
||||
assert(s_pCurBlock->GetFnptr() == (uptr)JITCompile
|
||||
|
@ -1478,20 +1387,17 @@ void recRecompile( const u32 startpc )
|
|||
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
|
||||
assert(!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc));
|
||||
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
|
||||
|
||||
if( s_pCurBlockEx == NULL ) {
|
||||
//SysPrintf("ee reset (blocks)\n");
|
||||
recResetEE();
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc));
|
||||
x86SetPtr( recPtr );
|
||||
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
|
||||
}
|
||||
|
||||
assert(s_pCurBlockEx);
|
||||
|
||||
x86SetPtr( recPtr );
|
||||
x86Align(16);
|
||||
recPtr = x86Ptr[0];
|
||||
|
||||
branch = 0;
|
||||
|
||||
// reset recomp state variables
|
||||
|
@ -1852,9 +1758,7 @@ StartRecomp:
|
|||
break;
|
||||
if (memcmp(&recRAMCopy[oldBlock->startpc / 4], PSM(oldBlock->startpc),
|
||||
oldBlock->size * 4)) {
|
||||
u8* oldX86 = x86Ptr[_EmitterId_];
|
||||
recClear(startpc, (pc - startpc) / 4);
|
||||
x86Ptr[_EmitterId_] = oldX86;
|
||||
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
|
||||
assert(s_pCurBlockEx->startpc == HWADDR(startpc));
|
||||
break;
|
||||
|
@ -1893,21 +1797,9 @@ StartRecomp:
|
|||
else
|
||||
ADD32ItoM((int)&cpuRegs.cycle, eeScaleBlockCycles() );
|
||||
|
||||
if( willbranch3 ) {
|
||||
BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock);
|
||||
assert( pc == s_nEndBlock );
|
||||
if( willbranch3 || !branch) {
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
MOV32ItoM((uptr)&cpuRegs.pc, pc);
|
||||
JMP32((uptr)pblock->GetFnptr() - ((uptr)x86Ptr[0] + 5));
|
||||
branch = 3;
|
||||
}
|
||||
else if( !branch ) {
|
||||
// didn't branch, but had to stop
|
||||
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
|
||||
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
ptr = JMP32(0);
|
||||
iBranch(pc, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1915,20 +1807,13 @@ StartRecomp:
|
|||
assert( recStackPtr < recStack+RECSTACK_SIZE );
|
||||
assert( x86FpuState == 0 );
|
||||
|
||||
assert(x86Ptr[_EmitterId_] - recPtr < 0x10000);
|
||||
s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr;
|
||||
|
||||
recPtr = x86Ptr[0];
|
||||
|
||||
assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );
|
||||
|
||||
if( !branch ) {
|
||||
assert( ptr != NULL );
|
||||
BASEBLOCK *pblock = PC_GETBLOCK(pc);
|
||||
|
||||
if (pblock->GetFnptr() == (uptr)JITCompile
|
||||
|| pblock->GetFnptr() == (uptr)JITCompileInBlock)
|
||||
recRecompile(pc);
|
||||
*ptr = pblock->GetFnptr() - ((u32)ptr + 4);
|
||||
}
|
||||
|
||||
s_pCurBlock = NULL;
|
||||
s_pCurBlockEx = NULL;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue