LLE JIT: Added an ASM dispatcher. Should help pave the way for future optimisation.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6486 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
88916a61da
commit
6a6fb9cce9
|
@ -37,6 +37,7 @@
|
|||
SDSP g_dsp;
|
||||
DSPBreakpoints dsp_breakpoints;
|
||||
DSPCoreState core_state = DSPCORE_STOP;
|
||||
int cyclesLeft = 0;
|
||||
DSPEmitter *jit = NULL;
|
||||
Common::Event step_event;
|
||||
|
||||
|
@ -71,6 +72,7 @@ bool DSPCore_Init(const char *irom_filename, const char *coef_filename,
|
|||
bool bUsingJIT)
|
||||
{
|
||||
g_dsp.step_counter = 0;
|
||||
cyclesLeft = 0;
|
||||
jit = NULL;
|
||||
|
||||
g_dsp.irom = (u16*)AllocateMemoryPages(DSP_IROM_BYTE_SIZE);
|
||||
|
@ -224,13 +226,18 @@ void DSPCore_CheckExceptions()
|
|||
// Handle state changes and stepping.
|
||||
int DSPCore_RunCycles(int cycles)
|
||||
{
|
||||
static int spare_cycles = 0;
|
||||
if (jit)
|
||||
{
|
||||
// DSPCore_CheckExceptions();
|
||||
// DSPCore_CheckExternalInterrupt();
|
||||
spare_cycles = jit->RunForCycles(cycles + spare_cycles);
|
||||
return 0;
|
||||
cyclesLeft = cycles;
|
||||
|
||||
CompiledCode pExecAddr = (CompiledCode)jit->enterDispatcher;
|
||||
pExecAddr();
|
||||
|
||||
// To use the C++ dispatcher, uncomment the line below and comment out the two lines above
|
||||
//jit->RunForCycles(cyclesLeft);
|
||||
return cyclesLeft;
|
||||
}
|
||||
|
||||
while (cycles > 0) {
|
||||
|
@ -284,7 +291,8 @@ void DSPCore_Step()
|
|||
step_event.Set();
|
||||
}
|
||||
|
||||
void CompileCurrent() {
|
||||
void CompileCurrent()
|
||||
{
|
||||
jit->Compile(g_dsp.pc);
|
||||
}
|
||||
|
||||
|
|
|
@ -234,6 +234,7 @@ struct SDSP
|
|||
extern SDSP g_dsp;
|
||||
extern DSPBreakpoints dsp_breakpoints;
|
||||
extern DSPEmitter *jit;
|
||||
extern int cyclesLeft;
|
||||
|
||||
bool DSPCore_Init(const char *irom_filename, const char *coef_filename,
|
||||
bool bUsingJIT);
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "ABI.h"
|
||||
|
||||
#define MAX_BLOCK_SIZE 250
|
||||
#define DSP_IDLE_SKIP_CYCLES 1000
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
|
@ -150,9 +151,9 @@ void DSPEmitter::Default(UDSPInstruction _inst)
|
|||
EmitInstruction(_inst);
|
||||
}
|
||||
|
||||
const u8 *DSPEmitter::Compile(int start_addr) {
|
||||
AlignCode16();
|
||||
const u8 *entryPoint = GetCodePtr();
|
||||
void DSPEmitter::Compile(int start_addr)
|
||||
{
|
||||
const u8 *entryPoint = AlignCode16();
|
||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||
// ABI_AlignStack(0);
|
||||
|
||||
|
@ -185,13 +186,8 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
|||
const DSPOPCTemplate *opcode = GetOpTemplate(inst);
|
||||
|
||||
// Increment PC - we shouldn't need to do this for every instruction. only for branches and end of block.
|
||||
// fallbacks to interpreter need this for fetching immedate values
|
||||
#ifdef _M_IX86 // All32
|
||||
// Fallbacks to interpreter need this for fetching immediate values
|
||||
ADD(16, M(&(g_dsp.pc)), Imm16(1));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
ADD(16, MDisp(RAX,0), Imm16(1));
|
||||
#endif
|
||||
|
||||
EmitInstruction(inst);
|
||||
|
||||
|
@ -202,7 +198,6 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
|||
// by the analyzer.
|
||||
if (DSPAnalyzer::code_flags[addr-1] & DSPAnalyzer::CODE_LOOP_END)
|
||||
{
|
||||
// TODO: Change to TEST for some reason (who added this comment?)
|
||||
#ifdef _M_IX86 // All32
|
||||
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2])));
|
||||
#else
|
||||
|
@ -232,16 +227,19 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
|||
SetJumpTarget(rLoopCounterExit);
|
||||
}
|
||||
|
||||
if (opcode->branch) {
|
||||
if (opcode->uncond_branch) {
|
||||
if (opcode->branch)
|
||||
{
|
||||
if (opcode->uncond_branch)
|
||||
{
|
||||
break;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
//look at g_dsp.pc if we actually branched
|
||||
#ifdef _M_IX86 // All32
|
||||
MOV(16, R(AX), M(&(g_dsp.pc)));
|
||||
MOV(16, R(AX), M(&g_dsp.pc));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, R(AX), MDisp(RAX,0));
|
||||
MOVZX(32, 16, EAX, M(&g_dsp.pc));
|
||||
#endif
|
||||
CMP(16, R(AX), Imm16(addr));
|
||||
FixupBranch rNoBranch = J_CC(CC_Z);
|
||||
|
@ -256,7 +254,8 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
|||
}
|
||||
|
||||
// End the block if we're before an idle skip address
|
||||
if (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP) {
|
||||
if (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -274,56 +273,129 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
|||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
MOV(32,R(EAX),Imm32(blockSize[start_addr]));
|
||||
RET();
|
||||
|
||||
return entryPoint;
|
||||
}
|
||||
|
||||
void STACKALIGN DSPEmitter::CompileDispatcher()
|
||||
void DSPEmitter::CompileDispatcher()
|
||||
{
|
||||
/*
|
||||
// TODO
|
||||
enterDispatcher = GetCodePtr();
|
||||
AlignCode16();
|
||||
enterDispatcher = AlignCode16();
|
||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||
|
||||
const u8 *outer_loop = GetCodePtr();
|
||||
|
||||
|
||||
//Landing pad for drec space
|
||||
// Cache pointers into registers
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(ESI), M(&cyclesLeft));
|
||||
MOV(32, R(EBX), Imm32((u32)blocks));
|
||||
MOV(32, R(EDI), Imm32((u32)blockSize));
|
||||
#else
|
||||
MOV(32, R(ESI), M(&cyclesLeft));
|
||||
MOV(64, R(RBX), Imm64((u64)blocks));
|
||||
MOV(64, R(RDI), Imm64((u64)blockSize));
|
||||
#endif
|
||||
|
||||
const u8 *dispatcherLoop = GetCodePtr();
|
||||
|
||||
// Check for DSP halt
|
||||
TEST(8, M(&g_dsp.cr), Imm8(CR_HALT));
|
||||
FixupBranch halt = J_CC(CC_NE);
|
||||
|
||||
// Check if block has been compiled (blockSize > 0)
|
||||
MOVZX(32, 16, ECX, M(&g_dsp.pc));
|
||||
MOVZX(32, 16, EAX, MComplex(RDI, ECX, SCALE_2, 0));
|
||||
TEST(16, R(AX), R(AX));
|
||||
|
||||
// Compile block if needed
|
||||
FixupBranch found = J_CC(CC_NE);
|
||||
CALL((void *)CompileCurrent);
|
||||
SetJumpTarget(found);
|
||||
|
||||
// Check if we have enough cycles to execute
|
||||
CMP(32, R(ESI), R(EAX));
|
||||
FixupBranch noCycles = J_CC(CC_B);
|
||||
|
||||
// Check for idle skip (C++ version below)
|
||||
// if (code_flags[pc] & CODE_IDLE_SKIP)
|
||||
// if (cycles > DSP_IDLE_SKIP_CYCLES) cycles -= DSP_IDLE_SKIP_CYCLES;
|
||||
// else cycles = 0;
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EDX), Imm32((u32)DSPAnalyzer::code_flags));
|
||||
#else
|
||||
MOV(64, R(RDX), Imm64((u64)DSPAnalyzer::code_flags));
|
||||
#endif
|
||||
TEST(8, MComplex(RDX, ECX, SCALE_1, 0), Imm8(DSPAnalyzer::CODE_IDLE_SKIP));
|
||||
FixupBranch noIdleSkip = J_CC(CC_E);
|
||||
SUB(32, R(ESI), Imm32(DSP_IDLE_SKIP_CYCLES));
|
||||
FixupBranch idleSkip = J_CC(CC_A);
|
||||
//MOV(32, M(&cyclesLeft), Imm32(0));
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
RET();*/
|
||||
RET();
|
||||
SetJumpTarget(idleSkip);
|
||||
SetJumpTarget(noIdleSkip);
|
||||
|
||||
// Execute block. Cycles executed returned in EAX.
|
||||
#ifdef _M_IX86
|
||||
CALLptr(MComplex(EBX, ECX, SCALE_4, 0));
|
||||
#else
|
||||
CALLptr(MComplex(RBX, ECX, SCALE_8, 0));
|
||||
#endif
|
||||
|
||||
// Decrement cyclesLeft
|
||||
SUB(32, R(ESI), R(EAX));
|
||||
|
||||
J_CC(CC_A, dispatcherLoop);
|
||||
|
||||
// Not enough cycles.
|
||||
SetJumpTarget(noCycles);
|
||||
//MOV(32, M(&cyclesLeft), R(ESI));
|
||||
//ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
//RET();
|
||||
|
||||
// DSP gave up the remaining cycles.
|
||||
SetJumpTarget(halt);
|
||||
//MOV(32, M(&cyclesLeft), Imm32(0));
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
RET();
|
||||
}
|
||||
|
||||
// Don't use the % operator in the inner loop. It's slow.
|
||||
int STACKALIGN DSPEmitter::RunForCycles(int cycles)
|
||||
{
|
||||
const int idle_cycles = 1000;
|
||||
const int idle_cycles = DSP_IDLE_SKIP_CYCLES;
|
||||
|
||||
while (!(g_dsp.cr & CR_HALT))
|
||||
{
|
||||
// Compile the block if needed
|
||||
u16 block_addr = g_dsp.pc;
|
||||
if (!blocks[block_addr])
|
||||
int block_size = blockSize[block_addr];
|
||||
if (!block_size)
|
||||
{
|
||||
CompileCurrent();
|
||||
block_size = blockSize[block_addr];
|
||||
}
|
||||
int block_size = blockSize[block_addr];
|
||||
|
||||
// Execute the block if we have enough cycles
|
||||
if (cycles > block_size)
|
||||
{
|
||||
int c = blocks[block_addr]();
|
||||
if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP) {
|
||||
if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
||||
{
|
||||
if (cycles > idle_cycles)
|
||||
cycles -= idle_cycles;
|
||||
else
|
||||
cycles = 0;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
cycles -= c;
|
||||
}
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// DSP gave up the remaining cycles.
|
||||
if (g_dsp.cr & CR_HALT)
|
||||
return 0;
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ public:
|
|||
|
||||
void CompileDispatcher();
|
||||
|
||||
const u8 *Compile(int start_addr);
|
||||
void Compile(int start_addr);
|
||||
|
||||
int STACKALIGN RunForCycles(int cycles);
|
||||
|
||||
|
@ -113,18 +113,14 @@ public:
|
|||
void mrr(const UDSPInstruction opc);
|
||||
void nx(const UDSPInstruction opc);
|
||||
|
||||
// CALL this to start the dispatcher
|
||||
const u8 *enterDispatcher;
|
||||
|
||||
private:
|
||||
CompiledCode *blocks;
|
||||
u16 *blockSize;
|
||||
u16 compileSR;
|
||||
|
||||
// CALL this to start the dispatcher
|
||||
u8 *enterDispatcher;
|
||||
|
||||
// JMP here when a block should be dispatches. make sure you're in a block
|
||||
// or at the same stack level already.
|
||||
u8 *dispatcher;
|
||||
|
||||
// The index of the last stored ext value (compile time).
|
||||
int storeIndex;
|
||||
int storeIndex2;
|
||||
|
|
Loading…
Reference in New Issue