LLE JIT: Added an ASM dispatcher. Should help pave the way for future optimisation.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6486 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-11-28 05:28:21 +00:00
parent 88916a61da
commit 6a6fb9cce9
4 changed files with 124 additions and 47 deletions

View File

@ -37,6 +37,7 @@
SDSP g_dsp; SDSP g_dsp;
DSPBreakpoints dsp_breakpoints; DSPBreakpoints dsp_breakpoints;
DSPCoreState core_state = DSPCORE_STOP; DSPCoreState core_state = DSPCORE_STOP;
int cyclesLeft = 0;
DSPEmitter *jit = NULL; DSPEmitter *jit = NULL;
Common::Event step_event; Common::Event step_event;
@ -71,6 +72,7 @@ bool DSPCore_Init(const char *irom_filename, const char *coef_filename,
bool bUsingJIT) bool bUsingJIT)
{ {
g_dsp.step_counter = 0; g_dsp.step_counter = 0;
cyclesLeft = 0;
jit = NULL; jit = NULL;
g_dsp.irom = (u16*)AllocateMemoryPages(DSP_IROM_BYTE_SIZE); g_dsp.irom = (u16*)AllocateMemoryPages(DSP_IROM_BYTE_SIZE);
@ -224,13 +226,18 @@ void DSPCore_CheckExceptions()
// Handle state changes and stepping. // Handle state changes and stepping.
int DSPCore_RunCycles(int cycles) int DSPCore_RunCycles(int cycles)
{ {
static int spare_cycles = 0;
if (jit) if (jit)
{ {
// DSPCore_CheckExceptions(); // DSPCore_CheckExceptions();
// DSPCore_CheckExternalInterrupt(); // DSPCore_CheckExternalInterrupt();
spare_cycles = jit->RunForCycles(cycles + spare_cycles); cyclesLeft = cycles;
return 0;
CompiledCode pExecAddr = (CompiledCode)jit->enterDispatcher;
pExecAddr();
// To use the C++ dispatcher, uncomment the line below and comment out the two lines above
//jit->RunForCycles(cyclesLeft);
return cyclesLeft;
} }
while (cycles > 0) { while (cycles > 0) {
@ -284,7 +291,8 @@ void DSPCore_Step()
step_event.Set(); step_event.Set();
} }
void CompileCurrent() { void CompileCurrent()
{
jit->Compile(g_dsp.pc); jit->Compile(g_dsp.pc);
} }

View File

@ -234,6 +234,7 @@ struct SDSP
extern SDSP g_dsp; extern SDSP g_dsp;
extern DSPBreakpoints dsp_breakpoints; extern DSPBreakpoints dsp_breakpoints;
extern DSPEmitter *jit; extern DSPEmitter *jit;
extern int cyclesLeft;
bool DSPCore_Init(const char *irom_filename, const char *coef_filename, bool DSPCore_Init(const char *irom_filename, const char *coef_filename,
bool bUsingJIT); bool bUsingJIT);

View File

@ -26,6 +26,7 @@
#include "ABI.h" #include "ABI.h"
#define MAX_BLOCK_SIZE 250 #define MAX_BLOCK_SIZE 250
#define DSP_IDLE_SKIP_CYCLES 1000
using namespace Gen; using namespace Gen;
@ -150,9 +151,9 @@ void DSPEmitter::Default(UDSPInstruction _inst)
EmitInstruction(_inst); EmitInstruction(_inst);
} }
const u8 *DSPEmitter::Compile(int start_addr) { void DSPEmitter::Compile(int start_addr)
AlignCode16(); {
const u8 *entryPoint = GetCodePtr(); const u8 *entryPoint = AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack(); ABI_PushAllCalleeSavedRegsAndAdjustStack();
// ABI_AlignStack(0); // ABI_AlignStack(0);
@ -185,13 +186,8 @@ const u8 *DSPEmitter::Compile(int start_addr) {
const DSPOPCTemplate *opcode = GetOpTemplate(inst); const DSPOPCTemplate *opcode = GetOpTemplate(inst);
// Increment PC - we shouldn't need to do this for every instruction. only for branches and end of block. // Increment PC - we shouldn't need to do this for every instruction. only for branches and end of block.
// fallbacks to interpreter need this for fetching immedate values // Fallbacks to interpreter need this for fetching immediate values
#ifdef _M_IX86 // All32
ADD(16, M(&(g_dsp.pc)), Imm16(1)); ADD(16, M(&(g_dsp.pc)), Imm16(1));
#else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
ADD(16, MDisp(RAX,0), Imm16(1));
#endif
EmitInstruction(inst); EmitInstruction(inst);
@ -202,7 +198,6 @@ const u8 *DSPEmitter::Compile(int start_addr) {
// by the analyzer. // by the analyzer.
if (DSPAnalyzer::code_flags[addr-1] & DSPAnalyzer::CODE_LOOP_END) if (DSPAnalyzer::code_flags[addr-1] & DSPAnalyzer::CODE_LOOP_END)
{ {
// TODO: Change to TEST for some reason (who added this comment?)
#ifdef _M_IX86 // All32 #ifdef _M_IX86 // All32
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2]))); MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2])));
#else #else
@ -232,16 +227,19 @@ const u8 *DSPEmitter::Compile(int start_addr) {
SetJumpTarget(rLoopCounterExit); SetJumpTarget(rLoopCounterExit);
} }
if (opcode->branch) { if (opcode->branch)
if (opcode->uncond_branch) { {
if (opcode->uncond_branch)
{
break; break;
} else { }
else
{
//look at g_dsp.pc if we actually branched //look at g_dsp.pc if we actually branched
#ifdef _M_IX86 // All32 #ifdef _M_IX86 // All32
MOV(16, R(AX), M(&(g_dsp.pc))); MOV(16, R(AX), M(&g_dsp.pc));
#else #else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); MOVZX(32, 16, EAX, M(&g_dsp.pc));
MOV(16, R(AX), MDisp(RAX,0));
#endif #endif
CMP(16, R(AX), Imm16(addr)); CMP(16, R(AX), Imm16(addr));
FixupBranch rNoBranch = J_CC(CC_Z); FixupBranch rNoBranch = J_CC(CC_Z);
@ -256,7 +254,8 @@ const u8 *DSPEmitter::Compile(int start_addr) {
} }
// End the block if we're before an idle skip address // End the block if we're before an idle skip address
if (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP) { if (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP)
{
break; break;
} }
} }
@ -274,56 +273,129 @@ const u8 *DSPEmitter::Compile(int start_addr) {
ABI_PopAllCalleeSavedRegsAndAdjustStack(); ABI_PopAllCalleeSavedRegsAndAdjustStack();
MOV(32,R(EAX),Imm32(blockSize[start_addr])); MOV(32,R(EAX),Imm32(blockSize[start_addr]));
RET(); RET();
return entryPoint;
} }
void STACKALIGN DSPEmitter::CompileDispatcher() void DSPEmitter::CompileDispatcher()
{ {
/* enterDispatcher = AlignCode16();
// TODO
enterDispatcher = GetCodePtr();
AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack(); ABI_PushAllCalleeSavedRegsAndAdjustStack();
const u8 *outer_loop = GetCodePtr();
// Cache pointers into registers
//Landing pad for drec space #ifdef _M_IX86
MOV(32, R(ESI), M(&cyclesLeft));
MOV(32, R(EBX), Imm32((u32)blocks));
MOV(32, R(EDI), Imm32((u32)blockSize));
#else
MOV(32, R(ESI), M(&cyclesLeft));
MOV(64, R(RBX), Imm64((u64)blocks));
MOV(64, R(RDI), Imm64((u64)blockSize));
#endif
const u8 *dispatcherLoop = GetCodePtr();
// Check for DSP halt
TEST(8, M(&g_dsp.cr), Imm8(CR_HALT));
FixupBranch halt = J_CC(CC_NE);
// Check if block has been compiled (blockSize > 0)
MOVZX(32, 16, ECX, M(&g_dsp.pc));
MOVZX(32, 16, EAX, MComplex(RDI, ECX, SCALE_2, 0));
TEST(16, R(AX), R(AX));
// Compile block if needed
FixupBranch found = J_CC(CC_NE);
CALL((void *)CompileCurrent);
SetJumpTarget(found);
// Check if we have enough cycles to execute
CMP(32, R(ESI), R(EAX));
FixupBranch noCycles = J_CC(CC_B);
// Check for idle skip (C++ version below)
// if (code_flags[pc] & CODE_IDLE_SKIP)
// if (cycles > DSP_IDLE_SKIP_CYCLES) cycles -= DSP_IDLE_SKIP_CYCLES;
// else cycles = 0;
#ifdef _M_IX86
MOV(32, R(EDX), Imm32((u32)DSPAnalyzer::code_flags));
#else
MOV(64, R(RDX), Imm64((u64)DSPAnalyzer::code_flags));
#endif
TEST(8, MComplex(RDX, ECX, SCALE_1, 0), Imm8(DSPAnalyzer::CODE_IDLE_SKIP));
FixupBranch noIdleSkip = J_CC(CC_E);
SUB(32, R(ESI), Imm32(DSP_IDLE_SKIP_CYCLES));
FixupBranch idleSkip = J_CC(CC_A);
//MOV(32, M(&cyclesLeft), Imm32(0));
ABI_PopAllCalleeSavedRegsAndAdjustStack(); ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();*/ RET();
SetJumpTarget(idleSkip);
SetJumpTarget(noIdleSkip);
// Execute block. Cycles executed returned in EAX.
#ifdef _M_IX86
CALLptr(MComplex(EBX, ECX, SCALE_4, 0));
#else
CALLptr(MComplex(RBX, ECX, SCALE_8, 0));
#endif
// Decrement cyclesLeft
SUB(32, R(ESI), R(EAX));
J_CC(CC_A, dispatcherLoop);
// Not enough cycles.
SetJumpTarget(noCycles);
//MOV(32, M(&cyclesLeft), R(ESI));
//ABI_PopAllCalleeSavedRegsAndAdjustStack();
//RET();
// DSP gave up the remaining cycles.
SetJumpTarget(halt);
//MOV(32, M(&cyclesLeft), Imm32(0));
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
} }
// Don't use the % operator in the inner loop. It's slow. // Don't use the % operator in the inner loop. It's slow.
int STACKALIGN DSPEmitter::RunForCycles(int cycles) int STACKALIGN DSPEmitter::RunForCycles(int cycles)
{ {
const int idle_cycles = 1000; const int idle_cycles = DSP_IDLE_SKIP_CYCLES;
while (!(g_dsp.cr & CR_HALT)) while (!(g_dsp.cr & CR_HALT))
{ {
// Compile the block if needed // Compile the block if needed
u16 block_addr = g_dsp.pc; u16 block_addr = g_dsp.pc;
if (!blocks[block_addr]) int block_size = blockSize[block_addr];
if (!block_size)
{ {
CompileCurrent(); CompileCurrent();
block_size = blockSize[block_addr];
} }
int block_size = blockSize[block_addr];
// Execute the block if we have enough cycles // Execute the block if we have enough cycles
if (cycles > block_size) if (cycles > block_size)
{ {
int c = blocks[block_addr](); int c = blocks[block_addr]();
if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
{
if (cycles > idle_cycles) if (cycles > idle_cycles)
cycles -= idle_cycles; cycles -= idle_cycles;
else else
cycles = 0; cycles = 0;
} else { }
else
{
cycles -= c; cycles -= c;
} }
} }
else { else
{
break; break;
} }
} }
// DSP gave up the remaining cycles.
if (g_dsp.cr & CR_HALT)
return 0;
return cycles; return cycles;
} }

View File

@ -42,7 +42,7 @@ public:
void CompileDispatcher(); void CompileDispatcher();
const u8 *Compile(int start_addr); void Compile(int start_addr);
int STACKALIGN RunForCycles(int cycles); int STACKALIGN RunForCycles(int cycles);
@ -113,18 +113,14 @@ public:
void mrr(const UDSPInstruction opc); void mrr(const UDSPInstruction opc);
void nx(const UDSPInstruction opc); void nx(const UDSPInstruction opc);
// CALL this to start the dispatcher
const u8 *enterDispatcher;
private: private:
CompiledCode *blocks; CompiledCode *blocks;
u16 *blockSize; u16 *blockSize;
u16 compileSR; u16 compileSR;
// CALL this to start the dispatcher
u8 *enterDispatcher;
// JMP here when a block should be dispatches. make sure you're in a block
// or at the same stack level already.
u8 *dispatcher;
// The index of the last stored ext value (compile time). // The index of the last stored ext value (compile time).
int storeIndex; int storeIndex;
int storeIndex2; int storeIndex2;