LLE JIT: Added an ASM dispatcher. Should help pave the way for future optimisation.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6486 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
88916a61da
commit
6a6fb9cce9
|
@ -37,6 +37,7 @@
|
||||||
SDSP g_dsp;
|
SDSP g_dsp;
|
||||||
DSPBreakpoints dsp_breakpoints;
|
DSPBreakpoints dsp_breakpoints;
|
||||||
DSPCoreState core_state = DSPCORE_STOP;
|
DSPCoreState core_state = DSPCORE_STOP;
|
||||||
|
int cyclesLeft = 0;
|
||||||
DSPEmitter *jit = NULL;
|
DSPEmitter *jit = NULL;
|
||||||
Common::Event step_event;
|
Common::Event step_event;
|
||||||
|
|
||||||
|
@ -71,6 +72,7 @@ bool DSPCore_Init(const char *irom_filename, const char *coef_filename,
|
||||||
bool bUsingJIT)
|
bool bUsingJIT)
|
||||||
{
|
{
|
||||||
g_dsp.step_counter = 0;
|
g_dsp.step_counter = 0;
|
||||||
|
cyclesLeft = 0;
|
||||||
jit = NULL;
|
jit = NULL;
|
||||||
|
|
||||||
g_dsp.irom = (u16*)AllocateMemoryPages(DSP_IROM_BYTE_SIZE);
|
g_dsp.irom = (u16*)AllocateMemoryPages(DSP_IROM_BYTE_SIZE);
|
||||||
|
@ -224,13 +226,18 @@ void DSPCore_CheckExceptions()
|
||||||
// Handle state changes and stepping.
|
// Handle state changes and stepping.
|
||||||
int DSPCore_RunCycles(int cycles)
|
int DSPCore_RunCycles(int cycles)
|
||||||
{
|
{
|
||||||
static int spare_cycles = 0;
|
|
||||||
if (jit)
|
if (jit)
|
||||||
{
|
{
|
||||||
// DSPCore_CheckExceptions();
|
// DSPCore_CheckExceptions();
|
||||||
// DSPCore_CheckExternalInterrupt();
|
// DSPCore_CheckExternalInterrupt();
|
||||||
spare_cycles = jit->RunForCycles(cycles + spare_cycles);
|
cyclesLeft = cycles;
|
||||||
return 0;
|
|
||||||
|
CompiledCode pExecAddr = (CompiledCode)jit->enterDispatcher;
|
||||||
|
pExecAddr();
|
||||||
|
|
||||||
|
// To use the C++ dispatcher, uncomment the line below and comment out the two lines above
|
||||||
|
//jit->RunForCycles(cyclesLeft);
|
||||||
|
return cyclesLeft;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (cycles > 0) {
|
while (cycles > 0) {
|
||||||
|
@ -284,7 +291,8 @@ void DSPCore_Step()
|
||||||
step_event.Set();
|
step_event.Set();
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompileCurrent() {
|
void CompileCurrent()
|
||||||
|
{
|
||||||
jit->Compile(g_dsp.pc);
|
jit->Compile(g_dsp.pc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -234,6 +234,7 @@ struct SDSP
|
||||||
extern SDSP g_dsp;
|
extern SDSP g_dsp;
|
||||||
extern DSPBreakpoints dsp_breakpoints;
|
extern DSPBreakpoints dsp_breakpoints;
|
||||||
extern DSPEmitter *jit;
|
extern DSPEmitter *jit;
|
||||||
|
extern int cyclesLeft;
|
||||||
|
|
||||||
bool DSPCore_Init(const char *irom_filename, const char *coef_filename,
|
bool DSPCore_Init(const char *irom_filename, const char *coef_filename,
|
||||||
bool bUsingJIT);
|
bool bUsingJIT);
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "ABI.h"
|
#include "ABI.h"
|
||||||
|
|
||||||
#define MAX_BLOCK_SIZE 250
|
#define MAX_BLOCK_SIZE 250
|
||||||
|
#define DSP_IDLE_SKIP_CYCLES 1000
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
|
@ -150,9 +151,9 @@ void DSPEmitter::Default(UDSPInstruction _inst)
|
||||||
EmitInstruction(_inst);
|
EmitInstruction(_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8 *DSPEmitter::Compile(int start_addr) {
|
void DSPEmitter::Compile(int start_addr)
|
||||||
AlignCode16();
|
{
|
||||||
const u8 *entryPoint = GetCodePtr();
|
const u8 *entryPoint = AlignCode16();
|
||||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||||
// ABI_AlignStack(0);
|
// ABI_AlignStack(0);
|
||||||
|
|
||||||
|
@ -185,13 +186,8 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
||||||
const DSPOPCTemplate *opcode = GetOpTemplate(inst);
|
const DSPOPCTemplate *opcode = GetOpTemplate(inst);
|
||||||
|
|
||||||
// Increment PC - we shouldn't need to do this for every instruction. only for branches and end of block.
|
// Increment PC - we shouldn't need to do this for every instruction. only for branches and end of block.
|
||||||
// fallbacks to interpreter need this for fetching immedate values
|
// Fallbacks to interpreter need this for fetching immediate values
|
||||||
#ifdef _M_IX86 // All32
|
|
||||||
ADD(16, M(&(g_dsp.pc)), Imm16(1));
|
ADD(16, M(&(g_dsp.pc)), Imm16(1));
|
||||||
#else
|
|
||||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
|
||||||
ADD(16, MDisp(RAX,0), Imm16(1));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
EmitInstruction(inst);
|
EmitInstruction(inst);
|
||||||
|
|
||||||
|
@ -202,7 +198,6 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
||||||
// by the analyzer.
|
// by the analyzer.
|
||||||
if (DSPAnalyzer::code_flags[addr-1] & DSPAnalyzer::CODE_LOOP_END)
|
if (DSPAnalyzer::code_flags[addr-1] & DSPAnalyzer::CODE_LOOP_END)
|
||||||
{
|
{
|
||||||
// TODO: Change to TEST for some reason (who added this comment?)
|
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2])));
|
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2])));
|
||||||
#else
|
#else
|
||||||
|
@ -232,16 +227,19 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
||||||
SetJumpTarget(rLoopCounterExit);
|
SetJumpTarget(rLoopCounterExit);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opcode->branch) {
|
if (opcode->branch)
|
||||||
if (opcode->uncond_branch) {
|
{
|
||||||
|
if (opcode->uncond_branch)
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
//look at g_dsp.pc if we actually branched
|
//look at g_dsp.pc if we actually branched
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOV(16, R(AX), M(&(g_dsp.pc)));
|
MOV(16, R(AX), M(&g_dsp.pc));
|
||||||
#else
|
#else
|
||||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
MOVZX(32, 16, EAX, M(&g_dsp.pc));
|
||||||
MOV(16, R(AX), MDisp(RAX,0));
|
|
||||||
#endif
|
#endif
|
||||||
CMP(16, R(AX), Imm16(addr));
|
CMP(16, R(AX), Imm16(addr));
|
||||||
FixupBranch rNoBranch = J_CC(CC_Z);
|
FixupBranch rNoBranch = J_CC(CC_Z);
|
||||||
|
@ -256,7 +254,8 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// End the block if we're before an idle skip address
|
// End the block if we're before an idle skip address
|
||||||
if (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP) {
|
if (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -274,56 +273,129 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
||||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||||
MOV(32,R(EAX),Imm32(blockSize[start_addr]));
|
MOV(32,R(EAX),Imm32(blockSize[start_addr]));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
return entryPoint;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void STACKALIGN DSPEmitter::CompileDispatcher()
|
void DSPEmitter::CompileDispatcher()
|
||||||
{
|
{
|
||||||
/*
|
enterDispatcher = AlignCode16();
|
||||||
// TODO
|
|
||||||
enterDispatcher = GetCodePtr();
|
|
||||||
AlignCode16();
|
|
||||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||||
|
|
||||||
const u8 *outer_loop = GetCodePtr();
|
|
||||||
|
|
||||||
|
// Cache pointers into registers
|
||||||
//Landing pad for drec space
|
#ifdef _M_IX86
|
||||||
|
MOV(32, R(ESI), M(&cyclesLeft));
|
||||||
|
MOV(32, R(EBX), Imm32((u32)blocks));
|
||||||
|
MOV(32, R(EDI), Imm32((u32)blockSize));
|
||||||
|
#else
|
||||||
|
MOV(32, R(ESI), M(&cyclesLeft));
|
||||||
|
MOV(64, R(RBX), Imm64((u64)blocks));
|
||||||
|
MOV(64, R(RDI), Imm64((u64)blockSize));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const u8 *dispatcherLoop = GetCodePtr();
|
||||||
|
|
||||||
|
// Check for DSP halt
|
||||||
|
TEST(8, M(&g_dsp.cr), Imm8(CR_HALT));
|
||||||
|
FixupBranch halt = J_CC(CC_NE);
|
||||||
|
|
||||||
|
// Check if block has been compiled (blockSize > 0)
|
||||||
|
MOVZX(32, 16, ECX, M(&g_dsp.pc));
|
||||||
|
MOVZX(32, 16, EAX, MComplex(RDI, ECX, SCALE_2, 0));
|
||||||
|
TEST(16, R(AX), R(AX));
|
||||||
|
|
||||||
|
// Compile block if needed
|
||||||
|
FixupBranch found = J_CC(CC_NE);
|
||||||
|
CALL((void *)CompileCurrent);
|
||||||
|
SetJumpTarget(found);
|
||||||
|
|
||||||
|
// Check if we have enough cycles to execute
|
||||||
|
CMP(32, R(ESI), R(EAX));
|
||||||
|
FixupBranch noCycles = J_CC(CC_B);
|
||||||
|
|
||||||
|
// Check for idle skip (C++ version below)
|
||||||
|
// if (code_flags[pc] & CODE_IDLE_SKIP)
|
||||||
|
// if (cycles > DSP_IDLE_SKIP_CYCLES) cycles -= DSP_IDLE_SKIP_CYCLES;
|
||||||
|
// else cycles = 0;
|
||||||
|
#ifdef _M_IX86
|
||||||
|
MOV(32, R(EDX), Imm32((u32)DSPAnalyzer::code_flags));
|
||||||
|
#else
|
||||||
|
MOV(64, R(RDX), Imm64((u64)DSPAnalyzer::code_flags));
|
||||||
|
#endif
|
||||||
|
TEST(8, MComplex(RDX, ECX, SCALE_1, 0), Imm8(DSPAnalyzer::CODE_IDLE_SKIP));
|
||||||
|
FixupBranch noIdleSkip = J_CC(CC_E);
|
||||||
|
SUB(32, R(ESI), Imm32(DSP_IDLE_SKIP_CYCLES));
|
||||||
|
FixupBranch idleSkip = J_CC(CC_A);
|
||||||
|
//MOV(32, M(&cyclesLeft), Imm32(0));
|
||||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||||
RET();*/
|
RET();
|
||||||
|
SetJumpTarget(idleSkip);
|
||||||
|
SetJumpTarget(noIdleSkip);
|
||||||
|
|
||||||
|
// Execute block. Cycles executed returned in EAX.
|
||||||
|
#ifdef _M_IX86
|
||||||
|
CALLptr(MComplex(EBX, ECX, SCALE_4, 0));
|
||||||
|
#else
|
||||||
|
CALLptr(MComplex(RBX, ECX, SCALE_8, 0));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Decrement cyclesLeft
|
||||||
|
SUB(32, R(ESI), R(EAX));
|
||||||
|
|
||||||
|
J_CC(CC_A, dispatcherLoop);
|
||||||
|
|
||||||
|
// Not enough cycles.
|
||||||
|
SetJumpTarget(noCycles);
|
||||||
|
//MOV(32, M(&cyclesLeft), R(ESI));
|
||||||
|
//ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||||
|
//RET();
|
||||||
|
|
||||||
|
// DSP gave up the remaining cycles.
|
||||||
|
SetJumpTarget(halt);
|
||||||
|
//MOV(32, M(&cyclesLeft), Imm32(0));
|
||||||
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||||
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't use the % operator in the inner loop. It's slow.
|
// Don't use the % operator in the inner loop. It's slow.
|
||||||
int STACKALIGN DSPEmitter::RunForCycles(int cycles)
|
int STACKALIGN DSPEmitter::RunForCycles(int cycles)
|
||||||
{
|
{
|
||||||
const int idle_cycles = 1000;
|
const int idle_cycles = DSP_IDLE_SKIP_CYCLES;
|
||||||
|
|
||||||
while (!(g_dsp.cr & CR_HALT))
|
while (!(g_dsp.cr & CR_HALT))
|
||||||
{
|
{
|
||||||
// Compile the block if needed
|
// Compile the block if needed
|
||||||
u16 block_addr = g_dsp.pc;
|
u16 block_addr = g_dsp.pc;
|
||||||
if (!blocks[block_addr])
|
int block_size = blockSize[block_addr];
|
||||||
|
if (!block_size)
|
||||||
{
|
{
|
||||||
CompileCurrent();
|
CompileCurrent();
|
||||||
|
block_size = blockSize[block_addr];
|
||||||
}
|
}
|
||||||
int block_size = blockSize[block_addr];
|
|
||||||
// Execute the block if we have enough cycles
|
// Execute the block if we have enough cycles
|
||||||
if (cycles > block_size)
|
if (cycles > block_size)
|
||||||
{
|
{
|
||||||
int c = blocks[block_addr]();
|
int c = blocks[block_addr]();
|
||||||
if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP) {
|
if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
||||||
|
{
|
||||||
if (cycles > idle_cycles)
|
if (cycles > idle_cycles)
|
||||||
cycles -= idle_cycles;
|
cycles -= idle_cycles;
|
||||||
else
|
else
|
||||||
cycles = 0;
|
cycles = 0;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
cycles -= c;
|
cycles -= c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DSP gave up the remaining cycles.
|
||||||
|
if (g_dsp.cr & CR_HALT)
|
||||||
|
return 0;
|
||||||
|
|
||||||
return cycles;
|
return cycles;
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ public:
|
||||||
|
|
||||||
void CompileDispatcher();
|
void CompileDispatcher();
|
||||||
|
|
||||||
const u8 *Compile(int start_addr);
|
void Compile(int start_addr);
|
||||||
|
|
||||||
int STACKALIGN RunForCycles(int cycles);
|
int STACKALIGN RunForCycles(int cycles);
|
||||||
|
|
||||||
|
@ -113,18 +113,14 @@ public:
|
||||||
void mrr(const UDSPInstruction opc);
|
void mrr(const UDSPInstruction opc);
|
||||||
void nx(const UDSPInstruction opc);
|
void nx(const UDSPInstruction opc);
|
||||||
|
|
||||||
|
// CALL this to start the dispatcher
|
||||||
|
const u8 *enterDispatcher;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CompiledCode *blocks;
|
CompiledCode *blocks;
|
||||||
u16 *blockSize;
|
u16 *blockSize;
|
||||||
u16 compileSR;
|
u16 compileSR;
|
||||||
|
|
||||||
// CALL this to start the dispatcher
|
|
||||||
u8 *enterDispatcher;
|
|
||||||
|
|
||||||
// JMP here when a block should be dispatches. make sure you're in a block
|
|
||||||
// or at the same stack level already.
|
|
||||||
u8 *dispatcher;
|
|
||||||
|
|
||||||
// The index of the last stored ext value (compile time).
|
// The index of the last stored ext value (compile time).
|
||||||
int storeIndex;
|
int storeIndex;
|
||||||
int storeIndex2;
|
int storeIndex2;
|
||||||
|
|
Loading…
Reference in New Issue