LLE JIT: Added the loop instructions to the JIT. Added ASM version of HandleLoop. Both x86 and x64 versions have been added.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6659 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
7c5b12c5bc
commit
963ca6f963
|
@ -299,7 +299,7 @@ void DSPEmitter::Compile(int start_addr)
|
|||
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2));
|
||||
#endif
|
||||
CMP(32, R(EAX), Imm32(0));
|
||||
FixupBranch rLoopAddressExit = J_CC(CC_LE);
|
||||
FixupBranch rLoopAddressExit = J_CC(CC_LE, true);
|
||||
|
||||
#ifdef _M_IX86 // All32
|
||||
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3])));
|
||||
|
@ -307,7 +307,7 @@ void DSPEmitter::Compile(int start_addr)
|
|||
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST3*2));
|
||||
#endif
|
||||
CMP(32, R(EAX), Imm32(0));
|
||||
FixupBranch rLoopCounterExit = J_CC(CC_LE);
|
||||
FixupBranch rLoopCounterExit = J_CC(CC_LE, true);
|
||||
|
||||
if (!opcode->branch)
|
||||
{
|
||||
|
@ -322,7 +322,7 @@ void DSPEmitter::Compile(int start_addr)
|
|||
|
||||
// These functions branch and therefore only need to be called in the
|
||||
// end of each block and in this order
|
||||
ABI_CallFunction((void *)&DSPInterpreter::HandleLoop);
|
||||
HandleLoop();
|
||||
// ABI_RestoreStack(0);
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
||||
|
|
|
@ -104,8 +104,8 @@ public:
|
|||
// Command helpers
|
||||
void dsp_reg_stack_push(int stack_reg);
|
||||
void dsp_reg_stack_pop(int stack_reg);
|
||||
void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg);
|
||||
void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg);
|
||||
void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg = Gen::EDX);
|
||||
void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg = Gen::EDX);
|
||||
void dsp_reg_store_stack_imm(int stack_reg, u16 val);
|
||||
void dsp_op_write_reg(int reg, Gen::X64Reg host_sreg);
|
||||
void dsp_op_write_reg_imm(int reg, u16 val);
|
||||
|
@ -127,10 +127,15 @@ public:
|
|||
void nx(const UDSPInstruction opc);
|
||||
|
||||
// Branch
|
||||
void HandleLoop();
|
||||
void jcc(const UDSPInstruction opc);
|
||||
void jmprcc(const UDSPInstruction opc);
|
||||
void call(const UDSPInstruction opc);
|
||||
void callr(const UDSPInstruction opc);
|
||||
void loop(const UDSPInstruction opc);
|
||||
void loopi(const UDSPInstruction opc);
|
||||
void bloop(const UDSPInstruction opc);
|
||||
void bloopi(const UDSPInstruction opc);
|
||||
|
||||
// Load/Store
|
||||
void srs(const UDSPInstruction opc);
|
||||
|
|
|
@ -184,10 +184,10 @@ const DSPOPCTemplate opcodes[] =
|
|||
{"ILRRN", 0x021c, 0xfefc, DSPInterpreter::ilrrn, NULL, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
||||
|
||||
// LOOPS
|
||||
{"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, NULL, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false},
|
||||
{"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, NULL, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
||||
{"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, NULL, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false},
|
||||
{"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, NULL, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
||||
{"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, &DSPEmitter::loop, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false},
|
||||
{"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, &DSPEmitter::bloop, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
||||
{"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, &DSPEmitter::loopi, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false},
|
||||
{"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, &DSPEmitter::bloopi, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
||||
|
||||
// load and store value pointed by indexing reg and increment; LRR/SRR variants
|
||||
{"LRR", 0x1800, 0xff80, DSPInterpreter::lrr, &DSPEmitter::lrr, 1, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_PRG, 1, 0, 5, 0x0060}}, false, false, false, false, false},
|
||||
|
|
|
@ -142,7 +142,7 @@ void halt(const UDSPInstruction opc)
|
|||
// instructions. Whenever there is value on stack $st2 and current PC is equal
|
||||
// value at $st2, then value at stack $st3 is decremented. If value is not zero
|
||||
// then PC is modified with value from call stack $st0. Otherwise values from
|
||||
// call stack $st0 and both loop stacks $st2 and $st3 are poped and execution
|
||||
// call stack $st0 and both loop stacks $st2 and $st3 are popped and execution
|
||||
// continues at next opcode.
|
||||
void HandleLoop()
|
||||
{
|
||||
|
@ -225,7 +225,7 @@ void loopi(const UDSPInstruction opc)
|
|||
// specified address addrA inclusive, ie. opcode at addrA is the last opcode
|
||||
// included in loop. Counter is pushed on loop stack $st3, end of block address
|
||||
// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
|
||||
// Up to 4 nested loops is allowed.
|
||||
// Up to 4 nested loops are allowed.
|
||||
void bloop(const UDSPInstruction opc)
|
||||
{
|
||||
u16 reg = opc & 0x1f;
|
||||
|
@ -253,7 +253,7 @@ void bloop(const UDSPInstruction opc)
|
|||
// address addrA inclusive, ie. opcode at addrA is the last opcode included in
|
||||
// loop. Counter is pushed on loop stack $st3, end of block address is pushed
|
||||
// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
|
||||
// nested loops is allowed.
|
||||
// nested loops are allowed.
|
||||
void bloopi(const UDSPInstruction opc)
|
||||
{
|
||||
u16 cnt = opc & 0xff;
|
||||
|
|
|
@ -284,3 +284,216 @@ void DSPEmitter::callr(const UDSPInstruction opc)
|
|||
{
|
||||
ReJitConditional<r_callr>(opc, *this);
|
||||
}
|
||||
|
||||
// LOOP handling: Loop stack is used to control execution of repeated blocks of
|
||||
// instructions. Whenever there is value on stack $st2 and current PC is equal
|
||||
// value at $st2, then value at stack $st3 is decremented. If value is not zero
|
||||
// then PC is modified with value from call stack $st0. Otherwise values from
|
||||
// call stack $st0 and both loop stacks $st2 and $st3 are popped and execution
|
||||
// continues at next opcode.
|
||||
void DSPEmitter::HandleLoop()
|
||||
{
|
||||
#ifdef _M_IX86 // All32
|
||||
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2])));
|
||||
MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST3])));
|
||||
#else
|
||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2));
|
||||
MOVZX(32, 16, ECX, MDisp(R11,DSP_REG_ST3*2));
|
||||
#endif
|
||||
|
||||
CMP(32, R(RCX), Imm32(0));
|
||||
FixupBranch rLoopCntG = J_CC(CC_LE, true);
|
||||
CMP(16, R(RAX), Imm16(compilePC - 1));
|
||||
FixupBranch rLoopAddrG = J_CC(CC_NE, true);
|
||||
|
||||
#ifdef _M_IX86 // All32
|
||||
SUB(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(1));
|
||||
CMP(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(0));
|
||||
#else
|
||||
SUB(16, MDisp(R11,DSP_REG_ST3*2), Imm16(1));
|
||||
CMP(16, MDisp(R11,DSP_REG_ST3*2), Imm16(0));
|
||||
#endif
|
||||
|
||||
FixupBranch loadStack = J_CC(CC_LE, true);
|
||||
#ifdef _M_IX86 // All32
|
||||
MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST0])));
|
||||
MOV(16, M(&g_dsp.pc), R(RCX));
|
||||
#else
|
||||
MOVZX(32, 16, RCX, MDisp(R11,DSP_REG_ST0*2));
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, MatR(RAX), R(RCX));
|
||||
#endif
|
||||
FixupBranch loopUpdated = J(true);
|
||||
|
||||
SetJumpTarget(loadStack);
|
||||
dsp_reg_load_stack(0);
|
||||
dsp_reg_load_stack(2);
|
||||
dsp_reg_load_stack(3);
|
||||
|
||||
SetJumpTarget(loopUpdated);
|
||||
SetJumpTarget(rLoopAddrG);
|
||||
SetJumpTarget(rLoopCntG);
|
||||
|
||||
}
|
||||
|
||||
// LOOP $R
|
||||
// 0000 0000 010r rrrr
|
||||
// Repeatedly execute following opcode until counter specified by value
|
||||
// from register $R reaches zero. Each execution decrement counter. Register
|
||||
// $R remains unchanged. If register $R is set to zero at the beginning of loop
|
||||
// then looped instruction will not get executed.
|
||||
// Actually, this instruction simply prepares the loop stacks for the above.
|
||||
// The looping hardware takes care of the rest.
|
||||
void DSPEmitter::loop(const UDSPInstruction opc)
|
||||
{
|
||||
u16 reg = opc & 0x1f;
|
||||
// u16 cnt = g_dsp.r[reg];
|
||||
#ifdef _M_IX86 // All32
|
||||
MOVZX(32, 16, EDX, M(&(g_dsp.r[reg])));
|
||||
#else
|
||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||
MOVZX(32, 16, EDX, MDisp(R11,reg*2));
|
||||
#endif
|
||||
u16 loop_pc = compilePC + 1;
|
||||
|
||||
CMP(16, R(EDX), Imm16(0));
|
||||
FixupBranch cnt = J_CC(CC_Z, true);
|
||||
dsp_reg_store_stack(3);
|
||||
MOV(16, R(RDX), Imm16(compilePC + 1));
|
||||
dsp_reg_store_stack(0);
|
||||
MOV(16, R(RDX), Imm16(loop_pc));
|
||||
dsp_reg_store_stack(2);
|
||||
|
||||
SetJumpTarget(cnt);
|
||||
#ifdef _M_IX86 // All32
|
||||
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, MDisp(RAX,0), Imm16(compilePC + 1));
|
||||
#endif
|
||||
}
|
||||
|
||||
// LOOPI #I
|
||||
// 0001 0000 iiii iiii
|
||||
// Repeatedly execute following opcode until counter specified by
|
||||
// immediate value I reaches zero. Each execution decrement counter. If
|
||||
// immediate value I is set to zero at the beginning of loop then looped
|
||||
// instruction will not get executed.
|
||||
// Actually, this instruction simply prepares the loop stacks for the above.
|
||||
// The looping hardware takes care of the rest.
|
||||
void DSPEmitter::loopi(const UDSPInstruction opc)
|
||||
{
|
||||
u16 cnt = opc & 0xff;
|
||||
u16 loop_pc = compilePC + 1;
|
||||
|
||||
if (cnt)
|
||||
{
|
||||
MOV(16, R(RDX), Imm16(compilePC + 1));
|
||||
dsp_reg_store_stack(0);
|
||||
MOV(16, R(RDX), Imm16(loop_pc));
|
||||
dsp_reg_store_stack(2);
|
||||
MOV(16, R(RDX), Imm16(cnt));
|
||||
dsp_reg_store_stack(3);
|
||||
|
||||
#ifdef _M_IX86 // All32
|
||||
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, MDisp(RAX,0), Imm16(compilePC + 1));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// BLOOP $R, addrA
|
||||
// 0000 0000 011r rrrr
|
||||
// aaaa aaaa aaaa aaaa
|
||||
// Repeatedly execute block of code starting at following opcode until
|
||||
// counter specified by value from register $R reaches zero. Block ends at
|
||||
// specified address addrA inclusive, ie. opcode at addrA is the last opcode
|
||||
// included in loop. Counter is pushed on loop stack $st3, end of block address
|
||||
// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
|
||||
// Up to 4 nested loops are allowed.
|
||||
void DSPEmitter::bloop(const UDSPInstruction opc)
|
||||
{
|
||||
u16 reg = opc & 0x1f;
|
||||
// u16 cnt = g_dsp.r[reg];
|
||||
#ifdef _M_IX86 // All32
|
||||
MOVZX(32, 16, EDX, M(&(g_dsp.r[reg])));
|
||||
#else
|
||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||
MOVZX(32, 16, EDX, MDisp(R11,reg*2));
|
||||
#endif
|
||||
u16 loop_pc = dsp_imem_read(compilePC + 1);
|
||||
|
||||
CMP(16, R(EDX), Imm16(0));
|
||||
FixupBranch cnt = J_CC(CC_Z, true);
|
||||
dsp_reg_store_stack(3);
|
||||
MOV(16, R(RDX), Imm16(compilePC + 2));
|
||||
dsp_reg_store_stack(0);
|
||||
MOV(16, R(RDX), Imm16(loop_pc));
|
||||
dsp_reg_store_stack(2);
|
||||
#ifdef _M_IX86 // All32
|
||||
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, MDisp(RAX,0), Imm16(compilePC + 2));
|
||||
#endif
|
||||
FixupBranch exit = J();
|
||||
|
||||
SetJumpTarget(cnt);
|
||||
// g_dsp.pc = loop_pc;
|
||||
// dsp_skip_inst();
|
||||
#ifdef _M_IX86 // All32
|
||||
MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||
#endif
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
// BLOOPI #I, addrA
|
||||
// 0001 0001 iiii iiii
|
||||
// aaaa aaaa aaaa aaaa
|
||||
// Repeatedly execute block of code starting at following opcode until
|
||||
// counter specified by immediate value I reaches zero. Block ends at specified
|
||||
// address addrA inclusive, ie. opcode at addrA is the last opcode included in
|
||||
// loop. Counter is pushed on loop stack $st3, end of block address is pushed
|
||||
// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
|
||||
// nested loops are allowed.
|
||||
void DSPEmitter::bloopi(const UDSPInstruction opc)
|
||||
{
|
||||
u16 cnt = opc & 0xff;
|
||||
// u16 loop_pc = dsp_fetch_code();
|
||||
u16 loop_pc = dsp_imem_read(compilePC + 1);
|
||||
|
||||
if (cnt)
|
||||
{
|
||||
MOV(16, R(RDX), Imm16(compilePC + 2));
|
||||
dsp_reg_store_stack(0);
|
||||
MOV(16, R(RDX), Imm16(loop_pc));
|
||||
dsp_reg_store_stack(2);
|
||||
MOV(16, R(RDX), Imm16(cnt));
|
||||
dsp_reg_store_stack(3);
|
||||
|
||||
#ifdef _M_IX86 // All32
|
||||
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, MDisp(RAX,0), Imm16(compilePC + 2));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// g_dsp.pc = loop_pc;
|
||||
// dsp_skip_inst();
|
||||
#ifdef _M_IX86 // All32
|
||||
MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||
#else
|
||||
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||
MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue