552 lines
13 KiB
C++
552 lines
13 KiB
C++
// Copyright (C) 2010 Dolphin Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
#include <cstring>
|
|
|
|
#include "DSPEmitter.h"
|
|
#include "DSPMemoryMap.h"
|
|
#include "DSPCore.h"
|
|
#include "DSPInterpreter.h"
|
|
#include "DSPAnalyzer.h"
|
|
#include "x64Emitter.h"
|
|
#include "ABI.h"
|
|
|
|
#define MAX_BLOCK_SIZE 250
|
|
#define DSP_IDLE_SKIP_CYCLES 0x1000
|
|
|
|
using namespace Gen;
|
|
|
|
const u8 *stubEntryPoint;
|
|
u16 blocksCompiled;
|
|
u16 unresolvedCalls;
|
|
u16 unresolvedCallsThisBlock;
|
|
|
|
DSPEmitter::DSPEmitter() : storeIndex(-1), storeIndex2(-1)
|
|
{
|
|
m_compiledCode = NULL;
|
|
|
|
AllocCodeSpace(COMPILED_CODE_SIZE);
|
|
|
|
blocks = new CompiledCode[MAX_BLOCKS];
|
|
blockLinks = new CompiledCode[MAX_BLOCKS];
|
|
blockSize = new u16[0x10000];
|
|
|
|
compileSR = 0;
|
|
compileSR |= SR_INT_ENABLE;
|
|
compileSR |= SR_EXT_INT_ENABLE;
|
|
|
|
CompileDispatcher();
|
|
stubEntryPoint = CompileStub();
|
|
|
|
//clear all of the block references
|
|
for(int i = 0x0000; i < MAX_BLOCKS; i++)
|
|
{
|
|
blocks[i] = (CompiledCode)stubEntryPoint;
|
|
blockLinks[i] = 0;
|
|
blockSize[i] = 0;
|
|
}
|
|
blocksCompiled = 0;
|
|
unresolvedCalls = 0;
|
|
}
|
|
|
|
DSPEmitter::~DSPEmitter()
|
|
{
|
|
delete[] blocks;
|
|
delete[] blockLinks;
|
|
delete[] blockSize;
|
|
FreeCodeSpace();
|
|
}
|
|
|
|
void DSPEmitter::ClearIRAM() {
|
|
// ClearCodeSpace();
|
|
for(int i = 0x0000; i < 0x1000; i++)
|
|
{
|
|
blocks[i] = (CompiledCode)stubEntryPoint;
|
|
blockLinks[i] = 0;
|
|
blockSize[i] = 0;
|
|
}
|
|
blocksCompiled = 0;
|
|
unresolvedCalls = 0;
|
|
}
|
|
|
|
// Must go out of block if exception is detected
|
|
void DSPEmitter::checkExceptions(u32 retval)
|
|
{
|
|
// Check for interrupts and exceptions
|
|
#ifdef _M_IX86 // All32
|
|
TEST(8, M(&g_dsp.exceptions), Imm8(0xff));
|
|
#else
|
|
MOV(64, R(RAX), ImmPtr(&g_dsp.exceptions));
|
|
TEST(8, MatR(RAX), Imm8(0xff));
|
|
#endif
|
|
FixupBranch skipCheck = J_CC(CC_Z);
|
|
|
|
#ifdef _M_IX86 // All32
|
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC));
|
|
#else
|
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
|
MOV(16, MatR(RAX), Imm16(compilePC));
|
|
#endif
|
|
|
|
ABI_CallFunction((void *)&DSPCore_CheckExceptions);
|
|
|
|
// ABI_RestoreStack(0);
|
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
|
MOV(32, R(EAX), Imm32(retval));
|
|
RET();
|
|
|
|
SetJumpTarget(skipCheck);
|
|
}
|
|
|
|
void DSPEmitter::Default(UDSPInstruction inst)
|
|
{
|
|
if (opTable[inst]->reads_pc)
|
|
{
|
|
// Increment PC - we shouldn't need to do this for every instruction. only for branches and end of block.
|
|
// Fallbacks to interpreter need this for fetching immediate values
|
|
|
|
#ifdef _M_IX86 // All32
|
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1));
|
|
#else
|
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
|
MOV(16, MatR(RAX), Imm16(compilePC + 1));
|
|
#endif
|
|
}
|
|
|
|
// Fall back to interpreter
|
|
SaveDSPRegs();
|
|
ABI_CallFunctionC16((void*)opTable[inst]->intFunc, inst);
|
|
LoadDSPRegs();
|
|
}
|
|
|
|
void DSPEmitter::EmitInstruction(UDSPInstruction inst)
|
|
{
|
|
const DSPOPCTemplate *tinst = GetOpTemplate(inst);
|
|
bool ext_is_jit = false;
|
|
|
|
// Call extended
|
|
if (tinst->extended) {
|
|
if ((inst >> 12) == 0x3) {
|
|
if (! extOpTable[inst & 0x7F]->jitFunc) {
|
|
// Fall back to interpreter
|
|
SaveDSPRegs();
|
|
ABI_CallFunctionC16((void*)extOpTable[inst & 0x7F]->intFunc, inst);
|
|
LoadDSPRegs();
|
|
INFO_LOG(DSPLLE, "Instruction not JITed(ext part): %04x\n", inst);
|
|
ext_is_jit = false;
|
|
} else {
|
|
(this->*extOpTable[inst & 0x7F]->jitFunc)(inst);
|
|
ext_is_jit = true;
|
|
}
|
|
} else {
|
|
if (!extOpTable[inst & 0xFF]->jitFunc) {
|
|
// Fall back to interpreter
|
|
SaveDSPRegs();
|
|
ABI_CallFunctionC16((void*)extOpTable[inst & 0xFF]->intFunc, inst);
|
|
LoadDSPRegs();
|
|
INFO_LOG(DSPLLE, "Instruction not JITed(ext part): %04x\n", inst);
|
|
ext_is_jit = false;
|
|
} else {
|
|
(this->*extOpTable[inst & 0xFF]->jitFunc)(inst);
|
|
ext_is_jit = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Main instruction
|
|
if (!opTable[inst]->jitFunc) {
|
|
Default(inst);
|
|
INFO_LOG(DSPLLE, "Instruction not JITed(main part): %04x\n", inst);
|
|
}
|
|
else
|
|
{
|
|
(this->*opTable[inst]->jitFunc)(inst);
|
|
}
|
|
|
|
// Backlog
|
|
if (tinst->extended) {
|
|
if (!ext_is_jit) {
|
|
//need to call the online cleanup function because
|
|
//the writeBackLog gets populated at runtime
|
|
SaveDSPRegs();
|
|
ABI_CallFunction((void*)::applyWriteBackLog);
|
|
LoadDSPRegs();
|
|
} else {
|
|
popExtValueToReg();
|
|
}
|
|
}
|
|
}
|
|
|
|
void DSPEmitter::unknown_instruction(UDSPInstruction inst)
|
|
{
|
|
PanicAlert("unknown_instruction %04x - Fix me ;)", inst);
|
|
}
|
|
|
|
void DSPEmitter::ClearCallFlag()
|
|
{
|
|
--unresolvedCallsThisBlock;
|
|
}
|
|
|
|
void DSPEmitter::Compile(int start_addr)
|
|
{
|
|
// Remember the current block address for later
|
|
startAddr = start_addr;
|
|
blocksCompiled++;
|
|
unresolvedCallsThisBlock = 0;
|
|
|
|
// If the number of unresolved calls exceeds 8, there is a critical
|
|
// block that probably cannot be resolved. If this occurs, quit linking
|
|
// blocks. Currently occurs in the zelda ucode.
|
|
if (unresolvedCalls <= 8)
|
|
{
|
|
// After every 10 blocks, clear out the blocks that have unresolved
|
|
// calls, and reattempt relinking.
|
|
if (blocksCompiled >= 10 && unresolvedCalls > 0)
|
|
{
|
|
for(int i = 0x0000; i < MAX_BLOCKS; ++i)
|
|
{
|
|
if (DSPAnalyzer::code_flags[i] & DSPAnalyzer::CODE_CALL)
|
|
{
|
|
blocks[i] = (CompiledCode)stubEntryPoint;
|
|
blockLinks[i] = 0;
|
|
blockSize[i] = 0;
|
|
}
|
|
}
|
|
// Reset and reattempt relinking
|
|
blocksCompiled = 0;
|
|
unresolvedCalls = 0;
|
|
}
|
|
}
|
|
|
|
const u8 *entryPoint = AlignCode16();
|
|
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
|
// ABI_AlignStack(0);
|
|
|
|
/*
|
|
// Check for other exceptions
|
|
if (dsp_SR_is_flag_set(SR_INT_ENABLE))
|
|
return;
|
|
|
|
if (g_dsp.exceptions == 0)
|
|
return;
|
|
*/
|
|
|
|
blockLinkEntry = GetCodePtr();
|
|
|
|
// ASM version of DSPCore_CheckExternalInterrupt.
|
|
#ifdef _M_IX86 // All32
|
|
TEST(16, M(&g_dsp.cr), Imm16(CR_EXTERNAL_INT));
|
|
FixupBranch noExternalInterrupt = J_CC(CC_Z);
|
|
TEST(16, M(&g_dsp.r.sr), Imm16(SR_EXT_INT_ENABLE));
|
|
FixupBranch externalInterruptDisabled = J_CC(CC_Z);
|
|
OR(8, M(&g_dsp.exceptions), Imm8(1 << EXP_INT));
|
|
AND(16, M(&g_dsp.cr), Imm16(~CR_EXTERNAL_INT));
|
|
SetJumpTarget(externalInterruptDisabled);
|
|
SetJumpTarget(noExternalInterrupt);
|
|
#else
|
|
/* // TODO: Needs to be optimised
|
|
MOV(64, R(RAX), ImmPtr(&g_dsp.cr));
|
|
TEST(16, MatR(RAX), Imm16(CR_EXTERNAL_INT));
|
|
FixupBranch noExternalInterrupt = J_CC(CC_Z);
|
|
MOV(64, R(RAX), ImmPtr(&g_dsp.r.sr));
|
|
TEST(16, MatR(RAX), Imm16(SR_EXT_INT_ENABLE));
|
|
FixupBranch externalInterruptDisabled = J_CC(CC_Z);
|
|
MOV(64, R(RAX), ImmPtr(&g_dsp.exceptions));
|
|
OR(8, MatR(RAX), Imm8(1 << EXP_INT));
|
|
MOV(64, R(RAX), ImmPtr(&g_dsp.cr));
|
|
AND(16, MatR(RAX), Imm16(~CR_EXTERNAL_INT));
|
|
SetJumpTarget(externalInterruptDisabled);
|
|
SetJumpTarget(noExternalInterrupt);
|
|
*/
|
|
ABI_CallFunction((void *)&DSPCore_CheckExternalInterrupt);
|
|
#endif
|
|
|
|
compilePC = start_addr;
|
|
bool fixup_pc = false;
|
|
blockSize[start_addr] = 0;
|
|
|
|
LoadDSPRegs();
|
|
|
|
while (compilePC < start_addr + MAX_BLOCK_SIZE)
|
|
{
|
|
checkExceptions(blockSize[start_addr]);
|
|
|
|
UDSPInstruction inst = dsp_imem_read(compilePC);
|
|
const DSPOPCTemplate *opcode = GetOpTemplate(inst);
|
|
|
|
// Scan for CALL's to delay block link. TODO: Scan for J_CC after it is jitted.
|
|
if (opcode->jitFunc &&
|
|
((opcode->opcode >= 0x0290 && opcode->opcode <= 0x029f) ||
|
|
(opcode->opcode >= 0x02b0 && opcode->opcode <= 0x02bf)))
|
|
{
|
|
++unresolvedCallsThisBlock;
|
|
}
|
|
|
|
EmitInstruction(inst);
|
|
|
|
blockSize[start_addr]++;
|
|
compilePC += opcode->size;
|
|
|
|
fixup_pc = true;
|
|
|
|
// Handle loop condition, only if current instruction was flagged as a loop destination
|
|
// by the analyzer.
|
|
if (DSPAnalyzer::code_flags[compilePC-1] & DSPAnalyzer::CODE_LOOP_END)
|
|
{
|
|
#ifdef _M_IX86 // All32
|
|
MOVZX(32, 16, EAX, M(&(g_dsp.r.st[2])));
|
|
#else
|
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
|
MOVZX(32, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, st[2])));
|
|
#endif
|
|
CMP(32, R(EAX), Imm32(0));
|
|
FixupBranch rLoopAddressExit = J_CC(CC_LE, true);
|
|
|
|
#ifdef _M_IX86 // All32
|
|
MOVZX(32, 16, EAX, M(&g_dsp.r.st[3]));
|
|
#else
|
|
MOVZX(32, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, st[3])));
|
|
#endif
|
|
CMP(32, R(EAX), Imm32(0));
|
|
FixupBranch rLoopCounterExit = J_CC(CC_LE, true);
|
|
|
|
if (!opcode->branch)
|
|
{
|
|
//branch insns update the g_dsp.pc
|
|
#ifdef _M_IX86 // All32
|
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC));
|
|
#else
|
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
|
MOV(16, MatR(RAX), Imm16(compilePC));
|
|
#endif
|
|
}
|
|
|
|
// These functions branch and therefore only need to be called in the
|
|
// end of each block and in this order
|
|
HandleLoop();
|
|
// ABI_RestoreStack(0);
|
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
|
if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
|
{
|
|
MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES));
|
|
}
|
|
else
|
|
{
|
|
MOV(16, R(EAX), Imm16(blockSize[start_addr]));
|
|
}
|
|
RET();
|
|
|
|
SetJumpTarget(rLoopAddressExit);
|
|
SetJumpTarget(rLoopCounterExit);
|
|
}
|
|
|
|
if (opcode->branch)
|
|
{
|
|
//don't update g_dsp.pc -- the branch insn already did
|
|
fixup_pc = false;
|
|
if (opcode->uncond_branch)
|
|
{
|
|
break;
|
|
}
|
|
else if (!opcode->jitFunc)
|
|
{
|
|
//look at g_dsp.pc if we actually branched
|
|
#ifdef _M_IX86 // All32
|
|
MOV(16, R(AX), M(&g_dsp.pc));
|
|
#else
|
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
|
MOV(16, R(AX), MatR(RAX));
|
|
#endif
|
|
CMP(16, R(AX), Imm16(compilePC));
|
|
FixupBranch rNoBranch = J_CC(CC_Z);
|
|
|
|
//don't update g_dsp.pc -- the branch insn already did
|
|
// ABI_RestoreStack(0);
|
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
|
if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
|
{
|
|
MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES));
|
|
}
|
|
else
|
|
{
|
|
MOV(16, R(EAX), Imm16(blockSize[start_addr]));
|
|
}
|
|
RET();
|
|
|
|
SetJumpTarget(rNoBranch);
|
|
}
|
|
}
|
|
|
|
// End the block if we're before an idle skip address
|
|
if (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_IDLE_SKIP)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (fixup_pc) {
|
|
#ifdef _M_IX86 // All32
|
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC));
|
|
#else
|
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
|
MOV(16, MatR(RAX), Imm16(compilePC));
|
|
#endif
|
|
}
|
|
|
|
blocks[start_addr] = (CompiledCode)entryPoint;
|
|
|
|
// Mark this block as a linkable destination if it does not contain
|
|
// any unresolved CALL's
|
|
if (unresolvedCallsThisBlock == 0)
|
|
{
|
|
DSPAnalyzer::code_flags[start_addr] &= ~DSPAnalyzer::CODE_CALL;
|
|
blockLinks[start_addr] = (CompiledCode)blockLinkEntry;
|
|
}
|
|
else
|
|
{
|
|
DSPAnalyzer::code_flags[start_addr] |= DSPAnalyzer::CODE_CALL;
|
|
blockLinks[start_addr] = 0;
|
|
++unresolvedCalls;
|
|
}
|
|
|
|
if (blockSize[start_addr] == 0)
|
|
{
|
|
// just a safeguard, should never happen anymore.
|
|
// if it does we might get stuck over in RunForCycles.
|
|
ERROR_LOG(DSPLLE, "Block at 0x%04x has zero size", start_addr);
|
|
blockSize[start_addr] = 1;
|
|
}
|
|
|
|
SaveDSPRegs();
|
|
|
|
// ABI_RestoreStack(0);
|
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
|
if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
|
{
|
|
MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES));
|
|
}
|
|
else
|
|
{
|
|
MOV(16, R(EAX), Imm16(blockSize[start_addr]));
|
|
}
|
|
RET();
|
|
}
|
|
|
|
const u8 *DSPEmitter::CompileStub()
|
|
{
|
|
const u8 *entryPoint = AlignCode16();
|
|
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
|
// ABI_AlignStack(0);
|
|
ABI_CallFunction((void *)&CompileCurrent);
|
|
// ABI_RestoreStack(0);
|
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
|
//MOVZX(32, 16, ECX, M(&g_dsp.pc));
|
|
XOR(32, R(EAX), R(EAX)); // Return 0 cycles executed
|
|
RET();
|
|
return entryPoint;
|
|
}
|
|
|
|
void DSPEmitter::CompileDispatcher()
|
|
{
|
|
enterDispatcher = AlignCode16();
|
|
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
|
|
|
// Cache pointers into registers
|
|
#ifdef _M_IX86
|
|
MOV(16, R(ESI), M(&cyclesLeft));
|
|
MOV(32, R(EBX), ImmPtr(blocks));
|
|
#else
|
|
// Using R12 here since it is callee save register on both
|
|
// linux and windows 64.
|
|
MOV(64, R(R12), ImmPtr(&cyclesLeft));
|
|
MOV(16, R(R12), MatR(R12));
|
|
MOV(64, R(RBX), ImmPtr(blocks));
|
|
#endif
|
|
|
|
const u8 *dispatcherLoop = GetCodePtr();
|
|
|
|
// Check for DSP halt
|
|
#ifdef _M_IX86
|
|
TEST(8, M(&g_dsp.cr), Imm8(CR_HALT));
|
|
#else
|
|
MOV(64, R(RAX), ImmPtr(&g_dsp.cr));
|
|
TEST(8, MatR(RAX), Imm8(CR_HALT));
|
|
#endif
|
|
FixupBranch _halt = J_CC(CC_NE);
|
|
|
|
#ifdef _M_IX86
|
|
MOVZX(32, 16, ECX, M(&g_dsp.pc));
|
|
#else
|
|
MOV(64, R(RCX), ImmPtr(&g_dsp.pc));
|
|
MOVZX(64, 16, RCX, MatR(RCX));
|
|
#endif
|
|
|
|
// Execute block. Cycles executed returned in EAX.
|
|
#ifdef _M_IX86
|
|
CALLptr(MComplex(EBX, ECX, SCALE_4, 0));
|
|
#else
|
|
CALLptr(MComplex(RBX, RCX, SCALE_8, 0));
|
|
#endif
|
|
|
|
// Decrement cyclesLeft
|
|
#ifdef _M_IX86
|
|
SUB(16, R(ESI), R(EAX));
|
|
#else
|
|
SUB(16, R(R12), R(EAX));
|
|
#endif
|
|
|
|
J_CC(CC_A, dispatcherLoop);
|
|
|
|
// DSP gave up the remaining cycles.
|
|
SetJumpTarget(_halt);
|
|
//MOV(32, M(&cyclesLeft), Imm32(0));
|
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
|
RET();
|
|
}
|
|
|
|
// Don't use the % operator in the inner loop. It's slow.
|
|
int STACKALIGN DSPEmitter::RunForCycles(int cycles)
|
|
{
|
|
while (!(g_dsp.cr & CR_HALT))
|
|
{
|
|
// Compile the block if needed
|
|
u16 block_addr = g_dsp.pc;
|
|
int block_size = blockSize[block_addr];
|
|
if (!block_size)
|
|
{
|
|
CompileCurrent();
|
|
block_size = blockSize[block_addr];
|
|
}
|
|
|
|
// Execute the block if we have enough cycles
|
|
if (cycles > block_size)
|
|
{
|
|
cycles -= blocks[block_addr]();
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
// DSP gave up the remaining cycles.
|
|
if (g_dsp.cr & CR_HALT || cycles < 0)
|
|
return 0;
|
|
|
|
return cycles;
|
|
}
|