diff --git a/Source/Core/DSPCore/DSPCore.vcproj b/Source/Core/DSPCore/DSPCore.vcproj index 05917ca096..058b8788b2 100644 --- a/Source/Core/DSPCore/DSPCore.vcproj +++ b/Source/Core/DSPCore/DSPCore.vcproj @@ -1,7 +1,7 @@ - + Name="JIT" + > + - + - + - + + + + + @@ -511,12 +519,12 @@ > + RelativePath=".\Src\DSPCore.cpp" + > - - - - diff --git a/Source/Core/DSPCore/Src/DSPAnalyzer.cpp b/Source/Core/DSPCore/Src/DSPAnalyzer.cpp index 9dc09eba2a..7319ba7a14 100644 --- a/Source/Core/DSPCore/Src/DSPAnalyzer.cpp +++ b/Source/Core/DSPCore/Src/DSPAnalyzer.cpp @@ -95,7 +95,7 @@ void AnalyzeRange(int start_addr, int end_addr) continue; } code_flags[addr] |= CODE_START_OF_INST; - // Look for loops. (this is not used atm) + // Look for loops. if ((inst & 0xffe0) == 0x0060 || (inst & 0xff00) == 0x1100) { // BLOOP, BLOOPI u16 loop_end = dsp_imem_read(addr + 1); diff --git a/Source/Core/DSPCore/Src/DSPEmitter.cpp b/Source/Core/DSPCore/Src/DSPEmitter.cpp index 8311d0b931..361eae5344 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.cpp +++ b/Source/Core/DSPCore/Src/DSPEmitter.cpp @@ -25,7 +25,7 @@ #include "x64Emitter.h" #include "ABI.h" -#define BLOCK_SIZE 250 +#define MAX_BLOCK_SIZE 250 using namespace Gen; @@ -36,33 +36,30 @@ DSPEmitter::DSPEmitter() : storeIndex(-1) AllocCodeSpace(COMPILED_CODE_SIZE); blocks = new CompiledCode[MAX_BLOCKS]; - endBlock = new bool[MAX_BLOCKS]; + blockSize = new u16[0x10000]; + + ClearIRAM(); - for(int i = 0x0000; i < MAX_BLOCKS; i++) - { - blocks[i] = CompileCurrent; - blockSize[i] = 0; - endBlock[i] = false; - } compileSR = 0; compileSR |= SR_INT_ENABLE; compileSR |= SR_EXT_INT_ENABLE; + + CompileDispatcher(); } DSPEmitter::~DSPEmitter() { delete[] blocks; - delete[] endBlock; + delete[] blockSize; FreeCodeSpace(); } void DSPEmitter::ClearIRAM() { - // TODO: Does not clear codespace + // ClearCodeSpace(); for(int i = 0x0000; i < 0x1000; i++) { - blocks[i] = CompileCurrent; + blocks[i] = NULL; blockSize[i] = 0; - endBlock[i] = false; } } @@ -100,7 +97,7 @@ void DSPEmitter::checkExceptions() { SetJumpTarget(skipCheck); } -void DSPEmitter::WriteCallInterpreter(UDSPInstruction inst) +void DSPEmitter::EmitInstruction(UDSPInstruction inst) { const DSPOPCTemplate *tinst = GetOpTemplate(inst); @@ -108,12 +105,14 @@ void DSPEmitter::WriteCallInterpreter(UDSPInstruction inst) if (tinst->extended) { if ((inst >> 12) == 0x3) { if (! extOpTable[inst & 0x7F]->jitFunc) { + // Fall back to interpreter ABI_CallFunctionC16((void*)extOpTable[inst & 0x7F]->intFunc, inst); } else { (this->*extOpTable[inst & 0x7F]->jitFunc)(inst); } } else { if (!extOpTable[inst & 0xFF]->jitFunc) { + // Fall back to interpreter ABI_CallFunctionC16((void*)extOpTable[inst & 0xFF]->intFunc, inst); } else { (this->*extOpTable[inst & 0xFF]->jitFunc)(inst); @@ -122,10 +121,14 @@ void DSPEmitter::WriteCallInterpreter(UDSPInstruction inst) } // Main instruction - if (!opTable[inst]->jitFunc) + if (!opTable[inst]->jitFunc) { + // Fall back to interpreter ABI_CallFunctionC16((void*)opTable[inst]->intFunc, inst); + } else + { (this->*opTable[inst]->jitFunc)(inst); + } // Backlog if (tinst->extended) { @@ -144,7 +147,7 @@ void DSPEmitter::unknown_instruction(UDSPInstruction inst) void DSPEmitter::Default(UDSPInstruction _inst) { - WriteCallInterpreter(_inst); + EmitInstruction(_inst); } const u8 *DSPEmitter::Compile(int start_addr) { @@ -155,52 +158,50 @@ const u8 *DSPEmitter::Compile(int start_addr) { int addr = start_addr; checkExceptions(); - while (addr < start_addr + BLOCK_SIZE) + while (addr < start_addr + MAX_BLOCK_SIZE) { UDSPInstruction inst = dsp_imem_read(addr); const DSPOPCTemplate *opcode = GetOpTemplate(inst); - - // Increment PC + // Increment PC - we shouldn't need to do this for every instruction. only for branches and end of block. ADD(16, M(&(g_dsp.pc)), Imm16(1)); - WriteCallInterpreter(inst); + EmitInstruction(inst); - blockSize[start_addr]++; + // Handle loop condition, only if current instruction was flagged as a loop destination + // by the analyzer. COMMENTED OUT - this breaks Zelda TP. Bah. - // Handle loop condition. Change to TEST - MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2]))); - CMP(32, R(EAX), Imm32(0)); - FixupBranch rLoopAddressExit = J_CC(CC_LE); + // if (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_LOOP_END) + { + // TODO: Change to TEST for some reason (who added this comment?) + MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2]))); + CMP(32, R(EAX), Imm32(0)); + FixupBranch rLoopAddressExit = J_CC(CC_LE); - MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3]))); - CMP(32, R(EAX), Imm32(0)); - FixupBranch rLoopCounterExit = J_CC(CC_LE); + MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3]))); + CMP(32, R(EAX), Imm32(0)); + FixupBranch rLoopCounterExit = J_CC(CC_LE); - // These functions branch and therefore only need to be called in the - // end of each block and in this order - ABI_CallFunction((void *)&DSPInterpreter::HandleLoop); - // ABI_RestoreStack(0); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); - RET(); + // These functions branch and therefore only need to be called in the + // end of each block and in this order + ABI_CallFunction((void *)&DSPInterpreter::HandleLoop); + // ABI_RestoreStack(0); + ABI_PopAllCalleeSavedRegsAndAdjustStack(); + RET(); - SetJumpTarget(rLoopAddressExit); - SetJumpTarget(rLoopCounterExit); - - // End the block where the loop ends - if ((inst & 0xffe0) == 0x0060 || (inst & 0xff00) == 0x1100) { - // BLOOP, BLOOPI - endBlock[dsp_imem_read(addr + 1)] = true; - } else if ((inst & 0xffe0) == 0x0040 || (inst & 0xff00) == 0x1000) { - // LOOP, LOOPI - endBlock[addr + 1] = true; + SetJumpTarget(rLoopAddressExit); + SetJumpTarget(rLoopCounterExit); } - if (opcode->branch || endBlock[addr] - || (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP)) { + // End the block if we're at a loop end. + if (opcode->branch || + (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_LOOP_END) || + (DSPAnalyzer::code_flags[addr] & DSPAnalyzer::CODE_IDLE_SKIP)) { break; } addr += opcode->size; + + blockSize[start_addr]++; } // ABI_RestoreStack(0); @@ -212,40 +213,50 @@ const u8 *DSPEmitter::Compile(int start_addr) { return entryPoint; } +void STACKALIGN DSPEmitter::CompileDispatcher() +{ + // TODO +} + +// Don't use the % operator in the inner loop. It's slow. void STACKALIGN DSPEmitter::RunBlock(int cycles) { + // How does this variable work? static int idleskip = 0; - // Trigger an external interrupt at the start of the cycle - u16 block_cycles = 501; +#define BURST_LENGTH 512 // Must be a power of two + u16 block_cycles = BURST_LENGTH + 1; + + // Trigger an external interrupt at the start of the cycle while (!(g_dsp.cr & CR_HALT)) { - if (block_cycles > 500) + if (block_cycles > BURST_LENGTH) { block_cycles = 0; } // Compile the block if needed - if (blocks[g_dsp.pc] == CompileCurrent) + if (!blocks[g_dsp.pc]) { blockSize[g_dsp.pc] = 0; - blocks[g_dsp.pc](); + CompileCurrent(); } // Execute the block if we have enough cycles if (cycles > blockSize[g_dsp.pc]) { u16 start_addr = g_dsp.pc; - if (idleskip % 100 > 95 && (DSPAnalyzer::code_flags[g_dsp.pc] & DSPAnalyzer::CODE_IDLE_SKIP)) { + + // 5%. Not sure where the rationale originally came from. + if (((idleskip & 127) > 121) && + (DSPAnalyzer::code_flags[g_dsp.pc] & DSPAnalyzer::CODE_IDLE_SKIP)) { block_cycles = 0; - } else + } else { blocks[g_dsp.pc](); - + } idleskip++; - - if (idleskip % 500 == 0) + if ((idleskip & (BURST_LENGTH - 1)) == 0) idleskip = 0; - block_cycles += blockSize[start_addr]; cycles -= blockSize[start_addr]; } diff --git a/Source/Core/DSPCore/Src/DSPEmitter.h b/Source/Core/DSPCore/Src/DSPEmitter.h index 7bbebf2d7c..8995144e4d 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.h +++ b/Source/Core/DSPCore/Src/DSPEmitter.h @@ -29,27 +29,19 @@ typedef void (*CompiledCode)(); class DSPEmitter : public Gen::XCodeBlock { - CompiledCode *blocks; - u16 blockSize[0x10000]; - bool *endBlock; - u16 compileSR; - - // The index of the last stored ext value (compile time). - int storeIndex; - - DISALLOW_COPY_AND_ASSIGN(DSPEmitter); - - void ToMask(Gen::X64Reg value_reg = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI); public: DSPEmitter(); ~DSPEmitter(); const u8 *m_compiledCode; - void WriteCallInterpreter(UDSPInstruction inst); + void EmitInstruction(UDSPInstruction inst); void unknown_instruction(UDSPInstruction inst); void Default(UDSPInstruction _inst); void ClearIRAM(); + + void CompileDispatcher(); + const u8 *Compile(int start_addr); void STACKALIGN RunBlock(int cycles); @@ -103,6 +95,20 @@ public: void sbclr(const UDSPInstruction opc); void sbset(const UDSPInstruction opc); void srbith(const UDSPInstruction opc); + +private: + CompiledCode *blocks; + u16 *blockSize; + u16 compileSR; + + u8 *dispatcher; + + // The index of the last stored ext value (compile time). + int storeIndex; + + DISALLOW_COPY_AND_ASSIGN(DSPEmitter); + + void ToMask(Gen::X64Reg value_reg = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI); }; diff --git a/Source/UnitTests/DSPJitTester.cpp b/Source/UnitTests/DSPJitTester.cpp index af23ec34f2..b374a49a97 100644 --- a/Source/UnitTests/DSPJitTester.cpp +++ b/Source/UnitTests/DSPJitTester.cpp @@ -41,7 +41,7 @@ SDSP DSPJitTester::RunJit(SDSP dsp_settings) ResetJit(); memcpy(&g_dsp, &dsp_settings, sizeof(SDSP)); const u8* code = jit.GetCodePtr(); - jit.WriteCallInterpreter(instruction); + jit.EmitInstruction(instruction); jit.RET(); ((void(*)())code)(); @@ -113,8 +113,8 @@ void DSPJitTester::DumpJittedCode() { ResetJit(); const u8* code = jit.GetCodePtr(); - jit.WriteCallInterpreter(instruction); - int code_size = jit.GetCodePtr() - code; + jit.EmitInstruction(instruction); + size_t code_size = jit.GetCodePtr() - code; printf("%s emitted: ", instruction_name); for (int i = 0; i < code_size; i++)