From 93ae9d03757006f0989209b2884ed9088b973be9 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 7 Nov 2024 18:14:18 +0100 Subject: [PATCH] sh4: refactor interpreter and recompiler with Sh4Executor interface --- core/emulator.cpp | 81 ++++++++++-------- core/emulator.h | 14 +++- core/hw/mem/addrspace.cpp | 2 - core/hw/pvr/Renderer_if.cpp | 4 +- core/hw/sh4/dyna/driver.cpp | 67 +++++---------- core/hw/sh4/dyna/ngen.h | 23 +++++ core/hw/sh4/interpr/sh4_interpreter.cpp | 107 ++++++++++-------------- core/hw/sh4/interpr/sh4_opcodes.cpp | 25 +++--- core/hw/sh4/modules/ccn.cpp | 3 +- core/hw/sh4/sh4_cache.h | 2 + core/hw/sh4/sh4_core.h | 1 - core/hw/sh4/sh4_core_regs.cpp | 1 - core/hw/sh4/sh4_cycles.cpp | 68 +++++++++++++++ core/hw/sh4/sh4_cycles.h | 70 +--------------- core/hw/sh4/sh4_if.h | 29 +++---- core/hw/sh4/sh4_interpreter.h | 58 ++++++------- core/hw/sh4/sh4_opcode_list.h | 33 +++++++- core/hw/sh4/sh4_sched.h | 8 +- core/network/ggpo.cpp | 4 +- core/nullDC.cpp | 2 +- shell/libretro/libretro.cpp | 2 +- tests/src/AicaArmTest.cpp | 2 +- tests/src/CheatManagerTest.cpp | 2 +- tests/src/MmuTest.cpp | 2 +- tests/src/Sh4InterpreterTest.cpp | 6 +- tests/src/div32_test.cpp | 2 +- tests/src/serialize_test.cpp | 2 +- tests/src/sh4_ops.h | 2 +- 28 files changed, 321 insertions(+), 301 deletions(-) diff --git a/core/emulator.cpp b/core/emulator.cpp index 82ddfca1d..1ef865b39 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -47,6 +47,8 @@ #ifndef LIBRETRO #include "ui/gui.h" #endif +#include "hw/sh4/sh4_interpreter.h" +#include "hw/sh4/dyna/ngen.h" settings_t settings; @@ -400,7 +402,7 @@ static void loadSpecialSettings() } } -void dc_reset(bool hard) +void Emulator::dc_reset(bool hard) { if (hard) { @@ -411,7 +413,7 @@ void dc_reset(bool hard) sh4_sched_reset(hard); pvr::reset(hard); aica::reset(hard); - sh4_cpu.Reset(true); + getSh4Executor()->Reset(true); mem_Reset(hard); } @@ -487,22 +489,28 @@ void Emulator::init() // the recompiler may start generating code at this point and needs a fully configured machine #if FEAT_SHREC != DYNAREC_NONE - Get_Sh4Recompiler(&sh4_cpu); - sh4_cpu.Init(); // Also initialize the interpreter + recompiler = Get_Sh4Recompiler(); + recompiler->Init(); if(config::DynarecEnabled) - { INFO_LOG(DYNAREC, "Using Recompiler"); - } else #endif - { - Get_Sh4Interpreter(&sh4_cpu); - sh4_cpu.Init(); INFO_LOG(INTERPRETER, "Using Interpreter"); - } + interpreter = Get_Sh4Interpreter(); + interpreter->Init(); state = Init; } +Sh4Executor *Emulator::getSh4Executor() +{ +#if FEAT_SHREC != DYNAREC_NONE + if(config::DynarecEnabled) + return recompiler; + else +#endif + return interpreter; +} + int getGamePlatform(const std::string& filename) { if (settings.naomi.slave) @@ -671,13 +679,13 @@ void Emulator::runInternal() { if (singleStep) { - sh4_cpu.Step(); + getSh4Executor()->Step(); singleStep = false; } else if(stepRangeTo != 0) { while (Sh4cntx.pc >= stepRangeFrom && Sh4cntx.pc <= stepRangeTo) - sh4_cpu.Step(); + getSh4Executor()->Step(); stepRangeFrom = 0; stepRangeTo = 0; @@ -687,7 +695,7 @@ void Emulator::runInternal() do { resetRequested = false; - sh4_cpu.Run(); + getSh4Executor()->Run(); if (resetRequested) { @@ -736,7 +744,18 @@ void Emulator::term() if (state == Init) { debugger::term(); - sh4_cpu.Term(); + if (interpreter != nullptr) + { + interpreter->Term(); + delete interpreter; + interpreter = nullptr; + } + if (recompiler != nullptr) + { + recompiler->Term(); + delete recompiler; + recompiler = nullptr; + } custom_texture.Terminate(); // lr: avoid deadlock on exit (win32) reios_term(); aica::term(); @@ -760,7 +779,7 @@ void Emulator::stop() const std::lock_guard _(mutex); // must be updated after GGPO is stopped since it may run some rollback frames state = Loaded; - sh4_cpu.Stop(); + getSh4Executor()->Stop(); } if (config::ThreadedRendering) { @@ -794,7 +813,7 @@ void Emulator::requestReset() resetRequested = true; if (config::GGPOEnable) NetworkHandshake::term(); - sh4_cpu.Stop(); + getSh4Executor()->Stop(); } void loadGameSpecificSettings() @@ -841,7 +860,7 @@ void Emulator::stepRange(u32 from, u32 to) stop(); } -void dc_loadstate(Deserializer& deser) +void Emulator::loadstate(Deserializer& deser) { custom_texture.Terminate(); #if FEAT_AREC == DYNAREC_JIT @@ -857,7 +876,7 @@ void dc_loadstate(Deserializer& deser) dc_deserialize(deser); mmu_set_state(); - sh4_cpu.ResetCache(); + getSh4Executor()->ResetCache(); KillTex = true; } @@ -871,7 +890,7 @@ void Emulator::setNetworkState(bool online) && config::Sh4Clock != 200) { config::Sh4Clock.override(200); - sh4_cpu.ResetCache(); + getSh4Executor()->ResetCache(); } EventManager::event(Event::Network); } @@ -906,7 +925,7 @@ void Emulator::run() startTime = sh4_sched_now64(); renderTimeout = false; if (!singleStep && stepRangeTo == 0) - sh4_cpu.Start(); + getSh4Executor()->Start(); try { runInternal(); if (ggpo::active()) @@ -914,7 +933,7 @@ void Emulator::run() } catch (...) { setNetworkState(false); state = Error; - sh4_cpu.Stop(); + getSh4Executor()->Stop(); EventManager::event(Event::Pause); throw; } @@ -930,18 +949,6 @@ void Emulator::start() if (config::GGPOEnable && config::ThreadedRendering) // Not supported with GGPO config::EmulateFramebuffer.override(false); -#if FEAT_SHREC != DYNAREC_NONE - if (config::DynarecEnabled) - { - Get_Sh4Recompiler(&sh4_cpu); - INFO_LOG(DYNAREC, "Using Recompiler"); - } - else -#endif - { - Get_Sh4Interpreter(&sh4_cpu); - INFO_LOG(DYNAREC, "Using Interpreter"); - } setupPtyPipe(); memwatch::protect(); @@ -949,7 +956,7 @@ void Emulator::start() if (config::ThreadedRendering) { const std::lock_guard lock(mutex); - sh4_cpu.Start(); + getSh4Executor()->Start(); threadResult = std::async(std::launch::async, [this] { ThreadName _("Flycast-emu"); InitAudio(); @@ -966,7 +973,7 @@ void Emulator::start() TermAudio(); } catch (...) { setNetworkState(false); - sh4_cpu.Stop(); + getSh4Executor()->Stop(); TermAudio(); throw; } @@ -1044,7 +1051,7 @@ void Emulator::vblank() if (ggpo::active()) ggpo::endOfFrame(); else if (!config::ThreadedRendering) - sh4_cpu.Stop(); + getSh4Executor()->Stop(); } bool Emulator::restartCpu() @@ -1052,7 +1059,7 @@ bool Emulator::restartCpu() const std::lock_guard _(mutex); if (state != Running) return false; - sh4_cpu.Start(); + getSh4Executor()->Start(); return true; } diff --git a/core/emulator.h b/core/emulator.h index 354a491db..945fc193d 100644 --- a/core/emulator.h +++ b/core/emulator.h @@ -33,12 +33,10 @@ void loadGameSpecificSettings(); void SaveSettings(); int flycast_init(int argc, char* argv[]); -void dc_reset(bool hard); // for tests only void flycast_term(); void dc_exit(); void dc_savestate(int index = 0, const u8 *pngData = nullptr, u32 pngSize = 0); void dc_loadstate(int index = 0); -void dc_loadstate(Deserializer& deser); time_t dc_getStateCreationDate(int index); void dc_getStateScreenshot(int index, std::vector& pngData); @@ -98,6 +96,8 @@ struct LoadProgress std::atomic progress; }; +class Sh4Executor; + class Emulator { public: @@ -170,6 +170,14 @@ public: * Returns true if the cpu was started */ bool restartCpu(); + /* + * Load the machine state from the passed deserializer + */ + void loadstate(Deserializer& deser); + + Sh4Executor *getSh4Executor(); + + void dc_reset(bool hard); // for tests only private: bool checkStatus(bool wait = false); @@ -193,6 +201,8 @@ private: u32 stepRangeTo = 0; bool stopRequested = false; std::mutex mutex; + Sh4Executor *interpreter = nullptr; + Sh4Executor *recompiler = nullptr; }; extern Emulator emu; diff --git a/core/hw/mem/addrspace.cpp b/core/hw/mem/addrspace.cpp index 391f48834..a0e8f1810 100644 --- a/core/hw/mem/addrspace.cpp +++ b/core/hw/mem/addrspace.cpp @@ -380,8 +380,6 @@ bool bm_lockedWrite(u8* address) bool reserve() { - static_assert((sizeof(Sh4RCB) % PAGE_SIZE) == 0, "sizeof(Sh4RCB) not multiple of PAGE_SIZE"); - if (ram_base != nullptr) return true; diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 86ce77c7d..55289b3ac 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -237,7 +237,7 @@ private: { presented = true; if (!config::ThreadedRendering && !ggpo::active()) - sh4_cpu.Stop(); + emu.getSh4Executor()->Stop(); #ifdef LIBRETRO retro_rend_present(); #endif @@ -321,7 +321,7 @@ bool rend_init_renderer() rendererEnabled = true; if (renderer == nullptr) rend_create_renderer(); - bool success = renderer->Init(); + bool success = renderer != nullptr && renderer->Init(); if (!success) { delete renderer; renderer = rend_norend(); diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index c57e5eb36..32b8230b2 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -40,9 +40,9 @@ ptrdiff_t cc_rx_offset; static std::unordered_set smc_hotspots; -static sh4_if sh4Interp; static Sh4CodeBuffer codeBuffer; Sh4Dynarec *sh4Dynarec; +Sh4Recompiler *Sh4Recompiler::Instance; void *Sh4CodeBuffer::get() { @@ -83,14 +83,14 @@ void Sh4CodeBuffer::reset(bool temporary) lastAddr = 0; } -static void clear_temp_cache(bool full) +void Sh4Recompiler::clear_temp_cache(bool full) { //printf("recSh4:Temp Code Cache clear at %08X\n", curr_pc); codeBuffer.reset(true); bm_ResetTempCache(full); } -static void recSh4_ClearCache() +void Sh4Recompiler::ResetCache() { INFO_LOG(DYNAREC, "recSh4:Dynarec Cache clear at %08X free space %d", next_pc, codeBuffer.getFreeSpace()); codeBuffer.reset(false); @@ -99,7 +99,7 @@ static void recSh4_ClearCache() clear_temp_cache(true); } -static void recSh4_Run() +void Sh4Recompiler::Run() { RestoreHostRoundingMode(); @@ -108,7 +108,7 @@ static void recSh4_Run() sh4Dynarec->mainloop(sh4_dyna_rcb); - sh4_int_bCpuRun = false; + ctx->CpuRunning = false; } void AnalyseBlock(RuntimeBlockInfo* blk); @@ -171,7 +171,7 @@ DynarecCodeEntryPtr rdv_CompilePC(u32 blockcheck_failures) const u32 pc = next_pc; if (codeBuffer.getFreeSpace() < 32_KB || pc == 0x8c0000e0 || pc == 0xac010000 || pc == 0xac008300) - recSh4_ClearCache(); + Sh4Recompiler::Instance->ResetCache(); RuntimeBlockInfo* rbi = sh4Dynarec->allocateBlock(); @@ -185,7 +185,7 @@ DynarecCodeEntryPtr rdv_CompilePC(u32 blockcheck_failures) { codeBuffer.useTempBuffer(true); if (codeBuffer.getFreeSpace() < 32_KB) - clear_temp_cache(false); + Sh4Recompiler::Instance->clear_temp_cache(false); rbi->temp_block = true; if (rbi->read_only) INFO_LOG(DYNAREC, "WARNING: temp block %x (%x) is protected!", rbi->vaddr, rbi->addr); @@ -248,7 +248,7 @@ DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr) else { next_pc = addr; - recSh4_ClearCache(); + Sh4Recompiler::Instance->ResetCache(); } return (DynarecCodeEntryPtr)CC_RW2RX(rdv_CompilePC(blockcheck_failures)); } @@ -340,34 +340,18 @@ void* DYNACALL rdv_LinkBlock(u8* code,u32 dpc) return (void*)rv; } -static void recSh4_Start() +void Sh4Recompiler::Reset(bool hard) { - sh4Interp.Start(); -} - -static void recSh4_Stop() -{ - sh4Interp.Stop(); -} - -static void recSh4_Step() -{ - sh4Interp.Step(); -} - -static void recSh4_Reset(bool hard) -{ - sh4Interp.Reset(hard); - recSh4_ClearCache(); + super::Reset(hard); + ResetCache(); if (hard) bm_Reset(); } -static void recSh4_Init() +void Sh4Recompiler::Init() { - INFO_LOG(DYNAREC, "recSh4 Init"); - Get_Sh4Interpreter(&sh4Interp); - sh4Interp.Init(); + INFO_LOG(DYNAREC, "Sh4Recompiler::Init"); + super::Init(); bm_Init(); if (addrspace::virtmemEnabled()) @@ -389,9 +373,9 @@ static void recSh4_Init() bm_ResetCache(); } -static void recSh4_Term() +void Sh4Recompiler::Term() { - INFO_LOG(DYNAREC, "recSh4 Term"); + INFO_LOG(DYNAREC, "Sh4Recompiler::Term"); #ifdef FEAT_NO_RWX_PAGES if (CodeCache != nullptr) virtmem::release_jit_block(CodeCache, (u8 *)CodeCache + cc_rx_offset, FULL_SIZE); @@ -402,25 +386,12 @@ static void recSh4_Term() CodeCache = nullptr; TempCodeCache = nullptr; bm_Term(); - sh4Interp.Term(); + super::Term(); } -static bool recSh4_IsCpuRunning() +Sh4Executor *Get_Sh4Recompiler() { - return sh4Interp.IsCpuRunning(); -} - -void Get_Sh4Recompiler(sh4_if* cpu) -{ - cpu->Run = recSh4_Run; - cpu->Start = recSh4_Start; - cpu->Stop = recSh4_Stop; - cpu->Step = recSh4_Step; - cpu->Reset = recSh4_Reset; - cpu->Init = recSh4_Init; - cpu->Term = recSh4_Term; - cpu->IsCpuRunning = recSh4_IsCpuRunning; - cpu->ResetCache = recSh4_ClearCache; + return new Sh4Recompiler(); } static bool translateAddress(u32 addr, int size, u32 access, u32& outAddr, RuntimeBlockInfo* block) diff --git a/core/hw/sh4/dyna/ngen.h b/core/hw/sh4/dyna/ngen.h index 86b8ca7cb..88c8f1d25 100644 --- a/core/hw/sh4/dyna/ngen.h +++ b/core/hw/sh4/dyna/ngen.h @@ -3,6 +3,7 @@ #pragma once #include "blockmanager.h" #include "oslib/host_context.h" +#include "../sh4_interpreter.h" // When NO_RWX is enabled there's two address-spaces, one executable and // one writtable. The emitter and most of the code in rec-* will work with @@ -121,3 +122,25 @@ public: }; extern Sh4Dynarec *sh4Dynarec; + +class Sh4Recompiler : public Sh4Interpreter +{ + using super = Sh4Interpreter; + +public: + Sh4Recompiler() { + Instance = this; + } + ~Sh4Recompiler() { + Instance = nullptr; + } + void Run() override; + void ResetCache() override; + void Reset(bool hard) override; + void Init() override; + void Term() override; + + void clear_temp_cache(bool full); + + static Sh4Recompiler *Instance; +}; diff --git a/core/hw/sh4/interpr/sh4_interpreter.cpp b/core/hw/sh4/interpr/sh4_interpreter.cpp index 250d6ca9d..c5c5df60e 100644 --- a/core/hw/sh4/interpr/sh4_interpreter.cpp +++ b/core/hw/sh4/interpr/sh4_interpreter.cpp @@ -14,38 +14,30 @@ #include "debug/gdb_server.h" #include "../sh4_cycles.h" -// SH4 underclock factor when using the interpreter so that it's somewhat usable -#ifdef STRICT_MODE -constexpr int CPU_RATIO = 1; -#else -constexpr int CPU_RATIO = 8; -#endif - Sh4ICache icache; Sh4OCache ocache; -Sh4Cycles sh4cycles(CPU_RATIO); -static void ExecuteOpcode(u16 op) +void Sh4Interpreter::ExecuteOpcode(u16 op) { if (sr.FD == 1 && OpDesc[op]->IsFloatingPoint()) - throw SH4ThrownException(next_pc - 2, Sh4Ex_FpuDisabled); + throw SH4ThrownException(ctx->pc - 2, Sh4Ex_FpuDisabled); OpPtr[op](op); sh4cycles.executeCycles(op); } -static u16 ReadNexOp() +u16 Sh4Interpreter::ReadNexOp() { - if (!mmu_enabled() && (next_pc & 1)) + u32 addr = ctx->pc; + if (!mmu_enabled() && (addr & 1)) // address error - throw SH4ThrownException(next_pc, Sh4Ex_AddressErrorRead); + throw SH4ThrownException(addr, Sh4Ex_AddressErrorRead); - u32 addr = next_pc; - next_pc += 2; + ctx->pc = addr + 2; return IReadMem16(addr); } -static void Sh4_int_Run() +void Sh4Interpreter::Run() { RestoreHostRoundingMode(); @@ -58,34 +50,34 @@ static void Sh4_int_Run() u32 op = ReadNexOp(); ExecuteOpcode(op); - } while (p_sh4rcb->cntx.cycle_counter > 0); - p_sh4rcb->cntx.cycle_counter += SH4_TIMESLICE; + } while (ctx->cycle_counter > 0); + ctx->cycle_counter += SH4_TIMESLICE; UpdateSystem_INTC(); } catch (const SH4ThrownException& ex) { Do_Exception(ex.epc, ex.expEvn); // an exception requires the instruction pipeline to drain, so approx 5 cycles sh4cycles.addCycles(5 * CPU_RATIO); } - } while (sh4_int_bCpuRun); + } while (ctx->CpuRunning); } catch (const debugger::Stop&) { } - sh4_int_bCpuRun = false; + ctx->CpuRunning = false; } -static void Sh4_int_Start() +void Sh4Interpreter::Start() { - sh4_int_bCpuRun = true; + ctx->CpuRunning = true; } -static void Sh4_int_Stop() +void Sh4Interpreter::Stop() { - sh4_int_bCpuRun = false; + ctx->CpuRunning = false; } -static void Sh4_int_Step() +void Sh4Interpreter::Step() { - verify(!sh4_int_bCpuRun); + verify(!ctx->CpuRunning); RestoreHostRoundingMode(); try { @@ -99,26 +91,26 @@ static void Sh4_int_Step() } } -static void Sh4_int_Reset(bool hard) +void Sh4Interpreter::Reset(bool hard) { - verify(!sh4_int_bCpuRun); + verify(!ctx->CpuRunning); if (hard) { - int schedNext = p_sh4rcb->cntx.sh4_sched_next; - memset(&p_sh4rcb->cntx, 0, sizeof(p_sh4rcb->cntx)); - p_sh4rcb->cntx.sh4_sched_next = schedNext; + int schedNext = ctx->sh4_sched_next; + memset(ctx, 0, sizeof(*ctx)); + ctx->sh4_sched_next = schedNext; } - next_pc = 0xA0000000; + ctx->pc = 0xA0000000; - memset(r,0,sizeof(r)); - memset(r_bank,0,sizeof(r_bank)); + memset(r, 0, sizeof(r)); + memset(r_bank, 0, sizeof(r_bank)); - gbr=ssr=spc=sgr=dbr=vbr=0; - mac.full=pr=fpul=0; + gbr = ssr = spc = sgr = dbr = vbr = 0; + mac.full = pr = fpul = 0; sr.setFull(0x700000F0); - old_sr.status=sr.status; + old_sr.status = sr.status; UpdateSR(); fpscr.full = 0x00040001; @@ -127,18 +119,18 @@ static void Sh4_int_Reset(bool hard) icache.Reset(hard); ocache.Reset(hard); sh4cycles.reset(); - p_sh4rcb->cntx.cycle_counter = SH4_TIMESLICE; + ctx->cycle_counter = SH4_TIMESLICE; INFO_LOG(INTERPRETER, "Sh4 Reset"); } -static bool Sh4_int_IsCpuRunning() +bool Sh4Interpreter::IsCpuRunning() { - return sh4_int_bCpuRun; + return ctx->CpuRunning; } //TODO : Check for valid delayslot instruction -void ExecuteDelayslot() +void Sh4Interpreter::ExecuteDelayslot() { try { u32 op = ReadNexOp(); @@ -148,12 +140,12 @@ void ExecuteDelayslot() AdjustDelaySlotException(ex); throw ex; } catch (const debugger::Stop& e) { - next_pc -= 2; // break on previous instruction + ctx->pc -= 2; // break on previous instruction throw e; } } -void ExecuteDelayslot_RTE() +void Sh4Interpreter::ExecuteDelayslot_RTE() { try { // In an RTE delay slot, status register (SR) bits are referenced as follows. @@ -169,7 +161,7 @@ void ExecuteDelayslot_RTE() } catch (const SH4ThrownException&) { throw FlycastException("Fatal: SH4 exception in RTE delay slot"); } catch (const debugger::Stop& e) { - next_pc -= 2; // break on previous instruction + ctx->pc -= 2; // break on previous instruction throw e; } } @@ -186,30 +178,19 @@ int UpdateSystem_INTC() return 0; } -static void sh4_int_resetcache() { +void Sh4Interpreter::Init() +{ + ctx = &p_sh4rcb->cntx; + memset(ctx, 0, sizeof(*ctx)); } -static void Sh4_int_Init() +void Sh4Interpreter::Term() { - memset(&p_sh4rcb->cntx, 0, sizeof(p_sh4rcb->cntx)); -} - -static void Sh4_int_Term() -{ - Sh4_int_Stop(); + Stop(); INFO_LOG(INTERPRETER, "Sh4 Term"); } -void Get_Sh4Interpreter(sh4_if* cpu) +Sh4Executor *Get_Sh4Interpreter() { - cpu->Start = Sh4_int_Start; - cpu->Run = Sh4_int_Run; - cpu->Stop = Sh4_int_Stop; - cpu->Step = Sh4_int_Step; - cpu->Reset = Sh4_int_Reset; - cpu->Init = Sh4_int_Init; - cpu->Term = Sh4_int_Term; - cpu->IsCpuRunning = Sh4_int_IsCpuRunning; - - cpu->ResetCache = sh4_int_resetcache; + return new Sh4Interpreter(); } diff --git a/core/hw/sh4/interpr/sh4_opcodes.cpp b/core/hw/sh4/interpr/sh4_opcodes.cpp index 97b71fd81..60713c51e 100644 --- a/core/hw/sh4/interpr/sh4_opcodes.cpp +++ b/core/hw/sh4/interpr/sh4_opcodes.cpp @@ -11,6 +11,7 @@ #include "hw/sh4/sh4_interrupts.h" #include "debug/gdb_server.h" #include "hw/sh4/dyna/decoder.h" +#include "emulator.h" #ifdef STRICT_MODE #include "hw/sh4/sh4_cache.h" @@ -802,12 +803,16 @@ sh4op(i0000_0000_0010_1000) mac.full=0; } +static void executeDelaySlot() { + static_cast(emu.getSh4Executor())->ExecuteDelayslot(); +} + //braf sh4op(i0000_nnnn_0010_0011) { u32 n = GetN(op); u32 newpc = r[n] + next_pc + 2;// - ExecuteDelayslot(); //WARN : r[n] can change here + executeDelaySlot(); //WARN : r[n] can change here next_pc = newpc; } //bsrf @@ -817,7 +822,7 @@ sh4op(i0000_nnnn_0000_0011) u32 newpc = r[n] + next_pc +2; u32 newpr = next_pc + 2; - ExecuteDelayslot(); //WARN : pr and r[n] can change here + executeDelaySlot(); //WARN : pr and r[n] can change here pr = newpr; next_pc = newpc; @@ -829,7 +834,7 @@ sh4op(i0000_nnnn_0000_0011) sh4op(i0000_0000_0010_1011) { u32 newpc = spc; - ExecuteDelayslot_RTE(); + static_cast(emu.getSh4Executor())->ExecuteDelayslot_RTE(); next_pc = newpc; if (UpdateSR()) UpdateINTC(); @@ -841,7 +846,7 @@ sh4op(i0000_0000_0010_1011) sh4op(i0000_0000_0000_1011) { u32 newpc=pr; - ExecuteDelayslot(); //WARN : pr can change here + executeDelaySlot(); //WARN : pr can change here next_pc=newpc; debugger::subroutineReturn(); } @@ -868,7 +873,7 @@ sh4op(i1000_1111_iiii_iiii) { //delay 1 instruction u32 newpc=branch_target_s8(op); - ExecuteDelayslot(); + executeDelaySlot(); next_pc = newpc; } } @@ -892,7 +897,7 @@ sh4op(i1000_1101_iiii_iiii) { //delay 1 instruction u32 newpc=branch_target_s8(op); - ExecuteDelayslot(); + executeDelaySlot(); next_pc = newpc; } } @@ -906,7 +911,7 @@ u32 branch_target_s12(u32 op) sh4op(i1010_iiii_iiii_iiii) { u32 newpc = branch_target_s12(op); - ExecuteDelayslot(); + executeDelaySlot(); next_pc=newpc; } @@ -915,7 +920,7 @@ sh4op(i1011_iiii_iiii_iiii) { u32 newpr = next_pc + 2; //return after delayslot u32 newpc = branch_target_s12(op); - ExecuteDelayslot(); + executeDelaySlot(); pr = newpr; next_pc = newpc; @@ -937,7 +942,7 @@ sh4op(i0100_nnnn_0010_1011) u32 n = GetN(op); u32 newpc=r[n]; - ExecuteDelayslot(); //r[n] can change here + executeDelaySlot(); //r[n] can change here next_pc=newpc; } @@ -948,7 +953,7 @@ sh4op(i0100_nnnn_0000_1011) u32 newpr = next_pc + 2; //return after delayslot u32 newpc= r[n]; - ExecuteDelayslot(); //r[n]/pr can change here + executeDelaySlot(); //r[n]/pr can change here pr = newpr; next_pc = newpc; diff --git a/core/hw/sh4/modules/ccn.cpp b/core/hw/sh4/modules/ccn.cpp index 708f33110..19bbb1916 100644 --- a/core/hw/sh4/modules/ccn.cpp +++ b/core/hw/sh4/modules/ccn.cpp @@ -8,6 +8,7 @@ #include "hw/sh4/sh4_core.h" #include "hw/sh4/sh4_cache.h" #include "cfg/option.h" +#include "emulator.h" CCNRegisters ccn; @@ -54,7 +55,7 @@ static void CCN_MMUCR_write(u32 addr, u32 value) { //printf("<*******>MMU Enabled , ONLY SQ remaps work<*******>\n"); mmu_set_state(); - sh4_cpu.ResetCache(); + emu.getSh4Executor()->ResetCache(); } } diff --git a/core/hw/sh4/sh4_cache.h b/core/hw/sh4/sh4_cache.h index 5f9ba26e8..2784866c5 100644 --- a/core/hw/sh4/sh4_cache.h +++ b/core/hw/sh4/sh4_cache.h @@ -221,6 +221,7 @@ private: } std::array lines; + Sh4Cycles sh4cycles; }; extern Sh4ICache icache; @@ -589,6 +590,7 @@ private: // TODO serialize u64 writeBackBufferCycles = 0; u64 writeThroughBufferCycles = 0; + Sh4Cycles sh4cycles; }; extern Sh4OCache ocache; diff --git a/core/hw/sh4/sh4_core.h b/core/hw/sh4/sh4_core.h index cf8daf234..eb28ce224 100644 --- a/core/hw/sh4/sh4_core.h +++ b/core/hw/sh4/sh4_core.h @@ -25,7 +25,6 @@ #define xf_hex ((u32*)xf) #define dr_hex ((u64*)fr) #define xd_hex ((u64*)xf) -#define sh4_int_bCpuRun Sh4cntx.CpuRunning void UpdateFPSCR(); bool UpdateSR(); diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp index 2fbd1584b..8338ef89b 100644 --- a/core/hw/sh4/sh4_core_regs.cpp +++ b/core/hw/sh4/sh4_core_regs.cpp @@ -10,7 +10,6 @@ #endif Sh4RCB* p_sh4rcb; -sh4_if sh4_cpu; static void ChangeGPR() { diff --git a/core/hw/sh4/sh4_cycles.cpp b/core/hw/sh4/sh4_cycles.cpp index 5588c9d6a..e55ca0760 100644 --- a/core/hw/sh4/sh4_cycles.cpp +++ b/core/hw/sh4/sh4_cycles.cpp @@ -17,6 +17,74 @@ along with Flycast. If not, see . */ #include "sh4_cycles.h" +#include "modules/mmu.h" + +int Sh4Cycles::countCycles(u16 op) +{ + sh4_opcodelistentry *opcode = OpDesc[op]; + int cycles = 0; +#ifndef STRICT_MODE + static const bool isMemOp[45] { + false, + false, + true, // all mem moves, ldtlb, sts.l FPUL/FPSCR, @-Rn, lds.l @Rn+,FPUL + true, // gbr-based load/store + false, + true, // tst.b #, @(R0,GBR) + true, // and/or/xor.b #, @(R0,GBR) + true, // tas.b @Rn + false, + false, + false, + false, + true, // movca.l R0, @Rn + false, + false, + false, + false, + true, // ldc.l @Rn+, VBR/SPC/SSR/Rn_Bank/DBR + true, // ldc.l @Rn+, GBR/SGR + true, // ldc.l @Rn+, SR + false, + false, + true, // stc.l DBR/SR/GBR/VBR/SSR/SPC/Rn_Bank, @-Rn + true, // stc.l SGR, @-Rn + false, + true, // lds.l @Rn+, PR + false, + true, // sts.l PR, @-Rn + false, + true, // lds.l @Rn+, MACH/MACL + false, + true, // sts.l MACH/MACL, @-Rn + false, + true, // lds.l @Rn+,FPSCR + false, + true, // mac.wl @Rm+,@Rn+ + }; + if (isMemOp[opcode->ex_type]) + { + if (++memOps < 4) + cycles = mmu_enabled() ? 5 : 2; + } + // TODO only for mem read? +#endif + + if (lastUnit == CO + || opcode->unit == CO + || (lastUnit == opcode->unit && lastUnit != MT)) + { + // cannot run in parallel + lastUnit = opcode->unit; + cycles += opcode->IssueCycles; + } + else + { + // can run in parallel + lastUnit = CO; + } + return cycles * cpuRatio; +} // TODO additional wait cycles depending on area?: // Area Wait cycles (not including external wait) diff --git a/core/hw/sh4/sh4_cycles.h b/core/hw/sh4/sh4_cycles.h index 1d517c675..ef203b907 100644 --- a/core/hw/sh4/sh4_cycles.h +++ b/core/hw/sh4/sh4_cycles.h @@ -21,7 +21,6 @@ #include "sh4_opcode_list.h" #include "sh4_if.h" #include "sh4_sched.h" -#include "modules/mmu.h" class Sh4Cycles { @@ -48,72 +47,7 @@ public: Sh4cntx.cycle_counter -= writeAccessCycles(addr, size); } - int countCycles(u16 op) - { - sh4_opcodelistentry *opcode = OpDesc[op]; - int cycles = 0; -#ifndef STRICT_MODE - static const bool isMemOp[45] { - false, - false, - true, // all mem moves, ldtlb, sts.l FPUL/FPSCR, @-Rn, lds.l @Rn+,FPUL - true, // gbr-based load/store - false, - true, // tst.b #, @(R0,GBR) - true, // and/or/xor.b #, @(R0,GBR) - true, // tas.b @Rn - false, - false, - false, - false, - true, // movca.l R0, @Rn - false, - false, - false, - false, - true, // ldc.l @Rn+, VBR/SPC/SSR/Rn_Bank/DBR - true, // ldc.l @Rn+, GBR/SGR - true, // ldc.l @Rn+, SR - false, - false, - true, // stc.l DBR/SR/GBR/VBR/SSR/SPC/Rn_Bank, @-Rn - true, // stc.l SGR, @-Rn - false, - true, // lds.l @Rn+, PR - false, - true, // sts.l PR, @-Rn - false, - true, // lds.l @Rn+, MACH/MACL - false, - true, // sts.l MACH/MACL, @-Rn - false, - true, // lds.l @Rn+,FPSCR - false, - true, // mac.wl @Rm+,@Rn+ - }; - if (isMemOp[opcode->ex_type]) - { - if (++memOps < 4) - cycles = mmu_enabled() ? 5 : 2; - } - // TODO only for mem read? -#endif - - if (lastUnit == CO - || opcode->unit == CO - || (lastUnit == opcode->unit && lastUnit != MT)) - { - // cannot run in parallel - lastUnit = opcode->unit; - cycles += opcode->IssueCycles; - } - else - { - // can run in parallel - lastUnit = CO; - } - return cycles * cpuRatio; - } + int countCycles(u16 op); void reset() { @@ -143,5 +77,3 @@ private: const int cpuRatio; int memOps = 0; }; - -extern Sh4Cycles sh4cycles; diff --git a/core/hw/sh4/sh4_if.h b/core/hw/sh4/sh4_if.h index a564c1543..491f4e666 100644 --- a/core/hw/sh4/sh4_if.h +++ b/core/hw/sh4/sh4_if.h @@ -100,21 +100,21 @@ struct fpscr_t }; //sh4 interface -struct sh4_if +class Sh4Executor { - void (*Start)(); - void (*Run)(); - void (*Stop)(); - void (*Step)(); - void (*Reset)(bool hard); - void (*Init)(); - void (*Term)(); - void (*ResetCache)(); - bool (*IsCpuRunning)(); +public: + virtual ~Sh4Executor() {} + virtual void Run() = 0; + virtual void Start() = 0; + virtual void Stop() = 0; + virtual void Step() = 0; + virtual void Reset(bool hard) = 0; + virtual void Init() = 0; + virtual void Term() = 0; + virtual void ResetCache() = 0; + virtual bool IsCpuRunning() = 0; }; -extern sh4_if sh4_cpu; - struct alignas(32) SQBuffer { u8 data[32]; }; @@ -235,6 +235,7 @@ struct alignas(PAGE_SIZE) Sh4RCB SQWriteFunc *do_sqw_nommu; Sh4Context cntx; }; +static_assert((sizeof(Sh4RCB) % PAGE_SIZE) == 0, "sizeof(Sh4RCB) not multiple of PAGE_SIZE"); extern Sh4RCB* p_sh4rcb; @@ -244,8 +245,8 @@ extern Sh4RCB* p_sh4rcb; #define Sh4cntx (sh4rcb.cntx) //Get an interface to sh4 interpreter -void Get_Sh4Interpreter(sh4_if* cpu); -void Get_Sh4Recompiler(sh4_if* cpu); +Sh4Executor *Get_Sh4Interpreter(); +Sh4Executor *Get_Sh4Recompiler(); enum Sh4ExceptionCode : u16 { diff --git a/core/hw/sh4/sh4_interpreter.h b/core/hw/sh4/sh4_interpreter.h index 4c5edabb3..70e3c7fb8 100644 --- a/core/hw/sh4/sh4_interpreter.h +++ b/core/hw/sh4/sh4_interpreter.h @@ -1,41 +1,37 @@ #pragma once #include "types.h" +#include "sh4_cycles.h" -#undef sh4op -#define sh4op(str) void DYNACALL str (u32 op) -typedef void (DYNACALL OpCallFP) (u32 op); - -enum OpcodeType +class Sh4Interpreter : public Sh4Executor { - //basic - Normal = 0, // Heh , nothing special :P - ReadsPC = 1, // PC must be set upon calling it - WritesPC = 2, // It will write PC (branch) - Delayslot = 4, // Has a delayslot opcode , valid only when WritesPC is set +public: + void Run() override; + void ResetCache() override {} + void Start() override; + void Stop() override; + void Step() override; + void Reset(bool hard) override; + void Init() override; + void Term() override; + bool IsCpuRunning() override; + void ExecuteDelayslot(); + void ExecuteDelayslot_RTE(); + Sh4Context *getContext() { return ctx; } - WritesSR = 8, // Writes to SR , and UpdateSR needs to be called - WritesFPSCR = 16, // Writes to FPSCR , and UpdateSR needs to be called - Invalid = 128, // Invalid +protected: + Sh4Context *ctx = nullptr; - UsesFPU = 2048, // Floating point op - FWritesFPSCR = UsesFPU | WritesFPSCR, +private: + void ExecuteOpcode(u16 op); + u16 ReadNexOp(); - // Heh, not basic :P - ReadWritePC = ReadsPC|WritesPC, // Read and writes pc :P - WritesSRRWPC = WritesSR|ReadsPC|WritesPC, - - // Branches (not delay slot): - Branch_dir = ReadWritePC, // Direct (eg , pc=r[xx]) -- this one is ReadWritePC b/c the delayslot may use pc ;) - Branch_rel = ReadWritePC, // Relative (rg pc+=10); - - // Delay slot - Branch_dir_d = Delayslot|Branch_dir, // Direct (eg , pc=r[xx]) - Branch_rel_d = Delayslot|Branch_rel, // Relative (rg pc+=10); + Sh4Cycles sh4cycles{CPU_RATIO}; + // SH4 underclock factor when using the interpreter so that it's somewhat usable +#ifdef STRICT_MODE + static constexpr int CPU_RATIO = 1; +#else + static constexpr int CPU_RATIO = 8; +#endif }; -void ExecuteDelayslot(); -void ExecuteDelayslot_RTE(); - -#define SH4_TIMESLICE 448 // at 112 Bangai-O doesn't start. 224 is ok - int UpdateSystem_INTC(); diff --git a/core/hw/sh4/sh4_opcode_list.h b/core/hw/sh4/sh4_opcode_list.h index a87be546e..cfdc25f48 100644 --- a/core/hw/sh4/sh4_opcode_list.h +++ b/core/hw/sh4/sh4_opcode_list.h @@ -1,9 +1,10 @@ #pragma once #include "types.h" -#include "sh4_interpreter.h" - #include +#define sh4op(str) void DYNACALL str (u32 op) + +typedef void (DYNACALL OpCallFP) (u32 op); extern OpCallFP* OpPtr[0x10000]; typedef void OpDissasmFP(char* out,const char* const FormatString,u32 pc,u16 opcode); @@ -18,6 +19,34 @@ enum sh4_eu CO, }; +enum OpcodeType +{ + //basic + Normal = 0, // Heh , nothing special :P + ReadsPC = 1, // PC must be set upon calling it + WritesPC = 2, // It will write PC (branch) + Delayslot = 4, // Has a delayslot opcode , valid only when WritesPC is set + + WritesSR = 8, // Writes to SR , and UpdateSR needs to be called + WritesFPSCR = 16, // Writes to FPSCR , and UpdateSR needs to be called + Invalid = 128, // Invalid + + UsesFPU = 2048, // Floating point op + FWritesFPSCR = UsesFPU | WritesFPSCR, + + // Heh, not basic :P + ReadWritePC = ReadsPC|WritesPC, // Read and writes pc :P + WritesSRRWPC = WritesSR|ReadsPC|WritesPC, + + // Branches (not delay slot): + Branch_dir = ReadWritePC, // Direct (eg , pc=r[xx]) -- this one is ReadWritePC b/c the delayslot may use pc ;) + Branch_rel = ReadWritePC, // Relative (rg pc+=10); + + // Delay slot + Branch_dir_d = Delayslot|Branch_dir, // Direct (eg , pc=r[xx]) + Branch_rel_d = Delayslot|Branch_rel, // Relative (rg pc+=10); +}; + std::string disassemble_op(const char* tx1, u32 pc, u16 opcode); typedef void ( RecOpCallFP) (u32 op); diff --git a/core/hw/sh4/sh4_sched.h b/core/hw/sh4/sh4_sched.h index 029f9ba8b..b40368972 100644 --- a/core/hw/sh4/sh4_sched.h +++ b/core/hw/sh4/sh4_sched.h @@ -1,8 +1,8 @@ -#ifndef SH4_SCHED_H -#define SH4_SCHED_H - +#pragma once #include "types.h" +#define SH4_TIMESLICE 448 // at 112 Bangai-O doesn't start. 224 is ok + /* tag, as passed on sh4_sched_register sch_cycles, the cycle duration that the callback requested (sh4_sched_request) @@ -53,5 +53,3 @@ void sh4_sched_serialize(Serializer& ser); void sh4_sched_deserialize(Deserializer& deser); void sh4_sched_serialize(Serializer& ser, int id); void sh4_sched_deserialize(Deserializer& deser, int id); - -#endif //SH4_SCHED_H diff --git a/core/network/ggpo.cpp b/core/network/ggpo.cpp index 0672af39b..6d4bb5fce 100644 --- a/core/network/ggpo.cpp +++ b/core/network/ggpo.cpp @@ -330,7 +330,7 @@ static bool load_game_state(unsigned char *buffer, int len) */ static bool save_game_state(unsigned char **buffer, int *len, int *checksum, int frame) { - verify(!sh4_cpu.IsCpuRunning()); + verify(!emu.getSh4Executor()->IsCpuRunning()); lastSavedFrame = frame; // TODO this is way too much memory size_t allocSize = settings.platform.isNaomi() ? 20_MB : 10_MB; @@ -914,7 +914,7 @@ void endOfFrame() if (active()) { _endOfFrame = true; - sh4_cpu.Stop(); + emu.getSh4Executor()->Stop(); } } diff --git a/core/nullDC.cpp b/core/nullDC.cpp index abe6669f7..8a50a042e 100644 --- a/core/nullDC.cpp +++ b/core/nullDC.cpp @@ -270,7 +270,7 @@ void dc_loadstate(int index) try { Deserializer deser(data, total_size); - dc_loadstate(deser); + emu.loadstate(deser); NOTICE_LOG(SAVESTATE, "Loaded state ver %d from %s size %d", deser.version(), filename.c_str(), total_size); if (deser.size() != total_size) // Note: this isn't true for RA savestates diff --git a/shell/libretro/libretro.cpp b/shell/libretro/libretro.cpp index 9b4122f7e..3c2c503ee 100644 --- a/shell/libretro/libretro.cpp +++ b/shell/libretro/libretro.cpp @@ -2362,7 +2362,7 @@ bool retro_unserialize(const void * data, size_t size) try { Deserializer deser(data, size); - dc_loadstate(deser); + emu.loadstate(deser); retro_audio_flush_buffer(); if (!first_run) emu.start(); diff --git a/tests/src/AicaArmTest.cpp b/tests/src/AicaArmTest.cpp index ad6ca7e66..994d03b6f 100644 --- a/tests/src/AicaArmTest.cpp +++ b/tests/src/AicaArmTest.cpp @@ -27,7 +27,7 @@ protected: if (!addrspace::reserve()) die("addrspace::reserve failed"); emu.init(); - dc_reset(true); + emu.dc_reset(true); Arm7Enabled = true; } diff --git a/tests/src/CheatManagerTest.cpp b/tests/src/CheatManagerTest.cpp index 7300c57d6..813b63213 100644 --- a/tests/src/CheatManagerTest.cpp +++ b/tests/src/CheatManagerTest.cpp @@ -287,7 +287,7 @@ cheats = "2" mgr.reset("TESTSUB8"); mgr.loadCheatFile("test.cht"); mem_map_default(); - dc_reset(true); + emu.dc_reset(true); mgr.enableCheat(0, true); WriteMem8_nommu(0x8c010000, 0xFA); diff --git a/tests/src/MmuTest.cpp b/tests/src/MmuTest.cpp index 74933979e..3956fd4b0 100644 --- a/tests/src/MmuTest.cpp +++ b/tests/src/MmuTest.cpp @@ -30,7 +30,7 @@ protected: if (!addrspace::reserve()) die("addrspace::reserve failed"); emu.init(); - dc_reset(true); + emu.dc_reset(true); CCN_MMUCR.AT = 1; MMU_reset(); } diff --git a/tests/src/Sh4InterpreterTest.cpp b/tests/src/Sh4InterpreterTest.cpp index adb573dcd..bd7642216 100644 --- a/tests/src/Sh4InterpreterTest.cpp +++ b/tests/src/Sh4InterpreterTest.cpp @@ -28,9 +28,9 @@ protected: die("addrspace::reserve failed"); emu.init(); mem_map_default(); - dc_reset(true); + emu.dc_reset(true); ctx = &p_sh4rcb->cntx; - Get_Sh4Interpreter(&sh4); + sh4 = Get_Sh4Interpreter(); } void PrepareOp(u16 op, u16 op2 = 0, u16 op3 = 0) override { @@ -45,7 +45,7 @@ protected: { ctx->pc = START_PC; for (int i = 0; i < numOp; i++) - sh4.Step(); + sh4->Step(); } }; diff --git a/tests/src/div32_test.cpp b/tests/src/div32_test.cpp index 3805abec2..36a5bd1fc 100644 --- a/tests/src/div32_test.cpp +++ b/tests/src/div32_test.cpp @@ -111,7 +111,7 @@ protected: if (!addrspace::reserve()) die("addrspace::reserve failed"); emu.init(); - dc_reset(true); + emu.dc_reset(true); } void div32s(u32 n1, u32 n2, u32 n3) diff --git a/tests/src/serialize_test.cpp b/tests/src/serialize_test.cpp index 75a769128..b02151cdb 100644 --- a/tests/src/serialize_test.cpp +++ b/tests/src/serialize_test.cpp @@ -13,7 +13,7 @@ protected: if (!addrspace::reserve()) die("addrspace::reserve failed"); emu.init(); - dc_reset(true); + emu.dc_reset(true); } }; diff --git a/tests/src/sh4_ops.h b/tests/src/sh4_ops.h index af5ab2921..4d5189532 100644 --- a/tests/src/sh4_ops.h +++ b/tests/src/sh4_ops.h @@ -91,7 +91,7 @@ protected: } Sh4Context *ctx; - sh4_if sh4; + Sh4Executor *sh4; std::set checkedRegs; static constexpr u32 START_PC = 0xAC000000;