Merge pull request #12236 from liamwhite/cpu-refactor

core: refactor emulated cpu core activation
2023-12-06 14:19:17 +01:00 · 2023-12-06 14:19:17 +01:00 · 8a79dd2d6c
parent 167efb2d2b f0ee3e29cb
commit 8a79dd2d6c
47 changed files with 2982 additions and 3332 deletions
--- a/.codespellrc
+++ b/.codespellrc
@ -3,4 +3,4 @@

 [codespell]
 skip = ./.git,./build,./dist,./Doxyfile,./externals,./LICENSES,./src/android/app/src/main/res
-ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nce,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink
+ignore-words-list = aci,allright,ba,canonicalizations,deques,fpr,froms,hda,inout,lod,masia,nam,nax,nce,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@ -4,6 +4,8 @@
 add_library(core STATIC
    arm/arm_interface.h
    arm/arm_interface.cpp
+    arm/debug.cpp
+    arm/debug.h
    arm/exclusive_monitor.cpp
    arm/exclusive_monitor.h
    arm/symbols.cpp
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@ -1,231 +1,32 @@
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

-#include <map>
-#include <optional>
-
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "common/demangle.h"
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
-#include "core/arm/symbols.h"
+#include "core/arm/debug.h"
 #include "core/core.h"
-#include "core/debugger/debugger.h"
 #include "core/hle/kernel/k_process.h"
-#include "core/hle/kernel/k_thread.h"
-#include "core/hle/kernel/svc.h"
-#include "core/loader/loader.h"
-#include "core/memory.h"

 namespace Core {

-constexpr u64 SEGMENT_BASE = 0x7100000000ull;
+void ArmInterface::LogBacktrace(const Kernel::KProcess* process) const {
+    Kernel::Svc::ThreadContext ctx;
+    this->GetContext(ctx);

-std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContext(
-    Core::System& system, const ARM_Interface::ThreadContext32& ctx) {
-    std::vector<BacktraceEntry> out;
-    auto& memory = system.ApplicationMemory();
-
-    const auto& reg = ctx.cpu_registers;
-    u32 pc = reg[15], lr = reg[14], fp = reg[11];
-    out.push_back({"", 0, pc, 0, ""});
-
-    // fp (= r11) points to the last frame record.
-    // Frame records are two words long:
-    // fp+0 : pointer to previous frame record
-    // fp+4 : value of lr for frame
-    for (size_t i = 0; i < 256; i++) {
-        out.push_back({"", 0, lr, 0, ""});
-        if (!fp || (fp % 4 != 0) || !memory.IsValidVirtualAddressRange(fp, 8)) {
-            break;
-        }
-        lr = memory.Read32(fp + 4);
-        fp = memory.Read32(fp);
-    }
-
-    SymbolicateBacktrace(system, out);
-
-    return out;
-}
-
-std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContext(
-    Core::System& system, const ARM_Interface::ThreadContext64& ctx) {
-    std::vector<BacktraceEntry> out;
-    auto& memory = system.ApplicationMemory();
-
-    const auto& reg = ctx.cpu_registers;
-    u64 pc = ctx.pc, lr = reg[30], fp = reg[29];
-
-    out.push_back({"", 0, pc, 0, ""});
-
-    // fp (= x29) points to the previous frame record.
-    // Frame records are two words long:
-    // fp+0 : pointer to previous frame record
-    // fp+8 : value of lr for frame
-    for (size_t i = 0; i < 256; i++) {
-        out.push_back({"", 0, lr, 0, ""});
-        if (!fp || (fp % 4 != 0) || !memory.IsValidVirtualAddressRange(fp, 16)) {
-            break;
-        }
-        lr = memory.Read64(fp + 8);
-        fp = memory.Read64(fp);
-    }
-
-    SymbolicateBacktrace(system, out);
-
-    return out;
-}
-
-void ARM_Interface::SymbolicateBacktrace(Core::System& system, std::vector<BacktraceEntry>& out) {
-    std::map<VAddr, std::string> modules;
-    auto& loader{system.GetAppLoader()};
-    if (loader.ReadNSOModules(modules) != Loader::ResultStatus::Success) {
-        return;
-    }
-
-    std::map<std::string, Symbols::Symbols> symbols;
-    for (const auto& module : modules) {
-        symbols.insert_or_assign(module.second,
-                                 Symbols::GetSymbols(module.first, system.ApplicationMemory(),
-                                                     system.ApplicationProcess()->Is64Bit()));
-    }
-
-    for (auto& entry : out) {
-        VAddr base = 0;
-        for (auto iter = modules.rbegin(); iter != modules.rend(); ++iter) {
-            const auto& module{*iter};
-            if (entry.original_address >= module.first) {
-                entry.module = module.second;
-                base = module.first;
-                break;
-            }
-        }
-
-        entry.offset = entry.original_address - base;
-        entry.address = SEGMENT_BASE + entry.offset;
-
-        if (entry.module.empty()) {
-            entry.module = "unknown";
-        }
-
-        const auto symbol_set = symbols.find(entry.module);
-        if (symbol_set != symbols.end()) {
-            const auto symbol = Symbols::GetSymbolName(symbol_set->second, entry.offset);
-            if (symbol) {
-                entry.name = Common::DemangleSymbol(*symbol);
-            }
-        }
-    }
-}
-
-std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const {
-    if (GetArchitecture() == Architecture::Aarch64) {
-        ThreadContext64 ctx;
-        SaveContext(ctx);
-        return GetBacktraceFromContext(system, ctx);
-    } else {
-        ThreadContext32 ctx;
-        SaveContext(ctx);
-        return GetBacktraceFromContext(system, ctx);
-    }
-}
-
-void ARM_Interface::LogBacktrace() const {
-    const VAddr sp = GetSP();
-    const VAddr pc = GetPC();
-    LOG_ERROR(Core_ARM, "Backtrace, sp={:016X}, pc={:016X}", sp, pc);
+    LOG_ERROR(Core_ARM, "Backtrace, sp={:016X}, pc={:016X}", ctx.sp, ctx.pc);
    LOG_ERROR(Core_ARM, "{:20}{:20}{:20}{:20}{}", "Module Name", "Address", "Original Address",
              "Offset", "Symbol");
    LOG_ERROR(Core_ARM, "");
-    const auto backtrace = GetBacktrace();
+    const auto backtrace = GetBacktraceFromContext(process, ctx);
    for (const auto& entry : backtrace) {
        LOG_ERROR(Core_ARM, "{:20}{:016X}    {:016X}    {:016X}    {}", entry.module, entry.address,
                  entry.original_address, entry.offset, entry.name);
    }
 }

-void ARM_Interface::Run() {
-    using Kernel::StepState;
-    using Kernel::SuspendType;
-
-    while (true) {
-        Kernel::KThread* current_thread{Kernel::GetCurrentThreadPointer(system.Kernel())};
-        HaltReason hr{};
-
-        // If the thread is scheduled for termination, exit the thread.
-        if (current_thread->HasDpc()) {
-            if (current_thread->IsTerminationRequested()) {
-                current_thread->Exit();
-                UNREACHABLE();
-            }
-        }
-
-        // Notify the debugger and go to sleep if a step was performed
-        // and this thread has been scheduled again.
-        if (current_thread->GetStepState() == StepState::StepPerformed) {
-            system.GetDebugger().NotifyThreadStopped(current_thread);
-            current_thread->RequestSuspend(SuspendType::Debug);
-            break;
-        }
-
-        // Otherwise, run the thread.
-        system.EnterCPUProfile();
-        if (current_thread->GetStepState() == StepState::StepPending) {
-            hr = StepJit();
-
-            if (True(hr & HaltReason::StepThread)) {
-                current_thread->SetStepState(StepState::StepPerformed);
-            }
-        } else {
-            hr = RunJit();
-        }
-        system.ExitCPUProfile();
-
-        // Notify the debugger and go to sleep if a breakpoint was hit,
-        // or if the thread is unable to continue for any reason.
-        if (True(hr & HaltReason::InstructionBreakpoint) || True(hr & HaltReason::PrefetchAbort)) {
-            if (!True(hr & HaltReason::PrefetchAbort)) {
-                RewindBreakpointInstruction();
-            }
-            if (system.DebuggerEnabled()) {
-                system.GetDebugger().NotifyThreadStopped(current_thread);
-            } else {
-                LogBacktrace();
-            }
-            current_thread->RequestSuspend(SuspendType::Debug);
-            break;
-        }
-
-        // Notify the debugger and go to sleep if a watchpoint was hit.
-        if (True(hr & HaltReason::DataAbort)) {
-            if (system.DebuggerEnabled()) {
-                system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
-            } else {
-                LogBacktrace();
-            }
-            current_thread->RequestSuspend(SuspendType::Debug);
-            break;
-        }
-
-        // Handle syscalls and scheduling (this may change the current thread/core)
-        if (True(hr & HaltReason::SupervisorCall)) {
-            Kernel::Svc::Call(system, GetSvcNumber());
-            break;
-        }
-        if (True(hr & HaltReason::BreakLoop) || !uses_wall_clock) {
-            break;
-        }
-    }
-}
-
-void ARM_Interface::LoadWatchpointArray(const WatchpointArray* wp) {
-    watchpoints = wp;
-}
-
-const Kernel::DebugWatchpoint* ARM_Interface::MatchingWatchpoint(
+const Kernel::DebugWatchpoint* ArmInterface::MatchingWatchpoint(
    u64 addr, u64 size, Kernel::DebugWatchpointType access_type) const {
-    if (!watchpoints) {
+    if (!m_watchpoints) {
        return nullptr;
    }

@ -233,7 +34,7 @@ const Kernel::DebugWatchpoint* ARM_Interface::MatchingWatchpoint(
    const u64 end_address{addr + size};

    for (size_t i = 0; i < Core::Hardware::NUM_WATCHPOINTS; i++) {
-        const auto& watch{(*watchpoints)[i]};
+        const auto& watch{(*m_watchpoints)[i]};

        if (end_address <= GetInteger(watch.start_address)) {
            continue;
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@ -12,20 +12,20 @@
 #include "common/common_types.h"
 #include "core/hardware_properties.h"

+#include "core/hle/kernel/svc_types.h"
+
 namespace Common {
 struct PageTable;
 }

 namespace Kernel {
-enum class VMAPermission : u8;
 enum class DebugWatchpointType : u8;
 struct DebugWatchpoint;
+class KThread;
+class KProcess;
 } // namespace Kernel

 namespace Core {
-class System;
-class CPUInterruptHandler;
-
 using WatchpointArray = std::array<Kernel::DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS>;

 // NOTE: these values match the HaltReason enum in Dynarmic
@ -40,197 +40,74 @@ enum class HaltReason : u64 {
 DECLARE_ENUM_FLAG_OPERATORS(HaltReason);

 enum class Architecture {
-    Aarch32,
-    Aarch64,
+    AArch64,
+    AArch32,
 };

 /// Generic ARMv8 CPU interface
-class ARM_Interface {
+class ArmInterface {
 public:
-    YUZU_NON_COPYABLE(ARM_Interface);
-    YUZU_NON_MOVEABLE(ARM_Interface);
+    YUZU_NON_COPYABLE(ArmInterface);
+    YUZU_NON_MOVEABLE(ArmInterface);

-    explicit ARM_Interface(System& system_, bool uses_wall_clock_)
-        : system{system_}, uses_wall_clock{uses_wall_clock_} {}
-    virtual ~ARM_Interface() = default;
+    explicit ArmInterface(bool uses_wall_clock) : m_uses_wall_clock{uses_wall_clock} {}
+    virtual ~ArmInterface() = default;

-    struct ThreadContext32 {
-        std::array<u32, 16> cpu_registers{};
-        std::array<u32, 64> extension_registers{};
-        u32 cpsr{};
-        u32 fpscr{};
-        u32 fpexc{};
-        u32 tpidr{};
-    };
-    // Internally within the kernel, it expects the AArch32 version of the
-    // thread context to be 344 bytes in size.
-    static_assert(sizeof(ThreadContext32) == 0x150);
-
-    struct ThreadContext64 {
-        std::array<u64, 31> cpu_registers{};
-        u64 sp{};
-        u64 pc{};
-        u32 pstate{};
-        std::array<u8, 4> padding{};
-        std::array<u128, 32> vector_registers{};
-        u32 fpcr{};
-        u32 fpsr{};
-        u64 tpidr{};
-    };
-    // Internally within the kernel, it expects the AArch64 version of the
-    // thread context to be 800 bytes in size.
-    static_assert(sizeof(ThreadContext64) == 0x320);
-
-    /// Perform any backend-specific initialization.
+    // Perform any backend-specific initialization.
    virtual void Initialize() {}

-    /// Runs the CPU until an event happens
-    void Run();
+    // Runs the CPU until an event happens.
+    virtual HaltReason RunThread(Kernel::KThread* thread) = 0;

-    /// Clear all instruction cache
+    // Runs the CPU for one instruction or until an event happens.
+    virtual HaltReason StepThread(Kernel::KThread* thread) = 0;
+
+    // Admits a backend-specific mechanism to lock the thread context.
+    virtual void LockThread(Kernel::KThread* thread) {}
+    virtual void UnlockThread(Kernel::KThread* thread) {}
+
+    // Clear the entire instruction cache for this CPU.
    virtual void ClearInstructionCache() = 0;

-    /**
-     * Clear instruction cache range
-     * @param addr Start address of the cache range to clear
-     * @param size Size of the cache range to clear, starting at addr
-     */
+    // Clear a range of the instruction cache for this CPU.
    virtual void InvalidateCacheRange(u64 addr, std::size_t size) = 0;

-    /**
-     * Notifies CPU emulation that the current page table has changed.
-     *  @param new_page_table                 The new page table.
-     *  @param new_address_space_size_in_bits The new usable size of the address space in bits.
-     *                                        This can be either 32, 36, or 39 on official software.
-     */
-    virtual void PageTableChanged(Common::PageTable& new_page_table,
-                                  std::size_t new_address_space_size_in_bits) = 0;
-
-    /**
-     * Set the Program Counter to an address
-     * @param addr Address to set PC to
-     */
-    virtual void SetPC(u64 addr) = 0;
-
-    /*
-     * Get the current Program Counter
-     * @return Returns current PC
-     */
-    virtual u64 GetPC() const = 0;
-
-    /**
-     * Get the current Stack Pointer
-     * @return Returns current SP
-     */
-    virtual u64 GetSP() const = 0;
-
-    /**
-     * Get an ARM register
-     * @param index Register index
-     * @return Returns the value in the register
-     */
-    virtual u64 GetReg(int index) const = 0;
-
-    /**
-     * Set an ARM register
-     * @param index Register index
-     * @param value Value to set register to
-     */
-    virtual void SetReg(int index, u64 value) = 0;
-
-    /**
-     * Gets the value of a specified vector register.
-     *
-     * @param index The index of the vector register.
-     * @return the value within the vector register.
-     */
-    virtual u128 GetVectorReg(int index) const = 0;
-
-    /**
-     * Sets a given value into a vector register.
-     *
-     * @param index The index of the vector register.
-     * @param value The new value to place in the register.
-     */
-    virtual void SetVectorReg(int index, u128 value) = 0;
-
-    /**
-     * Get the current PSTATE register
-     * @return Returns the value of the PSTATE register
-     */
-    virtual u32 GetPSTATE() const = 0;
-
-    /**
-     * Set the current PSTATE register
-     * @param pstate Value to set PSTATE to
-     */
-    virtual void SetPSTATE(u32 pstate) = 0;
-
-    virtual u64 GetTlsAddress() const = 0;
-
-    virtual void SetTlsAddress(u64 address) = 0;
-
-    /**
-     * Gets the value within the TPIDR_EL0 (read/write software thread ID) register.
-     *
-     * @return the value within the register.
-     */
-    virtual u64 GetTPIDR_EL0() const = 0;
-
-    /**
-     * Sets a new value within the TPIDR_EL0 (read/write software thread ID) register.
-     *
-     * @param value The new value to place in the register.
-     */
-    virtual void SetTPIDR_EL0(u64 value) = 0;
-
+    // Get the current architecture.
+    // This returns AArch64 when PSTATE.nRW == 0 and AArch32 when PSTATE.nRW == 1.
    virtual Architecture GetArchitecture() const = 0;
-    virtual void SaveContext(ThreadContext32& ctx) const = 0;
-    virtual void SaveContext(ThreadContext64& ctx) const = 0;
-    virtual void LoadContext(const ThreadContext32& ctx) = 0;
-    virtual void LoadContext(const ThreadContext64& ctx) = 0;
-    void LoadWatchpointArray(const WatchpointArray* wp);

-    /// Clears the exclusive monitor's state.
-    virtual void ClearExclusiveState() = 0;
+    // Context accessors.
+    // These should not be called if the CPU is running.
+    virtual void GetContext(Kernel::Svc::ThreadContext& ctx) const = 0;
+    virtual void SetContext(const Kernel::Svc::ThreadContext& ctx) = 0;
+    virtual void SetTpidrroEl0(u64 value) = 0;

-    /// Signal an interrupt and ask the core to halt as soon as possible.
-    virtual void SignalInterrupt() = 0;
+    virtual void GetSvcArguments(std::span<uint64_t, 8> args) const = 0;
+    virtual void SetSvcArguments(std::span<const uint64_t, 8> args) = 0;
+    virtual u32 GetSvcNumber() const = 0;

-    /// Clear a previous interrupt.
-    virtual void ClearInterrupt() = 0;
+    void SetWatchpointArray(const WatchpointArray* watchpoints) {
+        m_watchpoints = watchpoints;
+    }

-    struct BacktraceEntry {
-        std::string module;
-        u64 address;
-        u64 original_address;
-        u64 offset;
-        std::string name;
-    };
+    // Signal an interrupt for execution to halt as soon as possible.
+    // It is safe to call this if the CPU is not running.
+    virtual void SignalInterrupt(Kernel::KThread* thread) = 0;

-    static std::vector<BacktraceEntry> GetBacktraceFromContext(System& system,
-                                                               const ThreadContext32& ctx);
-    static std::vector<BacktraceEntry> GetBacktraceFromContext(System& system,
-                                                               const ThreadContext64& ctx);
+    // Stack trace generation.
+    void LogBacktrace(const Kernel::KProcess* process) const;

-    std::vector<BacktraceEntry> GetBacktrace() const;
-    void LogBacktrace() const;
+    // Debug functionality.
+    virtual const Kernel::DebugWatchpoint* HaltedWatchpoint() const = 0;
+    virtual void RewindBreakpointInstruction() = 0;

 protected:
-    /// System context that this ARM interface is running under.
-    System& system;
-    const WatchpointArray* watchpoints;
-    bool uses_wall_clock;
-
-    static void SymbolicateBacktrace(Core::System& system, std::vector<BacktraceEntry>& out);
    const Kernel::DebugWatchpoint* MatchingWatchpoint(
        u64 addr, u64 size, Kernel::DebugWatchpointType access_type) const;

-    virtual HaltReason RunJit() = 0;
-    virtual HaltReason StepJit() = 0;
-    virtual u32 GetSvcNumber() const = 0;
-    virtual const Kernel::DebugWatchpoint* HaltedWatchpoint() const = 0;
-    virtual void RewindBreakpointInstruction() = 0;
+protected:
+    const WatchpointArray* m_watchpoints{};
+    bool m_uses_wall_clock{};
 };

 } // namespace Core
--- a/src/core/arm/debug.cpp
+++ b/src/core/arm/debug.cpp
@ -0,0 +1,351 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/demangle.h"
+#include "core/arm/debug.h"
+#include "core/arm/symbols.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/kernel/k_thread.h"
+#include "core/memory.h"
+
+namespace Core {
+
+namespace {
+
+std::optional<std::string> GetNameFromThreadType64(Core::Memory::Memory& memory,
+                                                   const Kernel::KThread& thread) {
+    // Read thread type from TLS
+    const VAddr tls_thread_type{memory.Read64(thread.GetTlsAddress() + 0x1f8)};
+    const VAddr argument_thread_type{thread.GetArgument()};
+
+    if (argument_thread_type && tls_thread_type != argument_thread_type) {
+        // Probably not created by nnsdk, no name available.
+        return std::nullopt;
+    }
+
+    if (!tls_thread_type) {
+        return std::nullopt;
+    }
+
+    const u16 version{memory.Read16(tls_thread_type + 0x46)};
+    VAddr name_pointer{};
+    if (version == 1) {
+        name_pointer = memory.Read64(tls_thread_type + 0x1a0);
+    } else {
+        name_pointer = memory.Read64(tls_thread_type + 0x1a8);
+    }
+
+    if (!name_pointer) {
+        // No name provided.
+        return std::nullopt;
+    }
+
+    return memory.ReadCString(name_pointer, 256);
+}
+
+std::optional<std::string> GetNameFromThreadType32(Core::Memory::Memory& memory,
+                                                   const Kernel::KThread& thread) {
+    // Read thread type from TLS
+    const VAddr tls_thread_type{memory.Read32(thread.GetTlsAddress() + 0x1fc)};
+    const VAddr argument_thread_type{thread.GetArgument()};
+
+    if (argument_thread_type && tls_thread_type != argument_thread_type) {
+        // Probably not created by nnsdk, no name available.
+        return std::nullopt;
+    }
+
+    if (!tls_thread_type) {
+        return std::nullopt;
+    }
+
+    const u16 version{memory.Read16(tls_thread_type + 0x26)};
+    VAddr name_pointer{};
+    if (version == 1) {
+        name_pointer = memory.Read32(tls_thread_type + 0xe4);
+    } else {
+        name_pointer = memory.Read32(tls_thread_type + 0xe8);
+    }
+
+    if (!name_pointer) {
+        // No name provided.
+        return std::nullopt;
+    }
+
+    return memory.ReadCString(name_pointer, 256);
+}
+
+constexpr std::array<u64, 2> SegmentBases{
+    0x60000000ULL,
+    0x7100000000ULL,
+};
+
+void SymbolicateBacktrace(const Kernel::KProcess* process, std::vector<BacktraceEntry>& out) {
+    auto modules = FindModules(process);
+
+    const bool is_64 = process->Is64Bit();
+
+    std::map<std::string, Symbols::Symbols> symbols;
+    for (const auto& module : modules) {
+        symbols.insert_or_assign(module.second,
+                                 Symbols::GetSymbols(module.first, process->GetMemory(), is_64));
+    }
+
+    for (auto& entry : out) {
+        VAddr base = 0;
+        for (auto iter = modules.rbegin(); iter != modules.rend(); ++iter) {
+            const auto& module{*iter};
+            if (entry.original_address >= module.first) {
+                entry.module = module.second;
+                base = module.first;
+                break;
+            }
+        }
+
+        entry.offset = entry.original_address - base;
+        entry.address = SegmentBases[is_64] + entry.offset;
+
+        if (entry.module.empty()) {
+            entry.module = "unknown";
+        }
+
+        const auto symbol_set = symbols.find(entry.module);
+        if (symbol_set != symbols.end()) {
+            const auto symbol = Symbols::GetSymbolName(symbol_set->second, entry.offset);
+            if (symbol) {
+                entry.name = Common::DemangleSymbol(*symbol);
+            }
+        }
+    }
+}
+
+std::vector<BacktraceEntry> GetAArch64Backtrace(const Kernel::KProcess* process,
+                                                const Kernel::Svc::ThreadContext& ctx) {
+    std::vector<BacktraceEntry> out;
+    auto& memory = process->GetMemory();
+    auto pc = ctx.pc, lr = ctx.lr, fp = ctx.fp;
+
+    out.push_back({"", 0, pc, 0, ""});
+
+    // fp (= x29) points to the previous frame record.
+    // Frame records are two words long:
+    // fp+0 : pointer to previous frame record
+    // fp+8 : value of lr for frame
+    for (size_t i = 0; i < 256; i++) {
+        out.push_back({"", 0, lr, 0, ""});
+        if (!fp || (fp % 4 != 0) || !memory.IsValidVirtualAddressRange(fp, 16)) {
+            break;
+        }
+        lr = memory.Read64(fp + 8);
+        fp = memory.Read64(fp);
+    }
+
+    SymbolicateBacktrace(process, out);
+
+    return out;
+}
+
+std::vector<BacktraceEntry> GetAArch32Backtrace(const Kernel::KProcess* process,
+                                                const Kernel::Svc::ThreadContext& ctx) {
+    std::vector<BacktraceEntry> out;
+    auto& memory = process->GetMemory();
+    auto pc = ctx.pc, lr = ctx.lr, fp = ctx.fp;
+
+    out.push_back({"", 0, pc, 0, ""});
+
+    // fp (= r11) points to the last frame record.
+    // Frame records are two words long:
+    // fp+0 : pointer to previous frame record
+    // fp+4 : value of lr for frame
+    for (size_t i = 0; i < 256; i++) {
+        out.push_back({"", 0, lr, 0, ""});
+        if (!fp || (fp % 4 != 0) || !memory.IsValidVirtualAddressRange(fp, 8)) {
+            break;
+        }
+        lr = memory.Read32(fp + 4);
+        fp = memory.Read32(fp);
+    }
+
+    SymbolicateBacktrace(process, out);
+
+    return out;
+}
+
+} // namespace
+
+std::optional<std::string> GetThreadName(const Kernel::KThread* thread) {
+    const auto* process = thread->GetOwnerProcess();
+    if (process->Is64Bit()) {
+        return GetNameFromThreadType64(process->GetMemory(), *thread);
+    } else {
+        return GetNameFromThreadType32(process->GetMemory(), *thread);
+    }
+}
+
+std::string_view GetThreadWaitReason(const Kernel::KThread* thread) {
+    switch (thread->GetWaitReasonForDebugging()) {
+    case Kernel::ThreadWaitReasonForDebugging::Sleep:
+        return "Sleep";
+    case Kernel::ThreadWaitReasonForDebugging::IPC:
+        return "IPC";
+    case Kernel::ThreadWaitReasonForDebugging::Synchronization:
+        return "Synchronization";
+    case Kernel::ThreadWaitReasonForDebugging::ConditionVar:
+        return "ConditionVar";
+    case Kernel::ThreadWaitReasonForDebugging::Arbitration:
+        return "Arbitration";
+    case Kernel::ThreadWaitReasonForDebugging::Suspended:
+        return "Suspended";
+    default:
+        return "Unknown";
+    }
+}
+
+std::string GetThreadState(const Kernel::KThread* thread) {
+    switch (thread->GetState()) {
+    case Kernel::ThreadState::Initialized:
+        return "Initialized";
+    case Kernel::ThreadState::Waiting:
+        return fmt::format("Waiting ({})", GetThreadWaitReason(thread));
+    case Kernel::ThreadState::Runnable:
+        return "Runnable";
+    case Kernel::ThreadState::Terminated:
+        return "Terminated";
+    default:
+        return "Unknown";
+    }
+}
+
+Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process,
+                                     Kernel::KProcessAddress base) {
+    Kernel::KMemoryInfo mem_info;
+    Kernel::Svc::MemoryInfo svc_mem_info;
+    Kernel::Svc::PageInfo page_info;
+    VAddr cur_addr{GetInteger(base)};
+    auto& page_table = process->GetPageTable();
+
+    // Expect: r-x Code (.text)
+    R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
+    svc_mem_info = mem_info.GetSvcMemoryInfo();
+    cur_addr = svc_mem_info.base_address + svc_mem_info.size;
+    if (svc_mem_info.state != Kernel::Svc::MemoryState::Code ||
+        svc_mem_info.permission != Kernel::Svc::MemoryPermission::ReadExecute) {
+        return cur_addr - 1;
+    }
+
+    // Expect: r-- Code (.rodata)
+    R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
+    svc_mem_info = mem_info.GetSvcMemoryInfo();
+    cur_addr = svc_mem_info.base_address + svc_mem_info.size;
+    if (svc_mem_info.state != Kernel::Svc::MemoryState::Code ||
+        svc_mem_info.permission != Kernel::Svc::MemoryPermission::Read) {
+        return cur_addr - 1;
+    }
+
+    // Expect: rw- CodeData (.data)
+    R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
+    svc_mem_info = mem_info.GetSvcMemoryInfo();
+    cur_addr = svc_mem_info.base_address + svc_mem_info.size;
+    return cur_addr - 1;
+}
+
+Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process) {
+    Loader::AppLoader::Modules modules;
+
+    auto& page_table = process->GetPageTable();
+    auto& memory = process->GetMemory();
+    VAddr cur_addr = 0;
+
+    // Look for executable sections in Code or AliasCode regions.
+    while (true) {
+        Kernel::KMemoryInfo mem_info{};
+        Kernel::Svc::PageInfo page_info{};
+        R_ASSERT(
+            page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
+        auto svc_mem_info = mem_info.GetSvcMemoryInfo();
+
+        if (svc_mem_info.permission == Kernel::Svc::MemoryPermission::ReadExecute &&
+            (svc_mem_info.state == Kernel::Svc::MemoryState::Code ||
+             svc_mem_info.state == Kernel::Svc::MemoryState::AliasCode)) {
+            // Try to read the module name from its path.
+            constexpr s32 PathLengthMax = 0x200;
+            struct {
+                u32 zero;
+                s32 path_length;
+                std::array<char, PathLengthMax> path;
+            } module_path;
+
+            if (memory.ReadBlock(svc_mem_info.base_address + svc_mem_info.size, &module_path,
+                                 sizeof(module_path))) {
+                if (module_path.zero == 0 && module_path.path_length > 0) {
+                    // Truncate module name.
+                    module_path.path[PathLengthMax - 1] = '\0';
+
+                    // Ignore leading directories.
+                    char* path_pointer = module_path.path.data();
+
+                    for (s32 i = 0; i < std::min(PathLengthMax, module_path.path_length) &&
+                                    module_path.path[i] != '\0';
+                         i++) {
+                        if (module_path.path[i] == '/' || module_path.path[i] == '\\') {
+                            path_pointer = module_path.path.data() + i + 1;
+                        }
+                    }
+
+                    // Insert output.
+                    modules.emplace(svc_mem_info.base_address, path_pointer);
+                }
+            }
+        }
+
+        // Check if we're done.
+        const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
+        if (next_address <= cur_addr) {
+            break;
+        }
+
+        cur_addr = next_address;
+    }
+
+    return modules;
+}
+
+Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process) {
+    // Do we have any loaded executable sections?
+    auto modules = FindModules(process);
+
+    if (modules.size() >= 2) {
+        // If we have two or more, the first one is rtld and the second is main.
+        return std::next(modules.begin())->first;
+    } else if (!modules.empty()) {
+        // If we only have one, this is the main module.
+        return modules.begin()->first;
+    }
+
+    // As a last resort, use the start of the code region.
+    return GetInteger(process->GetPageTable().GetCodeRegionStart());
+}
+
+void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 address, u64 size) {
+    for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+        auto* interface = process->GetArmInterface(i);
+        if (interface) {
+            interface->InvalidateCacheRange(address, size);
+        }
+    }
+}
+
+std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process,
+                                                    const Kernel::Svc::ThreadContext& ctx) {
+    if (process->Is64Bit()) {
+        return GetAArch64Backtrace(process, ctx);
+    } else {
+        return GetAArch32Backtrace(process, ctx);
+    }
+}
+
+std::vector<BacktraceEntry> GetBacktrace(const Kernel::KThread* thread) {
+    Kernel::Svc::ThreadContext ctx = thread->GetContext();
+    return GetBacktraceFromContext(thread->GetOwnerProcess(), ctx);
+}
+
+} // namespace Core
--- a/src/core/arm/debug.h
+++ b/src/core/arm/debug.h
@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <optional>
+
+#include "core/hle/kernel/k_thread.h"
+#include "core/loader/loader.h"
+
+namespace Core {
+
+std::optional<std::string> GetThreadName(const Kernel::KThread* thread);
+std::string_view GetThreadWaitReason(const Kernel::KThread* thread);
+std::string GetThreadState(const Kernel::KThread* thread);
+
+Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process);
+Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process, Kernel::KProcessAddress base);
+Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process);
+
+void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 address, u64 size);
+
+struct BacktraceEntry {
+    std::string module;
+    u64 address;
+    u64 original_address;
+    u64 offset;
+    std::string name;
+};
+
+std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process,
+                                                    const Kernel::Svc::ThreadContext& ctx);
+std::vector<BacktraceEntry> GetBacktrace(const Kernel::KThread* thread);
+
+} // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@ -1,25 +1,13 @@
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

-#include <cinttypes>
-#include <memory>
-#include <dynarmic/interface/A32/a32.h>
-#include <dynarmic/interface/A32/config.h>
-#include "common/assert.h"
-#include "common/literals.h"
-#include "common/logging/log.h"
-#include "common/page_table.h"
 #include "common/settings.h"
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #include "core/arm/dynarmic/arm_dynarmic_32.h"
 #include "core/arm/dynarmic/dynarmic_cp15.h"
 #include "core/arm/dynarmic/dynarmic_exclusive_monitor.h"
-#include "core/core.h"
 #include "core/core_timing.h"
-#include "core/debugger/debugger.h"
 #include "core/hle/kernel/k_process.h"
-#include "core/hle/kernel/svc.h"
-#include "core/memory.h"

 namespace Core {

@ -27,78 +15,78 @@ using namespace Common::Literals;

 class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
 public:
-    explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
-        : parent{parent_}, memory(parent.system.ApplicationMemory()),
-          debugger_enabled{parent.system.DebuggerEnabled()},
-          check_memory_access{debugger_enabled ||
-                              !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {}
+    explicit DynarmicCallbacks32(ArmDynarmic32& parent, const Kernel::KProcess* process)
+        : m_parent{parent}, m_memory(process->GetMemory()),
+          m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
+          m_check_memory_access{m_debugger_enabled ||
+                                !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {}

    u8 MemoryRead8(u32 vaddr) override {
        CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
-        return memory.Read8(vaddr);
+        return m_memory.Read8(vaddr);
    }
    u16 MemoryRead16(u32 vaddr) override {
        CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
-        return memory.Read16(vaddr);
+        return m_memory.Read16(vaddr);
    }
    u32 MemoryRead32(u32 vaddr) override {
        CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
-        return memory.Read32(vaddr);
+        return m_memory.Read32(vaddr);
    }
    u64 MemoryRead64(u32 vaddr) override {
        CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
-        return memory.Read64(vaddr);
+        return m_memory.Read64(vaddr);
    }
    std::optional<u32> MemoryReadCode(u32 vaddr) override {
-        if (!memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) {
+        if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) {
            return std::nullopt;
        }
-        return memory.Read32(vaddr);
+        return m_memory.Read32(vaddr);
    }

    void MemoryWrite8(u32 vaddr, u8 value) override {
        if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
-            memory.Write8(vaddr, value);
+            m_memory.Write8(vaddr, value);
        }
    }
    void MemoryWrite16(u32 vaddr, u16 value) override {
        if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
-            memory.Write16(vaddr, value);
+            m_memory.Write16(vaddr, value);
        }
    }
    void MemoryWrite32(u32 vaddr, u32 value) override {
        if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
-            memory.Write32(vaddr, value);
+            m_memory.Write32(vaddr, value);
        }
    }
    void MemoryWrite64(u32 vaddr, u64 value) override {
        if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
-            memory.Write64(vaddr, value);
+            m_memory.Write64(vaddr, value);
        }
    }

    bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
        return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive8(vaddr, value, expected);
+               m_memory.WriteExclusive8(vaddr, value, expected);
    }
    bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
        return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive16(vaddr, value, expected);
+               m_memory.WriteExclusive16(vaddr, value, expected);
    }
    bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
        return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive32(vaddr, value, expected);
+               m_memory.WriteExclusive32(vaddr, value, expected);
    }
    bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
        return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive64(vaddr, value, expected);
+               m_memory.WriteExclusive64(vaddr, value, expected);
    }

    void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
-        parent.LogBacktrace();
+        m_parent.LogBacktrace(m_process);
        LOG_ERROR(Core_ARM,
                  "Unimplemented instruction @ 0x{:X} for {} instructions (instr = {:08X})", pc,
-                  num_instructions, memory.Read32(pc));
+                  num_instructions, m_memory.Read32(pc));
    }

    void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
@ -108,73 +96,64 @@ public:
            ReturnException(pc, PrefetchAbort);
            return;
        default:
-            if (debugger_enabled) {
+            if (m_debugger_enabled) {
                ReturnException(pc, InstructionBreakpoint);
                return;
            }

-            parent.LogBacktrace();
+            m_parent.LogBacktrace(m_process);
            LOG_CRITICAL(Core_ARM,
                         "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X}, thumb = {})",
-                         exception, pc, memory.Read32(pc), parent.IsInThumbMode());
+                         exception, pc, m_memory.Read32(pc), m_parent.IsInThumbMode());
        }
    }

    void CallSVC(u32 swi) override {
-        parent.svc_swi = swi;
-        parent.jit.load()->HaltExecution(SupervisorCall);
+        m_parent.m_svc_swi = swi;
+        m_parent.m_jit->HaltExecution(SupervisorCall);
    }

    void AddTicks(u64 ticks) override {
-        if (parent.uses_wall_clock) {
-            return;
-        }
+        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");

        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
        // rough approximation of the amount of executed ticks in the system, it may be thrown off
        // if not all cores are doing a similar amount of work. Instead of doing this, we should
        // device a way so that timing is consistent across all cores without increasing the ticks 4
        // times.
-        u64 amortized_ticks =
-            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
+        u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES;
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);

-        parent.system.CoreTiming().AddTicks(amortized_ticks);
-        num_interpreted_instructions = 0;
+        m_parent.m_system.CoreTiming().AddTicks(amortized_ticks);
    }

    u64 GetTicksRemaining() override {
-        if (parent.uses_wall_clock) {
-            if (!IsInterrupted()) {
-                return minimum_run_cycles;
-            }
-            return 0U;
-        }
+        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");

-        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
+        return std::max<s64>(m_parent.m_system.CoreTiming().GetDowncount(), 0);
    }

    bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) {
-        if (!check_memory_access) {
+        if (!m_check_memory_access) {
            return true;
        }

-        if (!memory.IsValidVirtualAddressRange(addr, size)) {
+        if (!m_memory.IsValidVirtualAddressRange(addr, size)) {
            LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}",
                         addr);
-            parent.jit.load()->HaltExecution(PrefetchAbort);
+            m_parent.m_jit->HaltExecution(PrefetchAbort);
            return false;
        }

-        if (!debugger_enabled) {
+        if (!m_debugger_enabled) {
            return true;
        }

-        const auto match{parent.MatchingWatchpoint(addr, size, type)};
+        const auto match{m_parent.MatchingWatchpoint(addr, size, type)};
        if (match) {
-            parent.halted_watchpoint = match;
-            parent.jit.load()->HaltExecution(DataAbort);
+            m_parent.m_halted_watchpoint = match;
+            m_parent.m_jit->HaltExecution(DataAbort);
            return false;
        }

@ -182,32 +161,31 @@ public:
    }

    void ReturnException(u32 pc, Dynarmic::HaltReason hr) {
-        parent.SaveContext(parent.breakpoint_context);
-        parent.breakpoint_context.cpu_registers[15] = pc;
-        parent.jit.load()->HaltExecution(hr);
+        m_parent.GetContext(m_parent.m_breakpoint_context);
+        m_parent.m_breakpoint_context.pc = pc;
+        m_parent.m_breakpoint_context.r[15] = pc;
+        m_parent.m_jit->HaltExecution(hr);
    }

-    bool IsInterrupted() {
-        return parent.system.Kernel().PhysicalCore(parent.core_index).IsInterrupted();
-    }
-
-    ARM_Dynarmic_32& parent;
-    Core::Memory::Memory& memory;
-    std::size_t num_interpreted_instructions{};
-    const bool debugger_enabled{};
-    const bool check_memory_access{};
-    static constexpr u64 minimum_run_cycles = 10000U;
+    ArmDynarmic32& m_parent;
+    Core::Memory::Memory& m_memory;
+    const Kernel::KProcess* m_process{};
+    const bool m_debugger_enabled{};
+    const bool m_check_memory_access{};
+    static constexpr u64 MinimumRunCycles = 10000U;
 };

-std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* page_table) const {
+std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* page_table) const {
    Dynarmic::A32::UserConfig config;
-    config.callbacks = cb.get();
-    config.coprocessors[15] = cp15;
+    config.callbacks = m_cb.get();
+    config.coprocessors[15] = m_cp15;
    config.define_unpredictable_behaviour = true;
-    static constexpr std::size_t YUZU_PAGEBITS = 12;
-    static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - YUZU_PAGEBITS);
+
    if (page_table) {
-        config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>(
+        constexpr size_t PageBits = 12;
+        constexpr size_t NumPageTableEntries = 1 << (32 - PageBits);
+
+        config.page_table = reinterpret_cast<std::array<std::uint8_t*, NumPageTableEntries>*>(
            page_table->pointers.data());
        config.absolute_offset_page_table = true;
        config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
@ -221,12 +199,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
    }

    // Multi-process state
-    config.processor_id = core_index;
-    config.global_monitor = &exclusive_monitor.monitor;
+    config.processor_id = m_core_index;
+    config.global_monitor = &m_exclusive_monitor.monitor;

    // Timing
-    config.wall_clock_cntpct = uses_wall_clock;
-    config.enable_cycle_counting = true;
+    config.wall_clock_cntpct = m_uses_wall_clock;
+    config.enable_cycle_counting = !m_uses_wall_clock;

    // Code cache size
 #ifdef ARCHITECTURE_arm64
@ -236,7 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
 #endif

    // Allow memory fault handling to work
-    if (system.DebuggerEnabled()) {
+    if (m_system.DebuggerEnabled()) {
        config.check_halt_on_memory_access = true;
    }

@ -325,137 +303,140 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
    return std::make_unique<Dynarmic::A32::Jit>(config);
 }

-HaltReason ARM_Dynarmic_32::RunJit() {
-    return TranslateHaltReason(jit.load()->Run());
+static std::pair<u32, u32> FpscrToFpsrFpcr(u32 fpscr) {
+    // FPSCR bits [31:27] are mapped to FPSR[31:27].
+    // FPSCR bit [7] is mapped to FPSR[7].
+    // FPSCR bits [4:0] are mapped to FPSR[4:0].
+    const u32 nzcv = fpscr & 0xf8000000;
+    const u32 idc = fpscr & 0x80;
+    const u32 fiq = fpscr & 0x1f;
+    const u32 fpsr = nzcv | idc | fiq;
+
+    // FPSCR bits [26:15] are mapped to FPCR[26:15].
+    // FPSCR bits [12:8] are mapped to FPCR[12:8].
+    const u32 round = fpscr & 0x7ff8000;
+    const u32 trap = fpscr & 0x1f00;
+    const u32 fpcr = round | trap;
+
+    return {fpsr, fpcr};
 }

-HaltReason ARM_Dynarmic_32::StepJit() {
-    return TranslateHaltReason(jit.load()->Step());
+static u32 FpsrFpcrToFpscr(u64 fpsr, u64 fpcr) {
+    auto [s, c] = FpscrToFpsrFpcr(static_cast<u32>(fpsr | fpcr));
+    return s | c;
 }

-u32 ARM_Dynarmic_32::GetSvcNumber() const {
-    return svc_swi;
+bool ArmDynarmic32::IsInThumbMode() const {
+    return (m_jit->Cpsr() & 0x20) != 0;
 }

-const Kernel::DebugWatchpoint* ARM_Dynarmic_32::HaltedWatchpoint() const {
-    return halted_watchpoint;
+HaltReason ArmDynarmic32::RunThread(Kernel::KThread* thread) {
+    m_jit->ClearExclusiveState();
+    return TranslateHaltReason(m_jit->Run());
 }

-void ARM_Dynarmic_32::RewindBreakpointInstruction() {
-    LoadContext(breakpoint_context);
+HaltReason ArmDynarmic32::StepThread(Kernel::KThread* thread) {
+    m_jit->ClearExclusiveState();
+    return TranslateHaltReason(m_jit->Step());
 }

-ARM_Dynarmic_32::ARM_Dynarmic_32(System& system_, bool uses_wall_clock_,
-                                 DynarmicExclusiveMonitor& exclusive_monitor_,
-                                 std::size_t core_index_)
-    : ARM_Interface{system_, uses_wall_clock_}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
-      cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index_},
-      exclusive_monitor{exclusive_monitor_}, null_jit{MakeJit(nullptr)}, jit{null_jit.get()} {}
-
-ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
-
-void ARM_Dynarmic_32::SetPC(u64 pc) {
-    jit.load()->Regs()[15] = static_cast<u32>(pc);
+u32 ArmDynarmic32::GetSvcNumber() const {
+    return m_svc_swi;
 }

-u64 ARM_Dynarmic_32::GetPC() const {
-    return jit.load()->Regs()[15];
-}
+void ArmDynarmic32::GetSvcArguments(std::span<uint64_t, 8> args) const {
+    Dynarmic::A32::Jit& j = *m_jit;
+    auto& gpr = j.Regs();

-u64 ARM_Dynarmic_32::GetSP() const {
-    return jit.load()->Regs()[13];
-}
-
-u64 ARM_Dynarmic_32::GetReg(int index) const {
-    return jit.load()->Regs()[index];
-}
-
-void ARM_Dynarmic_32::SetReg(int index, u64 value) {
-    jit.load()->Regs()[index] = static_cast<u32>(value);
-}
-
-u128 ARM_Dynarmic_32::GetVectorReg(int index) const {
-    return {};
-}
-
-void ARM_Dynarmic_32::SetVectorReg(int index, u128 value) {}
-
-u32 ARM_Dynarmic_32::GetPSTATE() const {
-    return jit.load()->Cpsr();
-}
-
-void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
-    jit.load()->SetCpsr(cpsr);
-}
-
-u64 ARM_Dynarmic_32::GetTlsAddress() const {
-    return cp15->uro;
-}
-
-void ARM_Dynarmic_32::SetTlsAddress(u64 address) {
-    cp15->uro = static_cast<u32>(address);
-}
-
-u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
-    return cp15->uprw;
-}
-
-void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
-    cp15->uprw = static_cast<u32>(value);
-}
-
-void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) const {
-    Dynarmic::A32::Jit* j = jit.load();
-    ctx.cpu_registers = j->Regs();
-    ctx.extension_registers = j->ExtRegs();
-    ctx.cpsr = j->Cpsr();
-    ctx.fpscr = j->Fpscr();
-}
-
-void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
-    Dynarmic::A32::Jit* j = jit.load();
-    j->Regs() = ctx.cpu_registers;
-    j->ExtRegs() = ctx.extension_registers;
-    j->SetCpsr(ctx.cpsr);
-    j->SetFpscr(ctx.fpscr);
-}
-
-void ARM_Dynarmic_32::SignalInterrupt() {
-    jit.load()->HaltExecution(BreakLoop);
-}
-
-void ARM_Dynarmic_32::ClearInterrupt() {
-    jit.load()->ClearHalt(BreakLoop);
-}
-
-void ARM_Dynarmic_32::ClearInstructionCache() {
-    jit.load()->ClearCache();
-}
-
-void ARM_Dynarmic_32::InvalidateCacheRange(u64 addr, std::size_t size) {
-    jit.load()->InvalidateCacheRange(static_cast<u32>(addr), size);
-}
-
-void ARM_Dynarmic_32::ClearExclusiveState() {
-    jit.load()->ClearExclusiveState();
-}
-
-void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table,
-                                       std::size_t new_address_space_size_in_bits) {
-    ThreadContext32 ctx{};
-    SaveContext(ctx);
-
-    auto key = std::make_pair(&page_table, new_address_space_size_in_bits);
-    auto iter = jit_cache.find(key);
-    if (iter != jit_cache.end()) {
-        jit.store(iter->second.get());
-        LoadContext(ctx);
-        return;
+    for (size_t i = 0; i < 8; i++) {
+        args[i] = gpr[i];
    }
-    std::shared_ptr new_jit = MakeJit(&page_table);
-    jit.store(new_jit.get());
-    LoadContext(ctx);
-    jit_cache.emplace(key, std::move(new_jit));
+}
+
+void ArmDynarmic32::SetSvcArguments(std::span<const uint64_t, 8> args) {
+    Dynarmic::A32::Jit& j = *m_jit;
+    auto& gpr = j.Regs();
+
+    for (size_t i = 0; i < 8; i++) {
+        gpr[i] = static_cast<u32>(args[i]);
+    }
+}
+
+const Kernel::DebugWatchpoint* ArmDynarmic32::HaltedWatchpoint() const {
+    return m_halted_watchpoint;
+}
+
+void ArmDynarmic32::RewindBreakpointInstruction() {
+    this->SetContext(m_breakpoint_context);
+}
+
+ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+                             DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
+    : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
+      m_cb(std::make_unique<DynarmicCallbacks32>(*this, process)),
+      m_cp15(std::make_shared<DynarmicCP15>(*this)), m_core_index{core_index} {
+    auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl();
+    m_jit = MakeJit(&page_table_impl);
+}
+
+ArmDynarmic32::~ArmDynarmic32() = default;
+
+void ArmDynarmic32::SetTpidrroEl0(u64 value) {
+    m_cp15->uro = static_cast<u32>(value);
+}
+
+void ArmDynarmic32::GetContext(Kernel::Svc::ThreadContext& ctx) const {
+    Dynarmic::A32::Jit& j = *m_jit;
+    auto& gpr = j.Regs();
+    auto& fpr = j.ExtRegs();
+
+    for (size_t i = 0; i < 16; i++) {
+        ctx.r[i] = gpr[i];
+    }
+
+    ctx.fp = gpr[11];
+    ctx.sp = gpr[13];
+    ctx.lr = gpr[14];
+    ctx.pc = gpr[15];
+    ctx.pstate = j.Cpsr();
+
+    static_assert(sizeof(fpr) <= sizeof(ctx.v));
+    std::memcpy(ctx.v.data(), &fpr, sizeof(fpr));
+
+    auto [fpsr, fpcr] = FpscrToFpsrFpcr(j.Fpscr());
+    ctx.fpcr = fpcr;
+    ctx.fpsr = fpsr;
+    ctx.tpidr = m_cp15->uprw;
+}
+
+void ArmDynarmic32::SetContext(const Kernel::Svc::ThreadContext& ctx) {
+    Dynarmic::A32::Jit& j = *m_jit;
+    auto& gpr = j.Regs();
+    auto& fpr = j.ExtRegs();
+
+    for (size_t i = 0; i < 16; i++) {
+        gpr[i] = static_cast<u32>(ctx.r[i]);
+    }
+
+    j.SetCpsr(ctx.pstate);
+
+    static_assert(sizeof(fpr) <= sizeof(ctx.v));
+    std::memcpy(&fpr, ctx.v.data(), sizeof(fpr));
+
+    j.SetFpscr(FpsrFpcrToFpscr(ctx.fpsr, ctx.fpcr));
+    m_cp15->uprw = static_cast<u32>(ctx.tpidr);
+}
+
+void ArmDynarmic32::SignalInterrupt(Kernel::KThread* thread) {
+    m_jit->HaltExecution(BreakLoop);
+}
+
+void ArmDynarmic32::ClearInstructionCache() {
+    m_jit->ClearCache();
+}
+
+void ArmDynarmic32::InvalidateCacheRange(u64 addr, std::size_t size) {
+    m_jit->InvalidateCacheRange(static_cast<u32>(addr), size);
 }

 } // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@ -3,14 +3,8 @@

 #pragma once

-#include <atomic>
-#include <memory>
-#include <unordered_map>
-
 #include <dynarmic/interface/A32/a32.h>
-#include <dynarmic/interface/A64/a64.h>
-#include "common/common_types.h"
-#include "common/hash.h"
+
 #include "core/arm/arm_interface.h"
 #include "core/arm/dynarmic/dynarmic_exclusive_monitor.h"

@ -20,89 +14,63 @@ class Memory;

 namespace Core {

-class CPUInterruptHandler;
 class DynarmicCallbacks32;
 class DynarmicCP15;
-class DynarmicExclusiveMonitor;
 class System;

-class ARM_Dynarmic_32 final : public ARM_Interface {
+class ArmDynarmic32 final : public ArmInterface {
 public:
-    ARM_Dynarmic_32(System& system_, bool uses_wall_clock_,
-                    DynarmicExclusiveMonitor& exclusive_monitor_, std::size_t core_index_);
-    ~ARM_Dynarmic_32() override;
-
-    void SetPC(u64 pc) override;
-    u64 GetPC() const override;
-    u64 GetSP() const override;
-    u64 GetReg(int index) const override;
-    void SetReg(int index, u64 value) override;
-    u128 GetVectorReg(int index) const override;
-    void SetVectorReg(int index, u128 value) override;
-    u32 GetPSTATE() const override;
-    void SetPSTATE(u32 pstate) override;
-    u64 GetTlsAddress() const override;
-    void SetTlsAddress(u64 address) override;
-    void SetTPIDR_EL0(u64 value) override;
-    u64 GetTPIDR_EL0() const override;
-
-    bool IsInThumbMode() const {
-        return (GetPSTATE() & 0x20) != 0;
-    }
+    ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+                  DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ~ArmDynarmic32() override;

    Architecture GetArchitecture() const override {
-        return Architecture::Aarch32;
+        return Architecture::AArch32;
    }
-    void SaveContext(ThreadContext32& ctx) const override;
-    void SaveContext(ThreadContext64& ctx) const override {}
-    void LoadContext(const ThreadContext32& ctx) override;
-    void LoadContext(const ThreadContext64& ctx) override {}

-    void SignalInterrupt() override;
-    void ClearInterrupt() override;
-    void ClearExclusiveState() override;
+    bool IsInThumbMode() const;

+    HaltReason RunThread(Kernel::KThread* thread) override;
+    HaltReason StepThread(Kernel::KThread* thread) override;
+
+    void GetContext(Kernel::Svc::ThreadContext& ctx) const override;
+    void SetContext(const Kernel::Svc::ThreadContext& ctx) override;
+    void SetTpidrroEl0(u64 value) override;
+
+    void GetSvcArguments(std::span<uint64_t, 8> args) const override;
+    void SetSvcArguments(std::span<const uint64_t, 8> args) override;
+    u32 GetSvcNumber() const override;
+
+    void SignalInterrupt(Kernel::KThread* thread) override;
    void ClearInstructionCache() override;
    void InvalidateCacheRange(u64 addr, std::size_t size) override;
-    void PageTableChanged(Common::PageTable& new_page_table,
-                          std::size_t new_address_space_size_in_bits) override;

 protected:
-    HaltReason RunJit() override;
-    HaltReason StepJit() override;
-    u32 GetSvcNumber() const override;
    const Kernel::DebugWatchpoint* HaltedWatchpoint() const override;
    void RewindBreakpointInstruction() override;

 private:
-    std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable* page_table) const;
-
-    static std::vector<BacktraceEntry> GetBacktrace(Core::System& system, u64 fp, u64 lr, u64 pc);
-
-    using JitCacheKey = std::pair<Common::PageTable*, std::size_t>;
-    using JitCacheType =
-        std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
+    System& m_system;
+    DynarmicExclusiveMonitor& m_exclusive_monitor;

+private:
    friend class DynarmicCallbacks32;
    friend class DynarmicCP15;

-    std::unique_ptr<DynarmicCallbacks32> cb;
-    JitCacheType jit_cache;
-    std::shared_ptr<DynarmicCP15> cp15;
-    std::size_t core_index;
-    DynarmicExclusiveMonitor& exclusive_monitor;
+    std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable* page_table) const;

-    std::shared_ptr<Dynarmic::A32::Jit> null_jit;
+    std::unique_ptr<DynarmicCallbacks32> m_cb{};
+    std::shared_ptr<DynarmicCP15> m_cp15{};
+    std::size_t m_core_index{};

-    // A raw pointer here is fine; we never delete Jit instances.
-    std::atomic<Dynarmic::A32::Jit*> jit;
+    std::shared_ptr<Dynarmic::A32::Jit> m_jit{};

    // SVC callback
-    u32 svc_swi{};
+    u32 m_svc_swi{};

    // Watchpoint info
-    const Kernel::DebugWatchpoint* halted_watchpoint;
-    ThreadContext32 breakpoint_context;
+    const Kernel::DebugWatchpoint* m_halted_watchpoint{};
+    Kernel::Svc::ThreadContext m_breakpoint_context{};
 };

 } // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@ -1,25 +1,12 @@
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

-#include <cinttypes>
-#include <memory>
-#include <dynarmic/interface/A64/a64.h>
-#include <dynarmic/interface/A64/config.h>
-#include "common/assert.h"
-#include "common/literals.h"
-#include "common/logging/log.h"
-#include "common/page_table.h"
 #include "common/settings.h"
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
 #include "core/arm/dynarmic/dynarmic_exclusive_monitor.h"
-#include "core/core.h"
 #include "core/core_timing.h"
-#include "core/debugger/debugger.h"
-#include "core/hardware_properties.h"
 #include "core/hle/kernel/k_process.h"
-#include "core/hle/kernel/svc.h"
-#include "core/memory.h"

 namespace Core {

@ -28,92 +15,92 @@ using namespace Common::Literals;

 class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
 public:
-    explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
-        : parent{parent_}, memory(parent.system.ApplicationMemory()),
-          debugger_enabled{parent.system.DebuggerEnabled()},
-          check_memory_access{debugger_enabled ||
-                              !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {}
+    explicit DynarmicCallbacks64(ArmDynarmic64& parent, const Kernel::KProcess* process)
+        : m_parent{parent}, m_memory(process->GetMemory()),
+          m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
+          m_check_memory_access{m_debugger_enabled ||
+                                !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {}

    u8 MemoryRead8(u64 vaddr) override {
        CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
-        return memory.Read8(vaddr);
+        return m_memory.Read8(vaddr);
    }
    u16 MemoryRead16(u64 vaddr) override {
        CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
-        return memory.Read16(vaddr);
+        return m_memory.Read16(vaddr);
    }
    u32 MemoryRead32(u64 vaddr) override {
        CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
-        return memory.Read32(vaddr);
+        return m_memory.Read32(vaddr);
    }
    u64 MemoryRead64(u64 vaddr) override {
        CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
-        return memory.Read64(vaddr);
+        return m_memory.Read64(vaddr);
    }
    Vector MemoryRead128(u64 vaddr) override {
        CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Read);
-        return {memory.Read64(vaddr), memory.Read64(vaddr + 8)};
+        return {m_memory.Read64(vaddr), m_memory.Read64(vaddr + 8)};
    }
    std::optional<u32> MemoryReadCode(u64 vaddr) override {
-        if (!memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) {
+        if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) {
            return std::nullopt;
        }
-        return memory.Read32(vaddr);
+        return m_memory.Read32(vaddr);
    }

    void MemoryWrite8(u64 vaddr, u8 value) override {
        if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
-            memory.Write8(vaddr, value);
+            m_memory.Write8(vaddr, value);
        }
    }
    void MemoryWrite16(u64 vaddr, u16 value) override {
        if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
-            memory.Write16(vaddr, value);
+            m_memory.Write16(vaddr, value);
        }
    }
    void MemoryWrite32(u64 vaddr, u32 value) override {
        if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
-            memory.Write32(vaddr, value);
+            m_memory.Write32(vaddr, value);
        }
    }
    void MemoryWrite64(u64 vaddr, u64 value) override {
        if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
-            memory.Write64(vaddr, value);
+            m_memory.Write64(vaddr, value);
        }
    }
    void MemoryWrite128(u64 vaddr, Vector value) override {
        if (CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write)) {
-            memory.Write64(vaddr, value[0]);
-            memory.Write64(vaddr + 8, value[1]);
+            m_memory.Write64(vaddr, value[0]);
+            m_memory.Write64(vaddr + 8, value[1]);
        }
    }

    bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override {
        return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive8(vaddr, value, expected);
+               m_memory.WriteExclusive8(vaddr, value, expected);
    }
    bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override {
        return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive16(vaddr, value, expected);
+               m_memory.WriteExclusive16(vaddr, value, expected);
    }
    bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override {
        return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive32(vaddr, value, expected);
+               m_memory.WriteExclusive32(vaddr, value, expected);
    }
    bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override {
        return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive64(vaddr, value, expected);
+               m_memory.WriteExclusive64(vaddr, value, expected);
    }
    bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override {
        return CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write) &&
-               memory.WriteExclusive128(vaddr, value, expected);
+               m_memory.WriteExclusive128(vaddr, value, expected);
    }

    void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
-        parent.LogBacktrace();
+        m_parent.LogBacktrace(m_process);
        LOG_ERROR(Core_ARM,
                  "Unimplemented instruction @ 0x{:X} for {} instructions (instr = {:08X})", pc,
-                  num_instructions, memory.Read32(pc));
+                  num_instructions, m_memory.Read32(pc));
        ReturnException(pc, PrefetchAbort);
    }

@ -124,11 +111,11 @@ public:
            static constexpr u64 ICACHE_LINE_SIZE = 64;

            const u64 cache_line_start = value & ~(ICACHE_LINE_SIZE - 1);
-            parent.system.InvalidateCpuInstructionCacheRange(cache_line_start, ICACHE_LINE_SIZE);
+            m_parent.InvalidateCacheRange(cache_line_start, ICACHE_LINE_SIZE);
            break;
        }
        case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoU:
-            parent.system.InvalidateCpuInstructionCaches();
+            m_parent.ClearInstructionCache();
            break;
        case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoUInnerSharable:
        default:
@ -136,7 +123,7 @@ public:
            break;
        }

-        parent.jit.load()->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
+        m_parent.m_jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
    }

    void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
@ -152,26 +139,24 @@ public:
            ReturnException(pc, PrefetchAbort);
            return;
        default:
-            if (debugger_enabled) {
+            if (m_debugger_enabled) {
                ReturnException(pc, InstructionBreakpoint);
                return;
            }

-            parent.LogBacktrace();
+            m_parent.LogBacktrace(m_process);
            LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
-                         static_cast<std::size_t>(exception), pc, memory.Read32(pc));
+                         static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
        }
    }

-    void CallSVC(u32 swi) override {
-        parent.svc_swi = swi;
-        parent.jit.load()->HaltExecution(SupervisorCall);
+    void CallSVC(u32 svc) override {
+        m_parent.m_svc = svc;
+        m_parent.m_jit->HaltExecution(SupervisorCall);
    }

    void AddTicks(u64 ticks) override {
-        if (parent.uses_wall_clock) {
-            return;
-        }
+        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");

        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
        // rough approximation of the amount of executed ticks in the system, it may be thrown off
@ -182,44 +167,39 @@ public:
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);

-        parent.system.CoreTiming().AddTicks(amortized_ticks);
+        m_parent.m_system.CoreTiming().AddTicks(amortized_ticks);
    }

    u64 GetTicksRemaining() override {
-        if (parent.uses_wall_clock) {
-            if (!IsInterrupted()) {
-                return minimum_run_cycles;
-            }
-            return 0U;
-        }
+        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");

-        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
+        return std::max<s64>(m_parent.m_system.CoreTiming().GetDowncount(), 0);
    }

    u64 GetCNTPCT() override {
-        return parent.system.CoreTiming().GetClockTicks();
+        return m_parent.m_system.CoreTiming().GetClockTicks();
    }

    bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) {
-        if (!check_memory_access) {
+        if (!m_check_memory_access) {
            return true;
        }

-        if (!memory.IsValidVirtualAddressRange(addr, size)) {
+        if (!m_memory.IsValidVirtualAddressRange(addr, size)) {
            LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}",
                         addr);
-            parent.jit.load()->HaltExecution(PrefetchAbort);
+            m_parent.m_jit->HaltExecution(PrefetchAbort);
            return false;
        }

-        if (!debugger_enabled) {
+        if (!m_debugger_enabled) {
            return true;
        }

-        const auto match{parent.MatchingWatchpoint(addr, size, type)};
+        const auto match{m_parent.MatchingWatchpoint(addr, size, type)};
        if (match) {
-            parent.halted_watchpoint = match;
-            parent.jit.load()->HaltExecution(DataAbort);
+            m_parent.m_halted_watchpoint = match;
+            m_parent.m_jit->HaltExecution(DataAbort);
            return false;
        }

@ -227,30 +207,27 @@ public:
    }

    void ReturnException(u64 pc, Dynarmic::HaltReason hr) {
-        parent.SaveContext(parent.breakpoint_context);
-        parent.breakpoint_context.pc = pc;
-        parent.jit.load()->HaltExecution(hr);
+        m_parent.GetContext(m_parent.m_breakpoint_context);
+        m_parent.m_breakpoint_context.pc = pc;
+        m_parent.m_jit->HaltExecution(hr);
    }

-    bool IsInterrupted() {
-        return parent.system.Kernel().PhysicalCore(parent.core_index).IsInterrupted();
-    }
-
-    ARM_Dynarmic_64& parent;
-    Core::Memory::Memory& memory;
-    u64 tpidrro_el0 = 0;
-    u64 tpidr_el0 = 0;
-    const bool debugger_enabled{};
-    const bool check_memory_access{};
-    static constexpr u64 minimum_run_cycles = 10000U;
+    ArmDynarmic64& m_parent;
+    Core::Memory::Memory& m_memory;
+    u64 m_tpidrro_el0{};
+    u64 m_tpidr_el0{};
+    const Kernel::KProcess* m_process{};
+    const bool m_debugger_enabled{};
+    const bool m_check_memory_access{};
+    static constexpr u64 MinimumRunCycles = 10000U;
 };

-std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* page_table,
-                                                             std::size_t address_space_bits) const {
+std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* page_table,
+                                                           std::size_t address_space_bits) const {
    Dynarmic::A64::UserConfig config;

    // Callbacks
-    config.callbacks = cb.get();
+    config.callbacks = m_cb.get();

    // Memory
    if (page_table) {
@ -271,12 +248,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
    }

    // Multi-process state
-    config.processor_id = core_index;
-    config.global_monitor = &exclusive_monitor.monitor;
+    config.processor_id = m_core_index;
+    config.global_monitor = &m_exclusive_monitor.monitor;

    // System registers
-    config.tpidrro_el0 = &cb->tpidrro_el0;
-    config.tpidr_el0 = &cb->tpidr_el0;
+    config.tpidrro_el0 = &m_cb->m_tpidrro_el0;
+    config.tpidr_el0 = &m_cb->m_tpidr_el0;
    config.dczid_el0 = 4;
    config.ctr_el0 = 0x8444c004;
    config.cntfrq_el0 = Hardware::CNTFREQ;
@ -285,8 +262,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
    config.define_unpredictable_behaviour = true;

    // Timing
-    config.wall_clock_cntpct = uses_wall_clock;
-    config.enable_cycle_counting = true;
+    config.wall_clock_cntpct = m_uses_wall_clock;
+    config.enable_cycle_counting = !m_uses_wall_clock;

    // Code cache size
 #ifdef ARCHITECTURE_arm64
@ -296,7 +273,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
 #endif

    // Allow memory fault handling to work
-    if (system.DebuggerEnabled()) {
+    if (m_system.DebuggerEnabled()) {
        config.check_halt_on_memory_access = true;
    }

@ -384,147 +361,112 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
    return std::make_shared<Dynarmic::A64::Jit>(config);
 }

-HaltReason ARM_Dynarmic_64::RunJit() {
-    return TranslateHaltReason(jit.load()->Run());
+HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) {
+    m_jit->ClearExclusiveState();
+    return TranslateHaltReason(m_jit->Run());
 }

-HaltReason ARM_Dynarmic_64::StepJit() {
-    return TranslateHaltReason(jit.load()->Step());
+HaltReason ArmDynarmic64::StepThread(Kernel::KThread* thread) {
+    m_jit->ClearExclusiveState();
+    return TranslateHaltReason(m_jit->Step());
 }

-u32 ARM_Dynarmic_64::GetSvcNumber() const {
-    return svc_swi;
+u32 ArmDynarmic64::GetSvcNumber() const {
+    return m_svc;
 }

-const Kernel::DebugWatchpoint* ARM_Dynarmic_64::HaltedWatchpoint() const {
-    return halted_watchpoint;
-}
+void ArmDynarmic64::GetSvcArguments(std::span<uint64_t, 8> args) const {
+    Dynarmic::A64::Jit& j = *m_jit;

-void ARM_Dynarmic_64::RewindBreakpointInstruction() {
-    LoadContext(breakpoint_context);
-}
-
-ARM_Dynarmic_64::ARM_Dynarmic_64(System& system_, bool uses_wall_clock_,
-                                 DynarmicExclusiveMonitor& exclusive_monitor_,
-                                 std::size_t core_index_)
-    : ARM_Interface{system_, uses_wall_clock_},
-      cb(std::make_unique<DynarmicCallbacks64>(*this)), core_index{core_index_},
-      exclusive_monitor{exclusive_monitor_}, null_jit{MakeJit(nullptr, 48)}, jit{null_jit.get()} {}
-
-ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;
-
-void ARM_Dynarmic_64::SetPC(u64 pc) {
-    jit.load()->SetPC(pc);
-}
-
-u64 ARM_Dynarmic_64::GetPC() const {
-    return jit.load()->GetPC();
-}
-
-u64 ARM_Dynarmic_64::GetSP() const {
-    return jit.load()->GetSP();
-}
-
-u64 ARM_Dynarmic_64::GetReg(int index) const {
-    return jit.load()->GetRegister(index);
-}
-
-void ARM_Dynarmic_64::SetReg(int index, u64 value) {
-    jit.load()->SetRegister(index, value);
-}
-
-u128 ARM_Dynarmic_64::GetVectorReg(int index) const {
-    return jit.load()->GetVector(index);
-}
-
-void ARM_Dynarmic_64::SetVectorReg(int index, u128 value) {
-    jit.load()->SetVector(index, value);
-}
-
-u32 ARM_Dynarmic_64::GetPSTATE() const {
-    return jit.load()->GetPstate();
-}
-
-void ARM_Dynarmic_64::SetPSTATE(u32 pstate) {
-    jit.load()->SetPstate(pstate);
-}
-
-u64 ARM_Dynarmic_64::GetTlsAddress() const {
-    return cb->tpidrro_el0;
-}
-
-void ARM_Dynarmic_64::SetTlsAddress(u64 address) {
-    cb->tpidrro_el0 = address;
-}
-
-u64 ARM_Dynarmic_64::GetTPIDR_EL0() const {
-    return cb->tpidr_el0;
-}
-
-void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {
-    cb->tpidr_el0 = value;
-}
-
-void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) const {
-    Dynarmic::A64::Jit* j = jit.load();
-    ctx.cpu_registers = j->GetRegisters();
-    ctx.sp = j->GetSP();
-    ctx.pc = j->GetPC();
-    ctx.pstate = j->GetPstate();
-    ctx.vector_registers = j->GetVectors();
-    ctx.fpcr = j->GetFpcr();
-    ctx.fpsr = j->GetFpsr();
-    ctx.tpidr = cb->tpidr_el0;
-}
-
-void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
-    Dynarmic::A64::Jit* j = jit.load();
-    j->SetRegisters(ctx.cpu_registers);
-    j->SetSP(ctx.sp);
-    j->SetPC(ctx.pc);
-    j->SetPstate(ctx.pstate);
-    j->SetVectors(ctx.vector_registers);
-    j->SetFpcr(ctx.fpcr);
-    j->SetFpsr(ctx.fpsr);
-    SetTPIDR_EL0(ctx.tpidr);
-}
-
-void ARM_Dynarmic_64::SignalInterrupt() {
-    jit.load()->HaltExecution(BreakLoop);
-}
-
-void ARM_Dynarmic_64::ClearInterrupt() {
-    jit.load()->ClearHalt(BreakLoop);
-}
-
-void ARM_Dynarmic_64::ClearInstructionCache() {
-    jit.load()->ClearCache();
-}
-
-void ARM_Dynarmic_64::InvalidateCacheRange(u64 addr, std::size_t size) {
-    jit.load()->InvalidateCacheRange(addr, size);
-}
-
-void ARM_Dynarmic_64::ClearExclusiveState() {
-    jit.load()->ClearExclusiveState();
-}
-
-void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table,
-                                       std::size_t new_address_space_size_in_bits) {
-    ThreadContext64 ctx{};
-    SaveContext(ctx);
-
-    auto key = std::make_pair(&page_table, new_address_space_size_in_bits);
-    auto iter = jit_cache.find(key);
-    if (iter != jit_cache.end()) {
-        jit.store(iter->second.get());
-        LoadContext(ctx);
-        return;
+    for (size_t i = 0; i < 8; i++) {
+        args[i] = j.GetRegister(i);
    }
-    std::shared_ptr new_jit = MakeJit(&page_table, new_address_space_size_in_bits);
-    jit.store(new_jit.get());
-    LoadContext(ctx);
-    jit_cache.emplace(key, std::move(new_jit));
+}
+
+void ArmDynarmic64::SetSvcArguments(std::span<const uint64_t, 8> args) {
+    Dynarmic::A64::Jit& j = *m_jit;
+
+    for (size_t i = 0; i < 8; i++) {
+        j.SetRegister(i, args[i]);
+    }
+}
+
+const Kernel::DebugWatchpoint* ArmDynarmic64::HaltedWatchpoint() const {
+    return m_halted_watchpoint;
+}
+
+void ArmDynarmic64::RewindBreakpointInstruction() {
+    this->SetContext(m_breakpoint_context);
+}
+
+ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+                             DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
+    : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
+      m_cb(std::make_unique<DynarmicCallbacks64>(*this, process)), m_core_index{core_index} {
+    auto& page_table = process->GetPageTable().GetBasePageTable();
+    auto& page_table_impl = page_table.GetImpl();
+    m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth());
+}
+
+ArmDynarmic64::~ArmDynarmic64() = default;
+
+void ArmDynarmic64::SetTpidrroEl0(u64 value) {
+    m_cb->m_tpidrro_el0 = value;
+}
+
+void ArmDynarmic64::GetContext(Kernel::Svc::ThreadContext& ctx) const {
+    Dynarmic::A64::Jit& j = *m_jit;
+    auto gpr = j.GetRegisters();
+    auto fpr = j.GetVectors();
+
+    // TODO: this is inconvenient
+    for (size_t i = 0; i < 29; i++) {
+        ctx.r[i] = gpr[i];
+    }
+    ctx.fp = gpr[29];
+    ctx.lr = gpr[30];
+
+    ctx.sp = j.GetSP();
+    ctx.pc = j.GetPC();
+    ctx.pstate = j.GetPstate();
+    ctx.v = fpr;
+    ctx.fpcr = j.GetFpcr();
+    ctx.fpsr = j.GetFpsr();
+    ctx.tpidr = m_cb->m_tpidr_el0;
+}
+
+void ArmDynarmic64::SetContext(const Kernel::Svc::ThreadContext& ctx) {
+    Dynarmic::A64::Jit& j = *m_jit;
+
+    // TODO: this is inconvenient
+    std::array<u64, 31> gpr;
+
+    for (size_t i = 0; i < 29; i++) {
+        gpr[i] = ctx.r[i];
+    }
+    gpr[29] = ctx.fp;
+    gpr[30] = ctx.lr;
+
+    j.SetRegisters(gpr);
+    j.SetSP(ctx.sp);
+    j.SetPC(ctx.pc);
+    j.SetPstate(ctx.pstate);
+    j.SetVectors(ctx.v);
+    j.SetFpcr(ctx.fpcr);
+    j.SetFpsr(ctx.fpsr);
+    m_cb->m_tpidr_el0 = ctx.tpidr;
+}
+
+void ArmDynarmic64::SignalInterrupt(Kernel::KThread* thread) {
+    m_jit->HaltExecution(BreakLoop);
+}
+
+void ArmDynarmic64::ClearInstructionCache() {
+    m_jit->ClearCache();
+}
+
+void ArmDynarmic64::InvalidateCacheRange(u64 addr, std::size_t size) {
+    m_jit->InvalidateCacheRange(addr, size);
 }

 } // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@ -23,76 +23,55 @@ class DynarmicCallbacks64;
 class DynarmicExclusiveMonitor;
 class System;

-class ARM_Dynarmic_64 final : public ARM_Interface {
+class ArmDynarmic64 final : public ArmInterface {
 public:
-    ARM_Dynarmic_64(System& system_, bool uses_wall_clock_,
-                    DynarmicExclusiveMonitor& exclusive_monitor_, std::size_t core_index_);
-    ~ARM_Dynarmic_64() override;
-
-    void SetPC(u64 pc) override;
-    u64 GetPC() const override;
-    u64 GetSP() const override;
-    u64 GetReg(int index) const override;
-    void SetReg(int index, u64 value) override;
-    u128 GetVectorReg(int index) const override;
-    void SetVectorReg(int index, u128 value) override;
-    u32 GetPSTATE() const override;
-    void SetPSTATE(u32 pstate) override;
-    u64 GetTlsAddress() const override;
-    void SetTlsAddress(u64 address) override;
-    void SetTPIDR_EL0(u64 value) override;
-    u64 GetTPIDR_EL0() const override;
+    ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+                  DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ~ArmDynarmic64() override;

    Architecture GetArchitecture() const override {
-        return Architecture::Aarch64;
+        return Architecture::AArch64;
    }
-    void SaveContext(ThreadContext32& ctx) const override {}
-    void SaveContext(ThreadContext64& ctx) const override;
-    void LoadContext(const ThreadContext32& ctx) override {}
-    void LoadContext(const ThreadContext64& ctx) override;

-    void SignalInterrupt() override;
-    void ClearInterrupt() override;
-    void ClearExclusiveState() override;
+    HaltReason RunThread(Kernel::KThread* thread) override;
+    HaltReason StepThread(Kernel::KThread* thread) override;

+    void GetContext(Kernel::Svc::ThreadContext& ctx) const override;
+    void SetContext(const Kernel::Svc::ThreadContext& ctx) override;
+    void SetTpidrroEl0(u64 value) override;
+
+    void GetSvcArguments(std::span<uint64_t, 8> args) const override;
+    void SetSvcArguments(std::span<const uint64_t, 8> args) override;
+    u32 GetSvcNumber() const override;
+
+    void SignalInterrupt(Kernel::KThread* thread) override;
    void ClearInstructionCache() override;
    void InvalidateCacheRange(u64 addr, std::size_t size) override;
-    void PageTableChanged(Common::PageTable& new_page_table,
-                          std::size_t new_address_space_size_in_bits) override;

 protected:
-    HaltReason RunJit() override;
-    HaltReason StepJit() override;
-    u32 GetSvcNumber() const override;
    const Kernel::DebugWatchpoint* HaltedWatchpoint() const override;
    void RewindBreakpointInstruction() override;

 private:
+    System& m_system;
+    DynarmicExclusiveMonitor& m_exclusive_monitor;
+
+private:
+    friend class DynarmicCallbacks64;
+
    std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable* page_table,
                                                std::size_t address_space_bits) const;
+    std::unique_ptr<DynarmicCallbacks64> m_cb{};
+    std::size_t m_core_index{};

-    using JitCacheKey = std::pair<Common::PageTable*, std::size_t>;
-    using JitCacheType =
-        std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A64::Jit>, Common::PairHash>;
-
-    friend class DynarmicCallbacks64;
-    std::unique_ptr<DynarmicCallbacks64> cb;
-    JitCacheType jit_cache;
-
-    std::size_t core_index;
-    DynarmicExclusiveMonitor& exclusive_monitor;
-
-    std::shared_ptr<Dynarmic::A64::Jit> null_jit;
-
-    // A raw pointer here is fine; we never delete Jit instances.
-    std::atomic<Dynarmic::A64::Jit*> jit;
+    std::shared_ptr<Dynarmic::A64::Jit> m_jit{};

    // SVC callback
-    u32 svc_swi{};
+    u32 m_svc{};

-    // Breakpoint info
-    const Kernel::DebugWatchpoint* halted_watchpoint;
-    ThreadContext64 breakpoint_context;
+    // Watchpoint info
+    const Kernel::DebugWatchpoint* m_halted_watchpoint{};
+    Kernel::Svc::ThreadContext m_breakpoint_context{};
 };

 } // namespace Core
--- a/src/core/arm/dynarmic/dynarmic_cp15.cpp
+++ b/src/core/arm/dynarmic/dynarmic_cp15.cpp
@ -124,8 +124,8 @@ CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc
    if (!two && opc == 0 && CRm == CoprocReg::C14) {
        // CNTPCT
        const auto callback = [](void* arg, u32, u32) -> u64 {
-            const auto& parent_arg = *static_cast<ARM_Dynarmic_32*>(arg);
-            return parent_arg.system.CoreTiming().GetClockTicks();
+            const auto& parent_arg = *static_cast<ArmDynarmic32*>(arg);
+            return parent_arg.m_system.CoreTiming().GetClockTicks();
        };
        return Callback{callback, &parent};
    }
--- a/src/core/arm/dynarmic/dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/dynarmic_cp15.h
@ -10,13 +10,13 @@

 namespace Core {

-class ARM_Dynarmic_32;
+class ArmDynarmic32;

 class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
 public:
    using CoprocReg = Dynarmic::A32::CoprocReg;

-    explicit DynarmicCP15(ARM_Dynarmic_32& parent_) : parent{parent_} {}
+    explicit DynarmicCP15(ArmDynarmic32& parent_) : parent{parent_} {}

    std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
                                                     CoprocReg CRn, CoprocReg CRm,
@ -32,11 +32,11 @@ public:
    std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
                                              std::optional<u8> option) override;

-    ARM_Dynarmic_32& parent;
+    ArmDynarmic32& parent;
    u32 uprw = 0;
    u32 uro = 0;

-    friend class ARM_Dynarmic_32;
+    friend class ArmDynarmic32;
 };

 } // namespace Core
--- a/src/core/arm/dynarmic/dynarmic_exclusive_monitor.h
+++ b/src/core/arm/dynarmic/dynarmic_exclusive_monitor.h
@ -14,8 +14,8 @@ class Memory;

 namespace Core {

-class ARM_Dynarmic_32;
-class ARM_Dynarmic_64;
+class ArmDynarmic32;
+class ArmDynarmic64;

 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
 public:
@ -36,8 +36,8 @@ public:
    bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;

 private:
-    friend class ARM_Dynarmic_32;
-    friend class ARM_Dynarmic_64;
+    friend class ArmDynarmic32;
+    friend class ArmDynarmic64;
    Dynarmic::ExclusiveMonitor monitor;
    Core::Memory::Memory& memory;
 };
--- a/src/core/arm/nce/arm_nce.cpp
+++ b/src/core/arm/nce/arm_nce.cpp
@ -6,6 +6,7 @@

 #include "common/signal_chain.h"
 #include "core/arm/nce/arm_nce.h"
+#include "core/arm/nce/guest_context.h"
 #include "core/arm/nce/patcher.h"
 #include "core/core.h"
 #include "core/memory.h"
@ -38,7 +39,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {

 } // namespace

-void* ARM_NCE::RestoreGuestContext(void* raw_context) {
+void* ArmNce::RestoreGuestContext(void* raw_context) {
    // Retrieve the host context.
    auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;

@ -71,7 +72,7 @@ void* ARM_NCE::RestoreGuestContext(void* raw_context) {
    return tpidr;
 }

-void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
+void ArmNce::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
    // Retrieve the host context.
    auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;

@ -103,7 +104,7 @@ void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
    host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
 }

-bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
+bool ArmNce::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
    auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
    auto* info = static_cast<siginfo_t*>(raw_info);

@ -134,7 +135,7 @@ bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* ra
    // - If we lose the race, then SignalInterrupt will send us a signal we are masking,
    //   and it will do nothing when it is unmasked, as we have already left guest code.
    // - If we win the race, then SignalInterrupt will wait for us to unlock first.
-    auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters();
+    auto& thread_params = guest_ctx->parent->m_running_thread->GetNativeExecutionParameters();
    thread_params.lock.store(SpinLockLocked);

    // Return to host.
@ -142,97 +143,93 @@ bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* ra
    return false;
 }

-void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) {
+void ArmNce::HandleHostFault(int sig, void* raw_info, void* raw_context) {
    return g_orig_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
 }

-HaltReason ARM_NCE::RunJit() {
-    // Get the thread parameters.
-    // TODO: pass the current thread down from ::Run
-    auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel());
+void ArmNce::LockThread(Kernel::KThread* thread) {
    auto* thread_params = &thread->GetNativeExecutionParameters();
+    LockThreadParameters(thread_params);
+}

-    {
-        // Lock our core context.
-        std::scoped_lock lk{lock};
+void ArmNce::UnlockThread(Kernel::KThread* thread) {
+    auto* thread_params = &thread->GetNativeExecutionParameters();
+    UnlockThreadParameters(thread_params);
+}

-        // We should not be running.
-        ASSERT(running_thread == nullptr);
-
-        // Check if we need to run. If we have already been halted, we are done.
-        u64 halt = guest_ctx.esr_el1.exchange(0);
-        if (halt != 0) {
-            return static_cast<HaltReason>(halt);
-        }
-
-        // Mark that we are running.
-        running_thread = thread;
-
-        // Acquire the lock on the thread parameters.
-        // This allows us to force synchronization with SignalInterrupt.
-        LockThreadParameters(thread_params);
+HaltReason ArmNce::RunThread(Kernel::KThread* thread) {
+    // Check if we're already interrupted.
+    // If we are, we can just return immediately.
+    HaltReason hr = static_cast<HaltReason>(m_guest_ctx.esr_el1.exchange(0));
+    if (True(hr)) {
+        return hr;
    }

-    // Assign current members.
-    guest_ctx.parent = this;
-    thread_params->native_context = &guest_ctx;
-    thread_params->tpidr_el0 = guest_ctx.tpidr_el0;
-    thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0;
-    thread_params->is_running = true;
+    // Get the thread context.
+    auto* thread_params = &thread->GetNativeExecutionParameters();
+    auto* process = thread->GetOwnerProcess();

-    HaltReason halt{};
+    // Assign current members.
+    m_running_thread = thread;
+    m_guest_ctx.parent = this;
+    thread_params->native_context = &m_guest_ctx;
+    thread_params->tpidr_el0 = m_guest_ctx.tpidr_el0;
+    thread_params->tpidrro_el0 = m_guest_ctx.tpidrro_el0;
+    thread_params->is_running = true;

    // TODO: finding and creating the post handler needs to be locked
    // to deal with dynamic loading of NROs.
-    const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers();
-    if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) {
-        halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second);
+    const auto& post_handlers = process->GetPostHandlers();
+    if (auto it = post_handlers.find(m_guest_ctx.pc); it != post_handlers.end()) {
+        hr = ReturnToRunCodeByTrampoline(thread_params, &m_guest_ctx, it->second);
    } else {
-        halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params);
+        hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params);
    }

    // Unload members.
    // The thread does not change, so we can persist the old reference.
-    guest_ctx.tpidr_el0 = thread_params->tpidr_el0;
+    m_running_thread = nullptr;
+    m_guest_ctx.tpidr_el0 = thread_params->tpidr_el0;
    thread_params->native_context = nullptr;
    thread_params->is_running = false;

-    // Unlock the thread parameters.
-    UnlockThreadParameters(thread_params);
-
-    {
-        // Lock the core context.
-        std::scoped_lock lk{lock};
-
-        // On exit, we no longer have an active thread.
-        running_thread = nullptr;
-    }
-
    // Return the halt reason.
-    return halt;
+    return hr;
 }

-HaltReason ARM_NCE::StepJit() {
+HaltReason ArmNce::StepThread(Kernel::KThread* thread) {
    return HaltReason::StepThread;
 }

-u32 ARM_NCE::GetSvcNumber() const {
-    return guest_ctx.svc_swi;
+u32 ArmNce::GetSvcNumber() const {
+    return m_guest_ctx.svc;
 }

-ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_)
-    : ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} {
-    guest_ctx.system = &system_;
+void ArmNce::GetSvcArguments(std::span<uint64_t, 8> args) const {
+    for (size_t i = 0; i < 8; i++) {
+        args[i] = m_guest_ctx.cpu_registers[i];
+    }
 }

-ARM_NCE::~ARM_NCE() = default;
+void ArmNce::SetSvcArguments(std::span<const uint64_t, 8> args) {
+    for (size_t i = 0; i < 8; i++) {
+        m_guest_ctx.cpu_registers[i] = args[i];
+    }
+}

-void ARM_NCE::Initialize() {
-    thread_id = gettid();
+ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
+    : ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} {
+    m_guest_ctx.system = &m_system;
+}
+
+ArmNce::~ArmNce() = default;
+
+void ArmNce::Initialize() {
+    m_thread_id = gettid();

    // Setup our signals
-    static std::once_flag flag;
-    std::call_once(flag, [] {
+    static std::once_flag signals;
+    std::call_once(signals, [] {
        using HandlerType = decltype(sigaction::sa_sigaction);

        sigset_t signal_mask;
@ -244,7 +241,7 @@ void ARM_NCE::Initialize() {
        struct sigaction return_to_run_code_action {};
        return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
        return_to_run_code_action.sa_sigaction = reinterpret_cast<HandlerType>(
-            &ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
+            &ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
        return_to_run_code_action.sa_mask = signal_mask;
        Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
                          nullptr);
@ -252,14 +249,13 @@ void ARM_NCE::Initialize() {
        struct sigaction break_from_run_code_action {};
        break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
        break_from_run_code_action.sa_sigaction =
-            reinterpret_cast<HandlerType>(&ARM_NCE::BreakFromRunCodeSignalHandler);
+            reinterpret_cast<HandlerType>(&ArmNce::BreakFromRunCodeSignalHandler);
        break_from_run_code_action.sa_mask = signal_mask;
        Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr);

        struct sigaction fault_action {};
        fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
-        fault_action.sa_sigaction =
-            reinterpret_cast<HandlerType>(&ARM_NCE::GuestFaultSignalHandler);
+        fault_action.sa_sigaction = reinterpret_cast<HandlerType>(&ArmNce::GuestFaultSignalHandler);
        fault_action.sa_mask = signal_mask;
        Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action);

@ -272,111 +268,59 @@ void ARM_NCE::Initialize() {
    });
 }

-void ARM_NCE::SetPC(u64 pc) {
-    guest_ctx.pc = pc;
+void ArmNce::SetTpidrroEl0(u64 value) {
+    m_guest_ctx.tpidrro_el0 = value;
 }

-u64 ARM_NCE::GetPC() const {
-    return guest_ctx.pc;
-}
-
-u64 ARM_NCE::GetSP() const {
-    return guest_ctx.sp;
-}
-
-u64 ARM_NCE::GetReg(int index) const {
-    return guest_ctx.cpu_registers[index];
-}
-
-void ARM_NCE::SetReg(int index, u64 value) {
-    guest_ctx.cpu_registers[index] = value;
-}
-
-u128 ARM_NCE::GetVectorReg(int index) const {
-    return guest_ctx.vector_registers[index];
-}
-
-void ARM_NCE::SetVectorReg(int index, u128 value) {
-    guest_ctx.vector_registers[index] = value;
-}
-
-u32 ARM_NCE::GetPSTATE() const {
-    return guest_ctx.pstate;
-}
-
-void ARM_NCE::SetPSTATE(u32 pstate) {
-    guest_ctx.pstate = pstate;
-}
-
-u64 ARM_NCE::GetTlsAddress() const {
-    return guest_ctx.tpidrro_el0;
-}
-
-void ARM_NCE::SetTlsAddress(u64 address) {
-    guest_ctx.tpidrro_el0 = address;
-}
-
-u64 ARM_NCE::GetTPIDR_EL0() const {
-    return guest_ctx.tpidr_el0;
-}
-
-void ARM_NCE::SetTPIDR_EL0(u64 value) {
-    guest_ctx.tpidr_el0 = value;
-}
-
-void ARM_NCE::SaveContext(ThreadContext64& ctx) const {
-    ctx.cpu_registers = guest_ctx.cpu_registers;
-    ctx.sp = guest_ctx.sp;
-    ctx.pc = guest_ctx.pc;
-    ctx.pstate = guest_ctx.pstate;
-    ctx.vector_registers = guest_ctx.vector_registers;
-    ctx.fpcr = guest_ctx.fpcr;
-    ctx.fpsr = guest_ctx.fpsr;
-    ctx.tpidr = guest_ctx.tpidr_el0;
-}
-
-void ARM_NCE::LoadContext(const ThreadContext64& ctx) {
-    guest_ctx.cpu_registers = ctx.cpu_registers;
-    guest_ctx.sp = ctx.sp;
-    guest_ctx.pc = ctx.pc;
-    guest_ctx.pstate = ctx.pstate;
-    guest_ctx.vector_registers = ctx.vector_registers;
-    guest_ctx.fpcr = ctx.fpcr;
-    guest_ctx.fpsr = ctx.fpsr;
-    guest_ctx.tpidr_el0 = ctx.tpidr;
-}
-
-void ARM_NCE::SignalInterrupt() {
-    // Lock core context.
-    std::scoped_lock lk{lock};
-
-    // Add break loop condition.
-    guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
-
-    // If there is no thread running, we are done.
-    if (running_thread == nullptr) {
-        return;
+void ArmNce::GetContext(Kernel::Svc::ThreadContext& ctx) const {
+    for (size_t i = 0; i < 29; i++) {
+        ctx.r[i] = m_guest_ctx.cpu_registers[i];
    }
+    ctx.fp = m_guest_ctx.cpu_registers[29];
+    ctx.lr = m_guest_ctx.cpu_registers[30];
+    ctx.sp = m_guest_ctx.sp;
+    ctx.pc = m_guest_ctx.pc;
+    ctx.pstate = m_guest_ctx.pstate;
+    ctx.v = m_guest_ctx.vector_registers;
+    ctx.fpcr = m_guest_ctx.fpcr;
+    ctx.fpsr = m_guest_ctx.fpsr;
+    ctx.tpidr = m_guest_ctx.tpidr_el0;
+}
+
+void ArmNce::SetContext(const Kernel::Svc::ThreadContext& ctx) {
+    for (size_t i = 0; i < 29; i++) {
+        m_guest_ctx.cpu_registers[i] = ctx.r[i];
+    }
+    m_guest_ctx.cpu_registers[29] = ctx.fp;
+    m_guest_ctx.cpu_registers[30] = ctx.lr;
+    m_guest_ctx.sp = ctx.sp;
+    m_guest_ctx.pc = ctx.pc;
+    m_guest_ctx.pstate = ctx.pstate;
+    m_guest_ctx.vector_registers = ctx.v;
+    m_guest_ctx.fpcr = ctx.fpcr;
+    m_guest_ctx.fpsr = ctx.fpsr;
+    m_guest_ctx.tpidr_el0 = ctx.tpidr;
+}
+
+void ArmNce::SignalInterrupt(Kernel::KThread* thread) {
+    // Add break loop condition.
+    m_guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));

    // Lock the thread context.
-    auto* params = &running_thread->GetNativeExecutionParameters();
+    auto* params = &thread->GetNativeExecutionParameters();
    LockThreadParameters(params);

    if (params->is_running) {
        // We should signal to the running thread.
        // The running thread will unlock the thread context.
-        syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal);
+        syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal);
    } else {
        // If the thread is no longer running, we have nothing to do.
        UnlockThreadParameters(params);
    }
 }

-void ARM_NCE::ClearInterrupt() {
-    guest_ctx.esr_el1 = {};
-}
-
-void ARM_NCE::ClearInstructionCache() {
+void ArmNce::ClearInstructionCache() {
    // TODO: This is not possible to implement correctly on Linux because
    // we do not have any access to ic iallu.

@ -384,17 +328,8 @@ void ARM_NCE::ClearInstructionCache() {
    std::atomic_thread_fence(std::memory_order_seq_cst);
 }

-void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) {
+void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
    this->ClearInstructionCache();
 }

-void ARM_NCE::ClearExclusiveState() {
-    // No-op.
-}
-
-void ARM_NCE::PageTableChanged(Common::PageTable& page_table,
-                               std::size_t new_address_space_size_in_bits) {
-    // No-op. Page table is never used.
-}
-
 } // namespace Core
--- a/src/core/arm/nce/arm_nce.h
+++ b/src/core/arm/nce/arm_nce.h
@ -3,11 +3,7 @@

 #pragma once

-#include <atomic>
-#include <memory>
-#include <span>
-#include <unordered_map>
-#include <vector>
+#include <mutex>

 #include "core/arm/arm_interface.h"
 #include "core/arm/nce/guest_context.h"
@ -20,51 +16,36 @@ namespace Core {

 class System;

-class ARM_NCE final : public ARM_Interface {
+class ArmNce final : public ArmInterface {
 public:
-    ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_);
-
-    ~ARM_NCE() override;
+    ArmNce(System& system, bool uses_wall_clock, std::size_t core_index);
+    ~ArmNce() override;

    void Initialize() override;
-    void SetPC(u64 pc) override;
-    u64 GetPC() const override;
-    u64 GetSP() const override;
-    u64 GetReg(int index) const override;
-    void SetReg(int index, u64 value) override;
-    u128 GetVectorReg(int index) const override;
-    void SetVectorReg(int index, u128 value) override;
-
-    u32 GetPSTATE() const override;
-    void SetPSTATE(u32 pstate) override;
-    u64 GetTlsAddress() const override;
-    void SetTlsAddress(u64 address) override;
-    void SetTPIDR_EL0(u64 value) override;
-    u64 GetTPIDR_EL0() const override;

    Architecture GetArchitecture() const override {
-        return Architecture::Aarch64;
+        return Architecture::AArch64;
    }

-    void SaveContext(ThreadContext32& ctx) const override {}
-    void SaveContext(ThreadContext64& ctx) const override;
-    void LoadContext(const ThreadContext32& ctx) override {}
-    void LoadContext(const ThreadContext64& ctx) override;
+    HaltReason RunThread(Kernel::KThread* thread) override;
+    HaltReason StepThread(Kernel::KThread* thread) override;

-    void SignalInterrupt() override;
-    void ClearInterrupt() override;
-    void ClearExclusiveState() override;
-    void ClearInstructionCache() override;
-    void InvalidateCacheRange(u64 addr, std::size_t size) override;
-    void PageTableChanged(Common::PageTable& new_page_table,
-                          std::size_t new_address_space_size_in_bits) override;
-
-protected:
-    HaltReason RunJit() override;
-    HaltReason StepJit() override;
+    void GetContext(Kernel::Svc::ThreadContext& ctx) const override;
+    void SetContext(const Kernel::Svc::ThreadContext& ctx) override;
+    void SetTpidrroEl0(u64 value) override;

+    void GetSvcArguments(std::span<uint64_t, 8> args) const override;
+    void SetSvcArguments(std::span<const uint64_t, 8> args) override;
    u32 GetSvcNumber() const override;

+    void SignalInterrupt(Kernel::KThread* thread) override;
+    void ClearInstructionCache() override;
+    void InvalidateCacheRange(u64 addr, std::size_t size) override;
+
+    void LockThread(Kernel::KThread* thread) override;
+    void UnlockThread(Kernel::KThread* thread) override;
+
+protected:
    const Kernel::DebugWatchpoint* HaltedWatchpoint() const override {
        return nullptr;
    }
@ -93,16 +74,15 @@ private:
    static void HandleHostFault(int sig, void* info, void* raw_context);

 public:
+    Core::System& m_system;
+
    // Members set on initialization.
-    std::size_t core_index{};
-    pid_t thread_id{-1};
+    std::size_t m_core_index{};
+    pid_t m_thread_id{-1};

    // Core context.
-    GuestContext guest_ctx;
-
-    // Thread and invalidation info.
-    std::mutex lock;
-    Kernel::KThread* running_thread{};
+    GuestContext m_guest_ctx{};
+    Kernel::KThread* m_running_thread{};
 };

 } // namespace Core
--- a/src/core/arm/nce/arm_nce.s
+++ b/src/core/arm/nce/arm_nce.s
@ -8,11 +8,11 @@
    movk    reg, #(((val) >> 0x10) & 0xFFFF), lsl #16


-/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
-.section    .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
-.global     _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
-.type       _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
-_ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
+/* static HaltReason Core::ArmNce::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
+.section    .text._ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
+.global     _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
+.type       _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
+_ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
    /* Back up host sp to x3. */
    /* Back up host tpidr_el0 to x4. */
    mov     x3, sp
@ -49,11 +49,11 @@ _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
    br      x2


-/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
-.section    .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
-.global     _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv
-.type       _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
-_ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv:
+/* static HaltReason Core::ArmNce::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
+.section    .text._ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
+.global     _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv
+.type       _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
+_ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv:
    /* This jumps to the signal handler, which will restore the entire context. */
    /* On entry, x0 = thread id, which is already in the right place. */

@ -71,17 +71,17 @@ _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv:
    brk     #1000


-/* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
-.section    .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
-.global     _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
-.type       _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
-_ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
+/* static void Core::ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
+.section    .text._ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
+.global     _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
+.type       _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
+_ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
    stp     x29, x30, [sp, #-0x10]!
    mov     x29, sp

    /* Call the context restorer with the raw context. */
    mov     x0, x2
-    bl      _ZN4Core7ARM_NCE19RestoreGuestContextEPv
+    bl      _ZN4Core6ArmNce19RestoreGuestContextEPv

    /* Save the old value of tpidr_el0. */
    mrs     x8, tpidr_el0
@ -92,18 +92,18 @@ _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
    msr     tpidr_el0, x0

    /* Unlock the context. */
-    bl      _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
+    bl      _ZN4Core6ArmNce22UnlockThreadParametersEPv

    /* Returning from here will enter the guest. */
    ldp     x29, x30, [sp], #0x10
    ret


-/* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
-.section    .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
-.global     _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_
-.type       _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function
-_ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:
+/* static void Core::ArmNce::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
+.section    .text._ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
+.global     _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_
+.type       _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, %function
+_ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
    /* Check to see if we have the correct TLS magic. */
    mrs     x8, tpidr_el0
    ldr     w9, [x8, #(TpidrEl0TlsMagic)]
@ -121,7 +121,7 @@ _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:

    /* Tail call the restorer. */
    mov     x1, x2
-    b       _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv
+    b       _ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv

    /* Returning from here will enter host code. */

@ -130,11 +130,11 @@ _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:
    ret


-/* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */
-.section    .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits
-.global     _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_
-.type       _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function
-_ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
+/* static void Core::ArmNce::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */
+.section    .text._ZN4Core6ArmNce23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits
+.global     _ZN4Core6ArmNce23GuestFaultSignalHandlerEiPvS1_
+.type       _ZN4Core6ArmNce23GuestFaultSignalHandlerEiPvS1_, %function
+_ZN4Core6ArmNce23GuestFaultSignalHandlerEiPvS1_:
    /* Check to see if we have the correct TLS magic. */
    mrs     x8, tpidr_el0
    ldr     w9, [x8, #(TpidrEl0TlsMagic)]
@ -146,7 +146,7 @@ _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:

    /* Incorrect TLS magic, so this is a host fault. */
    /* Tail call the handler. */
-    b       _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_
+    b       _ZN4Core6ArmNce15HandleHostFaultEiPvS1_

 1:
    /* Correct TLS magic, so this is a guest fault. */
@ -163,7 +163,7 @@ _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
    msr     tpidr_el0, x3

    /* Call the handler. */
-    bl       _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_
+    bl       _ZN4Core6ArmNce16HandleGuestFaultEPNS_12GuestContextEPvS3_

    /* If the handler returned false, we want to preserve the host tpidr_el0. */
    cbz     x0, 2f
@ -177,11 +177,11 @@ _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
    ret


-/* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */
-.section    .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits
-.global     _ZN4Core7ARM_NCE20LockThreadParametersEPv
-.type       _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function
-_ZN4Core7ARM_NCE20LockThreadParametersEPv:
+/* static void Core::ArmNce::LockThreadParameters(void* tpidr) */
+.section    .text._ZN4Core6ArmNce20LockThreadParametersEPv, "ax", %progbits
+.global     _ZN4Core6ArmNce20LockThreadParametersEPv
+.type       _ZN4Core6ArmNce20LockThreadParametersEPv, %function
+_ZN4Core6ArmNce20LockThreadParametersEPv:
    /* Offset to lock member. */
    add     x0, x0, #(TpidrEl0Lock)

@ -205,11 +205,11 @@ _ZN4Core7ARM_NCE20LockThreadParametersEPv:
    ret


-/* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */
-.section    .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits
-.global     _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
-.type       _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function
-_ZN4Core7ARM_NCE22UnlockThreadParametersEPv:
+/* static void Core::ArmNce::UnlockThreadParameters(void* tpidr) */
+.section    .text._ZN4Core6ArmNce22UnlockThreadParametersEPv, "ax", %progbits
+.global     _ZN4Core6ArmNce22UnlockThreadParametersEPv
+.type       _ZN4Core6ArmNce22UnlockThreadParametersEPv, %function
+_ZN4Core6ArmNce22UnlockThreadParametersEPv:
    /* Offset to lock member. */
    add     x0, x0, #(TpidrEl0Lock)

--- a/src/core/arm/nce/guest_context.h
+++ b/src/core/arm/nce/guest_context.h
@ -3,6 +3,8 @@

 #pragma once

+#include <atomic>
+
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "core/arm/arm_interface.h"
@ -10,7 +12,7 @@

 namespace Core {

-class ARM_NCE;
+class ArmNce;
 class System;

 struct HostContext {
@ -33,9 +35,9 @@ struct GuestContext {
    u64 tpidr_el0{};
    std::atomic<u64> esr_el1{};
    u32 nzcv{};
-    u32 svc_swi{};
+    u32 svc{};
    System* system{};
-    ARM_NCE* parent{};
+    ArmNce* parent{};
 };

 // Verify assembly offsets.
--- a/src/core/arm/nce/patcher.cpp
+++ b/src/core/arm/nce/patcher.cpp
@ -280,7 +280,7 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {

    // Store SVC number to execute when we return
    c.MOV(X2, svc_id);
-    c.STR(W2, X1, offsetof(GuestContext, svc_swi));
+    c.STR(W2, X1, offsetof(GuestContext, svc));

    // We are calling a SVC. Clear esr_el1 and return it.
    static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -323,7 +323,6 @@ struct System::Impl {
                static_cast<u32>(SystemResultStatus::ErrorLoader) + static_cast<u32>(load_result));
        }
        AddGlueRegistrationForProcess(*app_loader, *main_process);
-        kernel.InitializeCores();

        // Initialize cheat engine
        if (cheat_engine) {
@ -600,14 +599,6 @@ bool System::IsPaused() const {
    return impl->IsPaused();
 }

-void System::InvalidateCpuInstructionCaches() {
-    impl->kernel.InvalidateAllInstructionCaches();
-}
-
-void System::InvalidateCpuInstructionCacheRange(u64 addr, std::size_t size) {
-    impl->kernel.InvalidateCpuInstructionCacheRange(addr, size);
-}
-
 void System::ShutdownMainProcess() {
    impl->ShutdownMainProcess();
 }
@ -696,14 +687,6 @@ const TelemetrySession& System::TelemetrySession() const {
    return *impl->telemetry_session;
 }

-ARM_Interface& System::CurrentArmInterface() {
-    return impl->kernel.CurrentPhysicalCore().ArmInterface();
-}
-
-const ARM_Interface& System::CurrentArmInterface() const {
-    return impl->kernel.CurrentPhysicalCore().ArmInterface();
-}
-
 Kernel::PhysicalCore& System::CurrentPhysicalCore() {
    return impl->kernel.CurrentPhysicalCore();
 }
@ -738,14 +721,6 @@ const Kernel::KProcess* System::ApplicationProcess() const {
    return impl->kernel.ApplicationProcess();
 }

-ARM_Interface& System::ArmInterface(std::size_t core_index) {
-    return impl->kernel.PhysicalCore(core_index).ArmInterface();
-}
-
-const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
-    return impl->kernel.PhysicalCore(core_index).ArmInterface();
-}
-
 ExclusiveMonitor& System::Monitor() {
    return impl->kernel.GetExclusiveMonitor();
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@ -108,7 +108,6 @@ class RenderdocAPI;

 namespace Core {

-class ARM_Interface;
 class CpuManager;
 class Debugger;
 class DeviceMemory;
@ -171,15 +170,6 @@ public:
    /// Check if the core is currently paused.
    [[nodiscard]] bool IsPaused() const;

-    /**
-     * Invalidate the CPU instruction caches
-     * This function should only be used by GDB Stub to support breakpoints, memory updates and
-     * step/continue commands.
-     */
-    void InvalidateCpuInstructionCaches();
-
-    void InvalidateCpuInstructionCacheRange(u64 addr, std::size_t size);
-
    /// Shutdown the main emulated process.
    void ShutdownMainProcess();

@ -244,24 +234,12 @@ public:
    /// Gets and resets core performance statistics
    [[nodiscard]] PerfStatsResults GetAndResetPerfStats();

-    /// Gets an ARM interface to the CPU core that is currently running
-    [[nodiscard]] ARM_Interface& CurrentArmInterface();
-
-    /// Gets an ARM interface to the CPU core that is currently running
-    [[nodiscard]] const ARM_Interface& CurrentArmInterface() const;
-
    /// Gets the physical core for the CPU core that is currently running
    [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore();

    /// Gets the physical core for the CPU core that is currently running
    [[nodiscard]] const Kernel::PhysicalCore& CurrentPhysicalCore() const;

-    /// Gets a reference to an ARM interface for the CPU core with the specified index
-    [[nodiscard]] ARM_Interface& ArmInterface(std::size_t core_index);
-
-    /// Gets a const reference to an ARM interface from the CPU core with the specified index
-    [[nodiscard]] const ARM_Interface& ArmInterface(std::size_t core_index) const;
-
    /// Gets a reference to the underlying CPU manager.
    [[nodiscard]] CpuManager& GetCpuManager();

--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@ -73,12 +73,13 @@ void CpuManager::HandleInterrupt() {
 void CpuManager::MultiCoreRunGuestThread() {
    // Similar to UserModeThreadStarter in HOS
    auto& kernel = system.Kernel();
+    auto* thread = Kernel::GetCurrentThreadPointer(kernel);
    kernel.CurrentScheduler()->OnThreadStart();

    while (true) {
        auto* physical_core = &kernel.CurrentPhysicalCore();
        while (!physical_core->IsInterrupted()) {
-            physical_core->Run();
+            physical_core->RunThread(thread);
            physical_core = &kernel.CurrentPhysicalCore();
        }

@ -110,12 +111,13 @@ void CpuManager::MultiCoreRunIdleThread() {

 void CpuManager::SingleCoreRunGuestThread() {
    auto& kernel = system.Kernel();
+    auto* thread = Kernel::GetCurrentThreadPointer(kernel);
    kernel.CurrentScheduler()->OnThreadStart();

    while (true) {
        auto* physical_core = &kernel.CurrentPhysicalCore();
        if (!physical_core->IsInterrupted()) {
-            physical_core->Run();
+            physical_core->RunThread(thread);
            physical_core = &kernel.CurrentPhysicalCore();
        }

@ -211,8 +213,6 @@ void CpuManager::RunThread(std::stop_token token, std::size_t core) {
        system.GPU().ObtainContext();
    }

-    system.ArmInterface(core).Initialize();
-
    auto& kernel = system.Kernel();
    auto& scheduler = *kernel.CurrentScheduler();
    auto* thread = scheduler.GetSchedulerCurrentThread();
--- a/src/core/debugger/gdbstub.cpp
+++ b/src/core/debugger/gdbstub.cpp
@ -16,6 +16,7 @@
 #include "common/settings.h"
 #include "common/string_util.h"
 #include "core/arm/arm_interface.h"
+#include "core/arm/debug.h"
 #include "core/core.h"
 #include "core/debugger/gdbstub.h"
 #include "core/debugger/gdbstub_arch.h"
@ -310,7 +311,7 @@ void GDBStub::ExecuteCommand(std::string_view packet, std::vector<DebuggerAction
        const auto mem{Common::HexStringToVector(mem_substr, false)};

        if (system.ApplicationMemory().WriteBlock(addr, mem.data(), size)) {
-            system.InvalidateCpuInstructionCacheRange(addr, size);
+            Core::InvalidateInstructionCacheRange(system.ApplicationProcess(), addr, size);
            SendReply(GDB_STUB_REPLY_OK);
        } else {
            SendReply(GDB_STUB_REPLY_ERR);
@ -363,7 +364,7 @@ void GDBStub::HandleBreakpointInsert(std::string_view command) {
    case BreakpointType::Software:
        replaced_instructions[addr] = system.ApplicationMemory().Read32(addr);
        system.ApplicationMemory().Write32(addr, arch->BreakpointInstruction());
-        system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
+        Core::InvalidateInstructionCacheRange(system.ApplicationProcess(), addr, sizeof(u32));
        success = true;
        break;
    case BreakpointType::WriteWatch:
@ -411,7 +412,7 @@ void GDBStub::HandleBreakpointRemove(std::string_view command) {
        const auto orig_insn{replaced_instructions.find(addr)};
        if (orig_insn != replaced_instructions.end()) {
            system.ApplicationMemory().Write32(addr, orig_insn->second);
-            system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
+            Core::InvalidateInstructionCacheRange(system.ApplicationProcess(), addr, sizeof(u32));
            replaced_instructions.erase(addr);
            success = true;
        }
@ -442,114 +443,6 @@ void GDBStub::HandleBreakpointRemove(std::string_view command) {
    }
 }

-// Structure offsets are from Atmosphere
-// See osdbg_thread_local_region.os.horizon.hpp and osdbg_thread_type.os.horizon.hpp
-
-static std::optional<std::string> GetNameFromThreadType32(Core::Memory::Memory& memory,
-                                                          const Kernel::KThread& thread) {
-    // Read thread type from TLS
-    const VAddr tls_thread_type{memory.Read32(thread.GetTlsAddress() + 0x1fc)};
-    const VAddr argument_thread_type{thread.GetArgument()};
-
-    if (argument_thread_type && tls_thread_type != argument_thread_type) {
-        // Probably not created by nnsdk, no name available.
-        return std::nullopt;
-    }
-
-    if (!tls_thread_type) {
-        return std::nullopt;
-    }
-
-    const u16 version{memory.Read16(tls_thread_type + 0x26)};
-    VAddr name_pointer{};
-    if (version == 1) {
-        name_pointer = memory.Read32(tls_thread_type + 0xe4);
-    } else {
-        name_pointer = memory.Read32(tls_thread_type + 0xe8);
-    }
-
-    if (!name_pointer) {
-        // No name provided.
-        return std::nullopt;
-    }
-
-    return memory.ReadCString(name_pointer, 256);
-}
-
-static std::optional<std::string> GetNameFromThreadType64(Core::Memory::Memory& memory,
-                                                          const Kernel::KThread& thread) {
-    // Read thread type from TLS
-    const VAddr tls_thread_type{memory.Read64(thread.GetTlsAddress() + 0x1f8)};
-    const VAddr argument_thread_type{thread.GetArgument()};
-
-    if (argument_thread_type && tls_thread_type != argument_thread_type) {
-        // Probably not created by nnsdk, no name available.
-        return std::nullopt;
-    }
-
-    if (!tls_thread_type) {
-        return std::nullopt;
-    }
-
-    const u16 version{memory.Read16(tls_thread_type + 0x46)};
-    VAddr name_pointer{};
-    if (version == 1) {
-        name_pointer = memory.Read64(tls_thread_type + 0x1a0);
-    } else {
-        name_pointer = memory.Read64(tls_thread_type + 0x1a8);
-    }
-
-    if (!name_pointer) {
-        // No name provided.
-        return std::nullopt;
-    }
-
-    return memory.ReadCString(name_pointer, 256);
-}
-
-static std::optional<std::string> GetThreadName(Core::System& system,
-                                                const Kernel::KThread& thread) {
-    if (system.ApplicationProcess()->Is64Bit()) {
-        return GetNameFromThreadType64(system.ApplicationMemory(), thread);
-    } else {
-        return GetNameFromThreadType32(system.ApplicationMemory(), thread);
-    }
-}
-
-static std::string_view GetThreadWaitReason(const Kernel::KThread& thread) {
-    switch (thread.GetWaitReasonForDebugging()) {
-    case Kernel::ThreadWaitReasonForDebugging::Sleep:
-        return "Sleep";
-    case Kernel::ThreadWaitReasonForDebugging::IPC:
-        return "IPC";
-    case Kernel::ThreadWaitReasonForDebugging::Synchronization:
-        return "Synchronization";
-    case Kernel::ThreadWaitReasonForDebugging::ConditionVar:
-        return "ConditionVar";
-    case Kernel::ThreadWaitReasonForDebugging::Arbitration:
-        return "Arbitration";
-    case Kernel::ThreadWaitReasonForDebugging::Suspended:
-        return "Suspended";
-    default:
-        return "Unknown";
-    }
-}
-
-static std::string GetThreadState(const Kernel::KThread& thread) {
-    switch (thread.GetState()) {
-    case Kernel::ThreadState::Initialized:
-        return "Initialized";
-    case Kernel::ThreadState::Waiting:
-        return fmt::format("Waiting ({})", GetThreadWaitReason(thread));
-    case Kernel::ThreadState::Runnable:
-        return "Runnable";
-    case Kernel::ThreadState::Terminated:
-        return "Terminated";
-    default:
-        return "Unknown";
-    }
-}
-
 static std::string PaginateBuffer(std::string_view buffer, std::string_view request) {
    const auto amount{request.substr(request.find(',') + 1)};
    const auto offset_val{static_cast<u64>(strtoll(request.data(), nullptr, 16))};
@ -562,120 +455,6 @@ static std::string PaginateBuffer(std::string_view buffer, std::string_view requ
    }
 }

-static VAddr GetModuleEnd(Kernel::KProcessPageTable& page_table, VAddr base) {
-    Kernel::KMemoryInfo mem_info;
-    Kernel::Svc::MemoryInfo svc_mem_info;
-    Kernel::Svc::PageInfo page_info;
-    VAddr cur_addr{base};
-
-    // Expect: r-x Code (.text)
-    R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
-    svc_mem_info = mem_info.GetSvcMemoryInfo();
-    cur_addr = svc_mem_info.base_address + svc_mem_info.size;
-    if (svc_mem_info.state != Kernel::Svc::MemoryState::Code ||
-        svc_mem_info.permission != Kernel::Svc::MemoryPermission::ReadExecute) {
-        return cur_addr - 1;
-    }
-
-    // Expect: r-- Code (.rodata)
-    R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
-    svc_mem_info = mem_info.GetSvcMemoryInfo();
-    cur_addr = svc_mem_info.base_address + svc_mem_info.size;
-    if (svc_mem_info.state != Kernel::Svc::MemoryState::Code ||
-        svc_mem_info.permission != Kernel::Svc::MemoryPermission::Read) {
-        return cur_addr - 1;
-    }
-
-    // Expect: rw- CodeData (.data)
-    R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
-    svc_mem_info = mem_info.GetSvcMemoryInfo();
-    cur_addr = svc_mem_info.base_address + svc_mem_info.size;
-    return cur_addr - 1;
-}
-
-static Loader::AppLoader::Modules FindModules(Core::System& system) {
-    Loader::AppLoader::Modules modules;
-
-    auto& page_table = system.ApplicationProcess()->GetPageTable();
-    auto& memory = system.ApplicationMemory();
-    VAddr cur_addr = 0;
-
-    // Look for executable sections in Code or AliasCode regions.
-    while (true) {
-        Kernel::KMemoryInfo mem_info{};
-        Kernel::Svc::PageInfo page_info{};
-        R_ASSERT(
-            page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), cur_addr));
-        auto svc_mem_info = mem_info.GetSvcMemoryInfo();
-
-        if (svc_mem_info.permission == Kernel::Svc::MemoryPermission::ReadExecute &&
-            (svc_mem_info.state == Kernel::Svc::MemoryState::Code ||
-             svc_mem_info.state == Kernel::Svc::MemoryState::AliasCode)) {
-            // Try to read the module name from its path.
-            constexpr s32 PathLengthMax = 0x200;
-            struct {
-                u32 zero;
-                s32 path_length;
-                std::array<char, PathLengthMax> path;
-            } module_path;
-
-            if (memory.ReadBlock(svc_mem_info.base_address + svc_mem_info.size, &module_path,
-                                 sizeof(module_path))) {
-                if (module_path.zero == 0 && module_path.path_length > 0) {
-                    // Truncate module name.
-                    module_path.path[PathLengthMax - 1] = '\0';
-
-                    // Ignore leading directories.
-                    char* path_pointer = module_path.path.data();
-
-                    for (s32 i = 0; i < std::min(PathLengthMax, module_path.path_length) &&
-                                    module_path.path[i] != '\0';
-                         i++) {
-                        if (module_path.path[i] == '/' || module_path.path[i] == '\\') {
-                            path_pointer = module_path.path.data() + i + 1;
-                        }
-                    }
-
-                    // Insert output.
-                    modules.emplace(svc_mem_info.base_address, path_pointer);
-                }
-            }
-        }
-
-        // Check if we're done.
-        const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
-        if (next_address <= cur_addr) {
-            break;
-        }
-
-        cur_addr = next_address;
-    }
-
-    return modules;
-}
-
-static VAddr FindMainModuleEntrypoint(Core::System& system) {
-    Loader::AppLoader::Modules modules;
-    system.GetAppLoader().ReadNSOModules(modules);
-
-    // Do we have a module named main?
-    const auto main = std::find_if(modules.begin(), modules.end(),
-                                   [](const auto& key) { return key.second == "main"; });
-
-    if (main != modules.end()) {
-        return main->first;
-    }
-
-    // Do we have any loaded executable sections?
-    modules = FindModules(system);
-    if (!modules.empty()) {
-        return modules.begin()->first;
-    }
-
-    // As a last resort, use the start of the code region.
-    return GetInteger(system.ApplicationProcess()->GetPageTable().GetCodeRegionStart());
-}
-
 void GDBStub::HandleQuery(std::string_view command) {
    if (command.starts_with("TStatus")) {
        // no tracepoint support
@ -687,10 +466,10 @@ void GDBStub::HandleQuery(std::string_view command) {
        const auto target_xml{arch->GetTargetXML()};
        SendReply(PaginateBuffer(target_xml, command.substr(30)));
    } else if (command.starts_with("Offsets")) {
-        const auto main_offset = FindMainModuleEntrypoint(system);
-        SendReply(fmt::format("TextSeg={:x}", main_offset));
+        const auto main_offset = Core::FindMainModuleEntrypoint(system.ApplicationProcess());
+        SendReply(fmt::format("TextSeg={:x}", GetInteger(main_offset)));
    } else if (command.starts_with("Xfer:libraries:read::")) {
-        auto modules = FindModules(system);
+        auto modules = Core::FindModules(system.ApplicationProcess());

        std::string buffer;
        buffer += R"(<?xml version="1.0"?>)";
@ -720,14 +499,14 @@ void GDBStub::HandleQuery(std::string_view command) {

        const auto& threads = system.ApplicationProcess()->GetThreadList();
        for (const auto& thread : threads) {
-            auto thread_name{GetThreadName(system, thread)};
+            auto thread_name{Core::GetThreadName(&thread)};
            if (!thread_name) {
                thread_name = fmt::format("Thread {:d}", thread.GetThreadId());
            }

            buffer += fmt::format(R"(<thread id="{:x}" core="{:d}" name="{}">{}</thread>)",
                                  thread.GetThreadId(), thread.GetActiveCore(),
-                                  EscapeXML(*thread_name), GetThreadState(thread));
+                                  EscapeXML(*thread_name), GetThreadState(&thread));
        }

        buffer += "</threads>";
@ -856,7 +635,7 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {
            reply = "Fastmem is not enabled.\n";
        }
    } else if (command_str == "get info") {
-        auto modules = FindModules(system);
+        auto modules = Core::FindModules(process);

        reply = fmt::format("Process:     {:#x} ({})\n"
                            "Program Id:  {:#018x}\n",
@ -880,7 +659,7 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {

        for (const auto& [vaddr, name] : modules) {
            reply += fmt::format("  {:#012x} - {:#012x} {}\n", vaddr,
-                                 GetModuleEnd(page_table, vaddr), name);
+                                 GetInteger(Core::GetModuleEnd(process, vaddr)), name);
        }
    } else if (command_str == "get mappings") {
        reply = "Mappings:\n";
--- a/src/core/debugger/gdbstub_arch.cpp
+++ b/src/core/debugger/gdbstub_arch.cpp
@ -24,21 +24,6 @@ static std::string ValueToHex(const T value) {
    return Common::HexToString(mem);
 }

-template <typename T>
-static T GetSIMDRegister(const std::array<u32, 64>& simd_regs, size_t offset) {
-    static_assert(std::is_trivially_copyable_v<T>);
-    T value{};
-    std::memcpy(&value, reinterpret_cast<const u8*>(simd_regs.data()) + sizeof(T) * offset,
-                sizeof(T));
-    return value;
-}
-
-template <typename T>
-static void PutSIMDRegister(std::array<u32, 64>& simd_regs, size_t offset, const T value) {
-    static_assert(std::is_trivially_copyable_v<T>);
-    std::memcpy(reinterpret_cast<u8*>(simd_regs.data()) + sizeof(T) * offset, &value, sizeof(T));
-}
-
 // For sample XML files see the GDB source /gdb/features
 // This XML defines what the registers are for this specific ARM device
 std::string_view GDBStubA64::GetTargetXML() const {
@ -184,12 +169,16 @@ std::string GDBStubA64::RegRead(const Kernel::KThread* thread, size_t id) const
        return "";
    }

-    const auto& context{thread->GetContext64()};
-    const auto& gprs{context.cpu_registers};
-    const auto& fprs{context.vector_registers};
+    const auto& context{thread->GetContext()};
+    const auto& gprs{context.r};
+    const auto& fprs{context.v};

-    if (id < SP_REGISTER) {
+    if (id < FP_REGISTER) {
        return ValueToHex(gprs[id]);
+    } else if (id == FP_REGISTER) {
+        return ValueToHex(context.fp);
+    } else if (id == LR_REGISTER) {
+        return ValueToHex(context.lr);
    } else if (id == SP_REGISTER) {
        return ValueToHex(context.sp);
    } else if (id == PC_REGISTER) {
@ -212,10 +201,14 @@ void GDBStubA64::RegWrite(Kernel::KThread* thread, size_t id, std::string_view v
        return;
    }

-    auto& context{thread->GetContext64()};
+    auto& context{thread->GetContext()};

-    if (id < SP_REGISTER) {
-        context.cpu_registers[id] = HexToValue<u64>(value);
+    if (id < FP_REGISTER) {
+        context.r[id] = HexToValue<u64>(value);
+    } else if (id == FP_REGISTER) {
+        context.fp = HexToValue<u64>(value);
+    } else if (id == LR_REGISTER) {
+        context.lr = HexToValue<u64>(value);
    } else if (id == SP_REGISTER) {
        context.sp = HexToValue<u64>(value);
    } else if (id == PC_REGISTER) {
@ -223,7 +216,7 @@ void GDBStubA64::RegWrite(Kernel::KThread* thread, size_t id, std::string_view v
    } else if (id == PSTATE_REGISTER) {
        context.pstate = HexToValue<u32>(value);
    } else if (id >= Q0_REGISTER && id < FPSR_REGISTER) {
-        context.vector_registers[id - Q0_REGISTER] = HexToValue<u128>(value);
+        context.v[id - Q0_REGISTER] = HexToValue<u128>(value);
    } else if (id == FPSR_REGISTER) {
        context.fpsr = HexToValue<u32>(value);
    } else if (id == FPCR_REGISTER) {
@ -381,22 +374,20 @@ std::string GDBStubA32::RegRead(const Kernel::KThread* thread, size_t id) const
        return "";
    }

-    const auto& context{thread->GetContext32()};
-    const auto& gprs{context.cpu_registers};
-    const auto& fprs{context.extension_registers};
+    const auto& context{thread->GetContext()};
+    const auto& gprs{context.r};
+    const auto& fprs{context.v};

    if (id <= PC_REGISTER) {
-        return ValueToHex(gprs[id]);
+        return ValueToHex(static_cast<u32>(gprs[id]));
    } else if (id == CPSR_REGISTER) {
-        return ValueToHex(context.cpsr);
+        return ValueToHex(context.pstate);
    } else if (id >= D0_REGISTER && id < Q0_REGISTER) {
-        const u64 dN{GetSIMDRegister<u64>(fprs, id - D0_REGISTER)};
-        return ValueToHex(dN);
+        return ValueToHex(fprs[id - D0_REGISTER][0]);
    } else if (id >= Q0_REGISTER && id < FPSCR_REGISTER) {
-        const u128 qN{GetSIMDRegister<u128>(fprs, id - Q0_REGISTER)};
-        return ValueToHex(qN);
+        return ValueToHex(fprs[id - Q0_REGISTER]);
    } else if (id == FPSCR_REGISTER) {
-        return ValueToHex(context.fpscr);
+        return ValueToHex(context.fpcr | context.fpsr);
    } else {
        return "";
    }
@ -407,19 +398,20 @@ void GDBStubA32::RegWrite(Kernel::KThread* thread, size_t id, std::string_view v
        return;
    }

-    auto& context{thread->GetContext32()};
-    auto& fprs{context.extension_registers};
+    auto& context{thread->GetContext()};
+    auto& fprs{context.v};

    if (id <= PC_REGISTER) {
-        context.cpu_registers[id] = HexToValue<u32>(value);
+        context.r[id] = HexToValue<u32>(value);
    } else if (id == CPSR_REGISTER) {
-        context.cpsr = HexToValue<u32>(value);
+        context.pstate = HexToValue<u32>(value);
    } else if (id >= D0_REGISTER && id < Q0_REGISTER) {
-        PutSIMDRegister(fprs, id - D0_REGISTER, HexToValue<u64>(value));
+        fprs[id - D0_REGISTER] = {HexToValue<u64>(value), 0};
    } else if (id >= Q0_REGISTER && id < FPSCR_REGISTER) {
-        PutSIMDRegister(fprs, id - Q0_REGISTER, HexToValue<u128>(value));
+        fprs[id - Q0_REGISTER] = HexToValue<u128>(value);
    } else if (id == FPSCR_REGISTER) {
-        context.fpscr = HexToValue<u32>(value);
+        context.fpcr = HexToValue<u32>(value);
+        context.fpsr = HexToValue<u32>(value);
    }
 }

--- a/src/core/debugger/gdbstub_arch.h
+++ b/src/core/debugger/gdbstub_arch.h
@ -36,6 +36,7 @@ public:
    u32 BreakpointInstruction() const override;

 private:
+    static constexpr u32 FP_REGISTER = 29;
    static constexpr u32 LR_REGISTER = 30;
    static constexpr u32 SP_REGISTER = 31;
    static constexpr u32 PC_REGISTER = 32;
--- a/src/core/hle/kernel/k_page_table_base.cpp
+++ b/src/core/hle/kernel/k_page_table_base.cpp
@ -69,8 +69,16 @@ public:
 };

 template <typename AddressType>
-void InvalidateInstructionCache(Core::System& system, AddressType addr, u64 size) {
-    system.InvalidateCpuInstructionCacheRange(GetInteger(addr), size);
+void InvalidateInstructionCache(KernelCore& kernel, AddressType addr, u64 size) {
+    // TODO: lock the process list
+    for (auto& process : kernel.GetProcessList()) {
+        for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            auto* interface = process->GetArmInterface(i);
+            if (interface) {
+                interface->InvalidateCacheRange(GetInteger(addr), size);
+            }
+        }
+    }
 }

 template <typename AddressType>
@ -1261,7 +1269,7 @@ Result KPageTableBase::UnmapCodeMemory(KProcessAddress dst_address, KProcessAddr
    bool reprotected_pages = false;
    SCOPE_EXIT({
        if (reprotected_pages && any_code_pages) {
-            InvalidateInstructionCache(m_system, dst_address, size);
+            InvalidateInstructionCache(m_kernel, dst_address, size);
        }
    });

@ -1997,7 +2005,7 @@ Result KPageTableBase::SetProcessMemoryPermission(KProcessAddress addr, size_t s
        for (const auto& block : pg) {
            StoreDataCache(GetHeapVirtualPointer(m_kernel, block.GetAddress()), block.GetSize());
        }
-        InvalidateInstructionCache(m_system, addr, size);
+        InvalidateInstructionCache(m_kernel, addr, size);
    }

    R_SUCCEED();
@ -3239,7 +3247,7 @@ Result KPageTableBase::WriteDebugMemory(KProcessAddress dst_address, KProcessAdd
    R_TRY(PerformCopy());

    // Invalidate the instruction cache, as this svc allows modifying executable pages.
-    InvalidateInstructionCache(m_system, dst_address, size);
+    InvalidateInstructionCache(m_kernel, dst_address, size);

    R_SUCCEED();
 }
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@ -13,6 +13,12 @@
 #include "core/hle/kernel/k_thread_queue.h"
 #include "core/hle/kernel/k_worker_task_manager.h"

+#include "core/arm/dynarmic/arm_dynarmic_32.h"
+#include "core/arm/dynarmic/arm_dynarmic_64.h"
+#ifdef HAS_NCE
+#include "core/arm/nce/arm_nce.h"
+#endif
+
 namespace Kernel {

 namespace {
@ -957,10 +963,8 @@ Result KProcess::Run(s32 priority, size_t stack_size) {
    R_TRY(m_handle_table.Add(std::addressof(thread_handle), main_thread));

    // Set the thread arguments.
-    main_thread->GetContext32().cpu_registers[0] = 0;
-    main_thread->GetContext64().cpu_registers[0] = 0;
-    main_thread->GetContext32().cpu_registers[1] = thread_handle;
-    main_thread->GetContext64().cpu_registers[1] = thread_handle;
+    main_thread->GetContext().r[0] = 0;
+    main_thread->GetContext().r[1] = thread_handle;

    // Update our state.
    this->ChangeState((state == State::Created) ? State::Running : State::RunningAttached);
@ -1199,6 +1203,9 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
    m_is_hbl = is_hbl;
    m_ideal_core_id = metadata.GetMainThreadCore();

+    // Set up emulation context.
+    this->InitializeInterfaces();
+
    // We succeeded.
    R_SUCCEED();
 }
@ -1227,6 +1234,31 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
 #endif
 }

+void KProcess::InitializeInterfaces() {
+    this->GetMemory().SetCurrentPageTable(*this);
+
+#ifdef HAS_NCE
+    if (this->Is64Bit() && Settings::IsNceEnabled()) {
+        for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
+        }
+    } else
+#endif
+        if (this->Is64Bit()) {
+        for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic64>(
+                m_kernel.System(), m_kernel.IsMulticore(), this,
+                static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i);
+        }
+    } else {
+        for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic32>(
+                m_kernel.System(), m_kernel.IsMulticore(), this,
+                static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i);
+        }
+    }
+}
+
 bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) {
    const auto watch{std::find_if(m_watchpoints.begin(), m_watchpoints.end(), [&](const auto& wp) {
        return wp.type == DebugWatchpointType::None;
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@ -5,6 +5,7 @@

 #include <map>

+#include "core/arm/arm_interface.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/k_address_arbiter.h"
@ -106,6 +107,8 @@ private:
    bool m_is_suspended{};
    bool m_is_immortal{};
    bool m_is_handle_table_initialized{};
+    std::array<std::unique_ptr<Core::ArmInterface>, Core::Hardware::NUM_CPU_CORES>
+        m_arm_interfaces{};
    std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_running_threads{};
    std::array<u64, Core::Hardware::NUM_CPU_CORES> m_running_thread_idle_counts{};
    std::array<u64, Core::Hardware::NUM_CPU_CORES> m_running_thread_switch_counts{};
@ -476,6 +479,10 @@ public:
    }
 #endif

+    Core::ArmInterface* GetArmInterface(size_t core_index) const {
+        return m_arm_interfaces[core_index].get();
+    }
+
 public:
    // Attempts to insert a watchpoint into a free slot. Returns false if none are available.
    bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type);
@ -493,6 +500,8 @@ public:

    void LoadModule(CodeSet code_set, KProcessAddress base_addr);

+    void InitializeInterfaces();
+
    Core::Memory::Memory& GetMemory() const;

 public:
--- a/src/core/hle/kernel/k_process_page_table.h
+++ b/src/core/hle/kernel/k_process_page_table.h
@ -7,10 +7,6 @@
 #include "core/hle/kernel/k_scoped_lock.h"
 #include "core/hle/kernel/svc_types.h"

-namespace Core {
-class ARM_Interface;
-}
-
 namespace Kernel {

 class KProcessPageTable {
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@ -494,12 +494,7 @@ void KScheduler::ScheduleImplFiber() {
 }

 void KScheduler::Unload(KThread* thread) {
-    auto& cpu_core = m_kernel.System().ArmInterface(m_core_id);
-    cpu_core.SaveContext(thread->GetContext32());
-    cpu_core.SaveContext(thread->GetContext64());
-    // Save the TPIDR_EL0 system register in case it was modified.
-    thread->SetTpidrEl0(cpu_core.GetTPIDR_EL0());
-    cpu_core.ClearExclusiveState();
+    m_kernel.PhysicalCore(m_core_id).SaveContext(thread);

    // Check if the thread is terminated by checking the DPC flags.
    if ((thread->GetStackParameters().dpc_flags & static_cast<u32>(DpcFlag::Terminated)) == 0) {
@ -509,14 +504,7 @@ void KScheduler::Unload(KThread* thread) {
 }

 void KScheduler::Reload(KThread* thread) {
-    auto& cpu_core = m_kernel.System().ArmInterface(m_core_id);
-    auto* process = thread->GetOwnerProcess();
-    cpu_core.LoadContext(thread->GetContext32());
-    cpu_core.LoadContext(thread->GetContext64());
-    cpu_core.SetTlsAddress(GetInteger(thread->GetTlsAddress()));
-    cpu_core.SetTPIDR_EL0(thread->GetTpidrEl0());
-    cpu_core.LoadWatchpointArray(process ? &process->GetWatchpoints() : nullptr);
-    cpu_core.ClearExclusiveState();
+    m_kernel.PhysicalCore(m_core_id).LoadContext(thread);
 }

 void KScheduler::ClearPreviousThread(KernelCore& kernel, KThread* thread) {
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@ -41,24 +41,25 @@ namespace {

 constexpr inline s32 TerminatingThreadPriority = Kernel::Svc::SystemThreadPriorityHighest - 1;

-static void ResetThreadContext32(Kernel::KThread::ThreadContext32& context, u32 stack_top,
-                                 u32 entry_point, u32 arg) {
-    context = {};
-    context.cpu_registers[0] = arg;
-    context.cpu_registers[15] = entry_point;
-    context.cpu_registers[13] = stack_top;
-    context.fpscr = 0;
+static void ResetThreadContext32(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point,
+                                 u64 arg) {
+    ctx = {};
+    ctx.r[0] = arg;
+    ctx.r[15] = entry_point;
+    ctx.r[13] = stack_top;
+    ctx.fpcr = 0;
+    ctx.fpsr = 0;
 }

-static void ResetThreadContext64(Kernel::KThread::ThreadContext64& context, u64 stack_top,
-                                 u64 entry_point, u64 arg) {
-    context = {};
-    context.cpu_registers[0] = arg;
-    context.cpu_registers[18] = Kernel::KSystemControl::GenerateRandomU64() | 1;
-    context.pc = entry_point;
-    context.sp = stack_top;
-    context.fpcr = 0;
-    context.fpsr = 0;
+static void ResetThreadContext64(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point,
+                                 u64 arg) {
+    ctx = {};
+    ctx.r[0] = arg;
+    ctx.r[18] = Kernel::KSystemControl::GenerateRandomU64() | 1;
+    ctx.pc = entry_point;
+    ctx.sp = stack_top;
+    ctx.fpcr = 0;
+    ctx.fpsr = 0;
 }
 } // namespace

@ -223,9 +224,11 @@ Result KThread::Initialize(KThreadFunction func, uintptr_t arg, KProcessAddress
    }

    // Initialize thread context.
-    ResetThreadContext64(m_thread_context_64, GetInteger(user_stack_top), GetInteger(func), arg);
-    ResetThreadContext32(m_thread_context_32, static_cast<u32>(GetInteger(user_stack_top)),
-                         static_cast<u32>(GetInteger(func)), static_cast<u32>(arg));
+    if (m_parent != nullptr && !m_parent->Is64Bit()) {
+        ResetThreadContext32(m_thread_context, GetInteger(user_stack_top), GetInteger(func), arg);
+    } else {
+        ResetThreadContext64(m_thread_context, GetInteger(user_stack_top), GetInteger(func), arg);
+    }

    // Setup the stack parameters.
    StackParameters& sp = this->GetStackParameters();
@ -823,20 +826,7 @@ void KThread::CloneFpuStatus() {
    ASSERT(this->GetOwnerProcess() != nullptr);
    ASSERT(this->GetOwnerProcess() == GetCurrentProcessPointer(m_kernel));

-    if (this->GetOwnerProcess()->Is64Bit()) {
-        // Clone FPSR and FPCR.
-        ThreadContext64 cur_ctx{};
-        m_kernel.System().CurrentArmInterface().SaveContext(cur_ctx);
-
-        this->GetContext64().fpcr = cur_ctx.fpcr;
-        this->GetContext64().fpsr = cur_ctx.fpsr;
-    } else {
-        // Clone FPSCR.
-        ThreadContext32 cur_ctx{};
-        m_kernel.System().CurrentArmInterface().SaveContext(cur_ctx);
-
-        this->GetContext32().fpscr = cur_ctx.fpscr;
-    }
+    m_kernel.CurrentPhysicalCore().CloneFpuStatus(this);
 }

 Result KThread::SetActivity(Svc::ThreadActivity activity) {
@ -912,7 +902,7 @@ Result KThread::SetActivity(Svc::ThreadActivity activity) {
    R_SUCCEED();
 }

-Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) {
+Result KThread::GetThreadContext3(Svc::ThreadContext* out) {
    // Lock ourselves.
    KScopedLightLock lk{m_activity_pause_lock};

@ -926,18 +916,16 @@ Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) {

        // If we're not terminating, get the thread's user context.
        if (!this->IsTerminationRequested()) {
+            *out = m_thread_context;
+
+            // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
+            constexpr u32 El0Aarch64PsrMask = 0xF0000000;
+            constexpr u32 El0Aarch32PsrMask = 0xFE0FFE20;
+
            if (m_parent->Is64Bit()) {
-                // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
-                auto context = GetContext64();
-                context.pstate &= 0xFF0FFE20;
-                out.resize_destructive(sizeof(context));
-                std::memcpy(out.data(), std::addressof(context), sizeof(context));
+                out->pstate &= El0Aarch64PsrMask;
            } else {
-                // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
-                auto context = GetContext32();
-                context.cpsr &= 0xFF0FFE20;
-                out.resize_destructive(sizeof(context));
-                std::memcpy(out.data(), std::addressof(context), sizeof(context));
+                out->pstate &= El0Aarch32PsrMask;
            }
        }
    }
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@ -38,7 +38,6 @@ namespace Core {
 namespace Memory {
 class Memory;
 }
-class ARM_Interface;
 class System;
 } // namespace Core

@ -137,8 +136,6 @@ public:
    ~KThread() override;

 public:
-    using ThreadContext32 = Core::ARM_Interface::ThreadContext32;
-    using ThreadContext64 = Core::ARM_Interface::ThreadContext64;
    using WaiterList = Common::IntrusiveListBaseTraits<KThread>::ListType;

    /**
@ -246,31 +243,22 @@ public:
     * @returns The value of the TPIDR_EL0 register.
     */
    u64 GetTpidrEl0() const {
-        return m_thread_context_64.tpidr;
+        return m_thread_context.tpidr;
    }

    /// Sets the value of the TPIDR_EL0 Read/Write system register for this thread.
    void SetTpidrEl0(u64 value) {
-        m_thread_context_64.tpidr = value;
-        m_thread_context_32.tpidr = static_cast<u32>(value);
+        m_thread_context.tpidr = value;
    }

    void CloneFpuStatus();

-    ThreadContext32& GetContext32() {
-        return m_thread_context_32;
+    Svc::ThreadContext& GetContext() {
+        return m_thread_context;
    }

-    const ThreadContext32& GetContext32() const {
-        return m_thread_context_32;
-    }
-
-    ThreadContext64& GetContext64() {
-        return m_thread_context_64;
-    }
-
-    const ThreadContext64& GetContext64() const {
-        return m_thread_context_64;
+    const Svc::ThreadContext& GetContext() const {
+        return m_thread_context;
    }

    std::shared_ptr<Common::Fiber>& GetHostContext();
@ -577,7 +565,7 @@ public:

    void RemoveWaiter(KThread* thread);

-    Result GetThreadContext3(Common::ScratchBuffer<u8>& out);
+    Result GetThreadContext3(Svc::ThreadContext* out);

    KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) {
        return this->RemoveWaiterByKey(out_has_waiters, key, false);
@ -734,8 +722,7 @@ private:
                                   std::function<void()>&& init_func);

    // For core KThread implementation
-    ThreadContext32 m_thread_context_32{};
-    ThreadContext64 m_thread_context_64{};
+    Svc::ThreadContext m_thread_context{};
    Common::IntrusiveListNode m_process_list_node;
    Common::IntrusiveRedBlackTreeNode m_condvar_arbiter_tree_node{};
    s32 m_priority{};
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@ -99,13 +99,6 @@ struct KernelCore::Impl {
        RegisterHostThread(nullptr);
    }

-    void InitializeCores() {
-        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-            cores[core_id]->Initialize((*application_process).Is64Bit());
-            system.ApplicationMemory().SetCurrentPageTable(*application_process, core_id);
-        }
-    }
-
    void TerminateApplicationProcess() {
        application_process.load()->Terminate();
    }
@ -205,7 +198,7 @@ struct KernelCore::Impl {
            const s32 core{static_cast<s32>(i)};

            schedulers[i] = std::make_unique<Kernel::KScheduler>(system.Kernel());
-            cores[i] = std::make_unique<Kernel::PhysicalCore>(i, system, *schedulers[i]);
+            cores[i] = std::make_unique<Kernel::PhysicalCore>(system.Kernel(), i);

            auto* main_thread{Kernel::KThread::Create(system.Kernel())};
            main_thread->SetCurrentCore(core);
@ -880,10 +873,6 @@ void KernelCore::Initialize() {
    impl->Initialize(*this);
 }

-void KernelCore::InitializeCores() {
-    impl->InitializeCores();
-}
-
 void KernelCore::Shutdown() {
    impl->Shutdown();
 }
@ -993,21 +982,6 @@ const KAutoObjectWithListContainer& KernelCore::ObjectListContainer() const {
    return *impl->global_object_list_container;
 }

-void KernelCore::InvalidateAllInstructionCaches() {
-    for (auto& physical_core : impl->cores) {
-        physical_core->ArmInterface().ClearInstructionCache();
-    }
-}
-
-void KernelCore::InvalidateCpuInstructionCacheRange(KProcessAddress addr, std::size_t size) {
-    for (auto& physical_core : impl->cores) {
-        if (!physical_core->IsInitialized()) {
-            continue;
-        }
-        physical_core->ArmInterface().InvalidateCacheRange(GetInteger(addr), size);
-    }
-}
-
 void KernelCore::PrepareReschedule(std::size_t id) {
    // TODO: Reimplement, this
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@ -104,9 +104,6 @@ public:
    /// Resets the kernel to a clean slate for use.
    void Initialize();

-    /// Initializes the CPU cores.
-    void InitializeCores();
-
    /// Clears all resources in use by the kernel instance.
    void Shutdown();

@ -181,10 +178,6 @@ public:

    const KAutoObjectWithListContainer& ObjectListContainer() const;

-    void InvalidateAllInstructionCaches();
-
-    void InvalidateCpuInstructionCacheRange(KProcessAddress addr, std::size_t size);
-
    /// Registers all kernel objects with the global emulation state, this is purely for tracking
    /// leaks after emulation has been shutdown.
    void RegisterKernelObject(KAutoObject* object);
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@ -1,62 +1,206 @@
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

+#include "common/scope_exit.h"
 #include "common/settings.h"
-#include "core/arm/dynarmic/arm_dynarmic_32.h"
-#include "core/arm/dynarmic/arm_dynarmic_64.h"
-#ifdef HAS_NCE
-#include "core/arm/nce/arm_nce.h"
-#endif
 #include "core/core.h"
-#include "core/hle/kernel/k_scheduler.h"
+#include "core/debugger/debugger.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/physical_core.h"
+#include "core/hle/kernel/svc.h"

 namespace Kernel {

-PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KScheduler& scheduler)
-    : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} {
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
-    // TODO(bunnei): Initialization relies on a core being available. We may later replace this with
-    // an NCE interface or a 32-bit instance of Dynarmic. This should be abstracted out to a CPU
-    // manager.
-    auto& kernel = system.Kernel();
-    m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
-        system, kernel.IsMulticore(),
-        reinterpret_cast<Core::DynarmicExclusiveMonitor&>(kernel.GetExclusiveMonitor()),
-        m_core_index);
-#else
-#error Platform not supported yet.
-#endif
+PhysicalCore::PhysicalCore(KernelCore& kernel, std::size_t core_index)
+    : m_kernel{kernel}, m_core_index{core_index} {
+    m_is_single_core = !kernel.IsMulticore();
 }
-
 PhysicalCore::~PhysicalCore() = default;

-void PhysicalCore::Initialize(bool is_64_bit) {
-#if defined(HAS_NCE)
-    if (Settings::IsNceEnabled()) {
-        m_arm_interface = std::make_unique<Core::ARM_NCE>(m_system, m_system.Kernel().IsMulticore(),
-                                                          m_core_index);
-        return;
+void PhysicalCore::RunThread(Kernel::KThread* thread) {
+    auto* process = thread->GetOwnerProcess();
+    auto& system = m_kernel.System();
+    auto* interface = process->GetArmInterface(m_core_index);
+
+    interface->Initialize();
+
+    const auto EnterContext = [&]() {
+        system.EnterCPUProfile();
+
+        // Lock the core context.
+        std::scoped_lock lk{m_guard};
+
+        // Check if we are already interrupted. If we are, we can just stop immediately.
+        if (m_is_interrupted) {
+            return false;
+        }
+
+        // Mark that we are running.
+        m_arm_interface = interface;
+        m_current_thread = thread;
+
+        // Acquire the lock on the thread parameters.
+        // This allows us to force synchronization with Interrupt.
+        interface->LockThread(thread);
+
+        return true;
+    };
+
+    const auto ExitContext = [&]() {
+        // Unlock the thread.
+        interface->UnlockThread(thread);
+
+        // Lock the core context.
+        std::scoped_lock lk{m_guard};
+
+        // On exit, we no longer are running.
+        m_arm_interface = nullptr;
+        m_current_thread = nullptr;
+
+        system.ExitCPUProfile();
+    };
+
+    while (true) {
+        // If the thread is scheduled for termination, exit.
+        if (thread->HasDpc() && thread->IsTerminationRequested()) {
+            thread->Exit();
+        }
+
+        // Notify the debugger and go to sleep if a step was performed
+        // and this thread has been scheduled again.
+        if (thread->GetStepState() == StepState::StepPerformed) {
+            system.GetDebugger().NotifyThreadStopped(thread);
+            thread->RequestSuspend(SuspendType::Debug);
+            return;
+        }
+
+        // Otherwise, run the thread.
+        Core::HaltReason hr{};
+        {
+            // If we were interrupted, exit immediately.
+            if (!EnterContext()) {
+                return;
+            }
+
+            if (thread->GetStepState() == StepState::StepPending) {
+                hr = interface->StepThread(thread);
+
+                if (True(hr & Core::HaltReason::StepThread)) {
+                    thread->SetStepState(StepState::StepPerformed);
+                }
+            } else {
+                hr = interface->RunThread(thread);
+            }
+
+            ExitContext();
+        }
+
+        // Determine why we stopped.
+        const bool supervisor_call = True(hr & Core::HaltReason::SupervisorCall);
+        const bool prefetch_abort = True(hr & Core::HaltReason::PrefetchAbort);
+        const bool breakpoint = True(hr & Core::HaltReason::InstructionBreakpoint);
+        const bool data_abort = True(hr & Core::HaltReason::DataAbort);
+        const bool interrupt = True(hr & Core::HaltReason::BreakLoop);
+
+        // Since scheduling may occur here, we cannot use any cached
+        // state after returning from calls we make.
+
+        // Notify the debugger and go to sleep if a breakpoint was hit,
+        // or if the thread is unable to continue for any reason.
+        if (breakpoint || prefetch_abort) {
+            if (breakpoint) {
+                interface->RewindBreakpointInstruction();
+            }
+            if (system.DebuggerEnabled()) {
+                system.GetDebugger().NotifyThreadStopped(thread);
+            } else {
+                interface->LogBacktrace(process);
+            }
+            thread->RequestSuspend(SuspendType::Debug);
+            return;
+        }
+
+        // Notify the debugger and go to sleep on data abort.
+        if (data_abort) {
+            if (system.DebuggerEnabled()) {
+                system.GetDebugger().NotifyThreadWatchpoint(thread, *interface->HaltedWatchpoint());
+            }
+            thread->RequestSuspend(SuspendType::Debug);
+            return;
+        }
+
+        // Handle system calls.
+        if (supervisor_call) {
+            // Perform call.
+            Svc::Call(system, interface->GetSvcNumber());
+            return;
+        }
+
+        // Handle external interrupt sources.
+        if (interrupt || !m_is_single_core) {
+            return;
+        }
    }
-#endif
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
-    auto& kernel = m_system.Kernel();
-    if (!is_64_bit) {
-        // We already initialized a 64-bit core, replace with a 32-bit one.
-        m_arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
-            m_system, kernel.IsMulticore(),
-            reinterpret_cast<Core::DynarmicExclusiveMonitor&>(kernel.GetExclusiveMonitor()),
-            m_core_index);
-    }
-#else
-#error Platform not supported yet.
-#endif
 }

-void PhysicalCore::Run() {
-    m_arm_interface->Run();
-    m_arm_interface->ClearExclusiveState();
+void PhysicalCore::LoadContext(const KThread* thread) {
+    auto* const process = thread->GetOwnerProcess();
+    if (!process) {
+        // Kernel threads do not run on emulated CPU cores.
+        return;
+    }
+
+    auto* interface = process->GetArmInterface(m_core_index);
+    if (interface) {
+        interface->SetContext(thread->GetContext());
+        interface->SetTpidrroEl0(GetInteger(thread->GetTlsAddress()));
+        interface->SetWatchpointArray(&process->GetWatchpoints());
+    }
+}
+
+void PhysicalCore::LoadSvcArguments(const KProcess& process, std::span<const uint64_t, 8> args) {
+    process.GetArmInterface(m_core_index)->SetSvcArguments(args);
+}
+
+void PhysicalCore::SaveContext(KThread* thread) const {
+    auto* const process = thread->GetOwnerProcess();
+    if (!process) {
+        // Kernel threads do not run on emulated CPU cores.
+        return;
+    }
+
+    auto* interface = process->GetArmInterface(m_core_index);
+    if (interface) {
+        interface->GetContext(thread->GetContext());
+    }
+}
+
+void PhysicalCore::SaveSvcArguments(KProcess& process, std::span<uint64_t, 8> args) const {
+    process.GetArmInterface(m_core_index)->GetSvcArguments(args);
+}
+
+void PhysicalCore::CloneFpuStatus(KThread* dst) const {
+    auto* process = dst->GetOwnerProcess();
+
+    Svc::ThreadContext ctx{};
+    process->GetArmInterface(m_core_index)->GetContext(ctx);
+
+    dst->GetContext().fpcr = ctx.fpcr;
+    dst->GetContext().fpsr = ctx.fpsr;
+}
+
+void PhysicalCore::LogBacktrace() {
+    auto* process = GetCurrentProcessPointer(m_kernel);
+    if (!process) {
+        return;
+    }
+
+    auto* interface = process->GetArmInterface(m_core_index);
+    if (interface) {
+        interface->LogBacktrace(process);
+    }
 }

 void PhysicalCore::Idle() {
@ -69,16 +213,31 @@ bool PhysicalCore::IsInterrupted() const {
 }

 void PhysicalCore::Interrupt() {
-    std::unique_lock lk{m_guard};
+    // Lock core context.
+    std::scoped_lock lk{m_guard};
+
+    // Load members.
+    auto* arm_interface = m_arm_interface;
+    auto* thread = m_current_thread;
+
+    // Add interrupt flag.
    m_is_interrupted = true;
-    m_arm_interface->SignalInterrupt();
-    m_on_interrupt.notify_all();
+
+    // Interrupt ourselves.
+    m_on_interrupt.notify_one();
+
+    // If there is no thread running, we are done.
+    if (arm_interface == nullptr) {
+        return;
+    }
+
+    // Interrupt the CPU.
+    arm_interface->SignalInterrupt(thread);
 }

 void PhysicalCore::ClearInterrupt() {
-    std::unique_lock lk{m_guard};
+    std::scoped_lock lk{m_guard};
    m_is_interrupted = false;
-    m_arm_interface->ClearInterrupt();
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@ -11,7 +11,7 @@
 #include "core/arm/arm_interface.h"

 namespace Kernel {
-class KScheduler;
+class KernelCore;
 } // namespace Kernel

 namespace Core {
@ -23,62 +23,55 @@ namespace Kernel {

 class PhysicalCore {
 public:
-    PhysicalCore(std::size_t core_index_, Core::System& system_, KScheduler& scheduler_);
+    PhysicalCore(KernelCore& kernel, std::size_t core_index);
    ~PhysicalCore();

    YUZU_NON_COPYABLE(PhysicalCore);
    YUZU_NON_MOVEABLE(PhysicalCore);

-    /// Initialize the core for the specified parameters.
-    void Initialize(bool is_64_bit);
+    // Execute guest code running on the given thread.
+    void RunThread(KThread* thread);

-    /// Execute current jit state
-    void Run();
+    // Copy context from thread to current core.
+    void LoadContext(const KThread* thread);
+    void LoadSvcArguments(const KProcess& process, std::span<const uint64_t, 8> args);

+    // Copy context from current core to thread.
+    void SaveContext(KThread* thread) const;
+    void SaveSvcArguments(KProcess& process, std::span<uint64_t, 8> args) const;
+
+    // Copy floating point status registers to the target thread.
+    void CloneFpuStatus(KThread* dst) const;
+
+    // Log backtrace of current processor state.
+    void LogBacktrace();
+
+    // Wait for an interrupt.
    void Idle();

-    /// Interrupt this physical core.
+    // Interrupt this core.
    void Interrupt();

-    /// Clear this core's interrupt
+    // Clear this core's interrupt.
    void ClearInterrupt();

-    /// Check if this core is interrupted
+    // Check if this core is interrupted.
    bool IsInterrupted() const;

-    bool IsInitialized() const {
-        return m_arm_interface != nullptr;
-    }
-
-    Core::ARM_Interface& ArmInterface() {
-        return *m_arm_interface;
-    }
-
-    const Core::ARM_Interface& ArmInterface() const {
-        return *m_arm_interface;
-    }
-
    std::size_t CoreIndex() const {
        return m_core_index;
    }

-    Kernel::KScheduler& Scheduler() {
-        return m_scheduler;
-    }
-
-    const Kernel::KScheduler& Scheduler() const {
-        return m_scheduler;
-    }
-
 private:
+    KernelCore& m_kernel;
    const std::size_t m_core_index;
-    Core::System& m_system;
-    Kernel::KScheduler& m_scheduler;

    std::mutex m_guard;
    std::condition_variable m_on_interrupt;
-    std::unique_ptr<Core::ARM_Interface> m_arm_interface;
+    Core::ArmInterface* m_arm_interface{};
+    KThread* m_current_thread{};
    bool m_is_interrupted{};
+    bool m_is_single_core{};
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@ -9,6 +9,8 @@ namespace Core {
 class System;
 }

+#include <span>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/svc_types.h"
 #include "core/hle/result.h"
@ -520,15 +522,15 @@ void CallSecureMonitor64From32(Core::System& system, ilp32::SecureMonitorArgumen
 void CallSecureMonitor64(Core::System& system, lp64::SecureMonitorArguments* args);

 // Defined in svc_light_ipc.cpp.
-void SvcWrap_ReplyAndReceiveLight64From32(Core::System& system);
-void SvcWrap_ReplyAndReceiveLight64(Core::System& system);
+void SvcWrap_ReplyAndReceiveLight64From32(Core::System& system, std::span<uint64_t, 8> args);
+void SvcWrap_ReplyAndReceiveLight64(Core::System& system, std::span<uint64_t, 8> args);

-void SvcWrap_SendSyncRequestLight64From32(Core::System& system);
-void SvcWrap_SendSyncRequestLight64(Core::System& system);
+void SvcWrap_SendSyncRequestLight64From32(Core::System& system, std::span<uint64_t, 8> args);
+void SvcWrap_SendSyncRequestLight64(Core::System& system, std::span<uint64_t, 8> args);

 // Defined in svc_secure_monitor_call.cpp.
-void SvcWrap_CallSecureMonitor64From32(Core::System& system);
-void SvcWrap_CallSecureMonitor64(Core::System& system);
+void SvcWrap_CallSecureMonitor64From32(Core::System& system, std::span<uint64_t, 8> args);
+void SvcWrap_CallSecureMonitor64(Core::System& system, std::span<uint64_t, 8> args);

 // Perform a supervisor call by index.
 void Call(Core::System& system, u32 imm);
--- a/src/core/hle/kernel/svc/svc_exception.cpp
+++ b/src/core/hle/kernel/svc/svc_exception.cpp
@ -103,9 +103,7 @@ void Break(Core::System& system, BreakReason reason, u64 info1, u64 info2) {

        handle_debug_buffer(info1, info2);

-        auto* const current_thread = GetCurrentThreadPointer(system.Kernel());
-        const auto thread_processor_id = current_thread->GetActiveCore();
-        system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
+        system.CurrentPhysicalCore().LogBacktrace();
    }

    const bool is_hbl = GetCurrentProcess(system.Kernel()).IsHbl();
--- a/src/core/hle/kernel/svc/svc_light_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_light_ipc.cpp
@ -37,37 +37,36 @@ Result ReplyAndReceiveLight64From32(Core::System& system, Handle session_handle,
 // Custom ABI implementation for light IPC.

 template <typename F>
-static void SvcWrap_LightIpc(Core::System& system, F&& cb) {
-    auto& core = system.CurrentArmInterface();
-    std::array<u32, 7> arguments{};
+static void SvcWrap_LightIpc(Core::System& system, std::span<uint64_t, 8> args, F&& cb) {
+    std::array<u32, 7> ipc_args{};

-    Handle session_handle = static_cast<Handle>(core.GetReg(0));
+    Handle session_handle = static_cast<Handle>(args[0]);
    for (int i = 0; i < 7; i++) {
-        arguments[i] = static_cast<u32>(core.GetReg(i + 1));
+        ipc_args[i] = static_cast<u32>(args[i + 1]);
    }

-    Result ret = cb(system, session_handle, arguments.data());
+    Result ret = cb(system, session_handle, ipc_args.data());

-    core.SetReg(0, ret.raw);
+    args[0] = ret.raw;
    for (int i = 0; i < 7; i++) {
-        core.SetReg(i + 1, arguments[i]);
+        args[i + 1] = ipc_args[i];
    }
 }

-void SvcWrap_SendSyncRequestLight64(Core::System& system) {
-    SvcWrap_LightIpc(system, SendSyncRequestLight64);
+void SvcWrap_SendSyncRequestLight64(Core::System& system, std::span<uint64_t, 8> args) {
+    SvcWrap_LightIpc(system, args, SendSyncRequestLight64);
 }

-void SvcWrap_ReplyAndReceiveLight64(Core::System& system) {
-    SvcWrap_LightIpc(system, ReplyAndReceiveLight64);
+void SvcWrap_ReplyAndReceiveLight64(Core::System& system, std::span<uint64_t, 8> args) {
+    SvcWrap_LightIpc(system, args, ReplyAndReceiveLight64);
 }

-void SvcWrap_SendSyncRequestLight64From32(Core::System& system) {
-    SvcWrap_LightIpc(system, SendSyncRequestLight64From32);
+void SvcWrap_SendSyncRequestLight64From32(Core::System& system, std::span<uint64_t, 8> args) {
+    SvcWrap_LightIpc(system, args, SendSyncRequestLight64From32);
 }

-void SvcWrap_ReplyAndReceiveLight64From32(Core::System& system) {
-    SvcWrap_LightIpc(system, ReplyAndReceiveLight64From32);
+void SvcWrap_ReplyAndReceiveLight64From32(Core::System& system, std::span<uint64_t, 8> args) {
+    SvcWrap_LightIpc(system, args, ReplyAndReceiveLight64From32);
 }

 } // namespace Kernel::Svc
--- a/src/core/hle/kernel/svc/svc_secure_monitor_call.cpp
+++ b/src/core/hle/kernel/svc/svc_secure_monitor_call.cpp
@ -22,31 +22,29 @@ void CallSecureMonitor64From32(Core::System& system, ilp32::SecureMonitorArgumen

 // Custom ABI for CallSecureMonitor.

-void SvcWrap_CallSecureMonitor64(Core::System& system) {
-    auto& core = system.CurrentPhysicalCore().ArmInterface();
-    lp64::SecureMonitorArguments args{};
+void SvcWrap_CallSecureMonitor64(Core::System& system, std::span<uint64_t, 8> args) {
+    lp64::SecureMonitorArguments smc_args{};
    for (int i = 0; i < 8; i++) {
-        args.r[i] = core.GetReg(i);
+        smc_args.r[i] = args[i];
    }

-    CallSecureMonitor64(system, std::addressof(args));
+    CallSecureMonitor64(system, std::addressof(smc_args));

    for (int i = 0; i < 8; i++) {
-        core.SetReg(i, args.r[i]);
+        args[i] = smc_args.r[i];
    }
 }

-void SvcWrap_CallSecureMonitor64From32(Core::System& system) {
-    auto& core = system.CurrentPhysicalCore().ArmInterface();
-    ilp32::SecureMonitorArguments args{};
+void SvcWrap_CallSecureMonitor64From32(Core::System& system, std::span<uint64_t, 8> args) {
+    ilp32::SecureMonitorArguments smc_args{};
    for (int i = 0; i < 8; i++) {
-        args.r[i] = static_cast<u32>(core.GetReg(i));
+        smc_args.r[i] = static_cast<u32>(args[i]);
    }

-    CallSecureMonitor64From32(system, std::addressof(args));
+    CallSecureMonitor64From32(system, std::addressof(smc_args));

    for (int i = 0; i < 8; i++) {
-        core.SetReg(i, args.r[i]);
+        args[i] = smc_args.r[i];
    }
 }

--- a/src/core/hle/kernel/svc/svc_thread.cpp
+++ b/src/core/hle/kernel/svc/svc_thread.cpp
@ -90,8 +90,6 @@ Result StartThread(Core::System& system, Handle thread_handle) {

 /// Called when a thread exits
 void ExitThread(Core::System& system) {
-    LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
-
    auto* const current_thread = GetCurrentThreadPointer(system.Kernel());
    system.GlobalSchedulerContext().RemoveThread(current_thread);
    current_thread->Exit();
@ -147,47 +145,19 @@ Result GetThreadContext3(Core::System& system, u64 out_context, Handle thread_ha
    R_UNLESS(thread.IsNotNull(), ResultInvalidHandle);

    // Require the handle be to a non-current thread in the current process.
-    const auto* current_process = GetCurrentProcessPointer(kernel);
-    R_UNLESS(current_process == thread->GetOwnerProcess(), ResultInvalidId);
+    R_UNLESS(thread->GetOwnerProcess() == GetCurrentProcessPointer(kernel), ResultInvalidHandle);
+    R_UNLESS(thread.GetPointerUnsafe() != GetCurrentThreadPointer(kernel), ResultBusy);

-    // Verify that the thread isn't terminated.
-    R_UNLESS(thread->GetState() != ThreadState::Terminated, ResultTerminationRequested);
+    // Get the thread context.
+    Svc::ThreadContext context{};
+    R_TRY(thread->GetThreadContext3(std::addressof(context)));

-    /// Check that the thread is not the current one.
-    /// NOTE: Nintendo does not check this, and thus the following loop will deadlock.
-    R_UNLESS(thread.GetPointerUnsafe() != GetCurrentThreadPointer(kernel), ResultInvalidId);
+    // Copy the thread context to user space.
+    R_UNLESS(
+        GetCurrentMemory(kernel).WriteBlock(out_context, std::addressof(context), sizeof(context)),
+        ResultInvalidPointer);

-    // Try to get the thread context until the thread isn't current on any core.
-    while (true) {
-        KScopedSchedulerLock sl{kernel};
-
-        // TODO(bunnei): Enforce that thread is suspended for debug here.
-
-        // If the thread's raw state isn't runnable, check if it's current on some core.
-        if (thread->GetRawState() != ThreadState::Runnable) {
-            bool current = false;
-            for (auto i = 0; i < static_cast<s32>(Core::Hardware::NUM_CPU_CORES); ++i) {
-                if (thread.GetPointerUnsafe() == kernel.Scheduler(i).GetSchedulerCurrentThread()) {
-                    current = true;
-                    break;
-                }
-            }
-
-            // If the thread is current, retry until it isn't.
-            if (current) {
-                continue;
-            }
-        }
-
-        // Get the thread context.
-        static thread_local Common::ScratchBuffer<u8> context;
-        R_TRY(thread->GetThreadContext3(context));
-
-        // Copy the thread context to user space.
-        GetCurrentMemory(kernel).WriteBlock(out_context, context.data(), context.size());
-
-        R_SUCCEED();
-    }
+    R_SUCCEED();
 }

 /// Gets the priority for the specified thread
--- a/src/core/hle/kernel/svc_generator.py
+++ b/src/core/hle/kernel/svc_generator.py
@ -374,11 +374,11 @@ def get_registers(parse_result, bitness):

 # Collects possibly multiple source registers into the named C++ value.
 def emit_gather(sources, name, type_name, reg_size):
-    get_fn = f"GetReg{reg_size*8}"
+    get_fn = f"GetArg{reg_size*8}"

    if len(sources) == 1:
        s, = sources
-        line = f"{name} = Convert<{type_name}>({get_fn}(system, {s}));"
+        line = f"{name} = Convert<{type_name}>({get_fn}(args, {s}));"
        return [line]

    var_type = f"std::array<uint{reg_size*8}_t, {len(sources)}>"
@ -387,7 +387,7 @@ def emit_gather(sources, name, type_name, reg_size):
    ]
    for i in range(0, len(sources)):
        lines.append(
-            f"{name}_gather[{i}] = {get_fn}(system, {sources[i]});")
+            f"{name}_gather[{i}] = {get_fn}(args, {sources[i]});")

    lines.append(f"{name} = Convert<{type_name}>({name}_gather);")
    return lines
@ -396,12 +396,12 @@ def emit_gather(sources, name, type_name, reg_size):
 # Produces one or more statements which assign the named C++ value
 # into possibly multiple registers.
 def emit_scatter(destinations, name, reg_size):
-    set_fn = f"SetReg{reg_size*8}"
+    set_fn = f"SetArg{reg_size*8}"
    reg_type = f"uint{reg_size*8}_t"

    if len(destinations) == 1:
        d, = destinations
-        line = f"{set_fn}(system, {d}, Convert<{reg_type}>({name}));"
+        line = f"{set_fn}(args, {d}, Convert<{reg_type}>({name}));"
        return [line]

    var_type = f"std::array<{reg_type}, {len(destinations)}>"
@ -411,7 +411,7 @@ def emit_scatter(destinations, name, reg_size):

    for i in range(0, len(destinations)):
        lines.append(
-            f"{set_fn}(system, {destinations[i]}, {name}_scatter[{i}]);")
+            f"{set_fn}(args, {destinations[i]}, {name}_scatter[{i}]);")

    return lines

@ -433,7 +433,7 @@ def emit_lines(lines, indent='    '):
 def emit_wrapper(wrapped_fn, suffix, register_info, arguments, byte_size):
    return_write, output_writes, input_reads = register_info
    lines = [
-        f"static void SvcWrap_{wrapped_fn}{suffix}(Core::System& system) {{"
+        f"static void SvcWrap_{wrapped_fn}{suffix}(Core::System& system, std::span<uint64_t, 8> args) {{"
    ]

    # Get everything ready.
@ -498,6 +498,8 @@ namespace Core {
 class System;
 }

+#include <span>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/svc_types.h"
 #include "core/hle/result.h"
@ -524,15 +526,15 @@ void CallSecureMonitor64From32(Core::System& system, ilp32::SecureMonitorArgumen
 void CallSecureMonitor64(Core::System& system, lp64::SecureMonitorArguments* args);

 // Defined in svc_light_ipc.cpp.
-void SvcWrap_ReplyAndReceiveLight64From32(Core::System& system);
-void SvcWrap_ReplyAndReceiveLight64(Core::System& system);
+void SvcWrap_ReplyAndReceiveLight64From32(Core::System& system, std::span<uint64_t, 8> args);
+void SvcWrap_ReplyAndReceiveLight64(Core::System& system, std::span<uint64_t, 8> args);

-void SvcWrap_SendSyncRequestLight64From32(Core::System& system);
-void SvcWrap_SendSyncRequestLight64(Core::System& system);
+void SvcWrap_SendSyncRequestLight64From32(Core::System& system, std::span<uint64_t, 8> args);
+void SvcWrap_SendSyncRequestLight64(Core::System& system, std::span<uint64_t, 8> args);

 // Defined in svc_secure_monitor_call.cpp.
-void SvcWrap_CallSecureMonitor64From32(Core::System& system);
-void SvcWrap_CallSecureMonitor64(Core::System& system);
+void SvcWrap_CallSecureMonitor64From32(Core::System& system, std::span<uint64_t, 8> args);
+void SvcWrap_CallSecureMonitor64(Core::System& system, std::span<uint64_t, 8> args);

 // Perform a supervisor call by index.
 void Call(Core::System& system, u32 imm);
@ -550,20 +552,20 @@ PROLOGUE_CPP = """

 namespace Kernel::Svc {

-static uint32_t GetReg32(Core::System& system, int n) {
-    return static_cast<uint32_t>(system.CurrentArmInterface().GetReg(n));
+static uint32_t GetArg32(std::span<uint64_t, 8> args, int n) {
+    return static_cast<uint32_t>(args[n]);
 }

-static void SetReg32(Core::System& system, int n, uint32_t result) {
-    system.CurrentArmInterface().SetReg(n, static_cast<uint64_t>(result));
+static void SetArg32(std::span<uint64_t, 8> args, int n, uint32_t result) {
+    args[n] = result;
 }

-static uint64_t GetReg64(Core::System& system, int n) {
-    return system.CurrentArmInterface().GetReg(n);
+static uint64_t GetArg64(std::span<uint64_t, 8> args, int n) {
+    return args[n];
 }

-static void SetReg64(Core::System& system, int n, uint64_t result) {
-    system.CurrentArmInterface().SetReg(n, result);
+static void SetArg64(std::span<uint64_t, 8> args, int n, uint64_t result) {
+    args[n] = result;
 }

 // Like bit_cast, but handles the case when the source and dest
@ -590,15 +592,20 @@ EPILOGUE_CPP = """

 void Call(Core::System& system, u32 imm) {
    auto& kernel = system.Kernel();
+    auto& process = GetCurrentProcess(kernel);
+
+    std::array<uint64_t, 8> args;
+    kernel.CurrentPhysicalCore().SaveSvcArguments(process, args);
    kernel.EnterSVCProfile();

-    if (GetCurrentProcess(system.Kernel()).Is64Bit()) {
-        Call64(system, imm);
+    if (process.Is64Bit()) {
+        Call64(system, imm, args);
    } else {
-        Call32(system, imm);
+        Call32(system, imm, args);
    }

    kernel.ExitSVCProfile();
+    kernel.CurrentPhysicalCore().LoadSvcArguments(process, args);
 }

 } // namespace Kernel::Svc
@ -609,13 +616,13 @@ def emit_call(bitness, names, suffix):
    bit_size = REG_SIZES[bitness]*8
    indent = "    "
    lines = [
-        f"static void Call{bit_size}(Core::System& system, u32 imm) {{",
+        f"static void Call{bit_size}(Core::System& system, u32 imm, std::span<uint64_t, 8> args) {{",
        f"{indent}switch (static_cast<SvcId>(imm)) {{"
    ]

    for _, name in names:
        lines.append(f"{indent}case SvcId::{name}:")
-        lines.append(f"{indent*2}return SvcWrap_{name}{suffix}(system);")
+        lines.append(f"{indent*2}return SvcWrap_{name}{suffix}(system, args);")

    lines.append(f"{indent}default:")
    lines.append(
--- a/src/core/hle/service/jit/jit.cpp
+++ b/src/core/hle/service/jit/jit.cpp
@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

+#include "core/arm/debug.h"
 #include "core/arm/symbols.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_code_memory.h"
@ -98,8 +99,9 @@ public:
        if (return_value == 0) {
            // The callback has written to the output executable code range,
            // requiring an instruction cache invalidation
-            system.InvalidateCpuInstructionCacheRange(configuration.user_rx_memory.offset,
-                                                      configuration.user_rx_memory.size);
+            Core::InvalidateInstructionCacheRange(process.GetPointerUnsafe(),
+                                                  configuration.user_rx_memory.offset,
+                                                  configuration.user_rx_memory.size);

            // Write back to the IPC output buffer, if provided
            if (ctx.CanWriteBuffer()) {
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -43,13 +43,9 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
 struct Memory::Impl {
    explicit Impl(Core::System& system_) : system{system_} {}

-    void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) {
+    void SetCurrentPageTable(Kernel::KProcess& process) {
        current_page_table = &process.GetPageTable().GetImpl();
        current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
-
-        const std::size_t address_space_width = process.GetPageTable().GetAddressSpaceWidth();
-
-        system.ArmInterface(core_id).PageTableChanged(*current_page_table, address_space_width);
    }

    void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
@ -871,8 +867,8 @@ void Memory::Reset() {
    impl = std::make_unique<Impl>(system);
 }

-void Memory::SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) {
-    impl->SetCurrentPageTable(process, core_id);
+void Memory::SetCurrentPageTable(Kernel::KProcess& process) {
+    impl->SetCurrentPageTable(process);
 }

 void Memory::MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
--- a/src/core/memory.h
+++ b/src/core/memory.h
@ -73,7 +73,7 @@ public:
     *
     * @param process The process to use the page table of.
     */
-    void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id);
+    void SetCurrentPageTable(Kernel::KProcess& process);

    /**
     * Maps an allocated buffer onto a region of the emulated process address space.
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@ -109,41 +109,11 @@ json GetProcessorStateData(const std::string& architecture, u64 entry_point, u64
    return out;
 }

-json GetProcessorStateDataAuto(Core::System& system) {
-    const auto* process{system.ApplicationProcess()};
-    auto& arm{system.CurrentArmInterface()};
-
-    Core::ARM_Interface::ThreadContext64 context{};
-    arm.SaveContext(context);
-
-    return GetProcessorStateData(process->Is64Bit() ? "AArch64" : "AArch32",
-                                 GetInteger(process->GetEntryPoint()), context.sp, context.pc,
-                                 context.pstate, context.cpu_registers);
-}
-
-json GetBacktraceData(Core::System& system) {
-    auto out = json::array();
-    const auto& backtrace{system.CurrentArmInterface().GetBacktrace()};
-    for (const auto& entry : backtrace) {
-        out.push_back({
-            {"module", entry.module},
-            {"address", fmt::format("{:016X}", entry.address)},
-            {"original_address", fmt::format("{:016X}", entry.original_address)},
-            {"offset", fmt::format("{:016X}", entry.offset)},
-            {"symbol_name", entry.name},
-        });
-    }
-
-    return out;
-}
-
 json GetFullDataAuto(const std::string& timestamp, u64 title_id, Core::System& system) {
    json out;

    out["yuzu_version"] = GetYuzuVersionData();
    out["report_common"] = GetReportCommonData(title_id, ResultSuccess, timestamp);
-    out["processor_state"] = GetProcessorStateDataAuto(system);
-    out["backtrace"] = GetBacktraceData(system);

    return out;
 }
@ -351,8 +321,6 @@ void Reporter::SaveErrorReport(u64 title_id, Result result,

    out["yuzu_version"] = GetYuzuVersionData();
    out["report_common"] = GetReportCommonData(title_id, result, timestamp);
-    out["processor_state"] = GetProcessorStateDataAuto(system);
-    out["backtrace"] = GetBacktraceData(system);

    out["error_custom_text"] = {
        {"main", custom_text_main.value_or("")},
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@ -7,7 +7,7 @@
 #include "yuzu/debugger/wait_tree.h"
 #include "yuzu/uisettings.h"

-#include "core/arm/arm_interface.h"
+#include "core/arm/debug.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_class_token.h"
 #include "core/hle/kernel/k_handle_table.h"
@ -129,7 +129,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
        return list;
    }

-    auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(system, thread.GetContext64());
+    auto backtrace = Core::GetBacktraceFromContext(thread.GetOwnerProcess(), thread.GetContext());

    for (auto& entry : backtrace) {
        std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address,
@ -238,10 +238,10 @@ QString WaitTreeThread::GetText() const {
        break;
    }

-    const auto& context = thread.GetContext64();
+    const auto& context = thread.GetContext();
    const QString pc_info = tr(" PC = 0x%1 LR = 0x%2")
                                .arg(context.pc, 8, 16, QLatin1Char{'0'})
-                                .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'});
+                                .arg(context.lr, 8, 16, QLatin1Char{'0'});
    return QStringLiteral("%1%2 (%3) ")
        .arg(WaitTreeSynchronizationObject::GetText(), pc_info, status);
 }