WIP: ASM dispatcher for recompiler

This commit is contained in:
Connor McLaughlin 2020-09-06 00:17:21 +10:00
parent 5099d0e62f
commit b90dbf34c4
12 changed files with 354 additions and 38 deletions

View File

@ -1,6 +1,5 @@
#pragma once
#include "common/bitfield.h"
#include "cpu_code_cache.h"
#include "types.h"
#include <array>
#include <bitset>
@ -97,16 +96,4 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
}
/// Invalidates any code pages which overlap the specified range.
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
{
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
for (u32 page = start_page; page <= end_page; page++)
{
if (m_ram_code_bits[page])
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
}
}
} // namespace Bus

View File

@ -35,14 +35,8 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
static JitCodeBuffer s_code_buffer;
enum : u32
{
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
};
std::array<CodeBlock::HostCodePointer, FAST_MAP_TOTAL_SLOT_COUNT> s_fast_map;
CodeBlock::HostCodePointer s_asm_dispatcher;
ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
{
@ -51,6 +45,7 @@ ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
((pc & Bus::RAM_MASK) >> 2);
}
static void CompileDispatcher();
static void FastCompileBlockFunction();
static void ResetFastMap()
@ -111,6 +106,7 @@ void Initialize(bool use_recompiler)
}
ResetFastMap();
CompileDispatcher();
#else
s_use_recompiler = false;
#endif
@ -238,9 +234,21 @@ void Execute()
#ifdef WITH_RECOMPILER
void CompileDispatcher()
{
Recompiler::CodeGenerator cg(&s_code_buffer);
s_asm_dispatcher = cg.CompileDispatcher();
}
CodeBlock::HostCodePointer* GetFastMapPointer()
{
return s_fast_map.data();
}
void ExecuteRecompiler()
{
g_state.frame_done = false;
#if 0
while (!g_state.frame_done)
{
if (HasPendingInterrupt())
@ -261,6 +269,9 @@ void ExecuteRecompiler()
TimingEvents::RunEvents();
}
#else
s_asm_dispatcher();
#endif
// in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc;
@ -291,6 +302,7 @@ void Flush()
#ifdef WITH_RECOMPILER
s_code_buffer.Reset();
ResetFastMap();
CompileDispatcher();
#endif
}

View File

@ -1,4 +1,5 @@
#pragma once
#include "bus.h"
#include "common/bitfield.h"
#include "common/jit_code_buffer.h"
#include "cpu_types.h"
@ -9,6 +10,13 @@
namespace CPU {
enum : u32
{
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
};
union CodeBlockKey
{
u32 bits;
@ -86,6 +94,7 @@ void Shutdown();
void Execute();
#ifdef WITH_RECOMPILER
CodeBlock::HostCodePointer* GetFastMapPointer();
void ExecuteRecompiler();
#endif
@ -102,6 +111,18 @@ template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block);
void InterpretUncachedBlock();
/// Invalidates any code pages which overlap the specified range.
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
{
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
for (u32 page = start_page; page <= end_page; page++)
{
if (Bus::m_ram_code_bits[page])
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
}
}
}; // namespace CodeCache
} // namespace CPU

View File

@ -1381,6 +1381,7 @@ void DispatchInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
GTE::ExecuteInstruction(g_state.next_instruction.bits);

View File

@ -25,6 +25,8 @@ public:
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
CodeBlock::HostCodePointer CompileDispatcher();
//////////////////////////////////////////////////////////////////////////
// Code Generation
//////////////////////////////////////////////////////////////////////////
@ -67,6 +69,7 @@ public:
void EmitAddCPUStructField(u32 offset, const Value& value);
void EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr);
void EmitStoreGlobal(void* ptr, const Value& value);
void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr);
// Automatically generates an exception handler.
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size);

View File

@ -6,6 +6,7 @@
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "settings.h"
#include "timing_event.h"
Log_SetChannel(CPU::Recompiler);
namespace a64 = vixl::aarch64;
@ -26,6 +27,16 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u64 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s64 GetPCDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
}
static const a64::WRegister GetHostReg8(HostReg reg)
{
return a64::WRegister(reg);
@ -172,11 +183,11 @@ void CodeGenerator::EmitBeginBlock()
// Save the link register, since we'll be calling functions.
const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
DebugAssert(link_reg_allocated);
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer. TODO: make this better.
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
}
void CodeGenerator::EmitEndBlock()
@ -185,6 +196,7 @@ void CodeGenerator::EmitEndBlock()
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -200,6 +212,7 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -958,13 +971,6 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
m_register_cache.PopCallerSavedRegisters();
}
static s64 GetBranchDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
}
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
{
if (return_value)
@ -974,7 +980,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
const u32 adjust_size = PrepareStackForCall();
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1009,7 +1015,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG1, arg1);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1045,7 +1051,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG2, arg2);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1083,7 +1089,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG3, arg3);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1122,7 +1128,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG4, arg4);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1510,7 +1516,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
EmitLoadGlobalAddress(RSCRATCH, ptr);
switch (size)
{
case RegSize_8:
@ -1535,7 +1541,7 @@ void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
{
Value value_in_hr = GetValueInHostRegister(value);
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
EmitLoadGlobalAddress(RSCRATCH, ptr);
switch (value.size)
{
case RegSize_8:
@ -1882,4 +1888,130 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
m_emit->Bind(label);
}
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
{
const void* current_code_ptr_page = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(GetCurrentCodePointer()) & ~static_cast<uintptr_t>(0xFFF));
const void* ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(ptr) & ~static_cast<uintptr_t>(0xFFF));
const s64 page_displacement = GetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFu);
if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
{
m_emit->adrp(GetHostReg64(host_reg), page_displacement);
m_emit->orr(GetHostReg64(host_reg), GetHostReg64(host_reg), page_offset);
}
else
{
m_emit->Mov(GetHostReg64(host_reg), reinterpret_cast<uintptr_t>(ptr));
}
}
CodeBlock::HostCodePointer CodeGenerator::CompileDispatcher()
{
m_emit->Sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_register_cache.ReserveCallerSavedRegisters();
EmitLoadGlobalAddress(RCPUPTR, &g_state);
a64::Label frame_done_loop;
a64::Label exit_dispatcher;
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done)));
m_emit->tbnz(a64::w8, 0, &exit_dispatcher);
// x8 <- sr
a64::Label no_interrupt;
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tbz(a64::w8, 0, &no_interrupt);
// x9 <- cause
// x8 (sr) & cause
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a64::w8, a64::w8, a64::w9);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a64::w8, 0xFF00);
m_emit->b(&no_interrupt, a64::eq);
// we have an interrupt
EmitFunctionCall(nullptr, &DispatchInterrupt);
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// x8 <- head event->downcount
// downcount <- x8
EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x8));
m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount)));
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// main dispatch loop
a64::Label main_loop;
m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a64::Label downcount_hit;
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&downcount_hit, a64::ge);
// time to lookup the block
// w8 <- pc
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc)));
// w9 <- (pc & RAM_MASK) >> 2
m_emit->and_(a64::w9, a64::w8, Bus::RAM_MASK);
m_emit->lsr(a64::w9, a64::w9, 2);
// w10 <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
m_emit->and_(a64::w10, a64::w8, Bus::BIOS_MASK);
m_emit->lsr(a64::w10, a64::w10, 2);
m_emit->add(a64::w10, a64::w10, FAST_MAP_RAM_SLOT_COUNT);
// current_instruction_pc <- pc (eax)
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, current_instruction_pc)));
// if ((w8 (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use w10 as index }
m_emit->and_(a64::w8, a64::w8, PHYSICAL_MEMORY_ADDRESS_MASK);
m_emit->Mov(a64::w11, Bus::BIOS_BASE);
m_emit->cmp(a64::w8, a64::w11);
m_emit->csel(a64::w8, a64::w9, a64::w10, a64::lt);
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
EmitLoadGlobalAddress(9, CodeCache::GetFastMapPointer());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
m_emit->blr(a64::x8);
// end while
m_emit->Bind(&downcount_hit);
// check events then for frame done
EmitFunctionCall(nullptr, &TimingEvents::RunEvents);
m_emit->b(&frame_done_loop);
// all done
m_emit->Bind(&exit_dispatcher);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_emit->Ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_InfoPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
return ptr;
}
} // namespace CPU::Recompiler

View File

@ -1,9 +1,12 @@
#include "common/align.h"
#include "common/log.h"
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "settings.h"
#include "timing_event.h"
Log_SetChannel(Recompiler::CodeGenerator);
namespace CPU::Recompiler {
@ -187,10 +190,12 @@ Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_
void CodeGenerator::EmitBeginBlock()
{
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer.
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
}
void CodeGenerator::EmitEndBlock()
@ -2516,4 +2521,118 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
m_emit->L(*label);
}
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
{
const s64 displacement =
static_cast<s64>(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())) + 2;
if (Xbyak::inner::IsInInt32(static_cast<u64>(displacement)))
m_emit->lea(GetHostReg64(host_reg), m_emit->dword[m_emit->rip + ptr]);
else
m_emit->mov(GetHostReg64(host_reg), reinterpret_cast<size_t>(ptr));
}
CodeBlock::HostCodePointer CodeGenerator::CompileDispatcher()
{
m_register_cache.ReserveCallerSavedRegisters();
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
Xbyak::Label frame_done_loop;
Xbyak::Label exit_dispatcher;
m_emit->L(frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1);
m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR);
// eax <- sr
Xbyak::Label no_interrupt;
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]);
// if Iec == 0 then goto no_interrupt
m_emit->test(m_emit->eax, 1);
m_emit->jz(no_interrupt);
// sr & cause
m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->test(m_emit->eax, 0xFF00);
m_emit->jz(no_interrupt);
// we have an interrupt
EmitFunctionCall(nullptr, &DispatchInterrupt);
// no interrupt or we just serviced it
m_emit->L(no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// eax <- head event->downcount
// downcount <- eax
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax);
// main dispatch loop
Xbyak::Label main_loop;
m_emit->align(16);
m_emit->L(main_loop);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jge(downcount_hit);
// time to lookup the block
// eax <- pc
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]);
// ebx <- (pc & RAM_MASK) >> 2
m_emit->mov(m_emit->ebx, m_emit->eax);
m_emit->and_(m_emit->ebx, Bus::RAM_MASK);
m_emit->shr(m_emit->ebx, 2);
// ecx <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
m_emit->mov(m_emit->ecx, m_emit->eax);
m_emit->and_(m_emit->ecx, Bus::BIOS_MASK);
m_emit->shr(m_emit->ecx, 2);
m_emit->add(m_emit->ecx, FAST_MAP_RAM_SLOT_COUNT);
// current_instruction_pc <- pc (eax)
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, current_instruction_pc)], m_emit->eax);
// if ((eax (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use ecx as index }
m_emit->and_(m_emit->eax, PHYSICAL_MEMORY_ADDRESS_MASK);
m_emit->cmp(m_emit->eax, Bus::BIOS_BASE);
m_emit->cmovge(m_emit->ebx, m_emit->ecx);
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
EmitLoadGlobalAddress(Xbyak::Operand::RAX, CodeCache::GetFastMapPointer());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax + m_emit->rbx * 8]);
m_emit->call(m_emit->rax);
m_emit->jmp(main_loop);
// end while
m_emit->L(downcount_hit);
// check events then for frame done
EmitFunctionCall(nullptr, &TimingEvents::RunEvents);
m_emit->jmp(frame_done_loop);
// all done
m_emit->L(exit_dispatcher);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_InfoPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
return ptr;
}
} // namespace CPU::Recompiler

View File

@ -351,6 +351,33 @@ u32 RegisterCache::PopCalleeSavedRegisters(bool commit)
return count;
}
void RegisterCache::ReserveCallerSavedRegisters()
{
for (u32 reg = 0; reg < HostReg_Count; reg++)
{
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
}
}
void RegisterCache::AssumeCalleeSavedRegistersAreSaved()
{
for (u32 i = 0; i < HostReg_Count; i++)
{
if ((m_state.host_reg_state[i] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
m_state.host_reg_state[i] &= ~HostRegState::CalleeSaved;
}
}
}
void RegisterCache::PushState()
{
// need to copy this manually because of the load delay values

View File

@ -248,6 +248,12 @@ public:
/// Restore callee-saved registers. Call at the end of the function.
u32 PopCalleeSavedRegisters(bool commit);
/// Preallocates caller saved registers, enabling later use without stack pushes.
void ReserveCallerSavedRegisters();
/// Removes the callee-saved register flag from all registers. Call when compiling code blocks.
void AssumeCalleeSavedRegistersAreSaved();
/// Pushes the register allocator state, use when entering branched code.
void PushState();

View File

@ -4,6 +4,7 @@
#include "common/log.h"
#include "common/state_wrapper.h"
#include "common/string_util.h"
#include "cpu_code_cache.h"
#include "cpu_core.h"
#include "gpu.h"
#include "interrupt_controller.h"
@ -499,7 +500,7 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
const u32 terminator = UINT32_C(0xFFFFFF);
std::memcpy(&ram_pointer[address], &terminator, sizeof(terminator));
Bus::InvalidateCodePages(address, word_count);
CPU::CodeCache::InvalidateCodePages(address, word_count);
return Bus::GetDMARAMTickCount(word_count);
}
@ -547,6 +548,6 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
}
}
Bus::InvalidateCodePages(address, word_count);
CPU::CodeCache::InvalidateCodePages(address, word_count);
return Bus::GetDMARAMTickCount(word_count);
}

View File

@ -53,6 +53,11 @@ void UpdateCPUDowncount()
CPU::g_state.downcount = s_active_events_head->GetDowncount();
}
TimingEvent** GetHeadEventPtr()
{
return &s_active_events_head;
}
static void SortEvent(TimingEvent* event)
{
const TickCount event_downcount = event->m_downcount;

View File

@ -88,6 +88,8 @@ void RunEvents();
void UpdateCPUDowncount();
TimingEvent** GetHeadEventPtr();
} // namespace TimingEventManager