CPU/CodeCache: Dynamically compute BIOS memory access timing

The timings can change if the game does so. Instead of forcing the
blocks to recompile, we can just manually multiply size * word_time.

Improves stability of Nightmare Creatures booting, and fixes corrupted
text in Formula Circus when using the cached interpreter.
This commit is contained in:
Stenzek 2024-07-19 19:31:33 +10:00
parent 5f36c2948f
commit 2e96931c32
No known key found for this signature in database
18 changed files with 294 additions and 65 deletions

View File

@ -708,6 +708,15 @@ bool Bus::HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
return false; return false;
} }
const TickCount* Bus::GetMemoryAccessTimePtr(PhysicalMemoryAddress address, MemoryAccessSize size)
{
// Currently only BIOS, but could be EXP1 as well.
if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
return &g_bios_access_time[static_cast<size_t>(size)];
return nullptr;
}
std::optional<Bus::MemoryRegion> Bus::GetMemoryRegionForAddress(PhysicalMemoryAddress address) std::optional<Bus::MemoryRegion> Bus::GetMemoryRegionForAddress(PhysicalMemoryAddress address)
{ {
if (address < RAM_2MB_SIZE) if (address < RAM_2MB_SIZE)

View File

@ -191,6 +191,9 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
return static_cast<TickCount>(word_count + ((word_count + 15) / 16)); return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
} }
/// Returns a pointer to the cycle count for a non-RAM memory access.
const TickCount* GetMemoryAccessTimePtr(PhysicalMemoryAddress address, MemoryAccessSize size);
enum class MemoryRegion enum class MemoryRegion
{ {
RAM, RAM,

View File

@ -823,8 +823,20 @@ template<PGXPMode pgxp_mode>
} }
DebugAssert(!(HasPendingInterrupt())); DebugAssert(!(HasPendingInterrupt()));
if (g_settings.cpu_recompiler_icache) if (block->HasFlag(BlockFlags::IsUsingICache))
CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks); {
CheckAndUpdateICacheTags(block->icache_line_count);
}
else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks))
{
AddPendingTicks(
static_cast<TickCount>(block->size * static_cast<u32>(*Bus::GetMemoryAccessTimePtr(
block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word))));
}
else
{
AddPendingTicks(block->uncached_fetch_ticks);
}
InterpretCachedBlock<pgxp_mode>(block); InterpretCachedBlock<pgxp_mode>(block);
@ -893,6 +905,9 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
// TODO: Jump to other block if it exists at this pc? // TODO: Jump to other block if it exists at this pc?
const PageProtectionMode protection = GetProtectionModeForPC(start_pc); const PageProtectionMode protection = GetProtectionModeForPC(start_pc);
const bool use_icache = CPU::IsCachedAddress(start_pc);
const bool dynamic_fetch_ticks = (!use_icache && Bus::GetMemoryAccessTimePtr(start_pc & PHYSICAL_MEMORY_ADDRESS_MASK,
MemoryAccessSize::Word) != nullptr);
u32 pc = start_pc; u32 pc = start_pc;
bool is_branch_delay_slot = false; bool is_branch_delay_slot = false;
bool is_load_delay_slot = false; bool is_load_delay_slot = false;
@ -905,7 +920,8 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
instructions->clear(); instructions->clear();
metadata->icache_line_count = 0; metadata->icache_line_count = 0;
metadata->uncached_fetch_ticks = 0; metadata->uncached_fetch_ticks = 0;
metadata->flags = BlockFlags::None; metadata->flags = use_icache ? BlockFlags::IsUsingICache :
(dynamic_fetch_ticks ? BlockFlags::NeedsDynamicFetchTicks : BlockFlags::None);
u32 last_cache_line = ICACHE_LINES; u32 last_cache_line = ICACHE_LINES;
u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0; u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0;
@ -956,17 +972,23 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
info.is_store_instruction = IsMemoryStoreInstruction(instruction); info.is_store_instruction = IsMemoryStoreInstruction(instruction);
info.has_load_delay = InstructionHasLoadDelay(instruction); info.has_load_delay = InstructionHasLoadDelay(instruction);
if (g_settings.cpu_recompiler_icache) if (use_icache)
{ {
const u32 icache_line = GetICacheLine(pc); if (g_settings.cpu_recompiler_icache)
if (icache_line != last_cache_line)
{ {
metadata->icache_line_count++; const u32 icache_line = GetICacheLine(pc);
last_cache_line = icache_line; if (icache_line != last_cache_line)
{
metadata->icache_line_count++;
last_cache_line = icache_line;
}
} }
} }
else if (!dynamic_fetch_ticks)
{
metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
}
metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
if (info.is_load_instruction || info.is_store_instruction) if (info.is_load_instruction || info.is_store_instruction)
metadata->flags |= BlockFlags::ContainsLoadStoreInstructions; metadata->flags |= BlockFlags::ContainsLoadStoreInstructions;
@ -1022,6 +1044,8 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
#ifdef _DEBUG #ifdef _DEBUG
SmallString disasm; SmallString disasm;
DEBUG_LOG("Block at 0x{:08X}", start_pc); DEBUG_LOG("Block at 0x{:08X}", start_pc);
DEBUG_LOG(" Uncached fetch ticks: {}", metadata->uncached_fetch_ticks);
DEBUG_LOG(" ICache line count: {}", metadata->icache_line_count);
for (const auto& cbi : *instructions) for (const auto& cbi : *instructions)
{ {
CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits); CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits);

View File

@ -94,6 +94,8 @@ enum class BlockFlags : u8
ContainsLoadStoreInstructions = (1 << 0), ContainsLoadStoreInstructions = (1 << 0),
SpansPages = (1 << 1), SpansPages = (1 << 1),
BranchDelaySpansPages = (1 << 2), BranchDelaySpansPages = (1 << 2),
IsUsingICache = (1 << 3),
NeedsDynamicFetchTicks = (1 << 4),
}; };
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(BlockFlags); IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(BlockFlags);

View File

@ -2620,7 +2620,7 @@ TickCount CPU::GetInstructionReadTicks(VirtualMemoryAddress address)
{ {
return RAM_READ_TICKS; return RAM_READ_TICKS;
} }
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
{ {
return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)]; return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)];
} }
@ -2640,7 +2640,7 @@ TickCount CPU::GetICacheFillTicks(VirtualMemoryAddress address)
{ {
return 1 * ((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32)); return 1 * ((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32));
} }
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
{ {
return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] * return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] *
((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32)); ((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32));
@ -2651,29 +2651,23 @@ TickCount CPU::GetICacheFillTicks(VirtualMemoryAddress address)
} }
} }
void CPU::CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks) void CPU::CheckAndUpdateICacheTags(u32 line_count)
{ {
VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK; VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK;
if (IsCachedAddress(current_pc))
{
TickCount ticks = 0;
TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
{
const u32 line = GetICacheLine(current_pc);
if (g_state.icache_tags[line] != current_pc)
{
g_state.icache_tags[line] = current_pc;
ticks += cached_ticks_per_line;
}
}
g_state.pending_ticks += ticks; TickCount ticks = 0;
} TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
else for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
{ {
g_state.pending_ticks += uncached_ticks; const u32 line = GetICacheLine(current_pc);
if (g_state.icache_tags[line] != current_pc)
{
g_state.icache_tags[line] = current_pc;
ticks += cached_ticks_per_line;
}
} }
g_state.pending_ticks += ticks;
} }
u32 CPU::FillICache(VirtualMemoryAddress address) u32 CPU::FillICache(VirtualMemoryAddress address)

View File

@ -65,7 +65,7 @@ ALWAYS_INLINE static bool CompareICacheTag(VirtualMemoryAddress address)
TickCount GetInstructionReadTicks(VirtualMemoryAddress address); TickCount GetInstructionReadTicks(VirtualMemoryAddress address);
TickCount GetICacheFillTicks(VirtualMemoryAddress address); TickCount GetICacheFillTicks(VirtualMemoryAddress address);
u32 FillICache(VirtualMemoryAddress address); u32 FillICache(VirtualMemoryAddress address);
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks); void CheckAndUpdateICacheTags(u32 line_count);
ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address) ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address)
{ {

View File

@ -77,8 +77,7 @@ void CPU::NewRec::Compiler::BeginBlock()
GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction)); GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction));
} }
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0) GenerateICacheCheckAndUpdate();
GenerateICacheCheckAndUpdate();
if (g_settings.bios_tty_logging) if (g_settings.bios_tty_logging)
{ {
@ -1719,6 +1718,14 @@ void CPU::NewRec::Compiler::TruncateBlock()
iinfo->is_last_instruction = true; iinfo->is_last_instruction = true;
} }
const TickCount* CPU::NewRec::Compiler::GetFetchMemoryAccessTimePtr() const
{
const TickCount* ptr =
Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
AssertMsg(ptr, "Address has dynamic fetch ticks");
return ptr;
}
void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store, void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
bool use_fastmem) bool use_fastmem)
{ {

View File

@ -201,6 +201,8 @@ protected:
void SetCompilerPC(u32 newpc); void SetCompilerPC(u32 newpc);
void TruncateBlock(); void TruncateBlock();
const TickCount* GetFetchMemoryAccessTimePtr() const;
virtual const void* GetCurrentCodePointer() = 0; virtual const void* GetCurrentCodePointer() = 0;
virtual void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, virtual void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,

View File

@ -28,6 +28,7 @@ using namespace vixl::aarch32;
using CPU::Recompiler::armEmitCall; using CPU::Recompiler::armEmitCall;
using CPU::Recompiler::armEmitCondBranch; using CPU::Recompiler::armEmitCondBranch;
using CPU::Recompiler::armEmitFarLoad;
using CPU::Recompiler::armEmitJmp; using CPU::Recompiler::armEmitJmp;
using CPU::Recompiler::armEmitMov; using CPU::Recompiler::armEmitMov;
using CPU::Recompiler::armGetJumpTrampoline; using CPU::Recompiler::armGetJumpTrampoline;
@ -302,13 +303,25 @@ bool foo(const void* a, const void* b)
void CPU::NewRec::AArch32Compiler::GenerateICacheCheckAndUpdate() void CPU::NewRec::AArch32Compiler::GenerateICacheCheckAndUpdate()
{ {
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
armAsm->add(RARG1, RARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks))); {
armAsm->str(RARG1, PTR(&g_state.pending_ticks)); armEmitFarLoad(armAsm, RARG2, GetFetchMemoryAccessTimePtr());
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
armEmitMov(armAsm, RARG3, m_block->size);
armAsm->mul(RARG2, RARG2, RARG3);
armAsm->add(RARG1, RARG1, RARG2);
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
}
else
{
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
armAsm->add(RARG1, RARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
}
} }
else else if (m_block->icache_line_count > 0)
{ {
const auto& ticks_reg = RARG1; const auto& ticks_reg = RARG1;
const auto& current_tag_reg = RARG2; const auto& current_tag_reg = RARG2;

View File

@ -27,6 +27,7 @@ using namespace vixl::aarch64;
using CPU::Recompiler::armEmitCall; using CPU::Recompiler::armEmitCall;
using CPU::Recompiler::armEmitCondBranch; using CPU::Recompiler::armEmitCondBranch;
using CPU::Recompiler::armEmitFarLoad;
using CPU::Recompiler::armEmitJmp; using CPU::Recompiler::armEmitJmp;
using CPU::Recompiler::armEmitMov; using CPU::Recompiler::armEmitMov;
using CPU::Recompiler::armGetJumpTrampoline; using CPU::Recompiler::armGetJumpTrampoline;
@ -274,13 +275,25 @@ void CPU::NewRec::AArch64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr,
void CPU::NewRec::AArch64Compiler::GenerateICacheCheckAndUpdate() void CPU::NewRec::AArch64Compiler::GenerateICacheCheckAndUpdate()
{ {
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks))); {
armAsm->str(RWARG1, PTR(&g_state.pending_ticks)); armEmitFarLoad(armAsm, RWARG2, GetFetchMemoryAccessTimePtr());
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
armEmitMov(armAsm, RWARG3, m_block->size);
armAsm->mul(RWARG2, RWARG2, RWARG3);
armAsm->add(RWARG1, RWARG1, RWARG2);
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
}
else
{
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
}
} }
else else if (m_block->icache_line_count > 0)
{ {
const auto& ticks_reg = RWARG1; const auto& ticks_reg = RWARG1;
const auto& current_tag_reg = RWARG2; const auto& current_tag_reg = RWARG2;

View File

@ -40,6 +40,7 @@ using namespace biscuit;
using CPU::Recompiler::rvEmitCall; using CPU::Recompiler::rvEmitCall;
using CPU::Recompiler::rvEmitDSExtW; using CPU::Recompiler::rvEmitDSExtW;
using CPU::Recompiler::rvEmitDUExtW; using CPU::Recompiler::rvEmitDUExtW;
using CPU::Recompiler::rvEmitFarLoad;
using CPU::Recompiler::rvEmitJmp; using CPU::Recompiler::rvEmitJmp;
using CPU::Recompiler::rvEmitMov; using CPU::Recompiler::rvEmitMov;
using CPU::Recompiler::rvEmitMov64; using CPU::Recompiler::rvEmitMov64;
@ -130,6 +131,25 @@ u32 CPU::Recompiler::rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr)
return rvEmitJmp(rvAsm, ptr, biscuit::ra); return rvEmitJmp(rvAsm, ptr, biscuit::ra);
} }
void CPU::Recompiler::rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
bool sign_extend_word)
{
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
rvAsm->AUIPC(reg, hi);
if (sign_extend_word)
rvAsm->LW(reg, lo, reg);
else
rvAsm->LWU(reg, lo, reg);
}
void CPU::Recompiler::rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
const biscuit::GPR& tempreg)
{
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
rvAsm->AUIPC(tempreg, hi);
rvAsm->SW(reg, lo, tempreg);
}
void CPU::Recompiler::rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) void CPU::Recompiler::rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
{ {
rvAsm->SLLI(rd, rs, 24); rvAsm->SLLI(rd, rs, 24);
@ -525,13 +545,25 @@ void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr,
void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate() void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate()
{ {
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks)); {
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks)); rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr());
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
rvEmitMov(rvAsm, RARG3, m_block->size);
rvAsm->MULW(RARG2, RARG2, RARG3);
rvAsm->ADD(RARG1, RARG1, RARG2);
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
}
else
{
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
}
} }
else else if (m_block->icache_line_count > 0)
{ {
const auto& ticks_reg = RARG1; const auto& ticks_reg = RARG1;
const auto& current_tag_reg = RARG2; const auto& current_tag_reg = RARG2;

View File

@ -179,9 +179,18 @@ void CPU::NewRec::X64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, cons
void CPU::NewRec::X64Compiler::GenerateICacheCheckAndUpdate() void CPU::NewRec::X64Compiler::GenerateICacheCheckAndUpdate()
{ {
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(m_block->uncached_fetch_ticks)); if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
cg->mov(cg->eax, m_block->size);
cg->mul(cg->dword[cg->rip + GetFetchMemoryAccessTimePtr()]);
cg->add(cg->dword[PTR(&g_state.pending_ticks)], cg->eax);
}
else
{
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(m_block->uncached_fetch_ticks));
}
} }
else if (m_block->icache_line_count > 0) else if (m_block->icache_line_count > 0)
{ {

View File

@ -926,6 +926,14 @@ Value CodeGenerator::NotValue(const Value& val)
return res; return res;
} }
const TickCount* CodeGenerator::GetFetchMemoryAccessTimePtr() const
{
const TickCount* ptr =
Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
AssertMsg(ptr, "Address has dynamic fetch ticks");
return ptr;
}
void CodeGenerator::GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, void CodeGenerator::GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info,
Exception excode, Condition condition /* = Condition::Always */) Exception excode, Condition condition /* = Condition::Always */)
{ {
@ -996,8 +1004,7 @@ void CodeGenerator::BlockPrologue()
EmitFunctionCall(nullptr, &CPU::HandleB0Syscall); EmitFunctionCall(nullptr, &CPU::HandleB0Syscall);
} }
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0) EmitICacheCheckAndUpdate();
EmitICacheCheckAndUpdate();
// we don't know the state of the last block, so assume load delays might be in progress // we don't know the state of the last block, so assume load delays might be in progress
// TODO: Pull load delay into register cache // TODO: Pull load delay into register cache

View File

@ -198,6 +198,8 @@ public:
Value XorValues(const Value& lhs, const Value& rhs); Value XorValues(const Value& lhs, const Value& rhs);
Value NotValue(const Value& val); Value NotValue(const Value& val);
const TickCount* GetFetchMemoryAccessTimePtr() const;
// Raising exception if condition is true. // Raising exception if condition is true.
void GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, Exception excode, void GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, Exception excode,
Condition condition = Condition::Always); Condition condition = Condition::Always);

View File

@ -140,6 +140,20 @@ void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::
} }
} }
void CPU::Recompiler::armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg,
const void* addr)
{
armMoveAddressToReg(armAsm, reg, addr);
armAsm->ldr(reg, vixl::aarch32::MemOperand(reg));
}
void CPU::Recompiler::armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg,
const void* addr, const vixl::aarch64::Register& tempreg)
{
armMoveAddressToReg(armAsm, tempreg, addr);
armAsm->str(reg, vixl::aarch32::MemOperand(tempreg));
}
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
{ {
#ifdef ENABLE_HOST_DISASSEMBLY #ifdef ENABLE_HOST_DISASSEMBLY
@ -1913,12 +1927,24 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate() void CodeGenerator::EmitICacheCheckAndUpdate()
{ {
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
EmitAddCPUStructField(OFFSETOF(State, pending_ticks), if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks))); {
armEmitFarLoad(m_emit, RARG2, GetFetchMemoryAccessTimePtr());
m_emit->ldr(RARG1, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
m_emit->Mov(RARG3, m_block->size);
m_emit->mul(RARG2, RARG2, RARG3);
m_emit->add(RARG1, RARG1, RARG2);
m_emit->str(RARG1, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
}
else
{
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
}
} }
else else if (m_block->icache_line_count > 0)
{ {
const auto& ticks_reg = a32::r0; const auto& ticks_reg = a32::r0;
const auto& current_tag_reg = a32::r1; const auto& current_tag_reg = a32::r1;

View File

@ -261,6 +261,61 @@ void CPU::Recompiler::armEmitCondBranch(a64::Assembler* armAsm, a64::Condition c
} }
} }
void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
const void* addr, bool sign_extend_word)
{
const void* cur = armAsm->GetCursorAddress<const void*>();
const void* current_code_ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
const void* ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
a64::MemOperand memop;
const vixl::aarch64::Register xreg = reg.X();
if (vixl::IsInt21(page_displacement))
{
armAsm->adrp(xreg, page_displacement);
memop = vixl::aarch64::MemOperand(xreg, static_cast<int64_t>(page_offset));
}
else
{
armMoveAddressToReg(armAsm, xreg, addr);
memop = vixl::aarch64::MemOperand(xreg);
}
if (sign_extend_word)
armAsm->ldrsw(reg, memop);
else
armAsm->ldr(reg, memop);
}
void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
const void* addr, const vixl::aarch64::Register& tempreg)
{
DebugAssert(tempreg.IsX());
const void* cur = armAsm->GetCursorAddress<const void*>();
const void* current_code_ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
const void* ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
if (vixl::IsInt21(page_displacement))
{
armAsm->adrp(tempreg, page_displacement);
armAsm->str(reg, vixl::aarch64::MemOperand(tempreg, static_cast<int64_t>(page_offset)));
}
else
{
armMoveAddressToReg(armAsm, tempreg, addr);
armAsm->str(reg, vixl::aarch64::MemOperand(tempreg));
}
}
u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
{ {
auto it = s_trampoline_targets.find(target); auto it = s_trampoline_targets.find(target);
@ -2240,12 +2295,24 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate() void CodeGenerator::EmitICacheCheckAndUpdate()
{ {
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
EmitAddCPUStructField(OFFSETOF(State, pending_ticks), if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks))); {
armEmitFarLoad(m_emit, RWARG2, GetFetchMemoryAccessTimePtr());
m_emit->Ldr(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
m_emit->Mov(RWARG3, m_block->size);
m_emit->Mul(RWARG2, RWARG2, RWARG3);
m_emit->Add(RWARG1, RWARG1, RWARG2);
m_emit->Str(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
}
else
{
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
}
} }
else else if (m_block->icache_line_count > 0)
{ {
const auto& ticks_reg = a64::w0; const auto& ticks_reg = a64::w0;
const auto& current_tag_reg = a64::w1; const auto& current_tag_reg = a64::w1;

View File

@ -2782,12 +2782,21 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate() void CodeGenerator::EmitICacheCheckAndUpdate()
{ {
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
static_cast<u32>(m_block->uncached_fetch_ticks)); {
m_emit->mov(m_emit->eax, m_block->size);
m_emit->mul(m_emit->dword[m_emit->rip + GetFetchMemoryAccessTimePtr()]);
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], m_emit->eax);
}
else
{
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)],
static_cast<u32>(m_block->uncached_fetch_ticks));
}
} }
else else if (m_block->icache_line_count > 0)
{ {
VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)

View File

@ -93,6 +93,9 @@ void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register&
void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr); void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr);
void armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr);
void armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr,
const vixl::aarch32::Register& tempreg = RSCRATCH);
u8* armGetJumpTrampoline(const void* target); u8* armGetJumpTrampoline(const void* target);
} // namespace CPU::Recompiler } // namespace CPU::Recompiler
@ -129,6 +132,10 @@ void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register&
void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr); void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr);
void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
bool sign_extend_word = false);
void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
const vixl::aarch64::Register& tempreg = RXSCRATCH);
u8* armGetJumpTrampoline(const void* target); u8* armGetJumpTrampoline(const void* target);
} // namespace CPU::Recompiler } // namespace CPU::Recompiler
@ -157,8 +164,11 @@ std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);
void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr); void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);
void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm); void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);
void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm); void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);
u32 rvEmitJmp(biscuit::Assembler* armAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero); u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
u32 rvEmitCall(biscuit::Assembler* armAsm, const void* ptr); u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr);
void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word = false);
void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
const biscuit::GPR& tempreg = RSCRATCH);
void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word