diff --git a/src/core/cpu_recompiler_arm32.cpp b/src/core/cpu_recompiler_arm32.cpp index 418346c30..922895619 100644 --- a/src/core/cpu_recompiler_arm32.cpp +++ b/src/core/cpu_recompiler_arm32.cpp @@ -617,13 +617,19 @@ void CPU::ARM32Recompiler::GenerateICacheCheckAndUpdate() } else if (m_block->icache_line_count > 0) { + VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; + const TickCount fill_ticks = GetICacheFillTicks(current_pc); + if (fill_ticks <= 0) + return; + const auto& ticks_reg = RARG1; const auto& current_tag_reg = RARG2; const auto& existing_tag_reg = RARG3; + const auto& fill_ticks_reg = r5; - VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks)); armEmitMov(armAsm, current_tag_reg, current_pc); + armEmitMov(armAsm, fill_ticks_reg, fill_ticks); for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) { @@ -644,12 +650,9 @@ void CPU::ARM32Recompiler::GenerateICacheCheckAndUpdate() Label cache_hit; armAsm->ldr(existing_tag_reg, line_addr); - armAsm->cmp(existing_tag_reg, current_tag_reg); - armAsm->b(eq, &cache_hit); - armAsm->str(current_tag_reg, line_addr); - armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast(fill_ticks))); - armAsm->bind(&cache_hit); + armAsm->cmp(existing_tag_reg, current_tag_reg); + armAsm->add(ne, ticks_reg, ticks_reg, fill_ticks_reg); if (i != (m_block->icache_line_count - 1)) armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE)); diff --git a/src/core/cpu_recompiler_arm64.cpp b/src/core/cpu_recompiler_arm64.cpp index f949435e5..fb40f307d 100644 --- a/src/core/cpu_recompiler_arm64.cpp +++ b/src/core/cpu_recompiler_arm64.cpp @@ -780,28 +780,29 @@ void CPU::ARM64Recompiler::GenerateICacheCheckAndUpdate() const auto& ticks_reg = RWARG1; const auto& current_tag_reg = RWARG2; const auto& existing_tag_reg = RWARG3; + const auto& fill_ticks_reg = w4; + const auto& ticks_to_add_reg = w5; VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; + const TickCount fill_ticks = GetICacheFillTicks(current_pc); + if (fill_ticks <= 0) + return; + armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks)); armEmitMov(armAsm, current_tag_reg, current_pc); + armEmitMov(armAsm, fill_ticks_reg, fill_ticks); for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) { - const TickCount fill_ticks = GetICacheFillTicks(current_pc); - if (fill_ticks <= 0) - continue; - const u32 line = GetICacheLine(current_pc); const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); Label cache_hit; armAsm->ldr(existing_tag_reg, MemOperand(RSTATE, offset)); - armAsm->cmp(existing_tag_reg, current_tag_reg); - armAsm->b(&cache_hit, eq); - armAsm->str(current_tag_reg, MemOperand(RSTATE, offset)); - armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast(fill_ticks))); - armAsm->bind(&cache_hit); + armAsm->cmp(existing_tag_reg, current_tag_reg); + armAsm->csel(ticks_to_add_reg, fill_ticks_reg, wzr, ne); + armAsm->add(ticks_reg, ticks_reg, ticks_to_add_reg); if (i != (m_block->icache_line_count - 1)) armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE)); diff --git a/src/core/cpu_recompiler_x64.cpp b/src/core/cpu_recompiler_x64.cpp index 2beefc633..2fdf108d1 100644 --- a/src/core/cpu_recompiler_x64.cpp +++ b/src/core/cpu_recompiler_x64.cpp @@ -506,27 +506,32 @@ void CPU::X64Recompiler::GenerateICacheCheckAndUpdate() } else if (m_block->icache_line_count > 0) { + // RAM to ROM is not contiguous, therefore the cost will be the same across the entire block. + VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; + const TickCount fill_ticks = GetICacheFillTicks(current_pc); + if (fill_ticks <= 0) + return; + cg->lea(RXARG1, cg->dword[PTR(&g_state.icache_tags)]); + cg->xor_(RWARG2, RWARG2); + cg->mov(RWARG4, fill_ticks); // TODO: Vectorize this... - VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) { const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc); - const TickCount fill_ticks = GetICacheFillTicks(current_pc); - if (fill_ticks <= 0) - continue; const u32 line = GetICacheLine(current_pc); const u32 offset = (line * sizeof(u32)); - Xbyak::Label cache_hit; + cg->xor_(RWARG3, RWARG3); cg->cmp(cg->dword[RXARG1 + offset], tag); - cg->je(cache_hit); cg->mov(cg->dword[RXARG1 + offset], tag); - cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast(fill_ticks)); - cg->L(cache_hit); + cg->cmovne(RWARG3, RWARG4); + cg->add(RWARG2, RWARG3); } + + cg->add(cg->dword[PTR(&g_state.pending_ticks)], RWARG2); } }