CPU/Recompiler: Use condition select for ICache updates
Tidy ~4% perf boost.
This commit is contained in:
parent
666fee2df7
commit
9d52e27e16
|
@ -617,13 +617,19 @@ void CPU::ARM32Recompiler::GenerateICacheCheckAndUpdate()
|
|||
}
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||
if (fill_ticks <= 0)
|
||||
return;
|
||||
|
||||
const auto& ticks_reg = RARG1;
|
||||
const auto& current_tag_reg = RARG2;
|
||||
const auto& existing_tag_reg = RARG3;
|
||||
const auto& fill_ticks_reg = r5;
|
||||
|
||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks));
|
||||
armEmitMov(armAsm, current_tag_reg, current_pc);
|
||||
armEmitMov(armAsm, fill_ticks_reg, fill_ticks);
|
||||
|
||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||
{
|
||||
|
@ -644,12 +650,9 @@ void CPU::ARM32Recompiler::GenerateICacheCheckAndUpdate()
|
|||
|
||||
Label cache_hit;
|
||||
armAsm->ldr(existing_tag_reg, line_addr);
|
||||
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
||||
armAsm->b(eq, &cache_hit);
|
||||
|
||||
armAsm->str(current_tag_reg, line_addr);
|
||||
armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast<u32>(fill_ticks)));
|
||||
armAsm->bind(&cache_hit);
|
||||
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
||||
armAsm->add(ne, ticks_reg, ticks_reg, fill_ticks_reg);
|
||||
|
||||
if (i != (m_block->icache_line_count - 1))
|
||||
armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE));
|
||||
|
|
|
@ -780,28 +780,29 @@ void CPU::ARM64Recompiler::GenerateICacheCheckAndUpdate()
|
|||
const auto& ticks_reg = RWARG1;
|
||||
const auto& current_tag_reg = RWARG2;
|
||||
const auto& existing_tag_reg = RWARG3;
|
||||
const auto& fill_ticks_reg = w4;
|
||||
const auto& ticks_to_add_reg = w5;
|
||||
|
||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||
if (fill_ticks <= 0)
|
||||
return;
|
||||
|
||||
armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks));
|
||||
armEmitMov(armAsm, current_tag_reg, current_pc);
|
||||
armEmitMov(armAsm, fill_ticks_reg, fill_ticks);
|
||||
|
||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||
{
|
||||
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||
if (fill_ticks <= 0)
|
||||
continue;
|
||||
|
||||
const u32 line = GetICacheLine(current_pc);
|
||||
const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
|
||||
|
||||
Label cache_hit;
|
||||
armAsm->ldr(existing_tag_reg, MemOperand(RSTATE, offset));
|
||||
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
||||
armAsm->b(&cache_hit, eq);
|
||||
|
||||
armAsm->str(current_tag_reg, MemOperand(RSTATE, offset));
|
||||
armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast<u32>(fill_ticks)));
|
||||
armAsm->bind(&cache_hit);
|
||||
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
||||
armAsm->csel(ticks_to_add_reg, fill_ticks_reg, wzr, ne);
|
||||
armAsm->add(ticks_reg, ticks_reg, ticks_to_add_reg);
|
||||
|
||||
if (i != (m_block->icache_line_count - 1))
|
||||
armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE));
|
||||
|
|
|
@ -506,27 +506,32 @@ void CPU::X64Recompiler::GenerateICacheCheckAndUpdate()
|
|||
}
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
// RAM to ROM is not contiguous, therefore the cost will be the same across the entire block.
|
||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||
if (fill_ticks <= 0)
|
||||
return;
|
||||
|
||||
cg->lea(RXARG1, cg->dword[PTR(&g_state.icache_tags)]);
|
||||
cg->xor_(RWARG2, RWARG2);
|
||||
cg->mov(RWARG4, fill_ticks);
|
||||
|
||||
// TODO: Vectorize this...
|
||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||
{
|
||||
const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc);
|
||||
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||
if (fill_ticks <= 0)
|
||||
continue;
|
||||
|
||||
const u32 line = GetICacheLine(current_pc);
|
||||
const u32 offset = (line * sizeof(u32));
|
||||
Xbyak::Label cache_hit;
|
||||
|
||||
cg->xor_(RWARG3, RWARG3);
|
||||
cg->cmp(cg->dword[RXARG1 + offset], tag);
|
||||
cg->je(cache_hit);
|
||||
cg->mov(cg->dword[RXARG1 + offset], tag);
|
||||
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(fill_ticks));
|
||||
cg->L(cache_hit);
|
||||
cg->cmovne(RWARG3, RWARG4);
|
||||
cg->add(RWARG2, RWARG3);
|
||||
}
|
||||
|
||||
cg->add(cg->dword[PTR(&g_state.pending_ticks)], RWARG2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue