CPU/Recompiler: Use condition select for ICache updates
Tidy ~4% perf boost.
This commit is contained in:
parent
666fee2df7
commit
9d52e27e16
|
@ -617,13 +617,19 @@ void CPU::ARM32Recompiler::GenerateICacheCheckAndUpdate()
|
||||||
}
|
}
|
||||||
else if (m_block->icache_line_count > 0)
|
else if (m_block->icache_line_count > 0)
|
||||||
{
|
{
|
||||||
|
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||||
|
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||||
|
if (fill_ticks <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
const auto& ticks_reg = RARG1;
|
const auto& ticks_reg = RARG1;
|
||||||
const auto& current_tag_reg = RARG2;
|
const auto& current_tag_reg = RARG2;
|
||||||
const auto& existing_tag_reg = RARG3;
|
const auto& existing_tag_reg = RARG3;
|
||||||
|
const auto& fill_ticks_reg = r5;
|
||||||
|
|
||||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
|
||||||
armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks));
|
armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks));
|
||||||
armEmitMov(armAsm, current_tag_reg, current_pc);
|
armEmitMov(armAsm, current_tag_reg, current_pc);
|
||||||
|
armEmitMov(armAsm, fill_ticks_reg, fill_ticks);
|
||||||
|
|
||||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||||
{
|
{
|
||||||
|
@ -644,12 +650,9 @@ void CPU::ARM32Recompiler::GenerateICacheCheckAndUpdate()
|
||||||
|
|
||||||
Label cache_hit;
|
Label cache_hit;
|
||||||
armAsm->ldr(existing_tag_reg, line_addr);
|
armAsm->ldr(existing_tag_reg, line_addr);
|
||||||
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
|
||||||
armAsm->b(eq, &cache_hit);
|
|
||||||
|
|
||||||
armAsm->str(current_tag_reg, line_addr);
|
armAsm->str(current_tag_reg, line_addr);
|
||||||
armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast<u32>(fill_ticks)));
|
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
||||||
armAsm->bind(&cache_hit);
|
armAsm->add(ne, ticks_reg, ticks_reg, fill_ticks_reg);
|
||||||
|
|
||||||
if (i != (m_block->icache_line_count - 1))
|
if (i != (m_block->icache_line_count - 1))
|
||||||
armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE));
|
armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE));
|
||||||
|
|
|
@ -780,28 +780,29 @@ void CPU::ARM64Recompiler::GenerateICacheCheckAndUpdate()
|
||||||
const auto& ticks_reg = RWARG1;
|
const auto& ticks_reg = RWARG1;
|
||||||
const auto& current_tag_reg = RWARG2;
|
const auto& current_tag_reg = RWARG2;
|
||||||
const auto& existing_tag_reg = RWARG3;
|
const auto& existing_tag_reg = RWARG3;
|
||||||
|
const auto& fill_ticks_reg = w4;
|
||||||
|
const auto& ticks_to_add_reg = w5;
|
||||||
|
|
||||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||||
|
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||||
|
if (fill_ticks <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks));
|
armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks));
|
||||||
armEmitMov(armAsm, current_tag_reg, current_pc);
|
armEmitMov(armAsm, current_tag_reg, current_pc);
|
||||||
|
armEmitMov(armAsm, fill_ticks_reg, fill_ticks);
|
||||||
|
|
||||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||||
{
|
{
|
||||||
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
|
||||||
if (fill_ticks <= 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const u32 line = GetICacheLine(current_pc);
|
const u32 line = GetICacheLine(current_pc);
|
||||||
const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
|
const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
|
||||||
|
|
||||||
Label cache_hit;
|
Label cache_hit;
|
||||||
armAsm->ldr(existing_tag_reg, MemOperand(RSTATE, offset));
|
armAsm->ldr(existing_tag_reg, MemOperand(RSTATE, offset));
|
||||||
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
|
||||||
armAsm->b(&cache_hit, eq);
|
|
||||||
|
|
||||||
armAsm->str(current_tag_reg, MemOperand(RSTATE, offset));
|
armAsm->str(current_tag_reg, MemOperand(RSTATE, offset));
|
||||||
armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast<u32>(fill_ticks)));
|
armAsm->cmp(existing_tag_reg, current_tag_reg);
|
||||||
armAsm->bind(&cache_hit);
|
armAsm->csel(ticks_to_add_reg, fill_ticks_reg, wzr, ne);
|
||||||
|
armAsm->add(ticks_reg, ticks_reg, ticks_to_add_reg);
|
||||||
|
|
||||||
if (i != (m_block->icache_line_count - 1))
|
if (i != (m_block->icache_line_count - 1))
|
||||||
armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE));
|
armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE));
|
||||||
|
|
|
@ -506,27 +506,32 @@ void CPU::X64Recompiler::GenerateICacheCheckAndUpdate()
|
||||||
}
|
}
|
||||||
else if (m_block->icache_line_count > 0)
|
else if (m_block->icache_line_count > 0)
|
||||||
{
|
{
|
||||||
|
// RAM to ROM is not contiguous, therefore the cost will be the same across the entire block.
|
||||||
|
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
||||||
|
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
||||||
|
if (fill_ticks <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
cg->lea(RXARG1, cg->dword[PTR(&g_state.icache_tags)]);
|
cg->lea(RXARG1, cg->dword[PTR(&g_state.icache_tags)]);
|
||||||
|
cg->xor_(RWARG2, RWARG2);
|
||||||
|
cg->mov(RWARG4, fill_ticks);
|
||||||
|
|
||||||
// TODO: Vectorize this...
|
// TODO: Vectorize this...
|
||||||
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
|
|
||||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||||
{
|
{
|
||||||
const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc);
|
const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc);
|
||||||
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
|
|
||||||
if (fill_ticks <= 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const u32 line = GetICacheLine(current_pc);
|
const u32 line = GetICacheLine(current_pc);
|
||||||
const u32 offset = (line * sizeof(u32));
|
const u32 offset = (line * sizeof(u32));
|
||||||
Xbyak::Label cache_hit;
|
|
||||||
|
|
||||||
|
cg->xor_(RWARG3, RWARG3);
|
||||||
cg->cmp(cg->dword[RXARG1 + offset], tag);
|
cg->cmp(cg->dword[RXARG1 + offset], tag);
|
||||||
cg->je(cache_hit);
|
|
||||||
cg->mov(cg->dword[RXARG1 + offset], tag);
|
cg->mov(cg->dword[RXARG1 + offset], tag);
|
||||||
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(fill_ticks));
|
cg->cmovne(RWARG3, RWARG4);
|
||||||
cg->L(cache_hit);
|
cg->add(RWARG2, RWARG3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cg->add(cg->dword[PTR(&g_state.pending_ticks)], RWARG2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue