CPU/Interpreter: Address ICache lines in words

Might help on ARM32, because no unaligned access.

Otherwise, ~23% perf boost in debug builds. But this is pretty
meaningless.
This commit is contained in:
Stenzek 2024-12-13 20:40:16 +10:00
parent 0dbab167a8
commit 2f6eaa1d43
No known key found for this signature in database
4 changed files with 27 additions and 25 deletions

View File

@ -1476,10 +1476,10 @@ template<MemoryAccessSize size>
u32 Bus::ICacheReadHandler(VirtualMemoryAddress address) u32 Bus::ICacheReadHandler(VirtualMemoryAddress address)
{ {
const u32 line = CPU::GetICacheLine(address); const u32 line = CPU::GetICacheLine(address);
const u8* line_data = &CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE]; const u32* line_data = &CPU::g_state.icache_data[line * CPU::ICACHE_WORDS_PER_LINE];
const u32 offset = CPU::GetICacheLineOffset(address); const u32 offset = CPU::GetICacheLineOffset(address);
u32 result; u32 result;
std::memcpy(&result, &line_data[offset], sizeof(result)); std::memcpy(&result, reinterpret_cast<const u8*>(line_data) + offset, sizeof(result));
return result; return result;
} }
@ -1487,14 +1487,15 @@ template<MemoryAccessSize size>
void Bus::ICacheWriteHandler(VirtualMemoryAddress address, u32 value) void Bus::ICacheWriteHandler(VirtualMemoryAddress address, u32 value)
{ {
const u32 line = CPU::GetICacheLine(address); const u32 line = CPU::GetICacheLine(address);
u32* line_data = &CPU::g_state.icache_data[line * CPU::ICACHE_WORDS_PER_LINE];
const u32 offset = CPU::GetICacheLineOffset(address); const u32 offset = CPU::GetICacheLineOffset(address);
CPU::g_state.icache_tags[line] = CPU::GetICacheTagForAddress(address) | CPU::ICACHE_INVALID_BITS; CPU::g_state.icache_tags[line] = CPU::GetICacheTagForAddress(address) | CPU::ICACHE_INVALID_BITS;
if constexpr (size == MemoryAccessSize::Byte) if constexpr (size == MemoryAccessSize::Byte)
std::memcpy(&CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE + offset], &value, sizeof(u8)); std::memcpy(reinterpret_cast<u8*>(line_data) + offset, &value, sizeof(u8));
else if constexpr (size == MemoryAccessSize::HalfWord) else if constexpr (size == MemoryAccessSize::HalfWord)
std::memcpy(&CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE + offset], &value, sizeof(u16)); std::memcpy(reinterpret_cast<u8*>(line_data) + offset, &value, sizeof(u16));
else else
std::memcpy(&CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE + offset], &value, sizeof(u32)); std::memcpy(reinterpret_cast<u8*>(line_data) + offset, &value, sizeof(u32));
} }
template<MemoryAccessSize size> template<MemoryAccessSize size>

View File

@ -85,7 +85,7 @@ template<PGXPMode pgxp_mode, bool debug>
static bool FetchInstruction(); static bool FetchInstruction();
static bool FetchInstructionForInterpreterFallback(); static bool FetchInstructionForInterpreterFallback();
template<bool add_ticks, bool icache_read = false, u32 word_count = 1, bool raise_exceptions> template<bool add_ticks, bool icache_read = false, u32 word_count = 1, bool raise_exceptions>
static bool DoInstructionRead(PhysicalMemoryAddress address, void* data); static bool DoInstructionRead(PhysicalMemoryAddress address, u32* data);
template<MemoryAccessType type, MemoryAccessSize size> template<MemoryAccessType type, MemoryAccessSize size>
static bool DoSafeMemoryAccess(VirtualMemoryAddress address, u32& value); static bool DoSafeMemoryAccess(VirtualMemoryAddress address, u32& value);
template<MemoryAccessType type, MemoryAccessSize size> template<MemoryAccessType type, MemoryAccessSize size>
@ -2671,7 +2671,7 @@ void CPU::UpdateMemoryPointers()
} }
template<bool add_ticks, bool icache_read, u32 word_count, bool raise_exceptions> template<bool add_ticks, bool icache_read, u32 word_count, bool raise_exceptions>
ALWAYS_INLINE_RELEASE bool CPU::DoInstructionRead(PhysicalMemoryAddress address, void* data) ALWAYS_INLINE_RELEASE bool CPU::DoInstructionRead(PhysicalMemoryAddress address, u32* data)
{ {
using namespace Bus; using namespace Bus;
@ -2766,34 +2766,33 @@ void CPU::CheckAndUpdateICacheTags(u32 line_count)
u32 CPU::FillICache(VirtualMemoryAddress address) u32 CPU::FillICache(VirtualMemoryAddress address)
{ {
const u32 line = GetICacheLine(address); const u32 line = GetICacheLine(address);
u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE]; const u32 line_word_offset = GetICacheLineWordOffset(address);
u32* const line_data = g_state.icache_data.data() + (line * ICACHE_WORDS_PER_LINE);
u32* const offset_line_data = line_data + line_word_offset;
u32 line_tag; u32 line_tag;
switch ((address >> 2) & 0x03u) switch (line_word_offset)
{ {
case 0: case 0:
DoInstructionRead<true, true, 4, false>(address & ~(ICACHE_LINE_SIZE - 1u), line_data); DoInstructionRead<true, true, 4, false>(address & ~(ICACHE_LINE_SIZE - 1u), offset_line_data);
line_tag = GetICacheTagForAddress(address); line_tag = GetICacheTagForAddress(address);
break; break;
case 1: case 1:
DoInstructionRead<true, true, 3, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x4), line_data + 0x4); DoInstructionRead<true, true, 3, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x4), offset_line_data);
line_tag = GetICacheTagForAddress(address) | 0x1; line_tag = GetICacheTagForAddress(address) | 0x1;
break; break;
case 2: case 2:
DoInstructionRead<true, true, 2, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x8), line_data + 0x8); DoInstructionRead<true, true, 2, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x8), offset_line_data);
line_tag = GetICacheTagForAddress(address) | 0x3; line_tag = GetICacheTagForAddress(address) | 0x3;
break; break;
case 3: case 3:
default: default:
DoInstructionRead<true, true, 1, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0xC), line_data + 0xC); DoInstructionRead<true, true, 1, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0xC), offset_line_data);
line_tag = GetICacheTagForAddress(address) | 0x7; line_tag = GetICacheTagForAddress(address) | 0x7;
break; break;
} }
g_state.icache_tags[line] = line_tag;
const u32 offset = GetICacheLineOffset(address); g_state.icache_tags[line] = line_tag;
u32 result; return offset_line_data[0];
std::memcpy(&result, &line_data[offset], sizeof(result));
return result;
} }
void CPU::ClearICache() void CPU::ClearICache()
@ -2806,11 +2805,9 @@ namespace CPU {
ALWAYS_INLINE_RELEASE static u32 ReadICache(VirtualMemoryAddress address) ALWAYS_INLINE_RELEASE static u32 ReadICache(VirtualMemoryAddress address)
{ {
const u32 line = GetICacheLine(address); const u32 line = GetICacheLine(address);
const u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE]; const u32 line_word_offset = GetICacheLineWordOffset(address);
const u32 offset = GetICacheLineOffset(address); const u32* const line_data = g_state.icache_data.data() + (line * ICACHE_WORDS_PER_LINE);
u32 result; return line_data[line_word_offset];
std::memcpy(&result, &line_data[offset], sizeof(result));
return result;
} }
} // namespace CPU } // namespace CPU

View File

@ -32,7 +32,7 @@ enum : PhysicalMemoryAddress
ICACHE_SLOTS = ICACHE_SIZE / sizeof(u32), ICACHE_SLOTS = ICACHE_SIZE / sizeof(u32),
ICACHE_LINE_SIZE = 16, ICACHE_LINE_SIZE = 16,
ICACHE_LINES = ICACHE_SIZE / ICACHE_LINE_SIZE, ICACHE_LINES = ICACHE_SIZE / ICACHE_LINE_SIZE,
ICACHE_SLOTS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES, ICACHE_WORDS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES,
ICACHE_TAG_ADDRESS_MASK = 0xFFFFFFF0u, ICACHE_TAG_ADDRESS_MASK = 0xFFFFFFF0u,
ICACHE_INVALID_BITS = 0x0Fu, ICACHE_INVALID_BITS = 0x0Fu,
}; };
@ -117,7 +117,7 @@ struct State
PGXPValue pgxp_gte[64] = {}; PGXPValue pgxp_gte[64] = {};
std::array<u32, ICACHE_LINES> icache_tags = {}; std::array<u32, ICACHE_LINES> icache_tags = {};
std::array<u8, ICACHE_SIZE> icache_data = {}; std::array<u32, ICACHE_LINES * ICACHE_WORDS_PER_LINE> icache_data = {};
std::array<u8, SCRATCHPAD_SIZE> scratchpad = {}; std::array<u8, SCRATCHPAD_SIZE> scratchpad = {};

View File

@ -48,6 +48,10 @@ ALWAYS_INLINE static u32 GetICacheLineOffset(VirtualMemoryAddress address)
{ {
return (address & (ICACHE_LINE_SIZE - 1)); return (address & (ICACHE_LINE_SIZE - 1));
} }
ALWAYS_INLINE static u32 GetICacheLineWordOffset(VirtualMemoryAddress address)
{
return (address >> 2) & 0x03u;
}
ALWAYS_INLINE static u32 GetICacheTagForAddress(VirtualMemoryAddress address) ALWAYS_INLINE static u32 GetICacheTagForAddress(VirtualMemoryAddress address)
{ {
return (address & ICACHE_TAG_ADDRESS_MASK); return (address & ICACHE_TAG_ADDRESS_MASK);