CPU/Interpreter: Address ICache lines in words

Might help on ARM32, because no unaligned access.

Otherwise, ~23% perf boost in debug builds. But this is pretty
meaningless.
This commit is contained in:
Stenzek 2024-12-13 20:40:16 +10:00
parent 0dbab167a8
commit 2f6eaa1d43
No known key found for this signature in database
4 changed files with 27 additions and 25 deletions

View File

@ -1476,10 +1476,10 @@ template<MemoryAccessSize size>
u32 Bus::ICacheReadHandler(VirtualMemoryAddress address)
{
const u32 line = CPU::GetICacheLine(address);
const u8* line_data = &CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE];
const u32* line_data = &CPU::g_state.icache_data[line * CPU::ICACHE_WORDS_PER_LINE];
const u32 offset = CPU::GetICacheLineOffset(address);
u32 result;
std::memcpy(&result, &line_data[offset], sizeof(result));
std::memcpy(&result, reinterpret_cast<const u8*>(line_data) + offset, sizeof(result));
return result;
}
@ -1487,14 +1487,15 @@ template<MemoryAccessSize size>
void Bus::ICacheWriteHandler(VirtualMemoryAddress address, u32 value)
{
const u32 line = CPU::GetICacheLine(address);
u32* line_data = &CPU::g_state.icache_data[line * CPU::ICACHE_WORDS_PER_LINE];
const u32 offset = CPU::GetICacheLineOffset(address);
CPU::g_state.icache_tags[line] = CPU::GetICacheTagForAddress(address) | CPU::ICACHE_INVALID_BITS;
if constexpr (size == MemoryAccessSize::Byte)
std::memcpy(&CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE + offset], &value, sizeof(u8));
std::memcpy(reinterpret_cast<u8*>(line_data) + offset, &value, sizeof(u8));
else if constexpr (size == MemoryAccessSize::HalfWord)
std::memcpy(&CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE + offset], &value, sizeof(u16));
std::memcpy(reinterpret_cast<u8*>(line_data) + offset, &value, sizeof(u16));
else
std::memcpy(&CPU::g_state.icache_data[line * CPU::ICACHE_LINE_SIZE + offset], &value, sizeof(u32));
std::memcpy(reinterpret_cast<u8*>(line_data) + offset, &value, sizeof(u32));
}
template<MemoryAccessSize size>

View File

@ -85,7 +85,7 @@ template<PGXPMode pgxp_mode, bool debug>
static bool FetchInstruction();
static bool FetchInstructionForInterpreterFallback();
template<bool add_ticks, bool icache_read = false, u32 word_count = 1, bool raise_exceptions>
static bool DoInstructionRead(PhysicalMemoryAddress address, void* data);
static bool DoInstructionRead(PhysicalMemoryAddress address, u32* data);
template<MemoryAccessType type, MemoryAccessSize size>
static bool DoSafeMemoryAccess(VirtualMemoryAddress address, u32& value);
template<MemoryAccessType type, MemoryAccessSize size>
@ -2671,7 +2671,7 @@ void CPU::UpdateMemoryPointers()
}
template<bool add_ticks, bool icache_read, u32 word_count, bool raise_exceptions>
ALWAYS_INLINE_RELEASE bool CPU::DoInstructionRead(PhysicalMemoryAddress address, void* data)
ALWAYS_INLINE_RELEASE bool CPU::DoInstructionRead(PhysicalMemoryAddress address, u32* data)
{
using namespace Bus;
@ -2766,34 +2766,33 @@ void CPU::CheckAndUpdateICacheTags(u32 line_count)
u32 CPU::FillICache(VirtualMemoryAddress address)
{
const u32 line = GetICacheLine(address);
u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE];
const u32 line_word_offset = GetICacheLineWordOffset(address);
u32* const line_data = g_state.icache_data.data() + (line * ICACHE_WORDS_PER_LINE);
u32* const offset_line_data = line_data + line_word_offset;
u32 line_tag;
switch ((address >> 2) & 0x03u)
switch (line_word_offset)
{
case 0:
DoInstructionRead<true, true, 4, false>(address & ~(ICACHE_LINE_SIZE - 1u), line_data);
DoInstructionRead<true, true, 4, false>(address & ~(ICACHE_LINE_SIZE - 1u), offset_line_data);
line_tag = GetICacheTagForAddress(address);
break;
case 1:
DoInstructionRead<true, true, 3, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x4), line_data + 0x4);
DoInstructionRead<true, true, 3, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x4), offset_line_data);
line_tag = GetICacheTagForAddress(address) | 0x1;
break;
case 2:
DoInstructionRead<true, true, 2, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x8), line_data + 0x8);
DoInstructionRead<true, true, 2, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0x8), offset_line_data);
line_tag = GetICacheTagForAddress(address) | 0x3;
break;
case 3:
default:
DoInstructionRead<true, true, 1, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0xC), line_data + 0xC);
DoInstructionRead<true, true, 1, false>(address & (~(ICACHE_LINE_SIZE - 1u) | 0xC), offset_line_data);
line_tag = GetICacheTagForAddress(address) | 0x7;
break;
}
g_state.icache_tags[line] = line_tag;
const u32 offset = GetICacheLineOffset(address);
u32 result;
std::memcpy(&result, &line_data[offset], sizeof(result));
return result;
g_state.icache_tags[line] = line_tag;
return offset_line_data[0];
}
void CPU::ClearICache()
@ -2806,11 +2805,9 @@ namespace CPU {
ALWAYS_INLINE_RELEASE static u32 ReadICache(VirtualMemoryAddress address)
{
const u32 line = GetICacheLine(address);
const u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE];
const u32 offset = GetICacheLineOffset(address);
u32 result;
std::memcpy(&result, &line_data[offset], sizeof(result));
return result;
const u32 line_word_offset = GetICacheLineWordOffset(address);
const u32* const line_data = g_state.icache_data.data() + (line * ICACHE_WORDS_PER_LINE);
return line_data[line_word_offset];
}
} // namespace CPU

View File

@ -32,7 +32,7 @@ enum : PhysicalMemoryAddress
ICACHE_SLOTS = ICACHE_SIZE / sizeof(u32),
ICACHE_LINE_SIZE = 16,
ICACHE_LINES = ICACHE_SIZE / ICACHE_LINE_SIZE,
ICACHE_SLOTS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES,
ICACHE_WORDS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES,
ICACHE_TAG_ADDRESS_MASK = 0xFFFFFFF0u,
ICACHE_INVALID_BITS = 0x0Fu,
};
@ -117,7 +117,7 @@ struct State
PGXPValue pgxp_gte[64] = {};
std::array<u32, ICACHE_LINES> icache_tags = {};
std::array<u8, ICACHE_SIZE> icache_data = {};
std::array<u32, ICACHE_LINES * ICACHE_WORDS_PER_LINE> icache_data = {};
std::array<u8, SCRATCHPAD_SIZE> scratchpad = {};

View File

@ -48,6 +48,10 @@ ALWAYS_INLINE static u32 GetICacheLineOffset(VirtualMemoryAddress address)
{
return (address & (ICACHE_LINE_SIZE - 1));
}
ALWAYS_INLINE static u32 GetICacheLineWordOffset(VirtualMemoryAddress address)
{
return (address >> 2) & 0x03u;
}
ALWAYS_INLINE static u32 GetICacheTagForAddress(VirtualMemoryAddress address)
{
return (address & ICACHE_TAG_ADDRESS_MASK);