From 4deecc7d65e61c13d214b46c105dcfb381aacc54 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Sun, 21 Jul 2019 13:36:48 +0200 Subject: [PATCH] jit: decrease blockcache AddrMapping size for ARM9 --- src/ARM.cpp | 8 ++-- src/ARMJIT.cpp | 18 +++++--- src/ARMJIT.h | 73 +++++++++++++++++++++--------- src/ARMJIT_x64/ARMJIT_Compiler.cpp | 4 +- src/NDS.cpp | 12 ++--- 5 files changed, 77 insertions(+), 38 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index bfe18902..dd0be6af 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -609,14 +609,14 @@ void ARMv5::ExecuteJIT() while (NDS::ARM9Timestamp < NDS::ARM9Target) { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - if (!ARMJIT::IsMapped(0, instrAddr)) + if (!ARMJIT::IsMapped<0>(instrAddr)) { NDS::ARM9Timestamp = NDS::ARM9Target; printf("ARMv5 PC in non executable region %08X\n", R[15]); return; } - ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, instrAddr); + ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock<0>(instrAddr); Cycles += (block ? block : ARMJIT::CompileBlock(this))(); if (Halted) @@ -740,13 +740,13 @@ void ARMv4::ExecuteJIT() while (NDS::ARM7Timestamp < NDS::ARM7Target) { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - if (!ARMJIT::IsMapped(1, instrAddr)) + if (!ARMJIT::IsMapped<1>(instrAddr)) { NDS::ARM7Timestamp = NDS::ARM7Target; printf("ARMv4 PC in non executable region %08X\n", R[15]); return; } - ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, instrAddr); + ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock<1>(instrAddr); Cycles += (block ? block : ARMJIT::CompileBlock(this))(); // TODO optimize this shit!!! diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index e8e6be0f..aad14c0d 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -109,11 +109,14 @@ void Init() { memset(&cache, 0, sizeof(BlockCache)); - for (int cpu = 0; cpu < 2; cpu++) - for (int i = 0; i < 0x4000; i++) - cache.AddrMapping[cpu][i] = JIT_MEM[cpu][i >> 9] == -1 ? NULL : - (CompiledBlock*)((u8*)&cache + JIT_MEM[cpu][i >> 9]) - + (((i << 14) & JIT_MASK[cpu][i >> 9]) >> 1); + for (int i = 0; i < 0x2000; i++) + cache.AddrMapping9[i] = JIT_MEM[0][i >> 8] == -1 ? NULL : + (CompiledBlock*)((u8*)&cache + JIT_MEM[0][i >> 8]) + + (((i << 15) & JIT_MASK[0][i >> 8]) >> 1); + for (int i = 0; i < 0x4000; i++) + cache.AddrMapping7[i] = JIT_MEM[1][i >> 9] == -1 ? NULL : + (CompiledBlock*)((u8*)&cache + JIT_MEM[1][i >> 9]) + + (((i << 14) & JIT_MASK[1][i >> 9]) >> 1); compiler = new Compiler(); } @@ -175,7 +178,10 @@ CompiledBlock CompileBlock(ARM* cpu) CompiledBlock block = compiler->CompileBlock(cpu, instrs, i); - InsertBlock(cpu->Num, blockAddr, block); + if (cpu->Num == 0) + InsertBlock<0>(blockAddr, block); + else + InsertBlock<1>(blockAddr, block); return block; } diff --git a/src/ARMJIT.h b/src/ARMJIT.h index 004256c2..0fc1c385 100644 --- a/src/ARMJIT.h +++ b/src/ARMJIT.h @@ -47,9 +47,11 @@ struct FetchedInstr a function which executes a block instructions starting from there. The most significant 4 bits of each address is ignored. This 28 bit space is - divided into 0x4000 16 KB blocks, each of which a pointer to the relevant - place inside the before mentioned arrays. Only half of the bytes need to be - addressed (ARM address are aligned to 4, Thumb addresses to a 2 byte boundary). + divided into 0x2000 32 KB for ARM9 and 0x4000 16 KB for ARM7, each of which + a pointer to the relevant place inside the afore mentioned arrays. 32 and 16 KB + are the sizes of the smallest contigous memory region mapped to the respective CPU. + Because ARM addresses are always aligned to 4 bytes and Thumb to a 2 byte boundary, + we only need every second half word to be adressable. In case a memory write hits mapped memory, the function block at this address is set to null, so it's recompiled the next time it's executed. @@ -61,7 +63,8 @@ struct FetchedInstr struct BlockCache { - CompiledBlock* AddrMapping[2][0x4000] = {0}; + CompiledBlock* AddrMapping9[0x2000] = {0}; + CompiledBlock* AddrMapping7[0x4000] = {0}; CompiledBlock MainRAM[4*1024*1024/2]; CompiledBlock SWRAM[0x8000/2]; // Shared working RAM @@ -75,35 +78,63 @@ struct BlockCache extern BlockCache cache; -inline bool IsMapped(u32 num, u32 addr) +template +inline bool IsMapped(u32 addr) { - return cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14]; + if (num == 0) + return cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15]; + else + return cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14]; } -inline CompiledBlock LookUpBlock(u32 num, u32 addr) +template +inline CompiledBlock LookUpBlock(u32 addr) { - return cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1]; + if (num == 0) + return cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15][(addr & 0x7FFF) >> 1]; + else + return cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1]; } -inline void Invalidate16(u32 num, u32 addr) +template +inline void Invalidate16(u32 addr) { - if (IsMapped(num, addr)) - cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = NULL; -} - -inline void Invalidate32(u32 num, u32 addr) -{ - if (IsMapped(num, addr)) + if (IsMapped(addr)) { - CompiledBlock* page = cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14]; - page[(addr & 0x3FFF) >> 1] = NULL; - page[((addr + 2) & 0x3FFF) >> 1] = NULL; + if (num == 0) + cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15][(addr & 0x7FFF) >> 1] = NULL; + else + cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = NULL; } } -inline void InsertBlock(u32 num, u32 addr, CompiledBlock func) +template +inline void Invalidate32(u32 addr) { - cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = func; + if (IsMapped(addr)) + { + if (num == 0) + { + CompiledBlock* page = cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15]; + page[(addr & 0x7FFF) >> 1] = NULL; + page[((addr + 2) & 0x7FFF) >> 1] = NULL; + } + else + { + CompiledBlock* page = cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14]; + page[(addr & 0x3FFF) >> 1] = NULL; + page[((addr + 2) & 0x3FFF) >> 1] = NULL; + } + } +} + +template +inline void InsertBlock(u32 addr, CompiledBlock func) +{ + if (num == 0) + cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15][(addr & 0x7FFF) >> 1] = func; + else + cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = func; } void Init(); diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 1e871fdd..cb11f732 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -363,7 +363,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs CompiledBlock res = (CompiledBlock)GetWritableCodePtr(); - if (!IsMapped(Num, R15 - Thumb ? 2 : 4)) + if (!(Num == 0 + ? IsMapped<0>(R15 - (Thumb ? 2 : 4)) + : IsMapped<1>(R15 - (Thumb ? 2 : 4)))) { printf("Trying to compile a block in unmapped memory\n"); } diff --git a/src/NDS.cpp b/src/NDS.cpp index 7636a07b..3de9c1fa 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -2013,7 +2013,7 @@ u32 ARM9Read32(u32 addr) void ARM9Write8(u32 addr, u8 val) { #ifdef JIT_ENABLED - ARMJIT::Invalidate16(0, addr); + ARMJIT::Invalidate16<0>(addr); #endif switch (addr & 0xFF000000) @@ -2067,7 +2067,7 @@ void ARM9Write8(u32 addr, u8 val) void ARM9Write16(u32 addr, u16 val) { #ifdef JIT_ENABLED - ARMJIT::Invalidate16(0, addr); + ARMJIT::Invalidate16<0>(addr); #endif switch (addr & 0xFF000000) @@ -2137,7 +2137,7 @@ void ARM9Write16(u32 addr, u16 val) void ARM9Write32(u32 addr, u32 val) { #ifdef JIT_ENABLED - ARMJIT::Invalidate32(0, addr); + ARMJIT::Invalidate32<0>(addr); #endif switch (addr & 0xFF000000) @@ -2435,7 +2435,7 @@ u32 ARM7Read32(u32 addr) void ARM7Write8(u32 addr, u8 val) { #ifdef JIT_ENABLED - ARMJIT::Invalidate16(1, addr); + ARMJIT::Invalidate16<1>(addr); #endif switch (addr & 0xFF800000) @@ -2498,7 +2498,7 @@ void ARM7Write8(u32 addr, u8 val) void ARM7Write16(u32 addr, u16 val) { #ifdef JIT_ENABLED - ARMJIT::Invalidate16(1, addr); + ARMJIT::Invalidate16<1>(addr); #endif switch (addr & 0xFF800000) @@ -2571,7 +2571,7 @@ void ARM7Write16(u32 addr, u16 val) void ARM7Write32(u32 addr, u32 val) { #ifdef JIT_ENABLED - ARMJIT::Invalidate32(1, addr); + ARMJIT::Invalidate32<1>(addr); #endif switch (addr & 0xFF800000)