Optimize JitCache::InvalidateICache by maintaining a "valid blocks" bitset
Most of the InvalidateICache calls are for a 32 bytes block: this is the number of bytes invalidated by PowerPC dcb*/icb* instructions. Profiling shows that a lot of CPU time is spent checking if there are any JIT blocks covered by these 32 bytes (using std::map::lower_bound). This patch adds a bitset containing the state of every 32 bytes block in RAM (JIT cached/not JIT cached). Using that, a 32 bytes InvalidateICache can check in the bitset if any JIT block might be invalidated. A bitset check is a lot faster than an std::map::lower_bound operation, improving performance of JitCache::InvalidateICache by more than 100%. Some practical numbers: * Xenoblade Chronicles (PAL) 56.04FPS -> 59.28FPS (+5.78%) * The Last Story (PAL) 30.9FPS -> 32.83FPS (+6.25%) * Super Mario Galaxy (PAL) 59.76FPS -> 62.46FPS (+4.52%) This function still takes more time than it should - more optimization in this area might be possible (specializing for 32 bytes blocks to avoid useless memcpy, for example).
This commit is contained in:
parent
8cefcaa94c
commit
3990002250
|
@ -142,6 +142,7 @@ bool JitBlock::ContainsAddress(u32 em_address)
|
|||
}
|
||||
links_to.clear();
|
||||
block_map.clear();
|
||||
valid_block.reset();
|
||||
num_blocks = 0;
|
||||
memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS);
|
||||
}
|
||||
|
@ -220,6 +221,9 @@ bool JitBlock::ContainsAddress(u32 em_address)
|
|||
// Convert the logical address to a physical address for the block map
|
||||
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
|
||||
|
||||
for (u32 i = 0; i < (b.originalSize + 7) / 8; ++i)
|
||||
valid_block[pAddr / 32 + i] = true;
|
||||
|
||||
block_map[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num;
|
||||
if (block_link)
|
||||
{
|
||||
|
@ -440,35 +444,48 @@ bool JitBlock::ContainsAddress(u32 em_address)
|
|||
// Convert the logical address to a physical address for the block map
|
||||
u32 pAddr = address & 0x1FFFFFFF;
|
||||
|
||||
// Optimize the common case of length == 32 which is used by Interpreter::dcb*
|
||||
bool destroy_block = true;
|
||||
if (length == 32)
|
||||
{
|
||||
if (!valid_block[pAddr / 32])
|
||||
destroy_block = false;
|
||||
else
|
||||
valid_block[pAddr / 32] = false;
|
||||
}
|
||||
|
||||
// destroy JIT blocks
|
||||
// !! this works correctly under assumption that any two overlapping blocks end at the same address
|
||||
std::map<pair<u32,u32>, u32>::iterator it1 = block_map.lower_bound(std::make_pair(pAddr, 0)), it2 = it1, it;
|
||||
while (it2 != block_map.end() && it2->first.second < pAddr + length)
|
||||
if (destroy_block)
|
||||
{
|
||||
std::map<pair<u32,u32>, u32>::iterator it1 = block_map.lower_bound(std::make_pair(pAddr, 0)), it2 = it1, it;
|
||||
while (it2 != block_map.end() && it2->first.second < pAddr + length)
|
||||
{
|
||||
#ifdef JIT_UNLIMITED_ICACHE
|
||||
JitBlock &b = blocks[it2->second];
|
||||
if (b.originalAddress & JIT_ICACHE_VMEM_BIT)
|
||||
{
|
||||
u32 cacheaddr = b.originalAddress & JIT_ICACHE_MASK;
|
||||
memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, 4);
|
||||
}
|
||||
else if (b.originalAddress & JIT_ICACHE_EXRAM_BIT)
|
||||
{
|
||||
u32 cacheaddr = b.originalAddress & JIT_ICACHEEX_MASK;
|
||||
memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cacheaddr = b.originalAddress & JIT_ICACHE_MASK;
|
||||
memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, 4);
|
||||
}
|
||||
JitBlock &b = blocks[it2->second];
|
||||
if (b.originalAddress & JIT_ICACHE_VMEM_BIT)
|
||||
{
|
||||
u32 cacheaddr = b.originalAddress & JIT_ICACHE_MASK;
|
||||
memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, 4);
|
||||
}
|
||||
else if (b.originalAddress & JIT_ICACHE_EXRAM_BIT)
|
||||
{
|
||||
u32 cacheaddr = b.originalAddress & JIT_ICACHEEX_MASK;
|
||||
memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cacheaddr = b.originalAddress & JIT_ICACHE_MASK;
|
||||
memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, 4);
|
||||
}
|
||||
#endif
|
||||
DestroyBlock(it2->second, true);
|
||||
it2++;
|
||||
}
|
||||
if (it1 != it2)
|
||||
{
|
||||
block_map.erase(it1, it2);
|
||||
DestroyBlock(it2->second, true);
|
||||
it2++;
|
||||
}
|
||||
if (it1 != it2)
|
||||
{
|
||||
block_map.erase(it1, it2);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef JIT_UNLIMITED_ICACHE
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#ifndef _JITCACHE_H
|
||||
#define _JITCACHE_H
|
||||
|
||||
#include <bitset>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
|
@ -84,6 +85,7 @@ class JitBlockCache
|
|||
int num_blocks;
|
||||
std::multimap<u32, int> links_to;
|
||||
std::map<std::pair<u32,u32>, u32> block_map; // (end_addr, start_addr) -> number
|
||||
std::bitset<0x20000000 / 32> valid_block;
|
||||
#ifdef JIT_UNLIMITED_ICACHE
|
||||
u8 *iCache;
|
||||
u8 *iCacheEx;
|
||||
|
|
Loading…
Reference in New Issue