better caching?

This commit is contained in:
Joseph Mattiello 2025-07-06 16:49:22 -04:00
parent 2d7503153d
commit 381bccc292
3 changed files with 233 additions and 30 deletions

View File

@ -3,6 +3,11 @@
#include "hw/sh4/sh4_interrupts.h"
#include "hw/sh4/sh4_mem.h"
#include "oslib/oslib.h"
#include "shil_interpreter.h"
// Forward declarations for cache-friendly functions
extern "C" void CacheFriendlyShil_on_block_compiled();
extern "C" void shil_print_block_check_stats_wrapper();
// Simple stub function that will be used as the "compiled" code pointer
static void shil_interpreter_stub() {
@ -40,59 +45,96 @@ void ShilDynarec::mainloop(void* cntx) {
continue;
}
// Get the next block to execute
DynarecCodeEntryPtr code_ptr = rdv_FindOrCompile();
// **CACHE-FRIENDLY BLOCK LOOKUP**: Avoid using rdv_FindOrCompile which can trigger cache clears
RuntimeBlockInfoPtr block = bm_GetBlock(next_pc);
if (code_ptr == ngen_FailedToFindBlock) {
code_ptr = rdv_FailedToFindBlock(next_pc);
}
// Check if this is a SHIL interpreter block
if (code_ptr == (DynarecCodeEntryPtr)shil_interpreter_stub) {
// This is a SHIL block - execute via interpreter
RuntimeBlockInfoPtr block = bm_GetBlock(next_pc);
if (block) {
executeShilBlock(block.get());
if (block) {
// Block exists - execute it via SHIL interpreter
executeShilBlock(block.get());
// After executing the block, determine next PC based on block type
switch (BET_GET_CLS(block->BlockType)) {
case BET_CLS_Static:
if (block->BlockType == BET_StaticIntr) {
next_pc = block->NextBlock;
} else {
next_pc = block->BranchBlock;
}
break;
case BET_CLS_Dynamic:
// PC should have been set by the block execution
next_pc = Sh4cntx.pc;
break;
case BET_CLS_COND:
// Conditional branch - check the condition
if (sr.T) {
next_pc = block->BranchBlock;
} else {
next_pc = block->NextBlock;
}
break;
}
} else {
// **CACHE-FRIENDLY BLOCK COMPILATION**: Don't use rdv_FindOrCompile
// Compile new block using cache-friendly approach
RuntimeBlockInfo* new_block = allocateBlock();
if (new_block->Setup(next_pc, fpscr)) {
// Compile the block for SHIL interpretation
compile(new_block, !new_block->read_only, true);
// After executing the block, determine next PC based on block type
switch (BET_GET_CLS(block->BlockType)) {
// Add to block manager
bm_AddBlock(new_block);
// Track compilation for cache management
CacheFriendlyShil_on_block_compiled();
// Execute the newly compiled block
executeShilBlock(new_block);
// Update PC based on block type
switch (BET_GET_CLS(new_block->BlockType)) {
case BET_CLS_Static:
if (block->BlockType == BET_StaticIntr) {
next_pc = block->NextBlock;
if (new_block->BlockType == BET_StaticIntr) {
next_pc = new_block->NextBlock;
} else {
next_pc = block->BranchBlock;
next_pc = new_block->BranchBlock;
}
break;
case BET_CLS_Dynamic:
// PC should have been set by the block execution
next_pc = Sh4cntx.pc;
break;
case BET_CLS_COND:
// Conditional branch - check the condition
if (sr.T) {
next_pc = block->BranchBlock;
next_pc = new_block->BranchBlock;
} else {
next_pc = block->NextBlock;
next_pc = new_block->NextBlock;
}
break;
}
} else {
// Block not found - this shouldn't happen
ERROR_LOG(DYNAREC, "SHIL block not found for PC %08X", next_pc);
// Block setup failed - this shouldn't happen often
ERROR_LOG(DYNAREC, "SHIL: Block setup failed for PC %08X", next_pc);
delete new_block;
break;
}
} else {
// This is a regular JIT block - this shouldn't happen in SHIL mode
ERROR_LOG(DYNAREC, "Unexpected JIT block in SHIL mode at PC %08X", next_pc);
break;
}
// Print statistics periodically
static u32 stats_counter = 0;
if (++stats_counter % 50000 == 0) {
shil_interpreter_print_stats();
shil_print_block_check_stats_wrapper();
}
}
} catch (const SH4ThrownException& ex) {
// Handle SH4 exceptions
Do_Exception(next_pc, ex.expEvn);
} catch (const FlycastException& ex) {
} catch (const std::exception& ex) {
// Handle other exceptions
ERROR_LOG(DYNAREC, "Exception in SHIL mainloop: %s", ex.what());
sh4_int_bCpuRun = false;

View File

@ -932,6 +932,63 @@ void HybridRegisterCache::asm_mega_store() {
}
#endif
// === CACHE-FRIENDLY SHIL SYSTEM ===
// This prevents excessive cache clearing that destroys performance
struct CacheFriendlyShil {
// Track cache clears to prevent excessive clearing
static u32 cache_clear_count;
static u32 last_clear_time;
static u32 blocks_compiled_since_clear;
// Cache clear prevention thresholds
static constexpr u32 MIN_CLEAR_INTERVAL_MS = 5000; // Don't clear more than once per 5 seconds
static constexpr u32 MIN_BLOCKS_BEFORE_CLEAR = 100; // Need at least 100 blocks before clearing
// Override the aggressive cache clearing behavior
static bool should_prevent_cache_clear(u32 pc) {
u32 current_time = sh4_sched_now64() / (SH4_MAIN_CLOCK / 1000); // Convert to milliseconds
// Check if we're clearing too frequently
if (current_time - last_clear_time < MIN_CLEAR_INTERVAL_MS) {
INFO_LOG(DYNAREC, "SHIL: Preventing cache clear - too frequent (last clear %u ms ago)",
current_time - last_clear_time);
return true;
}
// Check if we have enough blocks to justify clearing
if (blocks_compiled_since_clear < MIN_BLOCKS_BEFORE_CLEAR) {
INFO_LOG(DYNAREC, "SHIL: Preventing cache clear - not enough blocks (%u < %u)",
blocks_compiled_since_clear, MIN_BLOCKS_BEFORE_CLEAR);
return true;
}
// Allow the clear but update tracking
cache_clear_count++;
last_clear_time = current_time;
blocks_compiled_since_clear = 0;
INFO_LOG(DYNAREC, "SHIL: Allowing cache clear #%u at PC=0x%08X", cache_clear_count, pc);
return false;
}
// Called when a new block is compiled
static void on_block_compiled() {
blocks_compiled_since_clear++;
}
// Statistics
static void print_cache_stats() {
INFO_LOG(DYNAREC, "SHIL Cache Stats: %u total clears, %u blocks since last clear",
cache_clear_count, blocks_compiled_since_clear);
}
};
// Static member definitions
u32 CacheFriendlyShil::cache_clear_count = 0;
u32 CacheFriendlyShil::last_clear_time = 0;
u32 CacheFriendlyShil::blocks_compiled_since_clear = 0;
// === PERSISTENT SHIL CACHE WITH ZERO RE-TRANSLATION ===
// This is the key to beating legacy interpreter performance!
@ -1017,6 +1074,9 @@ u32 calculate_sh4_hash(RuntimeBlockInfo* block) {
void ShilInterpreter::executeBlock(RuntimeBlockInfo* block) {
const u32 pc = sh4rcb.cntx.pc;
// Track block compilation for cache management
CacheFriendlyShil::on_block_compiled();
// **CRITICAL PATH**: Try persistent cache first - should be 90%+ hit rate
PrecompiledShilBlock* cached_block = PersistentShilCache::ultra_fast_lookup(pc);
if (__builtin_expect(cached_block != nullptr, 1)) {
@ -1128,6 +1188,89 @@ void shil_interpreter_clear_cache() {
// This function should be called periodically to print cache statistics
void shil_interpreter_print_stats() {
PersistentShilCache::print_performance_stats();
CacheFriendlyShil::print_cache_stats();
}
// === CACHE-FRIENDLY WRAPPER FUNCTIONS ===
// These functions can be called instead of direct cache clearing
// Wrapper for rdv_CompilePC cache clearing
bool shil_should_clear_cache_on_compile(u32 pc, u32 free_space) {
// In jitless mode, we don't need much code buffer space
// Only clear if we're really running out of space
if (free_space < 4_MB) { // Much more conservative than 32MB
return !CacheFriendlyShil::should_prevent_cache_clear(pc);
}
// Don't clear for hardcoded PC addresses unless really necessary
if (pc == 0x8c0000e0 || pc == 0xac010000 || pc == 0xac008300) {
// These are boot/BIOS addresses - be very conservative
return free_space < 1_MB && !CacheFriendlyShil::should_prevent_cache_clear(pc);
}
return false; // Don't clear
}
// === CACHE-FRIENDLY BLOCK CHECK FAILURE HANDLING ===
// This prevents the devastating cache clears that happen every few seconds
// Track block check failures per address
static std::unordered_map<u32, u32> block_check_failure_counts;
static u32 total_block_check_failures = 0;
// Handle block check failure without nuking the entire cache
DynarecCodeEntryPtr shil_handle_block_check_fail(u32 addr) {
total_block_check_failures++;
// Track failures for this specific address
u32& failure_count = block_check_failure_counts[addr];
failure_count++;
INFO_LOG(DYNAREC, "SHIL: Block check fail @ 0x%08X (failure #%u for this addr, #%u total)",
addr, failure_count, total_block_check_failures);
// Only clear cache if this address has failed many times
if (failure_count > 20) { // Much more conservative than clearing every time
// Reset failure count for this address
failure_count = 0;
// Only clear if cache-friendly logic allows it
if (!CacheFriendlyShil::should_prevent_cache_clear(addr)) {
INFO_LOG(DYNAREC, "SHIL: Clearing cache due to persistent failures at 0x%08X", addr);
PersistentShilCache::clear_temporary_cache_only();
} else {
INFO_LOG(DYNAREC, "SHIL: Prevented cache clear despite persistent failures at 0x%08X", addr);
}
}
// Just discard the problematic block, don't clear everything
RuntimeBlockInfoPtr block = bm_GetBlock(addr);
if (block) {
bm_DiscardBlock(block.get());
INFO_LOG(DYNAREC, "SHIL: Discarded problematic block at 0x%08X", addr);
}
// Recompile the block
next_pc = addr;
return (DynarecCodeEntryPtr)CC_RW2RX(rdv_CompilePC(failure_count));
}
// Statistics function
void shil_print_block_check_stats() {
INFO_LOG(DYNAREC, "SHIL Block Check Stats: %u total failures, %zu unique addresses",
total_block_check_failures, block_check_failure_counts.size());
// Print top 5 problematic addresses
std::vector<std::pair<u32, u32>> sorted_failures;
for (const auto& pair : block_check_failure_counts) {
sorted_failures.push_back({pair.second, pair.first});
}
std::sort(sorted_failures.rbegin(), sorted_failures.rend());
INFO_LOG(DYNAREC, "Top problematic addresses:");
for (size_t i = 0; i < std::min(size_t(5), sorted_failures.size()); i++) {
INFO_LOG(DYNAREC, " 0x%08X: %u failures", sorted_failures[i].second, sorted_failures[i].first);
}
}
// Redefine macros after our code
@ -1139,4 +1282,17 @@ void shil_interpreter_print_stats() {
#define pc Sh4cntx.pc
#define mac Sh4cntx.mac
#define macl Sh4cntx.macl
#define mach Sh4cntx.mach
#define mach Sh4cntx.mach
// === WRAPPER FUNCTIONS FOR EXTERNAL ACCESS ===
// These allow other modules to access the cache-friendly functionality
// C-style wrapper for CacheFriendlyShil::on_block_compiled()
extern "C" void CacheFriendlyShil_on_block_compiled() {
CacheFriendlyShil::on_block_compiled();
}
// C-style wrapper for shil_print_block_check_stats()
extern "C" void shil_print_block_check_stats_wrapper() {
shil_print_block_check_stats();
}

View File

@ -71,4 +71,9 @@ void shil_interpreter_mainloop(void* v_cntx);
/// SHIL cache management functions
void shil_interpreter_clear_cache();
void shil_interpreter_print_stats();
void shil_interpreter_print_stats();
/// SHIL cache-friendly wrapper functions
bool shil_should_clear_cache_on_compile(u32 pc, u32 free_space);
DynarecCodeEntryPtr shil_handle_block_check_fail(u32 addr);
void shil_print_block_check_stats();