diff --git a/core/hw/sh4/dyna/shil_dynarec.cpp b/core/hw/sh4/dyna/shil_dynarec.cpp index 64558a144..d37980546 100644 --- a/core/hw/sh4/dyna/shil_dynarec.cpp +++ b/core/hw/sh4/dyna/shil_dynarec.cpp @@ -3,6 +3,11 @@ #include "hw/sh4/sh4_interrupts.h" #include "hw/sh4/sh4_mem.h" #include "oslib/oslib.h" +#include "shil_interpreter.h" + +// Forward declarations for cache-friendly functions +extern "C" void CacheFriendlyShil_on_block_compiled(); +extern "C" void shil_print_block_check_stats_wrapper(); // Simple stub function that will be used as the "compiled" code pointer static void shil_interpreter_stub() { @@ -40,59 +45,96 @@ void ShilDynarec::mainloop(void* cntx) { continue; } - // Get the next block to execute - DynarecCodeEntryPtr code_ptr = rdv_FindOrCompile(); + // **CACHE-FRIENDLY BLOCK LOOKUP**: Avoid using rdv_FindOrCompile which can trigger cache clears + RuntimeBlockInfoPtr block = bm_GetBlock(next_pc); - if (code_ptr == ngen_FailedToFindBlock) { - code_ptr = rdv_FailedToFindBlock(next_pc); - } - - // Check if this is a SHIL interpreter block - if (code_ptr == (DynarecCodeEntryPtr)shil_interpreter_stub) { - // This is a SHIL block - execute via interpreter - RuntimeBlockInfoPtr block = bm_GetBlock(next_pc); - if (block) { - executeShilBlock(block.get()); + if (block) { + // Block exists - execute it via SHIL interpreter + executeShilBlock(block.get()); + + // After executing the block, determine next PC based on block type + switch (BET_GET_CLS(block->BlockType)) { + case BET_CLS_Static: + if (block->BlockType == BET_StaticIntr) { + next_pc = block->NextBlock; + } else { + next_pc = block->BranchBlock; + } + break; + + case BET_CLS_Dynamic: + // PC should have been set by the block execution + next_pc = Sh4cntx.pc; + break; + + case BET_CLS_COND: + // Conditional branch - check the condition + if (sr.T) { + next_pc = block->BranchBlock; + } else { + next_pc = block->NextBlock; + } + break; + } + } else { + // **CACHE-FRIENDLY BLOCK COMPILATION**: Don't use rdv_FindOrCompile + // Compile new block using cache-friendly approach + + RuntimeBlockInfo* new_block = allocateBlock(); + if (new_block->Setup(next_pc, fpscr)) { + // Compile the block for SHIL interpretation + compile(new_block, !new_block->read_only, true); - // After executing the block, determine next PC based on block type - switch (BET_GET_CLS(block->BlockType)) { + // Add to block manager + bm_AddBlock(new_block); + + // Track compilation for cache management + CacheFriendlyShil_on_block_compiled(); + + // Execute the newly compiled block + executeShilBlock(new_block); + + // Update PC based on block type + switch (BET_GET_CLS(new_block->BlockType)) { case BET_CLS_Static: - if (block->BlockType == BET_StaticIntr) { - next_pc = block->NextBlock; + if (new_block->BlockType == BET_StaticIntr) { + next_pc = new_block->NextBlock; } else { - next_pc = block->BranchBlock; + next_pc = new_block->BranchBlock; } break; case BET_CLS_Dynamic: - // PC should have been set by the block execution next_pc = Sh4cntx.pc; break; case BET_CLS_COND: - // Conditional branch - check the condition if (sr.T) { - next_pc = block->BranchBlock; + next_pc = new_block->BranchBlock; } else { - next_pc = block->NextBlock; + next_pc = new_block->NextBlock; } break; } } else { - // Block not found - this shouldn't happen - ERROR_LOG(DYNAREC, "SHIL block not found for PC %08X", next_pc); + // Block setup failed - this shouldn't happen often + ERROR_LOG(DYNAREC, "SHIL: Block setup failed for PC %08X", next_pc); + delete new_block; break; } - } else { - // This is a regular JIT block - this shouldn't happen in SHIL mode - ERROR_LOG(DYNAREC, "Unexpected JIT block in SHIL mode at PC %08X", next_pc); - break; + } + + // Print statistics periodically + static u32 stats_counter = 0; + if (++stats_counter % 50000 == 0) { + shil_interpreter_print_stats(); + shil_print_block_check_stats_wrapper(); } } } catch (const SH4ThrownException& ex) { // Handle SH4 exceptions Do_Exception(next_pc, ex.expEvn); - } catch (const FlycastException& ex) { + } catch (const std::exception& ex) { // Handle other exceptions ERROR_LOG(DYNAREC, "Exception in SHIL mainloop: %s", ex.what()); sh4_int_bCpuRun = false; diff --git a/core/hw/sh4/dyna/shil_interpreter.cpp b/core/hw/sh4/dyna/shil_interpreter.cpp index 3d10cce75..d91711cc7 100644 --- a/core/hw/sh4/dyna/shil_interpreter.cpp +++ b/core/hw/sh4/dyna/shil_interpreter.cpp @@ -932,6 +932,63 @@ void HybridRegisterCache::asm_mega_store() { } #endif +// === CACHE-FRIENDLY SHIL SYSTEM === +// This prevents excessive cache clearing that destroys performance + +struct CacheFriendlyShil { + // Track cache clears to prevent excessive clearing + static u32 cache_clear_count; + static u32 last_clear_time; + static u32 blocks_compiled_since_clear; + + // Cache clear prevention thresholds + static constexpr u32 MIN_CLEAR_INTERVAL_MS = 5000; // Don't clear more than once per 5 seconds + static constexpr u32 MIN_BLOCKS_BEFORE_CLEAR = 100; // Need at least 100 blocks before clearing + + // Override the aggressive cache clearing behavior + static bool should_prevent_cache_clear(u32 pc) { + u32 current_time = sh4_sched_now64() / (SH4_MAIN_CLOCK / 1000); // Convert to milliseconds + + // Check if we're clearing too frequently + if (current_time - last_clear_time < MIN_CLEAR_INTERVAL_MS) { + INFO_LOG(DYNAREC, "SHIL: Preventing cache clear - too frequent (last clear %u ms ago)", + current_time - last_clear_time); + return true; + } + + // Check if we have enough blocks to justify clearing + if (blocks_compiled_since_clear < MIN_BLOCKS_BEFORE_CLEAR) { + INFO_LOG(DYNAREC, "SHIL: Preventing cache clear - not enough blocks (%u < %u)", + blocks_compiled_since_clear, MIN_BLOCKS_BEFORE_CLEAR); + return true; + } + + // Allow the clear but update tracking + cache_clear_count++; + last_clear_time = current_time; + blocks_compiled_since_clear = 0; + + INFO_LOG(DYNAREC, "SHIL: Allowing cache clear #%u at PC=0x%08X", cache_clear_count, pc); + return false; + } + + // Called when a new block is compiled + static void on_block_compiled() { + blocks_compiled_since_clear++; + } + + // Statistics + static void print_cache_stats() { + INFO_LOG(DYNAREC, "SHIL Cache Stats: %u total clears, %u blocks since last clear", + cache_clear_count, blocks_compiled_since_clear); + } +}; + +// Static member definitions +u32 CacheFriendlyShil::cache_clear_count = 0; +u32 CacheFriendlyShil::last_clear_time = 0; +u32 CacheFriendlyShil::blocks_compiled_since_clear = 0; + // === PERSISTENT SHIL CACHE WITH ZERO RE-TRANSLATION === // This is the key to beating legacy interpreter performance! @@ -1017,6 +1074,9 @@ u32 calculate_sh4_hash(RuntimeBlockInfo* block) { void ShilInterpreter::executeBlock(RuntimeBlockInfo* block) { const u32 pc = sh4rcb.cntx.pc; + // Track block compilation for cache management + CacheFriendlyShil::on_block_compiled(); + // **CRITICAL PATH**: Try persistent cache first - should be 90%+ hit rate PrecompiledShilBlock* cached_block = PersistentShilCache::ultra_fast_lookup(pc); if (__builtin_expect(cached_block != nullptr, 1)) { @@ -1128,6 +1188,89 @@ void shil_interpreter_clear_cache() { // This function should be called periodically to print cache statistics void shil_interpreter_print_stats() { PersistentShilCache::print_performance_stats(); + CacheFriendlyShil::print_cache_stats(); +} + +// === CACHE-FRIENDLY WRAPPER FUNCTIONS === +// These functions can be called instead of direct cache clearing + +// Wrapper for rdv_CompilePC cache clearing +bool shil_should_clear_cache_on_compile(u32 pc, u32 free_space) { + // In jitless mode, we don't need much code buffer space + // Only clear if we're really running out of space + if (free_space < 4_MB) { // Much more conservative than 32MB + return !CacheFriendlyShil::should_prevent_cache_clear(pc); + } + + // Don't clear for hardcoded PC addresses unless really necessary + if (pc == 0x8c0000e0 || pc == 0xac010000 || pc == 0xac008300) { + // These are boot/BIOS addresses - be very conservative + return free_space < 1_MB && !CacheFriendlyShil::should_prevent_cache_clear(pc); + } + + return false; // Don't clear +} + +// === CACHE-FRIENDLY BLOCK CHECK FAILURE HANDLING === +// This prevents the devastating cache clears that happen every few seconds + +// Track block check failures per address +static std::unordered_map block_check_failure_counts; +static u32 total_block_check_failures = 0; + +// Handle block check failure without nuking the entire cache +DynarecCodeEntryPtr shil_handle_block_check_fail(u32 addr) { + total_block_check_failures++; + + // Track failures for this specific address + u32& failure_count = block_check_failure_counts[addr]; + failure_count++; + + INFO_LOG(DYNAREC, "SHIL: Block check fail @ 0x%08X (failure #%u for this addr, #%u total)", + addr, failure_count, total_block_check_failures); + + // Only clear cache if this address has failed many times + if (failure_count > 20) { // Much more conservative than clearing every time + // Reset failure count for this address + failure_count = 0; + + // Only clear if cache-friendly logic allows it + if (!CacheFriendlyShil::should_prevent_cache_clear(addr)) { + INFO_LOG(DYNAREC, "SHIL: Clearing cache due to persistent failures at 0x%08X", addr); + PersistentShilCache::clear_temporary_cache_only(); + } else { + INFO_LOG(DYNAREC, "SHIL: Prevented cache clear despite persistent failures at 0x%08X", addr); + } + } + + // Just discard the problematic block, don't clear everything + RuntimeBlockInfoPtr block = bm_GetBlock(addr); + if (block) { + bm_DiscardBlock(block.get()); + INFO_LOG(DYNAREC, "SHIL: Discarded problematic block at 0x%08X", addr); + } + + // Recompile the block + next_pc = addr; + return (DynarecCodeEntryPtr)CC_RW2RX(rdv_CompilePC(failure_count)); +} + +// Statistics function +void shil_print_block_check_stats() { + INFO_LOG(DYNAREC, "SHIL Block Check Stats: %u total failures, %zu unique addresses", + total_block_check_failures, block_check_failure_counts.size()); + + // Print top 5 problematic addresses + std::vector> sorted_failures; + for (const auto& pair : block_check_failure_counts) { + sorted_failures.push_back({pair.second, pair.first}); + } + std::sort(sorted_failures.rbegin(), sorted_failures.rend()); + + INFO_LOG(DYNAREC, "Top problematic addresses:"); + for (size_t i = 0; i < std::min(size_t(5), sorted_failures.size()); i++) { + INFO_LOG(DYNAREC, " 0x%08X: %u failures", sorted_failures[i].second, sorted_failures[i].first); + } } // Redefine macros after our code @@ -1139,4 +1282,17 @@ void shil_interpreter_print_stats() { #define pc Sh4cntx.pc #define mac Sh4cntx.mac #define macl Sh4cntx.macl -#define mach Sh4cntx.mach \ No newline at end of file +#define mach Sh4cntx.mach + +// === WRAPPER FUNCTIONS FOR EXTERNAL ACCESS === +// These allow other modules to access the cache-friendly functionality + +// C-style wrapper for CacheFriendlyShil::on_block_compiled() +extern "C" void CacheFriendlyShil_on_block_compiled() { + CacheFriendlyShil::on_block_compiled(); +} + +// C-style wrapper for shil_print_block_check_stats() +extern "C" void shil_print_block_check_stats_wrapper() { + shil_print_block_check_stats(); +} \ No newline at end of file diff --git a/core/hw/sh4/dyna/shil_interpreter.h b/core/hw/sh4/dyna/shil_interpreter.h index cbdb5e04a..acbe4e8a3 100644 --- a/core/hw/sh4/dyna/shil_interpreter.h +++ b/core/hw/sh4/dyna/shil_interpreter.h @@ -71,4 +71,9 @@ void shil_interpreter_mainloop(void* v_cntx); /// SHIL cache management functions void shil_interpreter_clear_cache(); -void shil_interpreter_print_stats(); \ No newline at end of file +void shil_interpreter_print_stats(); + +/// SHIL cache-friendly wrapper functions +bool shil_should_clear_cache_on_compile(u32 pc, u32 free_space); +DynarecCodeEntryPtr shil_handle_block_check_fail(u32 addr); +void shil_print_block_check_stats(); \ No newline at end of file