From ee9477dc21f8052d95a5144a3efadf2a787e23fe Mon Sep 17 00:00:00 2001 From: Eladash Date: Tue, 29 Aug 2023 15:32:26 +0300 Subject: [PATCH] SPU: support pure SPU code precompilation discovery --- rpcs3/Emu/Cell/PPUModule.cpp | 186 ++++++++++++++++++++++++++++++- rpcs3/Emu/Cell/SPURecompiler.cpp | 10 +- rpcs3/Loader/ELF.h | 7 ++ 3 files changed, 198 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 62f80e42b1..6e100adfa4 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -13,6 +13,7 @@ #include "Emu/VFS.h" #include "Emu/Cell/PPUOpcodes.h" +#include "Emu/Cell/SPUThread.h" #include "Emu/Cell/PPUAnalyser.h" #include "Emu/Cell/lv2/sys_process.h" @@ -1070,12 +1071,186 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment& return; } + const bool is_firmware = mod.path.starts_with(vfs::get("/dev_flash/")); + const std::string_view seg_view{ensure(mod.get_ptr(seg.addr)), seg.size}; - for (usz i = seg_view.find("\177ELF"); i < seg.size; i = seg_view.find("\177ELF", i + 4)) + auto find_first_of_multiple = [](std::string_view data, std::initializer_list values, usz index) + { + usz pos = umax; + + for (std::string_view value : values) + { + if (usz pos0 = data.substr(index, pos - index).find(value); pos0 != umax && pos0 + index < pos) + { + pos = pos0 + index; + } + } + + return pos; + }; + + extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size); + + // Search for [stqd lr,0x10(sp)] instruction or ELF file signature, whichever comes first + const std::initializer_list prefixes = {"\177ELF"sv, "\x24\0\x40\x80"sv}; + + usz prev_bound = 0; + + for (usz i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, utils::align(i + 1, 4))) { const auto elf_header = ensure(mod.get_ptr(seg.addr + i)); + if (i % 4 == 0 && std::memcmp(elf_header, "\x24\0\x40\x80", 4) == 0) + { + bool next = true; + const u32 old_i = i; + + for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries--, search = utils::sub_saturate(search, 128)) + { + if (seg_view[search] != 0x42 && seg_view[search] != 0x43) + { + continue; + } + + const u32 inst1 = read_from_ptr>(seg_view, search); + const u32 inst2 = read_from_ptr>(seg_view, search + 4); + const u32 inst3 = read_from_ptr>(seg_view, search + 8); + const u32 inst4 = read_from_ptr>(seg_view, search + 12); + + if ((inst1 & 0xfe'00'00'7f) != 0x42000002 || (inst2 & 0xfe'00'00'7f) != 0x42000002 || (inst3 & 0xfe'00'00'7f) != 0x42000002 || (inst4 & 0xfe'00'00'7f) != 0x42000002) + { + continue; + } + + ppu_log.success("Found SPURS GUID Pattern at 0x%05x", search + seg.addr); + i = search; + next = false; + break; + } + + if (next) + { + continue; + } + + std::string_view ls_segment = seg_view.substr(i); + + // Bound to a bit less than LS size + ls_segment = ls_segment.substr(0, 0x38000); + + for (usz addr_last = 0, valid_count = 0, invalid_count = 0;;) + { + usz instruction = ls_segment.find("\x24\0\x40\x80"sv, addr_last); + + if (instruction != umax) + { + if (instruction % 4 != i % 4) + { + // Unaligned, continue + addr_last = instruction + (i % 4 - instruction % 4) % 4; + continue; + } + + // FIXME: This seems to terminate SPU code prematurely in some cases + // Likely due to absolute branches + if (spu_thread::is_exec_code(instruction, {reinterpret_cast(ls_segment.data()), ls_segment.size()}, 0)) + { + addr_last = instruction + 4; + valid_count++; + invalid_count = 0; + continue; + } + + if (invalid_count == 0) + { + // Allow a single case of invalid data + addr_last = instruction + 4; + invalid_count++; + continue; + } + + addr_last = instruction; + } + + if (addr_last >= 0x80 && valid_count >= 2) + { + const u32 begin = i & -128; + u32 end = std::min(seg.size, utils::align(i + addr_last + 256, 128)); + + u32 guessed_ls_addr = 0; + + // Try to guess LS address by observing the pattern for disable/enable interrupts + // ILA R2, PC + 8 + // BIE/BID R2 + + for (u32 found = 0, last_vaddr = 0, it = begin + 16; it < end - 16; it += 4) + { + const u32 inst1 = read_from_ptr>(seg_view, it); + const u32 inst2 = read_from_ptr>(seg_view, it + 4); + const u32 inst3 = read_from_ptr>(seg_view, it + 8); + const u32 inst4 = read_from_ptr>(seg_view, it + 12); + + if ((inst1 & 0xfe'00'00'7f) == 0x42000002 && (inst2 & 0xfe'00'00'7f) == 0x42000002 && (inst3 & 0xfe'00'00'7f) == 0x42000002 && (inst4 & 0xfe'00'00'7f) == 0x42000002) + { + // SPURS GUID pattern + end = it; + ppu_log.success("Found SPURS GUID Pattern for terminagtor at 0x%05x", end + seg.addr); + break; + } + + if ((inst1 >> 7) % 4 == 0 && (inst1 & 0xfe'00'00'7f) == 0x42000002 && (inst2 == 0x35040100 || inst2 == 0x35080100)) + { + const u32 addr_inst = (inst1 >> 7) % 0x40000; + + if (u32 addr_seg = addr_inst - std::min(it + 8 - begin, addr_inst)) + { + if (last_vaddr != addr_seg) + { + guessed_ls_addr = 0; + found = 0; + } + + found++; + last_vaddr = addr_seg; + + if (found >= 2) + { + // Good segment address + guessed_ls_addr = last_vaddr; + ppu_log.notice("Found IENABLE/IDSIABLE Pattern at 0x%05x", it + seg.addr); + } + } + } + } + + if (guessed_ls_addr) + { + end = begin + std::min(end - begin, SPU_LS_SIZE - guessed_ls_addr); + } + + ppu_log.success("Found valid roaming SPU code at 0x%x..0x%x (guessed_ls_addr=0x%x)", seg.addr + begin, seg.addr + end, guessed_ls_addr); + + if (!is_firmware) + { + // Siginify that the base address is unknown by passing 0 + utilize_spu_data_segment(guessed_ls_addr ? guessed_ls_addr : 0x4000, seg_view.data() + begin, end - begin); + } + + i = std::max(end, i + 4) - 4; + prev_bound = i + 4; + } + else + { + i = old_i; + } + + break; + } + + continue; + } + // Try to load SPU image const spu_exec_object obj(fs::file(elf_header, seg.size - i)); @@ -1107,7 +1282,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment& if (prog.p_type == 0x1u /* LOAD */ && prog.p_filesz > 0u) { - if (prog.p_vaddr) + if (prog.p_vaddr && !is_firmware) { extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size); @@ -1126,11 +1301,13 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment& if (!name.empty()) { - fmt::append(dump, "\n\tSPUNAME: '%s' (image addr: 0x%x)", name, seg.addr + i); + fmt::append(dump, "\n\tSPUNAME: '%s'", name); } } } + fmt::append(dump, " (image addr: 0x%x, size: 0x%x)", seg.addr + i, obj.highest_offset); + sha1_finish(&sha2, sha1_hash); // Format patch name @@ -1173,6 +1350,9 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment& { ppu_loader.success("SPU executable hash: %s (<- %u)%s", hash, applied.size(), dump); } + + i += obj.highest_offset - 4; + prev_bound = i + 4; } } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 1441ed91d0..55b5e3c8dc 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -561,7 +561,7 @@ extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 s spu_section_data::data_t obj{vaddr, std::move(data)}; - obj.funcs = spu_thread::discover_functions(vaddr, { reinterpret_cast(ls_data_vaddr), size }, true, umax); + obj.funcs = spu_thread::discover_functions(vaddr, { reinterpret_cast(ls_data_vaddr), size }, vaddr != 0, umax); if (obj.funcs.empty()) { @@ -703,7 +703,7 @@ void spu_cache::initialize(bool build_existing_cache) total_precompile += sec.funcs.size(); } - const bool spu_precompilation_enabled = (build_existing_cache ? func_list.empty() : func_list.size() < total_precompile) && g_cfg.core.spu_cache && g_cfg.core.llvm_precompilation; + const bool spu_precompilation_enabled = func_list.empty() && g_cfg.core.spu_cache && g_cfg.core.llvm_precompilation; if (spu_precompilation_enabled) { @@ -716,6 +716,7 @@ void spu_cache::initialize(bool build_existing_cache) } else { + total_precompile = 0; data_list.clear(); } @@ -959,12 +960,17 @@ void spu_cache::initialize(bool build_existing_cache) return result; }); + u32 built_total = 0; + // Join (implicitly) and print individual results for (u32 i = 0; i < workers.size(); i++) { spu_log.notice("SPU Runtime: Worker %u built %u programs.", i + 1, workers[i]); + built_total += workers[i]; } + spu_log.notice("SPU Runtime: Workers built %u programs.", built_total); + if (Emu.IsStopped()) { spu_log.error("SPU Runtime: Cache building aborted."); diff --git a/rpcs3/Loader/ELF.h b/rpcs3/Loader/ELF.h index abb9979c19..34bb06c267 100644 --- a/rpcs3/Loader/ELF.h +++ b/rpcs3/Loader/ELF.h @@ -260,6 +260,8 @@ public: std::vector progs{}; std::vector shdrs{}; + usz highest_offset = 0; + public: elf_object() = default; @@ -270,6 +272,8 @@ public: elf_error open(const fs::file& stream, u64 offset = 0, bs_t opts = {}) { + highest_offset = 0; + // Check stream if (!stream) return set_error(elf_error::stream); @@ -322,6 +326,7 @@ public: stream.seek(offset + header.e_phoff); if (!stream.read(_phdrs, header.e_phnum)) return set_error(elf_error::stream_phdrs); + highest_offset = std::max(highest_offset, stream.pos()); } if (!(opts & elf_opt::no_sections)) @@ -329,6 +334,7 @@ public: stream.seek(offset + header.e_shoff); if (!stream.read(_shdrs, header.e_shnum)) return set_error(elf_error::stream_shdrs); + highest_offset = std::max(highest_offset, stream.pos()); } progs.clear(); @@ -342,6 +348,7 @@ public: stream.seek(offset + hdr.p_offset); if (!stream.read(progs.back().bin, hdr.p_filesz)) return set_error(elf_error::stream_data); + highest_offset = std::max(highest_offset, stream.pos()); } }