mirror of https://github.com/RPCS3/rpcs3.git
SPU: support pure SPU code precompilation discovery
This commit is contained in:
parent
37212a632c
commit
ee9477dc21
|
@ -13,6 +13,7 @@
|
||||||
#include "Emu/VFS.h"
|
#include "Emu/VFS.h"
|
||||||
|
|
||||||
#include "Emu/Cell/PPUOpcodes.h"
|
#include "Emu/Cell/PPUOpcodes.h"
|
||||||
|
#include "Emu/Cell/SPUThread.h"
|
||||||
#include "Emu/Cell/PPUAnalyser.h"
|
#include "Emu/Cell/PPUAnalyser.h"
|
||||||
|
|
||||||
#include "Emu/Cell/lv2/sys_process.h"
|
#include "Emu/Cell/lv2/sys_process.h"
|
||||||
|
@ -1070,12 +1071,186 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool is_firmware = mod.path.starts_with(vfs::get("/dev_flash/"));
|
||||||
|
|
||||||
const std::string_view seg_view{ensure(mod.get_ptr<char>(seg.addr)), seg.size};
|
const std::string_view seg_view{ensure(mod.get_ptr<char>(seg.addr)), seg.size};
|
||||||
|
|
||||||
for (usz i = seg_view.find("\177ELF"); i < seg.size; i = seg_view.find("\177ELF", i + 4))
|
auto find_first_of_multiple = [](std::string_view data, std::initializer_list<std::string_view> values, usz index)
|
||||||
|
{
|
||||||
|
usz pos = umax;
|
||||||
|
|
||||||
|
for (std::string_view value : values)
|
||||||
|
{
|
||||||
|
if (usz pos0 = data.substr(index, pos - index).find(value); pos0 != umax && pos0 + index < pos)
|
||||||
|
{
|
||||||
|
pos = pos0 + index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return pos;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size);
|
||||||
|
|
||||||
|
// Search for [stqd lr,0x10(sp)] instruction or ELF file signature, whichever comes first
|
||||||
|
const std::initializer_list<std::string_view> prefixes = {"\177ELF"sv, "\x24\0\x40\x80"sv};
|
||||||
|
|
||||||
|
usz prev_bound = 0;
|
||||||
|
|
||||||
|
for (usz i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, utils::align<u32>(i + 1, 4)))
|
||||||
{
|
{
|
||||||
const auto elf_header = ensure(mod.get_ptr<u8>(seg.addr + i));
|
const auto elf_header = ensure(mod.get_ptr<u8>(seg.addr + i));
|
||||||
|
|
||||||
|
if (i % 4 == 0 && std::memcmp(elf_header, "\x24\0\x40\x80", 4) == 0)
|
||||||
|
{
|
||||||
|
bool next = true;
|
||||||
|
const u32 old_i = i;
|
||||||
|
|
||||||
|
for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries--, search = utils::sub_saturate<u32>(search, 128))
|
||||||
|
{
|
||||||
|
if (seg_view[search] != 0x42 && seg_view[search] != 0x43)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 inst1 = read_from_ptr<be_t<u32>>(seg_view, search);
|
||||||
|
const u32 inst2 = read_from_ptr<be_t<u32>>(seg_view, search + 4);
|
||||||
|
const u32 inst3 = read_from_ptr<be_t<u32>>(seg_view, search + 8);
|
||||||
|
const u32 inst4 = read_from_ptr<be_t<u32>>(seg_view, search + 12);
|
||||||
|
|
||||||
|
if ((inst1 & 0xfe'00'00'7f) != 0x42000002 || (inst2 & 0xfe'00'00'7f) != 0x42000002 || (inst3 & 0xfe'00'00'7f) != 0x42000002 || (inst4 & 0xfe'00'00'7f) != 0x42000002)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ppu_log.success("Found SPURS GUID Pattern at 0x%05x", search + seg.addr);
|
||||||
|
i = search;
|
||||||
|
next = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (next)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string_view ls_segment = seg_view.substr(i);
|
||||||
|
|
||||||
|
// Bound to a bit less than LS size
|
||||||
|
ls_segment = ls_segment.substr(0, 0x38000);
|
||||||
|
|
||||||
|
for (usz addr_last = 0, valid_count = 0, invalid_count = 0;;)
|
||||||
|
{
|
||||||
|
usz instruction = ls_segment.find("\x24\0\x40\x80"sv, addr_last);
|
||||||
|
|
||||||
|
if (instruction != umax)
|
||||||
|
{
|
||||||
|
if (instruction % 4 != i % 4)
|
||||||
|
{
|
||||||
|
// Unaligned, continue
|
||||||
|
addr_last = instruction + (i % 4 - instruction % 4) % 4;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: This seems to terminate SPU code prematurely in some cases
|
||||||
|
// Likely due to absolute branches
|
||||||
|
if (spu_thread::is_exec_code(instruction, {reinterpret_cast<const u8*>(ls_segment.data()), ls_segment.size()}, 0))
|
||||||
|
{
|
||||||
|
addr_last = instruction + 4;
|
||||||
|
valid_count++;
|
||||||
|
invalid_count = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (invalid_count == 0)
|
||||||
|
{
|
||||||
|
// Allow a single case of invalid data
|
||||||
|
addr_last = instruction + 4;
|
||||||
|
invalid_count++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
addr_last = instruction;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (addr_last >= 0x80 && valid_count >= 2)
|
||||||
|
{
|
||||||
|
const u32 begin = i & -128;
|
||||||
|
u32 end = std::min<u32>(seg.size, utils::align<u32>(i + addr_last + 256, 128));
|
||||||
|
|
||||||
|
u32 guessed_ls_addr = 0;
|
||||||
|
|
||||||
|
// Try to guess LS address by observing the pattern for disable/enable interrupts
|
||||||
|
// ILA R2, PC + 8
|
||||||
|
// BIE/BID R2
|
||||||
|
|
||||||
|
for (u32 found = 0, last_vaddr = 0, it = begin + 16; it < end - 16; it += 4)
|
||||||
|
{
|
||||||
|
const u32 inst1 = read_from_ptr<be_t<u32>>(seg_view, it);
|
||||||
|
const u32 inst2 = read_from_ptr<be_t<u32>>(seg_view, it + 4);
|
||||||
|
const u32 inst3 = read_from_ptr<be_t<u32>>(seg_view, it + 8);
|
||||||
|
const u32 inst4 = read_from_ptr<be_t<u32>>(seg_view, it + 12);
|
||||||
|
|
||||||
|
if ((inst1 & 0xfe'00'00'7f) == 0x42000002 && (inst2 & 0xfe'00'00'7f) == 0x42000002 && (inst3 & 0xfe'00'00'7f) == 0x42000002 && (inst4 & 0xfe'00'00'7f) == 0x42000002)
|
||||||
|
{
|
||||||
|
// SPURS GUID pattern
|
||||||
|
end = it;
|
||||||
|
ppu_log.success("Found SPURS GUID Pattern for terminagtor at 0x%05x", end + seg.addr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((inst1 >> 7) % 4 == 0 && (inst1 & 0xfe'00'00'7f) == 0x42000002 && (inst2 == 0x35040100 || inst2 == 0x35080100))
|
||||||
|
{
|
||||||
|
const u32 addr_inst = (inst1 >> 7) % 0x40000;
|
||||||
|
|
||||||
|
if (u32 addr_seg = addr_inst - std::min<u32>(it + 8 - begin, addr_inst))
|
||||||
|
{
|
||||||
|
if (last_vaddr != addr_seg)
|
||||||
|
{
|
||||||
|
guessed_ls_addr = 0;
|
||||||
|
found = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
found++;
|
||||||
|
last_vaddr = addr_seg;
|
||||||
|
|
||||||
|
if (found >= 2)
|
||||||
|
{
|
||||||
|
// Good segment address
|
||||||
|
guessed_ls_addr = last_vaddr;
|
||||||
|
ppu_log.notice("Found IENABLE/IDSIABLE Pattern at 0x%05x", it + seg.addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (guessed_ls_addr)
|
||||||
|
{
|
||||||
|
end = begin + std::min<u32>(end - begin, SPU_LS_SIZE - guessed_ls_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
ppu_log.success("Found valid roaming SPU code at 0x%x..0x%x (guessed_ls_addr=0x%x)", seg.addr + begin, seg.addr + end, guessed_ls_addr);
|
||||||
|
|
||||||
|
if (!is_firmware)
|
||||||
|
{
|
||||||
|
// Siginify that the base address is unknown by passing 0
|
||||||
|
utilize_spu_data_segment(guessed_ls_addr ? guessed_ls_addr : 0x4000, seg_view.data() + begin, end - begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
i = std::max<u32>(end, i + 4) - 4;
|
||||||
|
prev_bound = i + 4;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
i = old_i;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Try to load SPU image
|
// Try to load SPU image
|
||||||
const spu_exec_object obj(fs::file(elf_header, seg.size - i));
|
const spu_exec_object obj(fs::file(elf_header, seg.size - i));
|
||||||
|
|
||||||
|
@ -1107,7 +1282,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&
|
||||||
|
|
||||||
if (prog.p_type == 0x1u /* LOAD */ && prog.p_filesz > 0u)
|
if (prog.p_type == 0x1u /* LOAD */ && prog.p_filesz > 0u)
|
||||||
{
|
{
|
||||||
if (prog.p_vaddr)
|
if (prog.p_vaddr && !is_firmware)
|
||||||
{
|
{
|
||||||
extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size);
|
extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size);
|
||||||
|
|
||||||
|
@ -1126,11 +1301,13 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&
|
||||||
|
|
||||||
if (!name.empty())
|
if (!name.empty())
|
||||||
{
|
{
|
||||||
fmt::append(dump, "\n\tSPUNAME: '%s' (image addr: 0x%x)", name, seg.addr + i);
|
fmt::append(dump, "\n\tSPUNAME: '%s'", name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt::append(dump, " (image addr: 0x%x, size: 0x%x)", seg.addr + i, obj.highest_offset);
|
||||||
|
|
||||||
sha1_finish(&sha2, sha1_hash);
|
sha1_finish(&sha2, sha1_hash);
|
||||||
|
|
||||||
// Format patch name
|
// Format patch name
|
||||||
|
@ -1173,6 +1350,9 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&
|
||||||
{
|
{
|
||||||
ppu_loader.success("SPU executable hash: %s (<- %u)%s", hash, applied.size(), dump);
|
ppu_loader.success("SPU executable hash: %s (<- %u)%s", hash, applied.size(), dump);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
i += obj.highest_offset - 4;
|
||||||
|
prev_bound = i + 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -561,7 +561,7 @@ extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 s
|
||||||
|
|
||||||
spu_section_data::data_t obj{vaddr, std::move(data)};
|
spu_section_data::data_t obj{vaddr, std::move(data)};
|
||||||
|
|
||||||
obj.funcs = spu_thread::discover_functions(vaddr, { reinterpret_cast<const u8*>(ls_data_vaddr), size }, true, umax);
|
obj.funcs = spu_thread::discover_functions(vaddr, { reinterpret_cast<const u8*>(ls_data_vaddr), size }, vaddr != 0, umax);
|
||||||
|
|
||||||
if (obj.funcs.empty())
|
if (obj.funcs.empty())
|
||||||
{
|
{
|
||||||
|
@ -703,7 +703,7 @@ void spu_cache::initialize(bool build_existing_cache)
|
||||||
total_precompile += sec.funcs.size();
|
total_precompile += sec.funcs.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool spu_precompilation_enabled = (build_existing_cache ? func_list.empty() : func_list.size() < total_precompile) && g_cfg.core.spu_cache && g_cfg.core.llvm_precompilation;
|
const bool spu_precompilation_enabled = func_list.empty() && g_cfg.core.spu_cache && g_cfg.core.llvm_precompilation;
|
||||||
|
|
||||||
if (spu_precompilation_enabled)
|
if (spu_precompilation_enabled)
|
||||||
{
|
{
|
||||||
|
@ -716,6 +716,7 @@ void spu_cache::initialize(bool build_existing_cache)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
total_precompile = 0;
|
||||||
data_list.clear();
|
data_list.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -959,12 +960,17 @@ void spu_cache::initialize(bool build_existing_cache)
|
||||||
return result;
|
return result;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
u32 built_total = 0;
|
||||||
|
|
||||||
// Join (implicitly) and print individual results
|
// Join (implicitly) and print individual results
|
||||||
for (u32 i = 0; i < workers.size(); i++)
|
for (u32 i = 0; i < workers.size(); i++)
|
||||||
{
|
{
|
||||||
spu_log.notice("SPU Runtime: Worker %u built %u programs.", i + 1, workers[i]);
|
spu_log.notice("SPU Runtime: Worker %u built %u programs.", i + 1, workers[i]);
|
||||||
|
built_total += workers[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spu_log.notice("SPU Runtime: Workers built %u programs.", built_total);
|
||||||
|
|
||||||
if (Emu.IsStopped())
|
if (Emu.IsStopped())
|
||||||
{
|
{
|
||||||
spu_log.error("SPU Runtime: Cache building aborted.");
|
spu_log.error("SPU Runtime: Cache building aborted.");
|
||||||
|
|
|
@ -260,6 +260,8 @@ public:
|
||||||
std::vector<prog_t> progs{};
|
std::vector<prog_t> progs{};
|
||||||
std::vector<shdata_t> shdrs{};
|
std::vector<shdata_t> shdrs{};
|
||||||
|
|
||||||
|
usz highest_offset = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
elf_object() = default;
|
elf_object() = default;
|
||||||
|
|
||||||
|
@ -270,6 +272,8 @@ public:
|
||||||
|
|
||||||
elf_error open(const fs::file& stream, u64 offset = 0, bs_t<elf_opt> opts = {})
|
elf_error open(const fs::file& stream, u64 offset = 0, bs_t<elf_opt> opts = {})
|
||||||
{
|
{
|
||||||
|
highest_offset = 0;
|
||||||
|
|
||||||
// Check stream
|
// Check stream
|
||||||
if (!stream)
|
if (!stream)
|
||||||
return set_error(elf_error::stream);
|
return set_error(elf_error::stream);
|
||||||
|
@ -322,6 +326,7 @@ public:
|
||||||
stream.seek(offset + header.e_phoff);
|
stream.seek(offset + header.e_phoff);
|
||||||
if (!stream.read(_phdrs, header.e_phnum))
|
if (!stream.read(_phdrs, header.e_phnum))
|
||||||
return set_error(elf_error::stream_phdrs);
|
return set_error(elf_error::stream_phdrs);
|
||||||
|
highest_offset = std::max<usz>(highest_offset, stream.pos());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(opts & elf_opt::no_sections))
|
if (!(opts & elf_opt::no_sections))
|
||||||
|
@ -329,6 +334,7 @@ public:
|
||||||
stream.seek(offset + header.e_shoff);
|
stream.seek(offset + header.e_shoff);
|
||||||
if (!stream.read(_shdrs, header.e_shnum))
|
if (!stream.read(_shdrs, header.e_shnum))
|
||||||
return set_error(elf_error::stream_shdrs);
|
return set_error(elf_error::stream_shdrs);
|
||||||
|
highest_offset = std::max<usz>(highest_offset, stream.pos());
|
||||||
}
|
}
|
||||||
|
|
||||||
progs.clear();
|
progs.clear();
|
||||||
|
@ -342,6 +348,7 @@ public:
|
||||||
stream.seek(offset + hdr.p_offset);
|
stream.seek(offset + hdr.p_offset);
|
||||||
if (!stream.read(progs.back().bin, hdr.p_filesz))
|
if (!stream.read(progs.back().bin, hdr.p_filesz))
|
||||||
return set_error(elf_error::stream_data);
|
return set_error(elf_error::stream_data);
|
||||||
|
highest_offset = std::max<usz>(highest_offset, stream.pos());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue