Patches/PPU: Extend and improve patching capabilities (code allocations, jumps to any address) (#10779)

* Patches/PPU: Implement dynamic code allocation + Any-Address jump patches

Also fix deallocation path of fixed allocation patches.
This commit is contained in:
Eladash 2021-09-01 13:38:17 +03:00 committed by GitHub
parent ee6e4c493d
commit b40ed5bdb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 339 additions and 72 deletions

View File

@ -38,6 +38,8 @@ void fmt_class_string<patch_type>::format(std::string& out, u64 arg)
{
case patch_type::invalid: return "invalid";
case patch_type::alloc: return "alloc";
case patch_type::code_alloc: return "calloc";
case patch_type::jump: return "jump";
case patch_type::load: return "load";
case patch_type::byte: return "byte";
case patch_type::le16: return "le16";
@ -516,10 +518,23 @@ void patch_engine::append_title_patches(const std::string& title_id)
}
void ppu_register_range(u32 addr, u32 size);
void ppu_register_function_at(u32 addr, u32 size, u64 ptr);
bool ppu_form_branch_to_code(u32 entry, u32 target);
static std::basic_string<u32> apply_modification(const patch_engine::patch_info& patch, u8* dst, u32 filesz, u32 min_addr)
void unmap_vm_area(std::shared_ptr<vm::block_t>& ptr)
{
std::basic_string<u32> applied;
if (ptr && ptr->flags & (1ull << 62))
{
const u32 addr = ptr->addr;
ptr.reset();
vm::unmap(addr, true);
}
}
// Returns old 'applied' size
static usz apply_modification(std::basic_string<u32>& applied, const patch_engine::patch_info& patch, u8* dst, u32 filesz, u32 min_addr)
{
const usz old_applied_size = applied.size();
for (const auto& p : patch.data_list)
{
@ -531,22 +546,22 @@ static std::basic_string<u32> apply_modification(const patch_engine::patch_info&
const u32 alloc_size = utils::align(static_cast<u32>(p.value.long_value) + alloc_at % 4096, 4096);
// Allocate map if needed, if allocated flags will indicate that bit 62 is set (unique identifier)
auto alloc_map = vm::reserve_map(vm::any, alloc_at & -0x10000, utils::align(alloc_size, 0x10000), vm::page_size_64k | vm::preallocated | vm::bf0_0x2 | (1ull << 62));
auto alloc_map = vm::reserve_map(vm::any, alloc_at & -0x10000, utils::align(alloc_size, 0x10000), vm::page_size_64k | vm::preallocated | (1ull << 62));
u64 flags = 0;
u64 flags = vm::page_readable;
switch (p.offset % patch_engine::mem_protection::mask)
{
case patch_engine::mem_protection::wx: flags |= vm::page_writable + vm::page_readable + vm::page_executable; break;
case patch_engine::mem_protection::ro: flags |= vm::page_readable; break;
case patch_engine::mem_protection::rx: flags |= vm::page_writable + vm::page_executable; break;
case patch_engine::mem_protection::rw: flags |= vm::page_writable + vm::page_readable; break;
case patch_engine::mem_protection::wx: flags |= vm::page_writable + vm::page_executable; break;
case patch_engine::mem_protection::ro: break;
case patch_engine::mem_protection::rx: flags |= vm::page_executable; break;
case patch_engine::mem_protection::rw: flags |= vm::page_writable; break;
default: ensure(false);
}
if (alloc_map)
{
if (alloc_map->falloc(alloc_at, alloc_size))
if ((p.alloc_addr = alloc_map->falloc(alloc_at, alloc_size)))
{
vm::page_protect(alloc_at, alloc_size, 0, flags, flags ^ (vm::page_writable + vm::page_readable + vm::page_executable));
@ -560,45 +575,41 @@ static std::basic_string<u32> apply_modification(const patch_engine::patch_info&
}
// Revert if allocated map before failure
if (alloc_map->flags & (1ull << 62))
{
vm::unmap(vm::any, alloc_map->addr);
}
unmap_vm_area(alloc_map);
}
}
// Revert in case of failure
for (u32 index : applied)
std::for_each(applied.begin() + old_applied_size, applied.end(), [&](u32 index)
{
const u32 addr = patch.data_list[index].offset & -4096;
const u32 addr = std::exchange(patch.data_list[index].alloc_addr, 0);
// Try different alignments until works
if (!vm::dealloc(addr))
{
if (!vm::dealloc(addr & -0x10000))
{
vm::dealloc(addr & -0x100000);
}
}
vm::dealloc(addr);
if (auto alloc_map = vm::get(vm::any, addr); alloc_map->flags & (1ull << 62))
{
vm::unmap(vm::any, alloc_map->addr);
}
}
auto alloc_map = vm::get(vm::any, addr);
unmap_vm_area(alloc_map);
});
applied.clear();
return applied;
applied.resize(old_applied_size);
return old_applied_size;
}
// Fixup values from before
std::fill(applied.begin(), applied.end(), u32{umax});
std::fill(applied.begin() + old_applied_size, applied.end(), u32{umax});
u32 relocate_instructions_at = 0;
for (const auto& p : patch.data_list)
{
u32 offset = p.offset;
if (offset < min_addr || offset - min_addr >= filesz)
if (relocate_instructions_at && vm::read32(relocate_instructions_at) != 0x6000'0000u)
{
// No longer points a NOP to be filled, meaning we ran out of instructions
relocate_instructions_at = 0;
}
if (!relocate_instructions_at && (offset < min_addr || offset - min_addr >= filesz))
{
// This patch is out of range for this segment
continue;
@ -608,6 +619,13 @@ static std::basic_string<u32> apply_modification(const patch_engine::patch_info&
auto ptr = dst + offset;
if (relocate_instructions_at)
{
offset = relocate_instructions_at;
ptr = vm::get_super_ptr<u8>(relocate_instructions_at);
relocate_instructions_at += 4; // Advance to the next instruction on dynamic memory
}
u32 resval = umax;
switch (p.type)
@ -623,6 +641,86 @@ static std::basic_string<u32> apply_modification(const patch_engine::patch_info&
// Applied before
continue;
}
case patch_type::code_alloc:
{
relocate_instructions_at = 0;
const u32 out_branch = vm::try_get_addr(dst + (offset & -4)).first;
// Allow only if points to a PPU executable instruction
if (out_branch < 0x10000 || out_branch >= 0x4000'0000 || !vm::check_addr<4>(out_branch, vm::page_executable))
{
continue;
}
const u32 alloc_size = utils::align(static_cast<u32>(p.value.long_value + 1) * 4, 0x10000);
// Always executable
u64 flags = vm::page_executable | vm::page_readable;
switch (p.offset % patch_engine::mem_protection::mask)
{
case patch_engine::mem_protection::rw:
case patch_engine::mem_protection::wx:
{
flags |= vm::page_writable;
break;
}
case patch_engine::mem_protection::ro:
case patch_engine::mem_protection::rx:
{
break;
}
default: ensure(false);
}
const auto alloc_map = ensure(vm::get(vm::any, out_branch));
// Range allowed for absolute branches to operate at
// It takes into account that we need to put a branch for return at the end of memory space
const u32 addr = p.alloc_addr = alloc_map->alloc(alloc_size, nullptr, 0x10000, flags);
if (!addr)
{
patch_log.error("Failed to allocate 0x%x bytes for code (entry=0x%x)", alloc_size, addr, out_branch);
continue;
}
patch_log.success("Allocated 0x%x for code at 0x%x (entry=0x%x)", alloc_size, addr, out_branch);
// NOP filled
std::fill_n(vm::get_super_ptr<u32>(addr), p.value.long_value, 0x60000000);
// Register code
ppu_register_range(addr, alloc_size);
ppu_register_function_at(addr, static_cast<u32>(p.value.long_value), 0);
// Write branch to code
ppu_form_branch_to_code(out_branch, addr);
resval = out_branch & -4;
// Write address of the allocated memory to the code entry
*vm::get_super_ptr<u32>(resval) = addr;
// Write branch to return to code
ppu_form_branch_to_code(addr + static_cast<u32>(p.value.long_value) * 4, resval + 4);
relocate_instructions_at = addr;
break;
}
case patch_type::jump:
{
const u32 out_branch = vm::try_get_addr(dst + (offset & -4)).first;
const u32 dest = static_cast<u32>(p.value.long_value);
// Allow only if points to a PPU executable instruction
if (!ppu_form_branch_to_code(out_branch, dest))
{
continue;
}
resval = out_branch & -4;
break;
}
case patch_type::byte:
{
*ptr = static_cast<u8>(p.value.long_value);
@ -721,7 +819,7 @@ static std::basic_string<u32> apply_modification(const patch_engine::patch_info&
applied.push_back(resval);
}
return applied;
return old_applied_size;
}
std::basic_string<u32> patch_engine::apply(const std::string& name, u8* dst, u32 filesz, u32 min_addr)
@ -812,11 +910,12 @@ std::basic_string<u32> patch_engine::apply(const std::string& name, u8* dst, u32
// Apply modifications sequentially
auto apply_func = [&](const patch_info& patch)
{
auto applied = apply_modification(patch, dst, filesz, min_addr);
const usz old_size = apply_modification(applied_total, patch, dst, filesz, min_addr);
applied_total += applied;
patch_log.success("Applied patch (hash='%s', description='%s', author='%s', patch_version='%s', file_version='%s') (<- %u)", patch.hash, patch.description, patch.author, patch.patch_version, patch.version, applied.size());
if (applied_total.size() != old_size)
{
patch_log.success("Applied patch (hash='%s', description='%s', author='%s', patch_version='%s', file_version='%s') (<- %u)", patch.hash, patch.description, patch.author, patch.patch_version, patch.version, applied_total.size() - old_size);
}
};
// Sort specific patches after global patches
@ -858,6 +957,34 @@ std::basic_string<u32> patch_engine::apply(const std::string& name, u8* dst, u32
return applied_total;
}
void patch_engine::unload(const std::string& name)
{
if (m_map.find(name) == m_map.cend())
{
return;
}
const auto& container = m_map.at(name);
for (const auto& [description, patch] : container.patch_info_map)
{
for (const auto& [title, serials] : patch.titles)
{
for (auto& entry : patch.data_list)
{
// Deallocate used memory
if (u32 addr = std::exchange(entry.alloc_addr, 0))
{
vm::dealloc(addr);
auto alloc_map = vm::get(vm::any, addr);
unmap_vm_area(alloc_map);
}
}
}
}
}
void patch_engine::save_config(const patch_map& patches_map)
{
const std::string path = get_patch_config_path();

View File

@ -27,6 +27,8 @@ enum class patch_type
invalid,
load,
alloc, // Allocate memory at address (zeroized executable memory)
code_alloc,// Allocate memory somewhere, saves branch to memory at specfied address (filled with PPU NOP and branch for returning)
jump, // Install special 32-bit jump instruction (PPU only atm)
byte,
le16,
le32,
@ -56,6 +58,7 @@ public:
u64 long_value;
f64 double_value;
} value{0};
mutable u32 alloc_addr = 0; // Used to save optional allocation address (if occured)
};
using patch_app_versions = std::unordered_map<std::string /*app_version*/, bool /*enabled*/>;
@ -148,6 +151,9 @@ public:
// Apply patch (returns the number of entries applied)
std::basic_string<u32> apply(const std::string& name, u8* dst, u32 filesz = -1, u32 min_addr = 0);
// Deallocate memory used by patches
void unload(const std::string& name);
private:
// Database
patch_map m_map{};

View File

@ -37,6 +37,8 @@ void fmt_class_string<bs_t<ppu_attr>>::format(std::string& out, u64 arg)
format_bitset(out, arg, "[", ",", "]", &fmt_class_string<ppu_attr>::format);
}
u32 ppu_get_far_jump(u32 pc);
void ppu_module::validate(u32 reloc)
{
// Load custom PRX configuration if available
@ -1199,6 +1201,12 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(iaddr))
{
block.second = _ptr.addr() - block.first;
break;
}
if (type == ppu_itype::UNK)
{
// Invalid blocks will remain empty
@ -1388,6 +1396,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(iaddr))
{
break;
}
if (type == ppu_itype::B || type == ppu_itype::BC)
{
const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14);
@ -1462,7 +1475,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (type == ppu_itype::UNK)
if (ppu_get_far_jump(addr))
{
_ptr.set(next);
}
else if (type == ppu_itype::UNK)
{
break;
}
@ -1674,6 +1691,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
for (; i_pos < lim; i_pos += 4)
{
if (ppu_get_far_jump(i_pos))
{
continue;
}
const u32 opc = vm::_ref<u32>(i_pos);
switch (auto type = s_ppu_itype.decode(opc))

View File

@ -528,6 +528,8 @@ struct ppu_prx_module_info
be_t<u32> unk5;
};
bool ppu_form_branch_to_code(u32 entry, u32 target);
// Load and register exports; return special exports found (nameless module)
static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 exports_end)
{
@ -612,20 +614,7 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
// Set exported function
flink.export_addr = target - 4;
if ((target <= _entry && _entry - target <= 0x2000000) || (target > _entry && target - _entry < 0x2000000))
{
// Use relative branch
vm::write32(_entry, ppu_instructions::B(target - _entry));
}
else if (target < 0x2000000)
{
// Use absolute branch if possible
vm::write32(_entry, ppu_instructions::B(target, true));
}
else
{
ppu_loader.fatal("Failed to patch function at 0x%x (0x%x)", _entry, target);
}
ppu_form_branch_to_code(faddr, target);
}
else
{
@ -1267,9 +1256,25 @@ void ppu_unload_prx(const lv2_prx& prx)
// }
//}
// Format patch name
std::string hash = fmt::format("PRX-%s", fmt::base57(prx.sha1));
for (auto& seg : prx.segs)
{
if (!seg.size) continue;
vm::dealloc(seg.addr, vm::main);
const std::string hash_seg = fmt::format("%s-%u", hash, &seg - prx.segs.data());
// Deallocatte memory used for patches
g_fxo->get<patch_engine>().unload(hash_seg);
if (!Emu.GetTitleID().empty())
{
// Alternative patch
g_fxo->get<patch_engine>().unload(Emu.GetTitleID() + '-' + hash_seg);
}
}
}

View File

@ -319,6 +319,12 @@ void ppu_recompiler_fallback(ppu_thread& ppu)
while (true)
{
if (uptr func = ppu_ref(ppu.cia); (func << 17 >> 17) != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
{
// We found a recompiler function at cia, return
break;
}
// Run instructions in interpreter
if (const u32 op = vm::read32(ppu.cia); ctr++, table[ppu_decode(op)](ppu, {op})) [[likely]]
{
@ -326,12 +332,6 @@ void ppu_recompiler_fallback(ppu_thread& ppu)
continue;
}
if (uptr func = ppu_ref(ppu.cia); (func << 17 >> 17) != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
{
// We found a recompiler function at cia, return
break;
}
if (ppu.test_stopped())
{
break;
@ -411,7 +411,7 @@ extern void ppu_register_range(u32 addr, u32 size)
// Register executable range at
utils::memory_commit(&ppu_ref(addr), u64{size} * 2, utils::protection::rw);
vm::page_protect(addr, size, 0, vm::page_executable);
ensure(vm::page_protect(addr, size, 0, vm::page_executable));
if (g_cfg.core.ppu_debug)
{
@ -438,7 +438,9 @@ extern void ppu_register_range(u32 addr, u32 size)
}
}
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr)
static bool ppu_far_jump(ppu_thread& ppu);
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr = nullptr)
{
// Initialize specific function
if (ptr)
@ -464,10 +466,11 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr)
// Initialize interpreter cache
const u64 _break = reinterpret_cast<uptr>(ppu_break);
const u64 far_jump = reinterpret_cast<uptr>(ppu_far_jump);
while (size)
{
if (ppu_ref(addr) != _break)
if (ppu_ref(addr) != _break && ppu_ref(addr) != far_jump)
{
ppu_ref(addr) = ppu_cache(addr);
}
@ -477,6 +480,80 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr)
}
}
extern void ppu_register_function_at(u32 addr, u32 size, u64 ptr)
{
return ppu_register_function_at(addr, size, reinterpret_cast<ppu_function_t>(ptr));
}
struct ppu_far_jumps_t
{
std::unordered_map<u32, u32> vals;
mutable shared_mutex mutex;
u32 get_target(u32 pc) const
{
reader_lock lock(mutex);
if (auto it = vals.find(pc); it != vals.end())
{
return it->second;
}
return 0;
}
};
u32 ppu_get_far_jump(u32 pc)
{
return g_fxo->get<const ppu_far_jumps_t>().get_target(pc);
}
static bool ppu_far_jump(ppu_thread& ppu)
{
ppu.cia = g_fxo->get<const ppu_far_jumps_t>().get_target(ppu.cia);
return false;
}
bool ppu_form_branch_to_code(u32 entry, u32 target)
{
entry &= -4;
target &= -4;
if (entry == target || vm::check_addr(entry, vm::page_executable) || !vm::check_addr(target, vm::page_executable))
{
return false;
}
g_fxo->init<ppu_far_jumps_t>();
// Register branch target in host memory, not guest memory
auto& jumps = g_fxo->get<ppu_far_jumps_t>();
std::lock_guard lock(jumps.mutex);
jumps.vals.insert_or_assign(entry, target);
return true;
}
void ppu_remove_hle_instructions(u32 addr, u32 size)
{
auto& jumps = g_fxo->get<ppu_far_jumps_t>();
std::lock_guard lock(jumps.mutex);
for (auto it = jumps.vals.begin(); it != jumps.vals.end();)
{
if (it->first >= addr && it->first <= addr + size - 1 && size)
{
it = jumps.vals.erase(it);
continue;
}
it++;
}
}
atomic_t<bool> g_debugger_pause_all_threads_on_bp = true;
// Breakpoint entry point
@ -608,6 +685,14 @@ std::array<u32, 2> op_branch_targets(u32 pc, ppu_opcode_t op)
{
std::array<u32, 2> res{pc + 4, umax};
g_fxo->need<ppu_far_jumps_t>();
if (u32 target = g_fxo->get<const ppu_far_jumps_t>().get_target(pc))
{
res[0] = target;
return res;
}
switch (const auto type = g_ppu_itype.decode(op.opcode))
{
case ppu_itype::B:
@ -1409,10 +1494,10 @@ void ppu_trap(ppu_thread& ppu, u64 addr)
ppu.cia += add; // Skip instructions, hope for valid code (interprter may be invoked temporarily)
}
[[noreturn]] static void ppu_error(ppu_thread& ppu, u64 addr, u32 op)
static void ppu_error(ppu_thread& ppu, u64 addr, u32 op)
{
ppu.cia = ::narrow<u32>(addr);
fmt::throw_exception("Unknown/Illegal opcode 0x08x (0x%llx)", op, addr);
ppu_recompiler_fallback(ppu);
}
static void ppu_check(ppu_thread& ppu, u64 addr)
@ -2707,7 +2792,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
{
for (auto& block : func.blocks)
{
ppu_register_function_at(block.first, block.second, nullptr);
ppu_register_function_at(block.first, block.second);
}
if (g_cfg.core.ppu_debug && func.size && func.toc != umax)
@ -3166,7 +3251,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
const auto name = fmt::format("__0x%x", func.addr - reloc);
const u64 addr = ensure(jit->get(name));
jit_mod.funcs.emplace_back(reinterpret_cast<ppu_function_t>(addr));
ppu_ref(func.addr) = (addr & 0x7fff'ffff'ffffu) | (ppu_ref(func.addr) & ~0x7fff'ffff'ffffu);
ppu_register_function_at(func.addr, 4, jit_mod.funcs.back());
if (g_cfg.core.ppu_debug)
ppu_log.notice("Installing function %s at 0x%x: %p (reloc = 0x%x)", name, func.addr, ppu_ref(func.addr), reloc);

View File

@ -126,6 +126,8 @@ Type* PPUTranslator::GetContextType()
return m_thread_type;
}
u32 ppu_get_far_jump(u32 pc);
Function* PPUTranslator::Translate(const ppu_function& info)
{
m_function = m_module->getFunction(info.name);
@ -232,7 +234,15 @@ Function* PPUTranslator::Translate(const ppu_function& info)
m_rel = nullptr;
}
if (u32 target = ppu_get_far_jump(m_addr + base))
{
FlushRegisters();
CallFunction(0, m_ir->getInt64(target));
continue;
}
const u32 op = vm::read32(vm::cast(m_addr + base));
(this->*(s_ppu_decoder.decode(op)))({op});
if (m_rel)
@ -352,6 +362,12 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
if (_target >= caddr && _target <= cend)
{
if (target == m_addr + 4)
{
// Branch to nex (ignored)
return;
}
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target), type);
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
}

View File

@ -20,6 +20,8 @@
LOG_CHANNEL(vm_log, "VM");
void ppu_remove_hle_instructions(u32 addr, u32 size);
namespace vm
{
static u8* memory_reserve_4GiB(void* _addr, u64 size = 0x100000000)
@ -914,6 +916,9 @@ namespace vm
rsxthr.on_notify_memory_unmapped(addr, size);
}
// Deregister PPU related data
ppu_remove_hle_instructions(addr, size);
// Actually unmap memory
if (!shm)
{
@ -1160,8 +1165,8 @@ namespace vm
{
if (!src)
{
// Use the block's flags
flags = this->flags;
// Use the block's flags (excpet for protection)
flags = (this->flags & ~page_prot_mask) | (flags & page_prot_mask);
}
// Determine minimal alignment
@ -1182,7 +1187,7 @@ namespace vm
return 0;
}
u8 pflags = flags & page_hidden ? 0 : page_readable | page_writable;
u8 pflags = flags & page_hidden ? 0 : (~flags & (page_readable | page_writable));
if ((flags & page_size_64k) == page_size_64k)
{
@ -1237,8 +1242,8 @@ namespace vm
{
if (!src)
{
// Use the block's flags
flags = this->flags;
// Use the block's flags (excpet for protection)
flags = (this->flags & ~page_prot_mask) | (flags & page_prot_mask);
}
// Determine minimal alignment
@ -1266,7 +1271,7 @@ namespace vm
// Force aligned address
addr -= addr % min_page_size;
u8 pflags = flags & page_hidden ? 0 : page_readable | page_writable;
u8 pflags = flags & page_hidden ? 0 : (~flags & (page_readable | page_writable));
if ((flags & page_size_64k) == page_size_64k)
{

View File

@ -43,7 +43,8 @@ namespace vm
page_readable = (1 << 0),
page_writable = (1 << 1),
page_executable = (1 << 2),
page_prot_mask = page_readable | page_writable | page_executable,
page_fault_notification = (1 << 3),
page_no_reservations = (1 << 4),
page_64k_size = (1 << 5),