diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index d94c5da789..609d7def83 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -299,12 +299,11 @@ namespace ppu_patterns }; } -std::vector ppu_analyse(const std::vector>& segs, const std::vector>& secs, u32 entry, u32 lib_toc) +std::vector ppu_analyse(const std::vector>& segs, const std::vector>& secs, u32 lib_toc) { // Assume first segment is executable const u32 start = segs[0].first; const u32 end = segs[0].first + segs[0].second; - const u32 start_toc = entry && !lib_toc ? +vm::read32(entry + 4) : lib_toc; // Known TOCs (usually only 1) std::unordered_set TOCs; @@ -384,28 +383,49 @@ std::vector ppu_analyse(const std::vector>& se // Find OPD section for (const auto& sec : secs) { - const u32 sec_end = sec.first + sec.second; + u32 sec_end = sec.first + sec.second; - if (entry >= sec.first && entry < sec_end) + // Probe + for (vm::cptr ptr = vm::cast(sec.first); ptr.addr() < sec_end; ptr += 2) { - for (vm::cptr ptr = vm::cast(sec.first); ptr.addr() < sec_end; ptr += 2) + const u32 addr = ptr[0]; + const u32 _toc = ptr[1]; + + // TODO: improve TOC constraints + if (_toc % 4 || _toc == 0 || _toc >= 0x40000000 || (_toc >= start && _toc < end)) { - // Add function and TOC - const u32 addr = ptr[0]; - const u32 toc = ptr[1]; - LOG_NOTICE(PPU, "OPD: [0x%x] 0x%x (TOC=0x%x)", ptr, addr, toc); - TOCs.emplace(toc); - - auto& func = add_func(addr, toc, ptr.addr()); - func.attr += ppu_attr::known_addr; + sec_end = 0; + break; } - break; + if (addr % 4 || addr < start || addr >= end || addr == _toc) + { + sec_end = 0; + break; + } + } + + if (sec_end) LOG_NOTICE(PPU, "Reading OPD section at 0x%x...", sec.first); + + // Mine + for (vm::cptr ptr = vm::cast(sec.first); ptr.addr() < sec_end; ptr += 2) + { + // Add function and TOC + const u32 addr = ptr[0]; + const u32 toc = ptr[1]; + LOG_NOTICE(PPU, "OPD: [0x%x] 0x%x (TOC=0x%x)", ptr, addr, toc); + + TOCs.emplace(toc); + auto& func = add_func(addr, toc, ptr.addr()); + func.attr += ppu_attr::known_addr; } } - // Otherwise, register initial set of functions (likely including the entry point) - add_toc(start_toc); + // Secondary attempt (TODO) + if (secs.empty() && lib_toc) + { + add_toc(lib_toc); + } // Find .eh_frame section for (const auto& sec : secs) @@ -449,6 +469,8 @@ std::vector ppu_analyse(const std::vector>& se ptr = vm::cast(ptr.addr() + size); } + if (sec_end && sec.second > 4) LOG_NOTICE(PPU, "Reading .eh_frame section at 0x%x...", sec.first); + // Mine for (vm::cptr ptr = vm::cast(sec.first); ptr.addr() < sec_end; ptr = vm::cast(ptr.addr() + ptr[0] + 4)) { @@ -471,16 +493,17 @@ std::vector ppu_analyse(const std::vector>& se u32 addr = 0; u32 size = 0; + // TODO: 64 bit or 32 bit values (approximation) if (ptr[2] == 0 && ptr[3] == 0) { size = ptr[5]; } - else if ((ptr[2] == -1 || ptr[2] == 0) && ptr[4] == 0) + else if ((ptr[2] == -1 || ptr[2] == 0) && ptr[4] == 0 && ptr[5]) { - addr = ptr[3] + ptr.addr() + 8; + addr = ptr[3]; size = ptr[5]; } - else if (ptr[2] != -1 && ptr[4]) + else if (ptr[2] != -1 && ptr[3]) { addr = ptr[2]; size = ptr[3]; @@ -491,13 +514,18 @@ std::vector ppu_analyse(const std::vector>& se continue; } + // TODO: absolute/relative offset (approximation) + if (addr > 0xc0000000) + { + addr += ptr.addr() + 8; + } + LOG_NOTICE(PPU, ".eh_frame: [0x%x] FDE 0x%x (cie=*0x%x, addr=0x%x, size=0x%x)", ptr, ptr[0], cie, addr, size); - if (!addr) continue; // TODO (some entries have zero offset) - - if (addr % 4 || addr < start || addr >= end) + // TODO: invalid offsets, zero offsets (removed functions?) + if (addr % 4 || size % 4 || size > (end - start) || addr < start || addr + size > end) { - LOG_ERROR(PPU, ".eh_frame: Invalid function 0x%x", addr); + if (addr) LOG_ERROR(PPU, ".eh_frame: Invalid function 0x%x", addr); continue; } @@ -525,13 +553,30 @@ std::vector ppu_analyse(const std::vector>& se if (ptr + 1 <= fend && (ptr[0] & 0xfc000001) == B({}, {})) { // Simple gate - const u32 target = ppu_branch_target(ptr[0] & 0x2 ? 0 : ptr.addr(), s32(ptr[0]) << 6 >> 6); + const u32 target = (ptr[0] & 0x2 ? 0 : ptr.addr()) + ppu_opcode_t{ptr[0]}.bt24; + + if (target == func.addr) + { + // Special case + func.size = 0x4; + func.blocks.emplace(func.addr, func.size); + func.attr += ppu_attr::no_return; + continue; + } if (target >= start && target < end) { + auto& new_func = add_func(target, func.toc, func.addr); + + if (new_func.blocks.empty()) + { + func_queue.emplace_back(func); + continue; + } + func.size = 0x4; func.blocks.emplace(func.addr, func.size); - add_func(target, func.toc, func.addr); + func.attr += new_func.attr & ppu_attr::no_return; continue; } } @@ -544,14 +589,23 @@ std::vector ppu_analyse(const std::vector>& se { // TOC change gate const u32 new_toc = func.toc && func.toc != -1 ? func.toc + (ptr[1] << 16) + s16(ptr[2]) : 0; - const u32 target = ppu_branch_target(ptr[3] & 0x2 ? 0 : (ptr + 3).addr(), s32(ptr[3]) << 6 >> 6); + const u32 target = (ptr[3] & 0x2 ? 0 : (ptr + 3).addr()) + ppu_opcode_t{ptr[3]}.bt24; if (target >= start && target < end) { + add_toc(new_toc); + + auto& new_func = add_func(target, new_toc, func.addr); + + if (new_func.blocks.empty()) + { + func_queue.emplace_back(func); + continue; + } + func.size = 0x10; func.blocks.emplace(func.addr, func.size); - add_func(target, new_toc, func.addr); - add_toc(new_toc); + func.attr += new_func.attr & ppu_attr::no_return; continue; } } @@ -576,7 +630,7 @@ std::vector ppu_analyse(const std::vector>& se if (const u32 len = ppu_test(ptr, fend, ppu_patterns::abort)) { - // Function .abort + // Function "abort" LOG_NOTICE(PPU, "Function [0x%x]: 'abort'", func.addr); func.attr += ppu_attr::no_return; func.attr += ppu_attr::known_size; @@ -599,7 +653,7 @@ std::vector ppu_analyse(const std::vector>& se } // Get function limit - const u32 func_end = get_limit(func.addr + 1); + const u32 func_end = std::min(get_limit(func.addr + 1), func.attr & ppu_attr::known_size ? func.addr + func.size : end); // Block analysis workload std::vector>> block_queue; @@ -675,7 +729,7 @@ std::vector ppu_analyse(const std::vector>& se } else if (type == ppu_itype::B || type == ppu_itype::BC) { - const u32 target = ppu_branch_target(op.aa ? 0 : iaddr, type == ppu_itype::B ? +op.ll : +op.simm16); + const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14); if (target < start || target >= end) { @@ -760,10 +814,18 @@ std::vector ppu_analyse(const std::vector>& se if (jt_addr != jt_end && _ptr.addr() == jt_addr) { // Acknowledge jumptable detection failure - func.attr += ppu_attr::no_size; + if (!func.attr.test_and_set(ppu_attr::no_size)) + { + LOG_WARNING(PPU, "[0x%x] Jump table not found! 0x%x-0x%x", func.addr, jt_addr, jt_end); + } + add_block(iaddr); block_queue.clear(); } + else + { + LOG_TRACE(PPU, "[0x%x] Jump table found: 0x%x-0x%x", func.addr, jt_addr, _ptr); + } } block.second = _ptr.addr() - block.first; @@ -780,60 +842,13 @@ std::vector ppu_analyse(const std::vector>& se } // Finalization: determine function size - for (const auto& block : func.blocks) + if (!func.attr.test(ppu_attr::known_size)) { - const u32 expected = func.addr + func.size; + const auto last = func.blocks.crbegin(); - if (func.attr & ppu_attr::known_size) + if (last != func.blocks.crend()) { - continue; - } - - // Just set the max - func.size = std::max(func.size, block.first + block.second - func.addr); - continue; - - // Disabled (TODO) - if (expected == block.first) - { - func.size += block.second; - } - else if (expected + 4 == block.first && vm::read32(expected) == ppu_instructions::NOP()) - { - func.size += block.second + 4; - } - else if (expected < block.first) - { - //block.second = 0; - continue; - } - - // Function min size constraint (TODO) - for (vm::cptr _ptr = vm::cast(block.first); _ptr.addr() < block.first + block.second;) - { - const u32 iaddr = _ptr.addr(); - const ppu_opcode_t op{*_ptr++}; - const ppu_itype::type type = s_ppu_itype.decode(op.opcode); - - if (type == ppu_itype::BCCTR && !op.lk) - { - const u32 jt_base = _ptr.addr() - func.addr; - - for (; _ptr.addr() < block.first + block.second; _ptr++) - { - func.size = std::max(func.size, jt_base + *_ptr); - } - - break; - } - else if (type == ppu_itype::BC && !op.lk) - { - const u32 target = ppu_branch_target(op.aa ? 0 : iaddr, +op.simm16); - - func.size = std::max(func.size, target - func.addr); - - break; - } + func.size = last->first + last->second - func.addr; } } @@ -873,7 +888,7 @@ std::vector ppu_analyse(const std::vector>& se if (type == ppu_itype::B || type == ppu_itype::BC) { - const u32 target = ppu_branch_target(op.aa ? 0 : iaddr, type == ppu_itype::B ? +op.ll : +op.simm16); + const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14); if (target >= start && target < end) { @@ -927,5 +942,7 @@ std::vector ppu_analyse(const std::vector>& se result.emplace_back(std::move(func.second)); } + LOG_NOTICE(PPU, "Function analysis: %zu functions (%zu enqueued)", result.size(), func_queue.size()); + return result; } diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index 1c7165107d..ce0c48e7b6 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -50,7 +50,7 @@ struct ppu_pattern extern void ppu_validate(const std::string& fname, const std::vector& funcs, u32 reloc); -extern std::vector ppu_analyse(const std::vector>& segs, const std::vector>& secs, u32 entry, u32 lib_toc); +extern std::vector ppu_analyse(const std::vector>& segs, const std::vector>& secs, u32 lib_toc); // PPU Instruction Type struct ppu_itype diff --git a/rpcs3/Emu/Cell/PPUDisAsm.h b/rpcs3/Emu/Cell/PPUDisAsm.h index 37d969a728..134738d65d 100644 --- a/rpcs3/Emu/Cell/PPUDisAsm.h +++ b/rpcs3/Emu/Cell/PPUDisAsm.h @@ -13,7 +13,7 @@ public: private: u32 DisAsmBranchTarget(const s32 imm) { - return ppu_branch_target(dump_pc, imm); + return dump_pc + (imm & ~3); } private: diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index e2c2acf35e..daf0c2fdc9 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -1897,7 +1897,7 @@ bool ppu_interpreter::BC(PPUThread& ppu, ppu_opcode_t op) if (ctr_ok && cond_ok) { const u32 nextLR = ppu.pc + 4; - ppu.pc = ppu_branch_target((op.aa ? 0 : ppu.pc), op.simm16); + ppu.pc = (op.aa ? 0 : ppu.pc) + op.bt14; if (op.lk) ppu.LR = nextLR; return false; } @@ -1927,7 +1927,7 @@ bool ppu_interpreter::SC(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::B(PPUThread& ppu, ppu_opcode_t op) { const u32 nextLR = ppu.pc + 4; - ppu.pc = ppu_branch_target(op.aa ? 0 : ppu.pc, op.ll); + ppu.pc = (op.aa ? 0 : ppu.pc) + op.bt24; if (op.lk) ppu.LR = nextLR; return false; } @@ -1954,7 +1954,7 @@ bool ppu_interpreter::BCLR(PPUThread& ppu, ppu_opcode_t op) if (ctr_ok && cond_ok) { const u32 nextLR = ppu.pc + 4; - ppu.pc = ppu_branch_target(0, (u32)ppu.LR); + ppu.pc = (u32)ppu.LR & ~3; if (op.lk) ppu.LR = nextLR; return false; } @@ -2023,7 +2023,7 @@ bool ppu_interpreter::BCCTR(PPUThread& ppu, ppu_opcode_t op) if (op.bo & 0x10 || ppu.CR[op.bi] == ((op.bo & 0x8) != 0)) { const u32 nextLR = ppu.pc + 4; - ppu.pc = ppu_branch_target(0, (u32)ppu.CTR); + ppu.pc = (u32)ppu.CTR & ~3; if (op.lk) ppu.LR = nextLR; return false; } diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index a717b2eaaa..94239bcc43 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -911,7 +911,7 @@ std::shared_ptr ppu_load_prx(const ppu_prx_object& elf) ppu_load_imports(link, lib_info->imports_start, lib_info->imports_end); - prx->funcs = ppu_analyse(segments, sections, prx->specials[0xbc9a0086], lib_info->toc); + prx->funcs = ppu_analyse(segments, sections, lib_info->toc); } else { @@ -1259,7 +1259,7 @@ void ppu_load_exec(const ppu_exec_object& elf) } // Analyse executable - const auto funcs = ppu_analyse(segments, sections, static_cast(elf.header.e_entry), 0); + const auto funcs = ppu_analyse(segments, sections, 0); ppu_validate(vfs::get(Emu.GetPath()), funcs, 0); diff --git a/rpcs3/Emu/Cell/PPUOpcodes.h b/rpcs3/Emu/Cell/PPUOpcodes.h index a310db22a4..45bf429c1a 100644 --- a/rpcs3/Emu/Cell/PPUOpcodes.h +++ b/rpcs3/Emu/Cell/PPUOpcodes.h @@ -56,18 +56,10 @@ union ppu_opcode_t ppu_bf_t flm; // 7..14 ppu_bf_t l6; // 6 ppu_bf_t l15; // 15 + cf_t, ff_t> bt14; + cf_t, ff_t> bt24; }; -inline u32 ppu_branch_target(u32 pc, u32 imm) -{ - return pc + (imm & ~0x3u); -} - -inline u64 ppu_branch_target(u64 pc, u64 imm) -{ - return pc + (imm & ~0x3ull); -} - inline u64 ppu_rotate_mask(u32 mb, u32 me) { const u64 mask = ~0ull << (63 ^ (me - mb)); diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 35fd7a34a5..42e5059474 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1734,7 +1734,7 @@ void PPUTranslator::ADDIS(ppu_opcode_t op) void PPUTranslator::BC(ppu_opcode_t op) { - const u64 target = ppu_branch_target(op.aa ? 0 : m_current_addr, op.simm16); + const u64 target = (op.aa ? 0 : m_current_addr) + op.bt14; const auto cond = CheckBranchCondition(op.bo, op.bi); @@ -1742,7 +1742,7 @@ void PPUTranslator::BC(ppu_opcode_t op) { // Local branch - if (op.lk) + if (op.lk && target != m_current_addr) { CompilationError("BCL: local branch"); Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); @@ -1779,13 +1779,13 @@ void PPUTranslator::SC(ppu_opcode_t op) void PPUTranslator::B(ppu_opcode_t op) { - const u64 target = ppu_branch_target(op.aa ? 0 : m_current_addr, op.ll); + const u64 target = (op.aa ? 0 : m_current_addr) + op.bt24; if ((target > m_start_addr && target < m_end_addr) || (target == m_start_addr && !op.lk)) { // Local branch - if (op.lk) + if (op.lk && target != m_current_addr) { CompilationError("BL: local branch"); Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); @@ -1880,58 +1880,56 @@ void PPUTranslator::BCCTR(ppu_opcode_t op) { UseCondition(CheckBranchCondition(op.bo | 0x4, op.bi)); - const auto jt_addr = m_current_addr + 4; - const auto jt_data = m_bin + 1; + // Jumptable: sorted set of possible targets + std::set targets; // Detect a possible jumptable - for (u64 i = 0, addr = jt_addr; addr < m_end_addr; i++, addr += sizeof(u32)) + for (u64 jt_addr = (m_current_addr += sizeof(u32)); m_current_addr < m_end_addr; m_current_addr += sizeof(u32)) { - const u64 target = jt_addr + static_cast(jt_data[i]); + const u64 target = jt_addr + static_cast(*++m_bin); - // Check jumptable entry conditions - if (target % 4 || target < m_start_addr || target >= m_end_addr) + if (target == jt_addr) { - if (i >= 2) - { - // Fix next instruction address - m_current_addr = addr; - - if (!op.lk) - { - // Get sorted set of possible targets - const std::set cases(jt_data, jt_data + i); - - // Create switch with special default case - const auto _default = BasicBlock::Create(m_context, fmt::format("loc_%llx.def", m_current_addr/* - m_start_addr*/), m_function); - const auto _switch = m_ir->CreateSwitch(m_ir->CreateLoad(m_reg_ctr), _default, ::size32(cases)); - - for (const s32 offset : cases) - { - const u64 target = jt_addr + offset; - _switch->addCase(m_ir->getInt64(target), GetBasicBlock(target)); - } - - m_ir->SetInsertPoint(_default); - Trap(m_current_addr); - return; - } - else - { - CompilationError("BCCTRL with a jt"); - } - } - break; } + + if (target % 4 || target < m_start_addr || target >= m_end_addr) + { + break; + } + + targets.emplace(target); } if (!op.lk) { - // Indirect branch - m_ir->CreateBr(m_jtr); + if (!targets.empty()) + { + // Create switch with special default case + const auto _default = BasicBlock::Create(m_context, fmt::format("loc_%llx.def", m_current_addr/* - m_start_addr*/), m_function); + const auto _switch = m_ir->CreateSwitch(m_ir->CreateLoad(m_reg_ctr), _default, ::size32(targets)); + + for (const u64 target : targets) + { + _switch->addCase(m_ir->getInt64(target), GetBasicBlock(target)); + } + + m_ir->SetInsertPoint(_default); + Trap(m_current_addr); + } + else + { + // Indirect branch + m_ir->CreateBr(m_jtr); + } } else { + if (!targets.empty()) + { + CompilationError("BCCTRL with a jumptable"); + } + // Indirect call CallFunction(0, false, m_ir->CreateLoad(m_reg_ctr)); }