diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index bede99956c..7f4dbc3ab5 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -633,7 +633,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) { if (!_seg.addr) continue; - if (value >= _seg.addr && value < _seg.addr + _seg.size) + if (value >= start && value < end) { addr_heap.emplace(value); break; @@ -1527,6 +1527,25 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) // Decompose functions to basic blocks for (auto&& [_, func] : as_rvalue(std::move(fmap))) { + if (func.attr & ppu_attr::no_size && entry) + { + // Disabled for PRX for now + const u32 lim = get_limit(func.addr); + + ppu_log.warning("Function 0x%x will be compiled on per-instruction basis (next=0x%x)", func.addr, lim); + + for (u32 addr = func.addr; addr < lim; addr += 4) + { + auto& block = fmap[addr]; + block.addr = addr; + block.size = 4; + block.toc = func.toc; + block.attr = ppu_attr::no_size; + } + + continue; + } + for (auto [addr, size] : func.blocks) { if (!size) @@ -1583,7 +1602,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) case 109: case 110: { - ppu_log.notice("Added block from reloc: 0x%x (0x%x, %u)", target, rel.addr, rel.type); + ppu_log.trace("Added block from reloc: 0x%x (0x%x, %u) (heap=%d)", target, rel.addr, rel.type, addr_heap.count(target)); block_queue.emplace_back(target, 0); block_set.emplace(target); continue; @@ -1598,8 +1617,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) u32 exp = start; u32 lim = end; - // Start with full scan - block_queue.emplace_back(exp, lim); + // Start with full scan (disabled for PRX for now) + if (entry) + { + block_queue.emplace_back(exp, lim); + } // block_queue may grow for (usz i = 0; i < block_queue.size(); i++) @@ -1731,6 +1753,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) block.addr = exp; block.size = i_pos - exp; ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size); + + if (get_limit(exp) == end) + { + block.attr += ppu_attr::no_size; + } } } @@ -1750,9 +1777,26 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) } // Convert map to vector (destructive) - for (auto&& pair : as_rvalue(std::move(fmap))) + for (auto&& [_, block] : as_rvalue(std::move(fmap))) { - funcs.emplace_back(std::move(pair.second)); + if (block.attr & ppu_attr::no_size && block.size > 4 && entry) + { + // Disabled for PRX for now + ppu_log.warning("Block 0x%x will be compiled on per-instruction basis (size=0x%x)", block.addr, block.size); + + for (u32 addr = block.addr; addr < block.addr + block.size; addr += 4) + { + auto& i = funcs.emplace_back(); + i.addr = addr; + i.size = 4; + i.toc = block.toc; + i.attr = ppu_attr::no_size; + } + + continue; + } + + funcs.emplace_back(std::move(block)); } ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size()); diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index b41e04654d..d49ff07e11 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -15,6 +15,7 @@ using namespace llvm; const ppu_decoder s_ppu_decoder; +const ppu_decoder s_ppu_itype; const ppu_decoder s_ppu_iname; PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_module& info, ExecutionEngine& engine) @@ -161,20 +162,60 @@ Function* PPUTranslator::Translate(const ppu_function& info) const u64 base = m_reloc ? m_reloc->addr : 0; m_addr = info.addr - base; + // Don't emit check in small blocks without terminator + bool need_check = info.size >= 16; + + for (u32 addr = m_addr; addr < m_addr + info.size; addr += 4) + { + const u32 op = vm::read32(vm::cast(addr + base)); + + switch (s_ppu_itype.decode(op)) + { + case ppu_itype::UNK: + case ppu_itype::ECIWX: + case ppu_itype::ECOWX: + case ppu_itype::TD: + case ppu_itype::TDI: + case ppu_itype::TW: + case ppu_itype::TWI: + case ppu_itype::B: + case ppu_itype::BC: + case ppu_itype::BCCTR: + case ppu_itype::BCLR: + case ppu_itype::SC: + { + need_check = true; + break; + } + default: + { + break; + } + } + } + m_thread = &*m_function->arg_begin(); m_base_loaded = m_ir->CreateLoad(m_base); const auto body = BasicBlock::Create(m_context, "__body", m_function); - // Check status register in the entry block - const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(nullptr, m_thread, 1), true); - const auto vcheck = BasicBlock::Create(m_context, "__test", m_function); - m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely); + if (need_check) + { + // Check status register in the entry block + const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(nullptr, m_thread, 1), true); + const auto vcheck = BasicBlock::Create(m_context, "__test", m_function); + m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely); + + // Create tail call to the check function + m_ir->SetInsertPoint(vcheck); + Call(GetType(), "__check", m_thread, GetAddr())->setTailCallKind(llvm::CallInst::TCK_Tail); + m_ir->CreateRetVoid(); + } + else + { + m_ir->CreateBr(body); + } - // Create tail call to the check function - m_ir->SetInsertPoint(vcheck); - Call(GetType(), "__check", m_thread, GetAddr())->setTailCallKind(llvm::CallInst::TCK_Tail); - m_ir->CreateRetVoid(); m_ir->SetInsertPoint(body); // Process blocks @@ -2990,7 +3031,7 @@ void PPUTranslator::EQV(ppu_opcode_t op) void PPUTranslator::ECIWX(ppu_opcode_t op) { - SetGpr(op.rd, Call(GetType(), "__eciwx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); + UNK(op); } void PPUTranslator::LHZUX(ppu_opcode_t op) @@ -3111,7 +3152,7 @@ void PPUTranslator::ORC(ppu_opcode_t op) void PPUTranslator::ECOWX(ppu_opcode_t op) { - Call(GetType(), "__ecowx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32)); + UNK(op); } void PPUTranslator::STHUX(ppu_opcode_t op)