diff --git a/rpcs3/Emu/Cell/PPUOpcodes.h b/rpcs3/Emu/Cell/PPUOpcodes.h index ff6f3b11b2..de517014b5 100644 --- a/rpcs3/Emu/Cell/PPUOpcodes.h +++ b/rpcs3/Emu/Cell/PPUOpcodes.h @@ -674,6 +674,7 @@ namespace ppu_instructions inline u32 ADDI(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0eu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; } inline u32 ADDIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0fu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; } + inline u32 XORIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x1bu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; } inline u32 ORI(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x18u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; } inline u32 ORIS(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x19u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; } inline u32 OR(u32 ra, u32 rs, u32 rb, bool rc = false) { ppu_opcode_t op{ 0x1fu << 26 | 0x1bcu << 1 }; op.rs = rs; op.ra = ra; op.rb = rb; op.rc = rc; return op.opcode; } @@ -685,6 +686,7 @@ namespace ppu_instructions inline u32 MFSPR(u32 rt, u32 spr) { ppu_opcode_t op{ 0x1fu << 26 | 0x153u << 1 }; op.rd = rt; op.spr = spr; return op.opcode; } inline u32 MTSPR(u32 spr, u32 rs) { ppu_opcode_t op{ 0x1fu << 26 | 0x1d3u << 1 }; op.rs = rs; op.spr = spr; return op.opcode; } inline u32 LWZ(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x20u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; } + inline u32 STW(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x24u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; } inline u32 STD(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; } inline u32 STDU(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 | 1 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; } inline u32 LD(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x3au << 26 }; op.rd = rt; op.ra = ra; op.ds = si / 4; return op.opcode; } @@ -697,6 +699,7 @@ namespace ppu_instructions inline u32 STVX(u32 vs, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 231 << 1 }; op.vs = vs; op.ra = ra; op.rb = rb; return op.opcode; } inline u32 LFD(u32 frd, u32 ra, s32 si) { ppu_opcode_t op{ 50u << 26 }; op.frd = frd; op.ra = ra; op.simm16 = si; return op.opcode; } inline u32 LVX(u32 vd, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 103 << 1 }; op.vd = vd; op.ra = ra; op.rb = rb; return op.opcode; } + inline constexpr u32 EIEIO() { return 0x7c0006ac; } namespace implicts { @@ -734,7 +737,7 @@ namespace ppu_instructions inline u32 CLRLDI(u32 x, u32 y, u32 n) { return RLDICL(x, y, 0, n, false); } inline u32 CLRRDI(u32 x, u32 y, u32 n) { return RLDICR(x, y, 0, 63 - n, false); } - inline u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0 + inline constexpr u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0 } using namespace implicts; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 92cd029779..9160697bfd 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -62,7 +62,9 @@ #include #include #include +#include #include + #include "util/asm.hpp" #include "util/vm.hpp" #include "util/v128.hpp" @@ -485,6 +487,167 @@ void ppu_reservation_fallback(ppu_thread& ppu) } } +u32 ppu_read_mmio_aware_u32(u8* vm_base, u32 eal) +{ + if (eal >= RAW_SPU_BASE_ADDR) + { + // RawSPU MMIO + auto thread = idm::get>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET)); + + if (!thread) + { + // Access Violation + } + else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access + { + } + else if (u32 value{}; thread->read_reg(eal, value)) + { + return std::bit_cast>(value); + } + else + { + fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal); + } + } + + // Value is assumed to be swapped + return read_from_ptr(vm_base + eal); +} + +void ppu_write_mmio_aware_u32(u8* vm_base, u32 eal, u32 value) +{ + if (eal >= RAW_SPU_BASE_ADDR) + { + // RawSPU MMIO + auto thread = idm::get>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET)); + + if (!thread) + { + // Access Violation + } + else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access + { + } + else if (thread->write_reg(eal, std::bit_cast>(value))) + { + return; + } + else + { + fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal); + } + } + + // Value is assumed swapped + write_to_ptr(vm_base + eal, value); +} + +extern bool ppu_test_address_may_be_mmio(std::span> insts) +{ + std::set reg_offsets; + bool found_raw_spu_base = false; + bool found_spu_area_offset_element = false; + + for (u32 inst : insts) + { + // Common around MMIO (orders IO) + if (inst == ppu_instructions::EIEIO()) + { + return true; + } + + const u32 op_imm16 = (inst & 0xfc00ffff); + + // RawSPU MMIO base + // 0xe00000000 is a common constant so try to find an ORIS 0x10 or ADDIS 0x10 nearby (for multiplying SPU ID by it) + if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x2000) || op_imm16 == ppu_instructions::ORIS({}, {}, 0xe000) || op_imm16 == ppu_instructions::XORIS({}, {}, 0xe000)) + { + found_raw_spu_base = true; + + if (found_spu_area_offset_element) + { + // Found both + return true; + } + } + else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0x10) || op_imm16 == ppu_instructions::ADDIS({}, {}, 0x10)) + { + found_spu_area_offset_element = true; + + if (found_raw_spu_base) + { + // Found both + return true; + } + } + // RawSPU MMIO base + problem state offset + else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffc)) + { + return true; + } + else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe004)) + { + return true; + } + else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe004)) + { + return true; + } + // RawSPU MMIO base + problem state offset + 64k of SNR1 offset + else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffb)) + { + return true; + } + else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe005)) + { + return true; + } + else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe005)) + { + return true; + } + // RawSPU MMIO base + problem state offset + 264k of SNR2 offset (STW allows 32K+- offset so in order to access SNR2 it needs to first add another 64k) + // SNR2 is the only register currently implemented that has its 0x80000 bit is set so its the only one its hardcoded access is done this way + else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffa)) + { + return true; + } + else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe006)) + { + return true; + } + else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe006)) + { + return true; + } + // Try to detect a function that receives RawSPU problem state base pointer as an argument + else if ((op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0) || + (op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0) || + (op_imm16 & ~0xffff) == ppu_instructions::ADDI({}, {}, 0)) + { + const bool is_load = (op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0); + const bool is_store = (op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0); + const bool is_neither = !is_store && !is_load; + const bool is_snr = (is_store || is_neither) && ((op_imm16 & 0xffff) == (SPU_RdSigNotify2_offs & 0xffff) || (op_imm16 & 0xffff) == (SPU_RdSigNotify1_offs & 0xffff)); + + if (is_snr || spu_thread::test_is_problem_state_register_offset(op_imm16 & 0xffff, is_load || is_neither, is_store || is_neither)) + { + reg_offsets.insert(op_imm16 & 0xffff); + + if (reg_offsets.size() >= 2) + { + // Assume high MMIO likelyhood if more than one offset appears in nearby code + // Such as common IN_MBOX + OUT_MBOX + return true; + } + } + } + } + + return false; +} + struct ppu_toc_manager { std::unordered_map toc_map; @@ -3529,6 +3692,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only) { "__resupdate", reinterpret_cast(vm::reservation_update) }, { "__resinterp", reinterpret_cast(ppu_reservation_fallback) }, { "__escape", reinterpret_cast(+ppu_escape) }, + { "__read_maybe_mmio32", reinterpret_cast(+ppu_read_mmio_aware_u32) }, + { "__write_maybe_mmio32", reinterpret_cast(+ppu_write_mmio_aware_u32) }, }; for (u64 index = 0; index < 1024; index++) diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 884f409db8..5e8d866dea 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -5,6 +5,7 @@ #include "Emu/Cell/Common.h" #include "PPUTranslator.h" #include "PPUThread.h" +#include "SPUThread.h" #include "util/types.hpp" #include "util/endian.hpp" @@ -12,6 +13,7 @@ #include "util/v128.hpp" #include "util/simd.hpp" #include +#include using namespace llvm; @@ -129,6 +131,7 @@ Type* PPUTranslator::GetContextType() } u32 ppu_get_far_jump(u32 pc); +bool ppu_test_address_may_be_mmio(std::span> insts); Function* PPUTranslator::Translate(const ppu_function& info) { @@ -239,6 +242,9 @@ Function* PPUTranslator::Translate(const ppu_function& info) m_rel = nullptr; } + // Reset MMIO hint + m_may_be_mmio = true; + const u32 op = *ensure(m_info.get_ptr(m_addr + base)); (this->*(s_ppu_decoder.decode(op)))({op}); @@ -600,15 +606,53 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align) { const u32 size = ::narrow(+type->getPrimitiveSizeInBits()); + if (m_may_be_mmio && size == 32) + { + // Test for MMIO patterns + struct instructions_to_test + { + be_t insts[128]; + }; + + m_may_be_mmio = false; + + if (auto ptr = m_info.get_ptr(std::max(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate(m_addr, sizeof(instructions_to_test) / 2)))) + { + if (ppu_test_address_may_be_mmio(std::span(ptr->insts))) + { + m_may_be_mmio = true; + } + } + } + if (is_be ^ m_is_be && size > 8) { + llvm::Value* value{}; + // Read, byteswap, bitcast const auto int_type = m_ir->getIntNTy(size); - const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align}); - value->setVolatile(true); + + if (m_may_be_mmio && size == 32) + { + ppu_log.notice("LLVM: Detected potential MMIO32 read at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); + value = Call(GetType(), "__read_maybe_mmio32", m_base, addr); + } + else + { + const auto inst = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align}); + inst->setVolatile(true); + value = inst; + } + return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type); } + if (m_may_be_mmio && size == 32) + { + ppu_log.notice("LLVM: Detected potential MMIO32 read at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); + return Call(GetType(), "__read_maybe_mmio32", m_base, addr); + } + // Read normally const auto r = m_ir->CreateAlignedLoad(type, GetMemory(addr), llvm::MaybeAlign{align}); r->setVolatile(true); @@ -627,6 +671,25 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align value = Call(int_type, fmt::format("llvm.bswap.i%u", size), bitcast(value, int_type)); } + if (m_may_be_mmio && size == 32) + { + // Test for MMIO patterns + struct instructions_to_test + { + be_t insts[128]; + }; + + if (auto ptr = m_info.get_ptr(std::max(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate(m_addr, sizeof(instructions_to_test) / 2)))) + { + if (ppu_test_address_may_be_mmio(std::span(ptr->insts))) + { + ppu_log.notice("LLVM: Detected potential MMIO32 write at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); + Call(GetType(), "__write_maybe_mmio32", m_base, addr, value); + return; + } + } + } + // Write m_ir->CreateAlignedStore(value, GetMemory(addr), llvm::MaybeAlign{align})->setVolatile(true); } @@ -2527,6 +2590,7 @@ void PPUTranslator::LDX(ppu_opcode_t op) void PPUTranslator::LWZX(ppu_opcode_t op) { + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); } @@ -2601,6 +2665,9 @@ void PPUTranslator::DCBST(ppu_opcode_t) void PPUTranslator::LWZUX(ppu_opcode_t op) { + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); SetGpr(op.rd, ReadMemory(addr, GetType())); SetGpr(op.ra, addr); @@ -2811,6 +2878,7 @@ void PPUTranslator::STWCX(ppu_opcode_t op) void PPUTranslator::STWX(ppu_opcode_t op) { + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32)); } @@ -2830,6 +2898,7 @@ void PPUTranslator::STDUX(ppu_opcode_t op) void PPUTranslator::STWUX(ppu_opcode_t op) { const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation WriteMemory(addr, GetGpr(op.rs, 32)); SetGpr(op.ra, addr); } @@ -3213,6 +3282,7 @@ void PPUTranslator::LWBRX(ppu_opcode_t op) void PPUTranslator::LFSX(ppu_opcode_t op) { + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); } @@ -3541,6 +3611,44 @@ void PPUTranslator::LWZ(ppu_opcode_t op) m_rel = nullptr; } + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset + + if (m_may_be_mmio && !op.simm16) + { + struct instructions_data + { + be_t insts[2]; + }; + + // Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO + if (auto ptr = m_info.get_ptr(m_addr + 4 + (m_reloc ? m_reloc->addr : 0))) + { + for (u32 inst : ptr->insts) + { + ppu_opcode_t test_op{inst}; + + if (test_op.simm16 == op.simm16 || test_op.ra != op.ra) + { + // Same offset (at least according to this test) or different register + continue; + } + + switch (g_ppu_itype.decode(inst)) + { + case ppu_itype::LWZ: + case ppu_itype::STW: + { + // Not MMIO + m_may_be_mmio = false; + break; + } + default: break; + } + } + } + } + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType())); } @@ -3554,6 +3662,9 @@ void PPUTranslator::LWZU(ppu_opcode_t op) m_rel = nullptr; } + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm); SetGpr(op.rd, ReadMemory(addr, GetType())); SetGpr(op.ra, addr); @@ -3597,6 +3708,44 @@ void PPUTranslator::STW(ppu_opcode_t op) m_rel = nullptr; } + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset + + if (m_may_be_mmio && !op.simm16) + { + struct instructions_data + { + be_t insts[3]; + }; + + // Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO + if (auto ptr = m_info.get_ptr(m_addr + 4 + (m_reloc ? m_reloc->addr : 0))) + { + for (u32 inst : ptr->insts) + { + ppu_opcode_t test_op{inst}; + + if (test_op.simm16 == op.simm16 || test_op.ra != op.ra) + { + // Same offset (at least according to this test) or different register + continue; + } + + switch (g_ppu_itype.decode(inst)) + { + case ppu_itype::LWZ: + case ppu_itype::STW: + { + // Not MMIO + m_may_be_mmio = false; + break; + } + default: break; + } + } + } + } + const auto value = GetGpr(op.rs, 32); const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm; WriteMemory(addr, value); @@ -3621,6 +3770,9 @@ void PPUTranslator::STWU(ppu_opcode_t op) m_rel = nullptr; } + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u);// Stack register and TLS address register are unlikely to be used in MMIO address calculatio + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm); WriteMemory(addr, GetGpr(op.rs, 32)); SetGpr(op.ra, addr); @@ -3740,6 +3892,8 @@ void PPUTranslator::STHU(ppu_opcode_t op) void PPUTranslator::LMW(ppu_opcode_t op) { + m_may_be_mmio &= op.rd == 31u && (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio + for (u32 i = 0; i < 32 - op.rd; i++) { SetGpr(i + op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetType())); @@ -3748,6 +3902,8 @@ void PPUTranslator::LMW(ppu_opcode_t op) void PPUTranslator::STMW(ppu_opcode_t op) { + m_may_be_mmio &= op.rs == 31u && (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio + for (u32 i = 0; i < 32 - op.rs; i++) { WriteMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetGpr(i + op.rs, 32)); @@ -3764,6 +3920,9 @@ void PPUTranslator::LFS(ppu_opcode_t op) m_rel = nullptr; } + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset + SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType())); } @@ -3777,6 +3936,9 @@ void PPUTranslator::LFSU(ppu_opcode_t op) m_rel = nullptr; } + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm); SetFpr(op.frd, ReadMemory(addr, GetType())); SetGpr(op.ra, addr); @@ -3819,7 +3981,12 @@ void PPUTranslator::STFS(ppu_opcode_t op) imm = SExt(ReadMemory(GetAddr(+2), GetType()), GetType()); m_rel = nullptr; } + else + { + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset + } + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetFpr(op.frs, 32)); } @@ -3832,6 +3999,12 @@ void PPUTranslator::STFSU(ppu_opcode_t op) imm = SExt(ReadMemory(GetAddr(+2), GetType()), GetType()); m_rel = nullptr; } + else + { + m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset + } + + m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm); WriteMemory(addr, GetFpr(op.frs, 32)); diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 947d818a1c..d72dc1de20 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -68,6 +68,7 @@ class PPUTranslator final : public cpu_translator llvm::Value** const m_fc = m_locals + 131; // FPSCR bits (used partially) llvm::Value* nan_vec4; + bool m_may_be_mmio = false; #define DEF_VALUE(loc, glb, pos)\ llvm::Value*& loc = m_locals[pos];\ diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index b97a1dd629..580ee319a9 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -318,6 +318,50 @@ bool spu_thread::write_reg(const u32 addr, const u32 value) return false; } +bool spu_thread::test_is_problem_state_register_offset(u32 offset, bool for_read, bool for_write) noexcept +{ + if (for_read) + { + switch (offset) + { + case MFC_CMDStatus_offs: + case MFC_QStatus_offs: + case SPU_Out_MBox_offs: + case SPU_MBox_Status_offs: + case SPU_Status_offs: + case Prxy_TagStatus_offs: + case SPU_NPC_offs: + case SPU_RunCntl_offs: + return true; + default: break; + } + } + + if (for_write) + { + switch (offset) + { + case MFC_LSA_offs: + case MFC_EAH_offs: + case MFC_EAL_offs: + case MFC_Size_Tag_offs: + case MFC_Class_CMD_offs: + case Prxy_QueryType_offs: + case Prxy_QueryMask_offs: + case SPU_In_MBox_offs: + case SPU_RunCntl_offs: + case SPU_NPC_offs: + case SPU_RdSigNotify1_offs: + case SPU_RdSigNotify2_offs: + case (SPU_RdSigNotify2_offs & 0xffff): // Fow now accept both (this is used for an optimization so it can be imperfect) + return true; + default: break; + } + } + + return false; +} + void spu_load_exec(const spu_exec_object& elf) { spu_thread::g_raw_spu_ctr++; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index b7ed915307..700c0c6165 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -865,6 +865,7 @@ public: bool read_reg(const u32 addr, u32& value); bool write_reg(const u32 addr, const u32 value); + static bool test_is_problem_state_register_offset(u32 offset, bool for_read, bool for_write) noexcept; static atomic_t g_raw_spu_ctr; static atomic_t g_raw_spu_id[5];