PPU LLVM/RawSPU: Implement MMIO violation elimination pass

This commit is contained in:
Eladash 2023-07-05 13:52:16 +03:00 committed by Ivan
parent e7ac84fca3
commit 598fe85243
6 changed files with 390 additions and 3 deletions

View File

@ -674,6 +674,7 @@ namespace ppu_instructions
inline u32 ADDI(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0eu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 ADDIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0fu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 XORIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x1bu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 ORI(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x18u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; }
inline u32 ORIS(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x19u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; }
inline u32 OR(u32 ra, u32 rs, u32 rb, bool rc = false) { ppu_opcode_t op{ 0x1fu << 26 | 0x1bcu << 1 }; op.rs = rs; op.ra = ra; op.rb = rb; op.rc = rc; return op.opcode; }
@ -685,6 +686,7 @@ namespace ppu_instructions
inline u32 MFSPR(u32 rt, u32 spr) { ppu_opcode_t op{ 0x1fu << 26 | 0x153u << 1 }; op.rd = rt; op.spr = spr; return op.opcode; }
inline u32 MTSPR(u32 spr, u32 rs) { ppu_opcode_t op{ 0x1fu << 26 | 0x1d3u << 1 }; op.rs = rs; op.spr = spr; return op.opcode; }
inline u32 LWZ(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x20u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 STW(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x24u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 STD(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; }
inline u32 STDU(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 | 1 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; }
inline u32 LD(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x3au << 26 }; op.rd = rt; op.ra = ra; op.ds = si / 4; return op.opcode; }
@ -697,6 +699,7 @@ namespace ppu_instructions
inline u32 STVX(u32 vs, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 231 << 1 }; op.vs = vs; op.ra = ra; op.rb = rb; return op.opcode; }
inline u32 LFD(u32 frd, u32 ra, s32 si) { ppu_opcode_t op{ 50u << 26 }; op.frd = frd; op.ra = ra; op.simm16 = si; return op.opcode; }
inline u32 LVX(u32 vd, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 103 << 1 }; op.vd = vd; op.ra = ra; op.rb = rb; return op.opcode; }
inline constexpr u32 EIEIO() { return 0x7c0006ac; }
namespace implicts
{
@ -734,7 +737,7 @@ namespace ppu_instructions
inline u32 CLRLDI(u32 x, u32 y, u32 n) { return RLDICL(x, y, 0, n, false); }
inline u32 CLRRDI(u32 x, u32 y, u32 n) { return RLDICR(x, y, 0, 63 - n, false); }
inline u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0
inline constexpr u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0
}
using namespace implicts;

View File

@ -62,7 +62,9 @@
#include <thread>
#include <cfenv>
#include <cctype>
#include <span>
#include <optional>
#include "util/asm.hpp"
#include "util/vm.hpp"
#include "util/v128.hpp"
@ -485,6 +487,167 @@ void ppu_reservation_fallback(ppu_thread& ppu)
}
}
u32 ppu_read_mmio_aware_u32(u8* vm_base, u32 eal)
{
if (eal >= RAW_SPU_BASE_ADDR)
{
// RawSPU MMIO
auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));
if (!thread)
{
// Access Violation
}
else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access
{
}
else if (u32 value{}; thread->read_reg(eal, value))
{
return std::bit_cast<be_t<u32>>(value);
}
else
{
fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal);
}
}
// Value is assumed to be swapped
return read_from_ptr<u32>(vm_base + eal);
}
void ppu_write_mmio_aware_u32(u8* vm_base, u32 eal, u32 value)
{
if (eal >= RAW_SPU_BASE_ADDR)
{
// RawSPU MMIO
auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));
if (!thread)
{
// Access Violation
}
else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access
{
}
else if (thread->write_reg(eal, std::bit_cast<be_t<u32>>(value)))
{
return;
}
else
{
fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal);
}
}
// Value is assumed swapped
write_to_ptr<u32>(vm_base + eal, value);
}
extern bool ppu_test_address_may_be_mmio(std::span<const be_t<u32>> insts)
{
std::set<u32> reg_offsets;
bool found_raw_spu_base = false;
bool found_spu_area_offset_element = false;
for (u32 inst : insts)
{
// Common around MMIO (orders IO)
if (inst == ppu_instructions::EIEIO())
{
return true;
}
const u32 op_imm16 = (inst & 0xfc00ffff);
// RawSPU MMIO base
// 0xe00000000 is a common constant so try to find an ORIS 0x10 or ADDIS 0x10 nearby (for multiplying SPU ID by it)
if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x2000) || op_imm16 == ppu_instructions::ORIS({}, {}, 0xe000) || op_imm16 == ppu_instructions::XORIS({}, {}, 0xe000))
{
found_raw_spu_base = true;
if (found_spu_area_offset_element)
{
// Found both
return true;
}
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0x10) || op_imm16 == ppu_instructions::ADDIS({}, {}, 0x10))
{
found_spu_area_offset_element = true;
if (found_raw_spu_base)
{
// Found both
return true;
}
}
// RawSPU MMIO base + problem state offset
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffc))
{
return true;
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe004))
{
return true;
}
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe004))
{
return true;
}
// RawSPU MMIO base + problem state offset + 64k of SNR1 offset
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffb))
{
return true;
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe005))
{
return true;
}
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe005))
{
return true;
}
// RawSPU MMIO base + problem state offset + 264k of SNR2 offset (STW allows 32K+- offset so in order to access SNR2 it needs to first add another 64k)
// SNR2 is the only register currently implemented that has its 0x80000 bit is set so its the only one its hardcoded access is done this way
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffa))
{
return true;
}
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe006))
{
return true;
}
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe006))
{
return true;
}
// Try to detect a function that receives RawSPU problem state base pointer as an argument
else if ((op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0) ||
(op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0) ||
(op_imm16 & ~0xffff) == ppu_instructions::ADDI({}, {}, 0))
{
const bool is_load = (op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0);
const bool is_store = (op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0);
const bool is_neither = !is_store && !is_load;
const bool is_snr = (is_store || is_neither) && ((op_imm16 & 0xffff) == (SPU_RdSigNotify2_offs & 0xffff) || (op_imm16 & 0xffff) == (SPU_RdSigNotify1_offs & 0xffff));
if (is_snr || spu_thread::test_is_problem_state_register_offset(op_imm16 & 0xffff, is_load || is_neither, is_store || is_neither))
{
reg_offsets.insert(op_imm16 & 0xffff);
if (reg_offsets.size() >= 2)
{
// Assume high MMIO likelyhood if more than one offset appears in nearby code
// Such as common IN_MBOX + OUT_MBOX
return true;
}
}
}
}
return false;
}
struct ppu_toc_manager
{
std::unordered_map<u32, u32> toc_map;
@ -3529,6 +3692,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
{ "__escape", reinterpret_cast<u64>(+ppu_escape) },
{ "__read_maybe_mmio32", reinterpret_cast<u64>(+ppu_read_mmio_aware_u32) },
{ "__write_maybe_mmio32", reinterpret_cast<u64>(+ppu_write_mmio_aware_u32) },
};
for (u64 index = 0; index < 1024; index++)

View File

@ -5,6 +5,7 @@
#include "Emu/Cell/Common.h"
#include "PPUTranslator.h"
#include "PPUThread.h"
#include "SPUThread.h"
#include "util/types.hpp"
#include "util/endian.hpp"
@ -12,6 +13,7 @@
#include "util/v128.hpp"
#include "util/simd.hpp"
#include <algorithm>
#include <span>
using namespace llvm;
@ -129,6 +131,7 @@ Type* PPUTranslator::GetContextType()
}
u32 ppu_get_far_jump(u32 pc);
bool ppu_test_address_may_be_mmio(std::span<const be_t<u32>> insts);
Function* PPUTranslator::Translate(const ppu_function& info)
{
@ -239,6 +242,9 @@ Function* PPUTranslator::Translate(const ppu_function& info)
m_rel = nullptr;
}
// Reset MMIO hint
m_may_be_mmio = true;
const u32 op = *ensure(m_info.get_ptr<u32>(m_addr + base));
(this->*(s_ppu_decoder.decode(op)))({op});
@ -600,15 +606,53 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
{
const u32 size = ::narrow<u32>(+type->getPrimitiveSizeInBits());
if (m_may_be_mmio && size == 32)
{
// Test for MMIO patterns
struct instructions_to_test
{
be_t<u32> insts[128];
};
m_may_be_mmio = false;
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(m_addr, sizeof(instructions_to_test) / 2))))
{
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
{
m_may_be_mmio = true;
}
}
}
if (is_be ^ m_is_be && size > 8)
{
llvm::Value* value{};
// Read, byteswap, bitcast
const auto int_type = m_ir->getIntNTy(size);
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align});
value->setVolatile(true);
if (m_may_be_mmio && size == 32)
{
ppu_log.notice("LLVM: Detected potential MMIO32 read at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
value = Call(GetType<u32>(), "__read_maybe_mmio32", m_base, addr);
}
else
{
const auto inst = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align});
inst->setVolatile(true);
value = inst;
}
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
}
if (m_may_be_mmio && size == 32)
{
ppu_log.notice("LLVM: Detected potential MMIO32 read at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
return Call(GetType<u32>(), "__read_maybe_mmio32", m_base, addr);
}
// Read normally
const auto r = m_ir->CreateAlignedLoad(type, GetMemory(addr), llvm::MaybeAlign{align});
r->setVolatile(true);
@ -627,6 +671,25 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
value = Call(int_type, fmt::format("llvm.bswap.i%u", size), bitcast(value, int_type));
}
if (m_may_be_mmio && size == 32)
{
// Test for MMIO patterns
struct instructions_to_test
{
be_t<u32> insts[128];
};
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(m_addr, sizeof(instructions_to_test) / 2))))
{
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
{
ppu_log.notice("LLVM: Detected potential MMIO32 write at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
Call(GetType<void>(), "__write_maybe_mmio32", m_base, addr, value);
return;
}
}
}
// Write
m_ir->CreateAlignedStore(value, GetMemory(addr), llvm::MaybeAlign{align})->setVolatile(true);
}
@ -2527,6 +2590,7 @@ void PPUTranslator::LDX(ppu_opcode_t op)
void PPUTranslator::LWZX(ppu_opcode_t op)
{
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u32>()));
}
@ -2601,6 +2665,9 @@ void PPUTranslator::DCBST(ppu_opcode_t)
void PPUTranslator::LWZUX(ppu_opcode_t op)
{
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetGpr(op.rd, ReadMemory(addr, GetType<u32>()));
SetGpr(op.ra, addr);
@ -2811,6 +2878,7 @@ void PPUTranslator::STWCX(ppu_opcode_t op)
void PPUTranslator::STWX(ppu_opcode_t op)
{
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32));
}
@ -2830,6 +2898,7 @@ void PPUTranslator::STDUX(ppu_opcode_t op)
void PPUTranslator::STWUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
WriteMemory(addr, GetGpr(op.rs, 32));
SetGpr(op.ra, addr);
}
@ -3213,6 +3282,7 @@ void PPUTranslator::LWBRX(ppu_opcode_t op)
void PPUTranslator::LFSX(ppu_opcode_t op)
{
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<f32>()));
}
@ -3541,6 +3611,44 @@ void PPUTranslator::LWZ(ppu_opcode_t op)
m_rel = nullptr;
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
if (m_may_be_mmio && !op.simm16)
{
struct instructions_data
{
be_t<u32> insts[2];
};
// Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO
if (auto ptr = m_info.get_ptr<instructions_data>(m_addr + 4 + (m_reloc ? m_reloc->addr : 0)))
{
for (u32 inst : ptr->insts)
{
ppu_opcode_t test_op{inst};
if (test_op.simm16 == op.simm16 || test_op.ra != op.ra)
{
// Same offset (at least according to this test) or different register
continue;
}
switch (g_ppu_itype.decode(inst))
{
case ppu_itype::LWZ:
case ppu_itype::STW:
{
// Not MMIO
m_may_be_mmio = false;
break;
}
default: break;
}
}
}
}
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<u32>()));
}
@ -3554,6 +3662,9 @@ void PPUTranslator::LWZU(ppu_opcode_t op)
m_rel = nullptr;
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
SetGpr(op.rd, ReadMemory(addr, GetType<u32>()));
SetGpr(op.ra, addr);
@ -3597,6 +3708,44 @@ void PPUTranslator::STW(ppu_opcode_t op)
m_rel = nullptr;
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
if (m_may_be_mmio && !op.simm16)
{
struct instructions_data
{
be_t<u32> insts[3];
};
// Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO
if (auto ptr = m_info.get_ptr<instructions_data>(m_addr + 4 + (m_reloc ? m_reloc->addr : 0)))
{
for (u32 inst : ptr->insts)
{
ppu_opcode_t test_op{inst};
if (test_op.simm16 == op.simm16 || test_op.ra != op.ra)
{
// Same offset (at least according to this test) or different register
continue;
}
switch (g_ppu_itype.decode(inst))
{
case ppu_itype::LWZ:
case ppu_itype::STW:
{
// Not MMIO
m_may_be_mmio = false;
break;
}
default: break;
}
}
}
}
const auto value = GetGpr(op.rs, 32);
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm;
WriteMemory(addr, value);
@ -3621,6 +3770,9 @@ void PPUTranslator::STWU(ppu_opcode_t op)
m_rel = nullptr;
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u);// Stack register and TLS address register are unlikely to be used in MMIO address calculatio
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
WriteMemory(addr, GetGpr(op.rs, 32));
SetGpr(op.ra, addr);
@ -3740,6 +3892,8 @@ void PPUTranslator::STHU(ppu_opcode_t op)
void PPUTranslator::LMW(ppu_opcode_t op)
{
m_may_be_mmio &= op.rd == 31u && (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
for (u32 i = 0; i < 32 - op.rd; i++)
{
SetGpr(i + op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetType<u32>()));
@ -3748,6 +3902,8 @@ void PPUTranslator::LMW(ppu_opcode_t op)
void PPUTranslator::STMW(ppu_opcode_t op)
{
m_may_be_mmio &= op.rs == 31u && (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
for (u32 i = 0; i < 32 - op.rs; i++)
{
WriteMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetGpr(i + op.rs, 32));
@ -3764,6 +3920,9 @@ void PPUTranslator::LFS(ppu_opcode_t op)
m_rel = nullptr;
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<f32>()));
}
@ -3777,6 +3936,9 @@ void PPUTranslator::LFSU(ppu_opcode_t op)
m_rel = nullptr;
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
SetFpr(op.frd, ReadMemory(addr, GetType<f32>()));
SetGpr(op.ra, addr);
@ -3819,7 +3981,12 @@ void PPUTranslator::STFS(ppu_opcode_t op)
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
else
{
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetFpr(op.frs, 32));
}
@ -3832,6 +3999,12 @@ void PPUTranslator::STFSU(ppu_opcode_t op)
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
else
{
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
}
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
WriteMemory(addr, GetFpr(op.frs, 32));

View File

@ -68,6 +68,7 @@ class PPUTranslator final : public cpu_translator
llvm::Value** const m_fc = m_locals + 131; // FPSCR bits (used partially)
llvm::Value* nan_vec4;
bool m_may_be_mmio = false;
#define DEF_VALUE(loc, glb, pos)\
llvm::Value*& loc = m_locals[pos];\

View File

@ -318,6 +318,50 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
return false;
}
bool spu_thread::test_is_problem_state_register_offset(u32 offset, bool for_read, bool for_write) noexcept
{
if (for_read)
{
switch (offset)
{
case MFC_CMDStatus_offs:
case MFC_QStatus_offs:
case SPU_Out_MBox_offs:
case SPU_MBox_Status_offs:
case SPU_Status_offs:
case Prxy_TagStatus_offs:
case SPU_NPC_offs:
case SPU_RunCntl_offs:
return true;
default: break;
}
}
if (for_write)
{
switch (offset)
{
case MFC_LSA_offs:
case MFC_EAH_offs:
case MFC_EAL_offs:
case MFC_Size_Tag_offs:
case MFC_Class_CMD_offs:
case Prxy_QueryType_offs:
case Prxy_QueryMask_offs:
case SPU_In_MBox_offs:
case SPU_RunCntl_offs:
case SPU_NPC_offs:
case SPU_RdSigNotify1_offs:
case SPU_RdSigNotify2_offs:
case (SPU_RdSigNotify2_offs & 0xffff): // Fow now accept both (this is used for an optimization so it can be imperfect)
return true;
default: break;
}
}
return false;
}
void spu_load_exec(const spu_exec_object& elf)
{
spu_thread::g_raw_spu_ctr++;

View File

@ -865,6 +865,7 @@ public:
bool read_reg(const u32 addr, u32& value);
bool write_reg(const u32 addr, const u32 value);
static bool test_is_problem_state_register_offset(u32 offset, bool for_read, bool for_write) noexcept;
static atomic_t<u32> g_raw_spu_ctr;
static atomic_t<u32> g_raw_spu_id[5];