SPU Recompiler fixed

This commit is contained in:
Nekotekina 2015-09-04 02:23:31 +03:00
parent ab1c05aa71
commit 7e01c81154
10 changed files with 430 additions and 334 deletions

File diff suppressed because it is too large Load Diff

View File

@ -30,32 +30,28 @@ private:
asmjit::X86GpVar* cpu;
asmjit::X86GpVar* ls;
// output:
asmjit::X86GpVar* pos;
// temporary:
asmjit::X86GpVar* addr;
asmjit::X86GpVar* qw0;
asmjit::X86GpVar* qw1;
asmjit::X86GpVar* qw2;
std::array<asmjit::X86XmmVar*, 10> vec;
std::array<asmjit::X86XmmVar*, 6> vec;
// labels:
asmjit::Label* labels; // array[0x10000]
asmjit::Label* jt; // jump table resolver
asmjit::Label* jt; // jump table resolver (uses *addr)
asmjit::Label* end; // function end (return *addr)
class XmmLink
{
friend class spu_recompiler;
asmjit::X86XmmVar*& m_alloc;
asmjit::X86XmmVar* xmm_var;
asmjit::X86XmmVar* const m_var;
XmmLink(asmjit::X86XmmVar*& xmm_var)
: m_alloc(xmm_var)
, xmm_var(xmm_var)
: m_var(xmm_var)
{
m_alloc = nullptr;
xmm_var = nullptr;
}
public:
@ -64,24 +60,13 @@ private:
XmmLink(const XmmLink&) = delete;
XmmLink(XmmLink&& right)
: m_alloc(right.m_alloc)
, xmm_var(right.xmm_var)
: m_var(right.m_var)
{
right.xmm_var = nullptr;
}
XmmLink& operator =(const XmmLink&) = delete;
XmmLink& operator =(XmmLink&& right) = delete;
~XmmLink()
{
if (xmm_var) m_alloc = xmm_var;
}
inline operator const asmjit::X86XmmVar&() const
{
return *xmm_var;
return *m_var;
}
};

View File

@ -7,6 +7,25 @@
const spu_opcode_table_t<spu_itype_t> g_spu_itype{ DEFINE_SPU_OPCODES(spu_itype::), spu_itype::UNK };
std::shared_ptr<spu_function_t> SPUDatabase::find(const be_t<u32>* data, u64 key, u32 max_size)
{
for (auto found = m_db.find(key); found != m_db.end(); found++)
{
if (found->second->size > max_size)
{
continue;
}
// Compare binary data explicitly (TODO: optimize)
if (std::equal(found->second->data.begin(), found->second->data.end(), data))
{
return found->second;
}
}
return nullptr;
}
SPUDatabase::SPUDatabase()
{
// TODO: load existing database associated with currently running executable
@ -33,13 +52,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
const u64 key = entry | u64{ ls[entry / 4] } << 32;
// Try to find existing function in the database
for (auto found = m_db.find(key); found != m_db.end(); found++)
if (auto func = find(ls + entry / 4, key, max_limit - entry))
{
// Compare binary data explicitly (TODO: optimize)
if (std::equal(found->second->data.begin(), found->second->data.end(), ls + entry / 4))
{
return found->second;
}
return func;
}
// Initialize block entries with the function entry point
@ -51,6 +66,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
// Set initial limit which will be narrowed later
u32 limit = max_limit;
// Minimal position of ila $SP,* instruction
u32 ila_sp_pos = max_limit;
// Find preliminary set of possible block entries (first pass), `start` is the current block address
for (u32 start = entry, pos = entry; pos < limit; pos += 4)
{
@ -60,7 +78,15 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
using namespace spu_itype;
if (start == pos) // Additional analysis at the beginning of the block (questionable)
// Find existing function
if (pos != entry && find(ls + pos / 4, pos | u64{ op.opcode } << 32, limit - pos))
{
limit = pos;
break;
}
// Additional analysis at the beginning of the block
if (start != entry && start == pos)
{
// Possible jump table
std::vector<u32> jt_abs, jt_rel;
@ -128,17 +154,19 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
{
// Discard current block and abort the operation
limit = start;
break;
}
else if (op.opcode == 0) // Hack: special case (STOP 0)
if (op.opcode == 0) // Hack: special case (STOP 0)
{
limit = pos + 4;
break;
}
else if (type == BI) // Branch Indirect
if (type == BI || type == IRET) // Branch Indirect
{
if (type == IRET) LOG_ERROR(SPU, "[0x%05x] Interrupt Return", pos);
blocks.emplace(start); start = pos + 4;
}
else if (type == BR || type == BRA) // Branch Relative/Absolute
@ -163,7 +191,7 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
{
// Branch to the next instruction and set link ("get next instruction address" idiom)
if (op.rt == 0) LOG_ERROR(SPU, "Suspicious instruction at [0x%05x]", pos);
if (op.rt == 0) LOG_ERROR(SPU, "[0x%05x] Branch-to-next with $LR", pos);
}
else
{
@ -174,13 +202,15 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
{
limit = std::min<u32>(limit, target);
}
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Function call without $LR", pos);
}
}
else if (type == BISL) // Branch Indirect and Set Link
else if (type == BISL || type == BISLED) // Branch Indirect and Set Link
{
// Nothing
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Indirect function call without $LR", pos);
}
else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero Word/Halfword
else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero (Half)word
{
const u32 target = spu_branch_target(pos, op.i16);
@ -192,6 +222,40 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
blocks.emplace(target);
}
}
else if (type == BINZ || type == BIZ || type == BIHNZ || type == BIHZ) // Branch Indirect if (Not) Zero (Half)word
{
}
else if (type == HBR || type == HBRA || type == HBRR) // Hint for Branch
{
}
else if (type == STQA || type == STQD || type == STQR || type == STQX || type == FSCRWR || type == MTSPR || type == WRCH) // Store
{
}
else if (type == HEQ || type == HEQI || type == HGT || type == HGTI || type == HLGT || type == HLGTI) // Halt
{
}
else if (type == STOP || type == STOPD || type == NOP || type == LNOP || type == SYNC || type == DSYNC) // Miscellaneous
{
}
else // Other instructions (writing rt reg)
{
const u32 rt = type == SELB || type == SHUFB || type == MPYA || type == FNMS || type == FMA || type == FMS ? op.rc : op.rt;
// Analyse link register access
if (rt == 0)
{
}
// Analyse stack pointer access
if (rt == 1)
{
if (type == ILA && pos < ila_sp_pos)
{
// set minimal ila $SP,* instruction position
ila_sp_pos = pos;
}
}
}
}
// Find more function calls (second pass, questionable)
@ -228,6 +292,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
// Prepare new function (set addr and size)
auto func = std::make_shared<spu_function_t>(entry, limit - entry);
// Copy function contents
func->data = { ls + entry / 4, ls + limit / 4 };
// Fill function block info
for (auto i = blocks.crbegin(); i != blocks.crend(); i++)
{
@ -255,8 +322,8 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
}
}
// Copy function contents
func->data = { ls + entry / 4, ls + limit / 4 };
// Set whether the function can reset stack
func->does_reset_stack = ila_sp_pos < limit;
// Add function to the database
m_db.emplace(key, func);

View File

@ -242,6 +242,9 @@ struct spu_function_t
// jump table values (start addresses)
std::set<u32> jtable;
// whether ila $SP,* instruction found
bool does_reset_stack;
// pointer to the compiled function
spu_jit_func_t compiled = nullptr;
@ -260,6 +263,9 @@ class SPUDatabase final
// All registered functions (uses addr and first instruction as a key)
std::unordered_multimap<u64, std::shared_ptr<spu_function_t>> m_db;
// For internal use
std::shared_ptr<spu_function_t> find(const be_t<u32>* data, u64 key, u32 max_size);
public:
SPUDatabase();
~SPUDatabase();

View File

@ -298,7 +298,7 @@ void spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u32[3] == 0)
{
set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
}
}
@ -307,7 +307,7 @@ void spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u32[3] != 0)
{
set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
}
}
@ -316,7 +316,7 @@ void spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u16[6] == 0)
{
set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
}
}
@ -325,13 +325,13 @@ void spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u16[6] != 0)
{
set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
}
}
void spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op)
{
throw EXCEPTION("Unexpected instruction");
throw EXCEPTION("Unimplemented instruction");
}
void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op)
@ -342,25 +342,25 @@ void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op)
void spu_interpreter::BI(SPUThread& spu, spu_opcode_t op)
{
set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
}
void spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op)
{
set_interrupt_status(spu, op);
const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3], 0);
spu.gpr[op.rt] = v128::from32r(spu.pc + 4);
const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3]);
spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
spu.pc = target - 4;
}
void spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op)
{
throw EXCEPTION("Unexpected instruction");
throw EXCEPTION("Unimplemented instruction");
}
void spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op)
{
throw EXCEPTION("Unexpected instruction");
throw EXCEPTION("Unimplemented instruction");
}
void spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op)
@ -1022,7 +1022,7 @@ void spu_interpreter::LQA(SPUThread& spu, spu_opcode_t op)
void spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op)
{
const u32 target = spu_branch_target(0, op.i16);
spu.gpr[op.rt] = v128::from32r(spu.pc + 4);
spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
spu.pc = target - 4;
}
@ -1039,7 +1039,7 @@ void spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op)
void spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op)
{
const u32 target = spu_branch_target(spu.pc, op.i16);
spu.gpr[op.rt] = v128::from32r(spu.pc + 4);
spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
spu.pc = target - 4;
}

View File

@ -316,7 +316,7 @@ public:
}
};
inline u32 spu_branch_target(u32 pc, s32 imm)
inline u32 spu_branch_target(u32 pc, s32 imm = 0)
{
return (pc + (imm << 2)) & 0x3fffc;
}

View File

@ -23,22 +23,28 @@ u32 SPURecompilerDecoder::DecodeMemory(const u32 address)
throw EXCEPTION("Invalid address or PC (address=0x%x, PC=0x%05x)", address, spu.pc);
}
// get SPU LS pointer
const auto _ls = vm::get_ptr<be_t<u32>>(spu.offset);
const u32 index = spu.pc / 4;
if (!m_entries.at(index) || true) // always validate (TODO)
{
// always validate (TODO)
const auto func = db->analyse(_ls, spu.pc);
if (!func->compiled) rec->compile(*func);
// reset callstack if necessary
if (func->does_reset_stack && spu.recursion_level)
{
spu.m_state |= CPU_STATE_RETURN;
if (!func->compiled) throw EXCEPTION("Compilation failed");
m_entries[index] = func->compiled;
return 0;
}
const u32 res = m_entries[index](&spu, _ls);
if (!func->compiled)
{
rec->compile(*func);
if (!func->compiled) throw EXCEPTION("Compilation failed");
}
const u32 res = func->compiled(&spu, _ls);
if (const auto exception = spu.pending_exception)
{

View File

@ -23,8 +23,6 @@ public:
// SPU Decoder instance (created per SPU thread)
class SPURecompilerDecoder final : public CPUDecoder
{
std::array<spu_jit_func_t, 0x10000> m_entries = {}; // currently useless
public:
const std::shared_ptr<SPUDatabase> db; // associated SPU Analyser instance

View File

@ -1361,7 +1361,7 @@ void SPUThread::stop_and_signal(u32 code)
throw EXCEPTION("Invalid SPU Thread Group");
}
for (auto thread : group->threads)
for (auto& thread : group->threads)
{
if (thread && thread.get() != this)
{

View File

@ -524,6 +524,7 @@ public:
class SPUThread : public CPUThread
{
friend class SPURecompilerDecoder;
friend class spu_recompiler;
public:
@ -658,6 +659,7 @@ public:
std::function<void(SPUThread&)> custom_task;
std::exception_ptr pending_exception;
u32 recursion_level = 0;
protected:
SPUThread(CPUThreadType type, const std::string& name, std::function<std::string()> thread_name, u32 index, u32 offset);