SPU Recompiler fixed

This commit is contained in:
Nekotekina 2015-09-04 02:23:31 +03:00
parent ab1c05aa71
commit 7e01c81154
10 changed files with 430 additions and 334 deletions

File diff suppressed because it is too large Load Diff

View File

@ -30,32 +30,28 @@ private:
asmjit::X86GpVar* cpu; asmjit::X86GpVar* cpu;
asmjit::X86GpVar* ls; asmjit::X86GpVar* ls;
// output:
asmjit::X86GpVar* pos;
// temporary: // temporary:
asmjit::X86GpVar* addr; asmjit::X86GpVar* addr;
asmjit::X86GpVar* qw0; asmjit::X86GpVar* qw0;
asmjit::X86GpVar* qw1; asmjit::X86GpVar* qw1;
asmjit::X86GpVar* qw2; asmjit::X86GpVar* qw2;
std::array<asmjit::X86XmmVar*, 10> vec; std::array<asmjit::X86XmmVar*, 6> vec;
// labels: // labels:
asmjit::Label* labels; // array[0x10000] asmjit::Label* labels; // array[0x10000]
asmjit::Label* jt; // jump table resolver asmjit::Label* jt; // jump table resolver (uses *addr)
asmjit::Label* end; // function end (return *addr)
class XmmLink class XmmLink
{ {
friend class spu_recompiler; friend class spu_recompiler;
asmjit::X86XmmVar*& m_alloc; asmjit::X86XmmVar* const m_var;
asmjit::X86XmmVar* xmm_var;
XmmLink(asmjit::X86XmmVar*& xmm_var) XmmLink(asmjit::X86XmmVar*& xmm_var)
: m_alloc(xmm_var) : m_var(xmm_var)
, xmm_var(xmm_var)
{ {
m_alloc = nullptr; xmm_var = nullptr;
} }
public: public:
@ -64,24 +60,13 @@ private:
XmmLink(const XmmLink&) = delete; XmmLink(const XmmLink&) = delete;
XmmLink(XmmLink&& right) XmmLink(XmmLink&& right)
: m_alloc(right.m_alloc) : m_var(right.m_var)
, xmm_var(right.xmm_var)
{ {
right.xmm_var = nullptr;
}
XmmLink& operator =(const XmmLink&) = delete;
XmmLink& operator =(XmmLink&& right) = delete;
~XmmLink()
{
if (xmm_var) m_alloc = xmm_var;
} }
inline operator const asmjit::X86XmmVar&() const inline operator const asmjit::X86XmmVar&() const
{ {
return *xmm_var; return *m_var;
} }
}; };

View File

@ -7,6 +7,25 @@
const spu_opcode_table_t<spu_itype_t> g_spu_itype{ DEFINE_SPU_OPCODES(spu_itype::), spu_itype::UNK }; const spu_opcode_table_t<spu_itype_t> g_spu_itype{ DEFINE_SPU_OPCODES(spu_itype::), spu_itype::UNK };
std::shared_ptr<spu_function_t> SPUDatabase::find(const be_t<u32>* data, u64 key, u32 max_size)
{
for (auto found = m_db.find(key); found != m_db.end(); found++)
{
if (found->second->size > max_size)
{
continue;
}
// Compare binary data explicitly (TODO: optimize)
if (std::equal(found->second->data.begin(), found->second->data.end(), data))
{
return found->second;
}
}
return nullptr;
}
SPUDatabase::SPUDatabase() SPUDatabase::SPUDatabase()
{ {
// TODO: load existing database associated with currently running executable // TODO: load existing database associated with currently running executable
@ -33,13 +52,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
const u64 key = entry | u64{ ls[entry / 4] } << 32; const u64 key = entry | u64{ ls[entry / 4] } << 32;
// Try to find existing function in the database // Try to find existing function in the database
for (auto found = m_db.find(key); found != m_db.end(); found++) if (auto func = find(ls + entry / 4, key, max_limit - entry))
{ {
// Compare binary data explicitly (TODO: optimize) return func;
if (std::equal(found->second->data.begin(), found->second->data.end(), ls + entry / 4))
{
return found->second;
}
} }
// Initialize block entries with the function entry point // Initialize block entries with the function entry point
@ -51,6 +66,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
// Set initial limit which will be narrowed later // Set initial limit which will be narrowed later
u32 limit = max_limit; u32 limit = max_limit;
// Minimal position of ila $SP,* instruction
u32 ila_sp_pos = max_limit;
// Find preliminary set of possible block entries (first pass), `start` is the current block address // Find preliminary set of possible block entries (first pass), `start` is the current block address
for (u32 start = entry, pos = entry; pos < limit; pos += 4) for (u32 start = entry, pos = entry; pos < limit; pos += 4)
{ {
@ -60,7 +78,15 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
using namespace spu_itype; using namespace spu_itype;
if (start == pos) // Additional analysis at the beginning of the block (questionable) // Find existing function
if (pos != entry && find(ls + pos / 4, pos | u64{ op.opcode } << 32, limit - pos))
{
limit = pos;
break;
}
// Additional analysis at the beginning of the block
if (start != entry && start == pos)
{ {
// Possible jump table // Possible jump table
std::vector<u32> jt_abs, jt_rel; std::vector<u32> jt_abs, jt_rel;
@ -128,17 +154,19 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
{ {
// Discard current block and abort the operation // Discard current block and abort the operation
limit = start; limit = start;
break; break;
} }
else if (op.opcode == 0) // Hack: special case (STOP 0)
if (op.opcode == 0) // Hack: special case (STOP 0)
{ {
limit = pos + 4; limit = pos + 4;
break; break;
} }
else if (type == BI) // Branch Indirect
if (type == BI || type == IRET) // Branch Indirect
{ {
if (type == IRET) LOG_ERROR(SPU, "[0x%05x] Interrupt Return", pos);
blocks.emplace(start); start = pos + 4; blocks.emplace(start); start = pos + 4;
} }
else if (type == BR || type == BRA) // Branch Relative/Absolute else if (type == BR || type == BRA) // Branch Relative/Absolute
@ -163,7 +191,7 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
{ {
// Branch to the next instruction and set link ("get next instruction address" idiom) // Branch to the next instruction and set link ("get next instruction address" idiom)
if (op.rt == 0) LOG_ERROR(SPU, "Suspicious instruction at [0x%05x]", pos); if (op.rt == 0) LOG_ERROR(SPU, "[0x%05x] Branch-to-next with $LR", pos);
} }
else else
{ {
@ -174,13 +202,15 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
{ {
limit = std::min<u32>(limit, target); limit = std::min<u32>(limit, target);
} }
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Function call without $LR", pos);
} }
} }
else if (type == BISL) // Branch Indirect and Set Link else if (type == BISL || type == BISLED) // Branch Indirect and Set Link
{ {
// Nothing if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Indirect function call without $LR", pos);
} }
else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero Word/Halfword else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero (Half)word
{ {
const u32 target = spu_branch_target(pos, op.i16); const u32 target = spu_branch_target(pos, op.i16);
@ -192,6 +222,40 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
blocks.emplace(target); blocks.emplace(target);
} }
} }
else if (type == BINZ || type == BIZ || type == BIHNZ || type == BIHZ) // Branch Indirect if (Not) Zero (Half)word
{
}
else if (type == HBR || type == HBRA || type == HBRR) // Hint for Branch
{
}
else if (type == STQA || type == STQD || type == STQR || type == STQX || type == FSCRWR || type == MTSPR || type == WRCH) // Store
{
}
else if (type == HEQ || type == HEQI || type == HGT || type == HGTI || type == HLGT || type == HLGTI) // Halt
{
}
else if (type == STOP || type == STOPD || type == NOP || type == LNOP || type == SYNC || type == DSYNC) // Miscellaneous
{
}
else // Other instructions (writing rt reg)
{
const u32 rt = type == SELB || type == SHUFB || type == MPYA || type == FNMS || type == FMA || type == FMS ? op.rc : op.rt;
// Analyse link register access
if (rt == 0)
{
}
// Analyse stack pointer access
if (rt == 1)
{
if (type == ILA && pos < ila_sp_pos)
{
// set minimal ila $SP,* instruction position
ila_sp_pos = pos;
}
}
}
} }
// Find more function calls (second pass, questionable) // Find more function calls (second pass, questionable)
@ -228,6 +292,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
// Prepare new function (set addr and size) // Prepare new function (set addr and size)
auto func = std::make_shared<spu_function_t>(entry, limit - entry); auto func = std::make_shared<spu_function_t>(entry, limit - entry);
// Copy function contents
func->data = { ls + entry / 4, ls + limit / 4 };
// Fill function block info // Fill function block info
for (auto i = blocks.crbegin(); i != blocks.crend(); i++) for (auto i = blocks.crbegin(); i != blocks.crend(); i++)
{ {
@ -255,8 +322,8 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
} }
} }
// Copy function contents // Set whether the function can reset stack
func->data = { ls + entry / 4, ls + limit / 4 }; func->does_reset_stack = ila_sp_pos < limit;
// Add function to the database // Add function to the database
m_db.emplace(key, func); m_db.emplace(key, func);

View File

@ -242,6 +242,9 @@ struct spu_function_t
// jump table values (start addresses) // jump table values (start addresses)
std::set<u32> jtable; std::set<u32> jtable;
// whether ila $SP,* instruction found
bool does_reset_stack;
// pointer to the compiled function // pointer to the compiled function
spu_jit_func_t compiled = nullptr; spu_jit_func_t compiled = nullptr;
@ -260,6 +263,9 @@ class SPUDatabase final
// All registered functions (uses addr and first instruction as a key) // All registered functions (uses addr and first instruction as a key)
std::unordered_multimap<u64, std::shared_ptr<spu_function_t>> m_db; std::unordered_multimap<u64, std::shared_ptr<spu_function_t>> m_db;
// For internal use
std::shared_ptr<spu_function_t> find(const be_t<u32>* data, u64 key, u32 max_size);
public: public:
SPUDatabase(); SPUDatabase();
~SPUDatabase(); ~SPUDatabase();

View File

@ -298,7 +298,7 @@ void spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u32[3] == 0) if (spu.gpr[op.rt]._u32[3] == 0)
{ {
set_interrupt_status(spu, op); set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4; spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
} }
} }
@ -307,7 +307,7 @@ void spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u32[3] != 0) if (spu.gpr[op.rt]._u32[3] != 0)
{ {
set_interrupt_status(spu, op); set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4; spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
} }
} }
@ -316,7 +316,7 @@ void spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u16[6] == 0) if (spu.gpr[op.rt]._u16[6] == 0)
{ {
set_interrupt_status(spu, op); set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4; spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
} }
} }
@ -325,13 +325,13 @@ void spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op)
if (spu.gpr[op.rt]._u16[6] != 0) if (spu.gpr[op.rt]._u16[6] != 0)
{ {
set_interrupt_status(spu, op); set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4; spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
} }
} }
void spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op) void spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op)
{ {
throw EXCEPTION("Unexpected instruction"); throw EXCEPTION("Unimplemented instruction");
} }
void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op) void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op)
@ -342,25 +342,25 @@ void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op)
void spu_interpreter::BI(SPUThread& spu, spu_opcode_t op) void spu_interpreter::BI(SPUThread& spu, spu_opcode_t op)
{ {
set_interrupt_status(spu, op); set_interrupt_status(spu, op);
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4; spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
} }
void spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op) void spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op)
{ {
set_interrupt_status(spu, op); set_interrupt_status(spu, op);
const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3], 0); const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3]);
spu.gpr[op.rt] = v128::from32r(spu.pc + 4); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
spu.pc = target - 4; spu.pc = target - 4;
} }
void spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op) void spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op)
{ {
throw EXCEPTION("Unexpected instruction"); throw EXCEPTION("Unimplemented instruction");
} }
void spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op) void spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op)
{ {
throw EXCEPTION("Unexpected instruction"); throw EXCEPTION("Unimplemented instruction");
} }
void spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op) void spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op)
@ -1022,7 +1022,7 @@ void spu_interpreter::LQA(SPUThread& spu, spu_opcode_t op)
void spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op) void spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op)
{ {
const u32 target = spu_branch_target(0, op.i16); const u32 target = spu_branch_target(0, op.i16);
spu.gpr[op.rt] = v128::from32r(spu.pc + 4); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
spu.pc = target - 4; spu.pc = target - 4;
} }
@ -1039,7 +1039,7 @@ void spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op)
void spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op) void spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op)
{ {
const u32 target = spu_branch_target(spu.pc, op.i16); const u32 target = spu_branch_target(spu.pc, op.i16);
spu.gpr[op.rt] = v128::from32r(spu.pc + 4); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
spu.pc = target - 4; spu.pc = target - 4;
} }

View File

@ -316,7 +316,7 @@ public:
} }
}; };
inline u32 spu_branch_target(u32 pc, s32 imm) inline u32 spu_branch_target(u32 pc, s32 imm = 0)
{ {
return (pc + (imm << 2)) & 0x3fffc; return (pc + (imm << 2)) & 0x3fffc;
} }

View File

@ -23,22 +23,28 @@ u32 SPURecompilerDecoder::DecodeMemory(const u32 address)
throw EXCEPTION("Invalid address or PC (address=0x%x, PC=0x%05x)", address, spu.pc); throw EXCEPTION("Invalid address or PC (address=0x%x, PC=0x%05x)", address, spu.pc);
} }
// get SPU LS pointer
const auto _ls = vm::get_ptr<be_t<u32>>(spu.offset); const auto _ls = vm::get_ptr<be_t<u32>>(spu.offset);
const u32 index = spu.pc / 4; // always validate (TODO)
const auto func = db->analyse(_ls, spu.pc);
if (!m_entries.at(index) || true) // always validate (TODO) // reset callstack if necessary
if (func->does_reset_stack && spu.recursion_level)
{ {
const auto func = db->analyse(_ls, spu.pc); spu.m_state |= CPU_STATE_RETURN;
if (!func->compiled) rec->compile(*func); return 0;
if (!func->compiled) throw EXCEPTION("Compilation failed");
m_entries[index] = func->compiled;
} }
const u32 res = m_entries[index](&spu, _ls); if (!func->compiled)
{
rec->compile(*func);
if (!func->compiled) throw EXCEPTION("Compilation failed");
}
const u32 res = func->compiled(&spu, _ls);
if (const auto exception = spu.pending_exception) if (const auto exception = spu.pending_exception)
{ {

View File

@ -23,8 +23,6 @@ public:
// SPU Decoder instance (created per SPU thread) // SPU Decoder instance (created per SPU thread)
class SPURecompilerDecoder final : public CPUDecoder class SPURecompilerDecoder final : public CPUDecoder
{ {
std::array<spu_jit_func_t, 0x10000> m_entries = {}; // currently useless
public: public:
const std::shared_ptr<SPUDatabase> db; // associated SPU Analyser instance const std::shared_ptr<SPUDatabase> db; // associated SPU Analyser instance

View File

@ -1361,7 +1361,7 @@ void SPUThread::stop_and_signal(u32 code)
throw EXCEPTION("Invalid SPU Thread Group"); throw EXCEPTION("Invalid SPU Thread Group");
} }
for (auto thread : group->threads) for (auto& thread : group->threads)
{ {
if (thread && thread.get() != this) if (thread && thread.get() != this)
{ {

View File

@ -524,6 +524,7 @@ public:
class SPUThread : public CPUThread class SPUThread : public CPUThread
{ {
friend class SPURecompilerDecoder;
friend class spu_recompiler; friend class spu_recompiler;
public: public:
@ -658,6 +659,7 @@ public:
std::function<void(SPUThread&)> custom_task; std::function<void(SPUThread&)> custom_task;
std::exception_ptr pending_exception; std::exception_ptr pending_exception;
u32 recursion_level = 0;
protected: protected:
SPUThread(CPUThreadType type, const std::string& name, std::function<std::string()> thread_name, u32 index, u32 offset); SPUThread(CPUThreadType type, const std::string& name, std::function<std::string()> thread_name, u32 index, u32 offset);