Remove SPUThread::jit_dispatcher

Use global array - save memory
Move the array to JIT memory
This commit is contained in:
Nekotekina 2019-01-28 16:14:01 +03:00
parent 4292997a01
commit 50922faac9
6 changed files with 30 additions and 57 deletions

View File

@ -46,14 +46,6 @@ void spu_recompiler::init()
}
}
spu_function_t spu_recompiler::get(u32 lsa)
{
init();
// Simple atomic read
return m_spurt->m_dispatcher[lsa / 4];
}
spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
{
init();
@ -955,7 +947,9 @@ void spu_recompiler::branch_fixed(u32 target)
return;
}
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&spu_thread::jit_dispatcher) + target * 2));
c->mov(x86::rax, imm_ptr(spu_runtime::g_dispatcher + target / 4));
c->mov(x86::rax, x86::qword_ptr(x86::rax));
c->mov(SPU_OFF_32(pc), target);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_stop);
@ -1038,7 +1032,8 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga)
{
// Simply external call (return or indirect call)
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
c->mov(x86::r10, imm_ptr(spu_runtime::g_dispatcher));
c->mov(x86::r10, x86::qword_ptr(x86::r10, addr->r64(), 1, 0));
}
else
{
@ -1057,7 +1052,8 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->lea(x86::r10, x86::qword_ptr(instr_table));
c->cmp(qw1->r32(), end - start);
c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
c->mov(*qw1, imm_ptr(spu_runtime::g_dispatcher));
c->lea(*qw1, x86::qword_ptr(*qw1, addr->r64(), 1, 0));
c->cmovae(x86::r10, *qw1);
c->mov(x86::r10, x86::qword_ptr(x86::r10));
}

View File

@ -15,8 +15,6 @@ public:
virtual void init() override;
virtual spu_function_t get(u32 lsa) override;
virtual spu_function_t compile(std::vector<u32>&&) override;
private:

View File

@ -23,6 +23,19 @@ const spu_decoder<spu_iname> s_spu_iname;
extern u64 get_timebased_time();
DECLARE(spu_runtime::g_dispatcher) = []
{
const auto ptr = reinterpret_cast<decltype(spu_runtime::g_dispatcher)>(jit_runtime::alloc(0x10000 * sizeof(void*), 8, false));
// Initialize lookup table
for (u32 i = 0; i < 0x10000; i++)
{
ptr[i].raw() = &spu_recompiler_base::dispatch;
}
return ptr;
}();
spu_cache::spu_cache(const std::string& loc)
: m_file(loc, fs::read + fs::write + fs::create + fs::append)
{
@ -231,12 +244,6 @@ void spu_cache::initialize()
spu_runtime::spu_runtime()
{
// Initialize lookup table
for (auto& v : m_dispatcher)
{
v.raw() = &spu_recompiler_base::dispatch;
}
// Initialize "empty" block
m_map[std::vector<u32>()] = &spu_recompiler_base::dispatch;
@ -275,7 +282,7 @@ void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where,
if (size0 == 1)
{
m_dispatcher[func[0] / 4] = compiled;
g_dispatcher[func[0] / 4] = compiled;
}
else
{
@ -516,7 +523,7 @@ void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where,
}
}
m_dispatcher[func[0] / 4] = reinterpret_cast<spu_function_t>(reinterpret_cast<u64>(wxptr));
g_dispatcher[func[0] / 4] = reinterpret_cast<spu_function_t>(reinterpret_cast<u64>(wxptr));
}
lock.unlock();
@ -543,17 +550,8 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
#endif
}
const auto func = spu.jit->get(spu.pc);
// First attempt (load new trampoline and retry)
if (func != spu.jit_dispatcher[spu.pc / 4])
{
spu.jit_dispatcher[spu.pc / 4] = func;
return;
}
// Second attempt (recover from the recursion after repeated unsuccessful trampoline call)
if (spu.block_counter != spu.block_recover && func != &dispatch)
if (spu.block_counter != spu.block_recover && &dispatch != spu_runtime::g_dispatcher[spu.pc / 4])
{
spu.block_recover = spu.block_counter;
return;
@ -561,7 +559,6 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
// Compile
verify(HERE), spu.jit->compile(spu.jit->block(spu._ptr<u32>(0), spu.pc));
spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc);
// Diagnostic
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
@ -579,7 +576,6 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
{
// Compile (TODO: optimize search of the existing functions)
const auto func = verify(HERE, spu.jit->compile(spu.jit->block(spu._ptr<u32>(0), spu.pc)));
spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc);
// Overwrite jump to this function with jump to the compiled function
const s64 rel = reinterpret_cast<u64>(func) - reinterpret_cast<u64>(rip) - 5;
@ -1989,9 +1985,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto result = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->SetInsertPoint(result);
m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&spu_thread::pc));
const auto addr = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher) + target * 2));
const auto type = llvm::FunctionType::get(get_type<void>(), {get_type<u8*>(), get_type<u8*>(), get_type<u32>()}, false)->getPointerTo()->getPointerTo();
tail(m_ir->CreateLoad(m_ir->CreateBitCast(addr, type)));
tail(m_ir->CreateLoad(m_ir->CreateIntToPtr(m_ir->getInt64((u64)(spu_runtime::g_dispatcher + target / 4)), type)));
m_ir->SetInsertPoint(cblock);
return result;
}
@ -2534,14 +2529,6 @@ public:
}
}
virtual spu_function_t get(u32 lsa) override
{
init();
// Simple atomic read
return m_spurt->m_dispatcher[lsa / 4];
}
virtual spu_function_t compile(std::vector<u32>&& func_rv) override
{
init();
@ -5663,7 +5650,7 @@ public:
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
const auto type = llvm::FunctionType::get(get_type<void>(), {get_type<u8*>(), get_type<u8*>(), get_type<u32>()}, false)->getPointerTo()->getPointerTo();
const auto disp = m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher))), type);
const auto disp = m_ir->CreateIntToPtr(m_ir->getInt64((u64)spu_runtime::g_dispatcher), type);
const auto ad64 = m_ir->CreateZExt(addr.value, get_type<u64>());
if (ret && g_cfg.core.spu_block_size != spu_block_size_type::safe)

View File

@ -44,9 +44,6 @@ public:
// All functions
std::map<std::vector<u32>, spu_function_t> m_map;
// All dispatchers
std::array<atomic_t<spu_function_t>, 0x10000> m_dispatcher;
// Debug module output location
std::string m_cache_path;
@ -59,6 +56,9 @@ public:
// Add compiled function and generate trampoline if necessary
void add(std::pair<const std::vector<u32>, spu_function_t>& where, spu_function_t compiled);
// All dispatchers (array allocated in jit memory)
static atomic_t<spu_function_t>* const g_dispatcher;
};
// SPU Recompiler instance base class
@ -100,9 +100,6 @@ public:
// Initialize
virtual void init() = 0;
// Get pointer to the trampoline at given position
virtual spu_function_t get(u32 lsa) = 0;
// Compile function
virtual spu_function_t compile(std::vector<u32>&&) = 0;

View File

@ -564,7 +564,7 @@ void spu_thread::cpu_task()
{
while (LIKELY(!state || !check_state()))
{
jit_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
spu_runtime::g_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
}
// Print some stats
@ -696,9 +696,6 @@ spu_thread::spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::stri
if (g_cfg.core.spu_decoder != spu_decoder_type::fast && g_cfg.core.spu_decoder != spu_decoder_type::precise)
{
// Initialize lookup table
jit_dispatcher.fill(&spu_recompiler_base::dispatch);
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
{
// Initialize stack mirror
@ -1340,7 +1337,7 @@ bool spu_thread::process_mfc_cmd()
while (true)
{
result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data());
if (result < 2)
{
break;

View File

@ -584,8 +584,6 @@ public:
u64 block_recover = 0;
u64 block_failure = 0;
std::array<spu_function_t, 0x10000> jit_dispatcher; // Dispatch table for indirect calls
std::array<v128, 0x4000> stack_mirror; // Return address information
void push_snr(u32 number, u32 value);