Remove SPUThread::jit_dispatcher

Use global array - save memory Move the array to JIT memory
2019-01-28 16:14:01 +03:00 · 2019-01-28 16:14:01 +03:00 · 50922faac9
parent 4292997a01
commit 50922faac9
6 changed files with 30 additions and 57 deletions
--- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
@ -46,14 +46,6 @@ void spu_recompiler::init()
 	}
 }

-spu_function_t spu_recompiler::get(u32 lsa)
-{
-	init();
-
-	// Simple atomic read
-	return m_spurt->m_dispatcher[lsa / 4];
-}
-
 spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
 {
 	init();
@ -955,7 +947,9 @@ void spu_recompiler::branch_fixed(u32 target)
 		return;
 	}

-	c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&spu_thread::jit_dispatcher) + target * 2));
+	c->mov(x86::rax, imm_ptr(spu_runtime::g_dispatcher + target / 4));
+	c->mov(x86::rax, x86::qword_ptr(x86::rax));
+
 	c->mov(SPU_OFF_32(pc), target);
 	c->cmp(SPU_OFF_32(state), 0);
 	c->jnz(label_stop);
@ -1038,7 +1032,8 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
 	if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga)
 	{
 		// Simply external call (return or indirect call)
-		c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
+		c->mov(x86::r10, imm_ptr(spu_runtime::g_dispatcher));
+		c->mov(x86::r10, x86::qword_ptr(x86::r10, addr->r64(), 1, 0));
 	}
 	else
 	{
@ -1057,7 +1052,8 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
 		c->lea(x86::r10, x86::qword_ptr(instr_table));
 		c->cmp(qw1->r32(), end - start);
 		c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
-		c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
+		c->mov(*qw1, imm_ptr(spu_runtime::g_dispatcher));
+		c->lea(*qw1, x86::qword_ptr(*qw1, addr->r64(), 1, 0));
 		c->cmovae(x86::r10, *qw1);
 		c->mov(x86::r10, x86::qword_ptr(x86::r10));
 	}
--- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h
+++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h
@ -15,8 +15,6 @@ public:

 	virtual void init() override;

-	virtual spu_function_t get(u32 lsa) override;
-
 	virtual spu_function_t compile(std::vector<u32>&&) override;

 private:
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@ -23,6 +23,19 @@ const spu_decoder<spu_iname> s_spu_iname;

 extern u64 get_timebased_time();

+DECLARE(spu_runtime::g_dispatcher) = []
+{
+	const auto ptr = reinterpret_cast<decltype(spu_runtime::g_dispatcher)>(jit_runtime::alloc(0x10000 * sizeof(void*), 8, false));
+
+	// Initialize lookup table
+	for (u32 i = 0; i < 0x10000; i++)
+	{
+		ptr[i].raw() = &spu_recompiler_base::dispatch;
+	}
+
+	return ptr;
+}();
+
 spu_cache::spu_cache(const std::string& loc)
 	: m_file(loc, fs::read + fs::write + fs::create + fs::append)
 {
@ -231,12 +244,6 @@ void spu_cache::initialize()

 spu_runtime::spu_runtime()
 {
-	// Initialize lookup table
-	for (auto& v : m_dispatcher)
-	{
-		v.raw() = &spu_recompiler_base::dispatch;
-	}
-
 	// Initialize "empty" block
 	m_map[std::vector<u32>()] = &spu_recompiler_base::dispatch;

@ -275,7 +282,7 @@ void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where,

 	if (size0 == 1)
 	{
-		m_dispatcher[func[0] / 4] = compiled;
+		g_dispatcher[func[0] / 4] = compiled;
 	}
 	else
 	{
@ -516,7 +523,7 @@ void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where,
 			}
 		}

-		m_dispatcher[func[0] / 4] = reinterpret_cast<spu_function_t>(reinterpret_cast<u64>(wxptr));
+		g_dispatcher[func[0] / 4] = reinterpret_cast<spu_function_t>(reinterpret_cast<u64>(wxptr));
 	}

 	lock.unlock();
@ -543,17 +550,8 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
 #endif
 	}

-	const auto func = spu.jit->get(spu.pc);
-
-	// First attempt (load new trampoline and retry)
-	if (func != spu.jit_dispatcher[spu.pc / 4])
-	{
-		spu.jit_dispatcher[spu.pc / 4] = func;
-		return;
-	}
-
 	// Second attempt (recover from the recursion after repeated unsuccessful trampoline call)
-	if (spu.block_counter != spu.block_recover && func != &dispatch)
+	if (spu.block_counter != spu.block_recover && &dispatch != spu_runtime::g_dispatcher[spu.pc / 4])
 	{
 		spu.block_recover = spu.block_counter;
 		return;
@ -561,7 +559,6 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)

 	// Compile
 	verify(HERE), spu.jit->compile(spu.jit->block(spu._ptr<u32>(0), spu.pc));
-	spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc);

 	// Diagnostic
 	if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
@ -579,7 +576,6 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
 {
 	// Compile (TODO: optimize search of the existing functions)
 	const auto func = verify(HERE, spu.jit->compile(spu.jit->block(spu._ptr<u32>(0), spu.pc)));
-	spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc);

 	// Overwrite jump to this function with jump to the compiled function
 	const s64 rel = reinterpret_cast<u64>(func) - reinterpret_cast<u64>(rip) - 5;
@ -1989,9 +1985,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
 			const auto result = llvm::BasicBlock::Create(m_context, "", m_function);
 			m_ir->SetInsertPoint(result);
 			m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&spu_thread::pc));
-			const auto addr = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher) + target * 2));
 			const auto type = llvm::FunctionType::get(get_type<void>(), {get_type<u8*>(), get_type<u8*>(), get_type<u32>()}, false)->getPointerTo()->getPointerTo();
-			tail(m_ir->CreateLoad(m_ir->CreateBitCast(addr, type)));
+			tail(m_ir->CreateLoad(m_ir->CreateIntToPtr(m_ir->getInt64((u64)(spu_runtime::g_dispatcher + target / 4)), type)));
 			m_ir->SetInsertPoint(cblock);
 			return result;
 		}
@ -2534,14 +2529,6 @@ public:
 		}
 	}

-	virtual spu_function_t get(u32 lsa) override
-	{
-		init();
-
-		// Simple atomic read
-		return m_spurt->m_dispatcher[lsa / 4];
-	}
-
 	virtual spu_function_t compile(std::vector<u32>&& func_rv) override
 	{
 		init();
@ -5663,7 +5650,7 @@ public:

 		m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
 		const auto type = llvm::FunctionType::get(get_type<void>(), {get_type<u8*>(), get_type<u8*>(), get_type<u32>()}, false)->getPointerTo()->getPointerTo();
-		const auto disp = m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher))), type);
+		const auto disp = m_ir->CreateIntToPtr(m_ir->getInt64((u64)spu_runtime::g_dispatcher), type);
 		const auto ad64 = m_ir->CreateZExt(addr.value, get_type<u64>());

 		if (ret && g_cfg.core.spu_block_size != spu_block_size_type::safe)
--- a/rpcs3/Emu/Cell/SPURecompiler.h
+++ b/rpcs3/Emu/Cell/SPURecompiler.h
@ -44,9 +44,6 @@ public:
 	// All functions
 	std::map<std::vector<u32>, spu_function_t> m_map;

-	// All dispatchers
-	std::array<atomic_t<spu_function_t>, 0x10000> m_dispatcher;
-
 	// Debug module output location
 	std::string m_cache_path;

@ -59,6 +56,9 @@ public:

 	// Add compiled function and generate trampoline if necessary
 	void add(std::pair<const std::vector<u32>, spu_function_t>& where, spu_function_t compiled);
+
+	// All dispatchers (array allocated in jit memory)
+	static atomic_t<spu_function_t>* const g_dispatcher;
 };

 // SPU Recompiler instance base class
@ -100,9 +100,6 @@ public:
 	// Initialize
 	virtual void init() = 0;

-	// Get pointer to the trampoline at given position
-	virtual spu_function_t get(u32 lsa) = 0;
-
 	// Compile function
 	virtual spu_function_t compile(std::vector<u32>&&) = 0;

--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -564,7 +564,7 @@ void spu_thread::cpu_task()
 	{
 		while (LIKELY(!state || !check_state()))
 		{
-			jit_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
+			spu_runtime::g_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
 		}

 		// Print some stats
@ -696,9 +696,6 @@ spu_thread::spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::stri

 	if (g_cfg.core.spu_decoder != spu_decoder_type::fast && g_cfg.core.spu_decoder != spu_decoder_type::precise)
 	{
-		// Initialize lookup table
-		jit_dispatcher.fill(&spu_recompiler_base::dispatch);
-
 		if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
 		{
 			// Initialize stack mirror
@ -1340,7 +1337,7 @@ bool spu_thread::process_mfc_cmd()
 				while (true)
 				{
 					result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data());
-					
+
 					if (result < 2)
 					{
 						break;
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@ -584,8 +584,6 @@ public:
 	u64 block_recover = 0;
 	u64 block_failure = 0;

-	std::array<spu_function_t, 0x10000> jit_dispatcher; // Dispatch table for indirect calls
-
 	std::array<v128, 0x4000> stack_mirror; // Return address information

 	void push_snr(u32 number, u32 value);