Implement "built_function" utility (runtime-generated assembly)

Similar to build_function_asm, but links without indirection.
Achieved by emitting code directly into a byte array.
This commit is contained in:
Nekotekina 2021-12-22 19:27:20 +03:00
parent 12e3c9e08b
commit dcd011048d
6 changed files with 126 additions and 7 deletions

View File

@ -268,6 +268,44 @@ asmjit::Runtime& asmjit::get_global_runtime()
return g_rt;
}
asmjit::Error asmjit::inline_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
{
usz codeSize = code->getCodeSize();
if (!codeSize) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorNoCodeGenerated;
}
if (utils::align(codeSize, 4096) > m_size) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorNoVirtualMemory;
}
usz relocSize = code->relocate(m_data);
if (!relocSize) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorInvalidState;
}
flush(m_data, relocSize);
*dst = m_data;
return asmjit::kErrorOk;
}
asmjit::Error asmjit::inline_runtime::_release(void*) noexcept
{
return asmjit::kErrorOk;
}
asmjit::inline_runtime::~inline_runtime()
{
utils::memory_protect(m_data, m_size, utils::protection::rx);
}
#ifdef LLVM_AVAILABLE
#include <unordered_map>

View File

@ -63,6 +63,30 @@ namespace asmjit
// Should only be used to build global functions
asmjit::Runtime& get_global_runtime();
// Don't use directly
class inline_runtime : public HostRuntime
{
uchar* m_data;
usz m_size;
public:
inline_runtime(const inline_runtime&) = delete;
inline_runtime& operator=(const inline_runtime&) = delete;
inline_runtime(uchar* data, usz size)
: m_data(data)
, m_size(size)
{
}
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
asmjit::Error _release(void*) noexcept override;
~inline_runtime();
};
// Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please)
template <typename F>
[[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, F func)
@ -168,6 +192,63 @@ inline FT build_function_asm(F&& builder)
return result;
}
template <typename FT, usz Size = 4096>
class built_function
{
alignas(4096) uchar m_data[Size];
public:
built_function(const built_function&) = delete;
built_function& operator=(const built_function&) = delete;
template <typename F>
built_function(F&& builder)
{
using namespace asmjit;
inline_runtime rt(m_data, Size);
CodeHolder code;
code.init(rt.getCodeInfo());
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
std::array<X86Gp, 4> args;
#ifdef _WIN32
args[0] = x86::rcx;
args[1] = x86::rdx;
args[2] = x86::r8;
args[3] = x86::r9;
#else
args[0] = x86::rdi;
args[1] = x86::rsi;
args[2] = x86::rdx;
args[3] = x86::rcx;
#endif
X86Assembler compiler(&code);
builder(std::ref(compiler), args);
FT result;
if (compiler.getLastError() || rt.add(&result, &code))
{
ensure(false);
}
}
operator FT() const noexcept
{
return FT(+m_data);
}
template <typename... Args>
auto operator()(Args&&... args) const noexcept
{
return FT(+m_data)(std::forward<Args>(args)...);
}
};
#ifdef LLVM_AVAILABLE
#include <memory>

View File

@ -147,7 +147,7 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op);
extern void do_cell_atomic_128_store(u32 addr, const void* to_write);
const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>([](asmjit::X86Assembler& c, auto& args)
const auto ppu_gateway = built_function<void(*)(ppu_thread*)>([](asmjit::X86Assembler& c, auto& args)
{
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
using namespace asmjit;
@ -1816,7 +1816,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
return ppu_load_acquire_reservation<u64>(ppu, addr);
}
const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>([](asmjit::X86Assembler& c, auto& args)
const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>([](asmjit::X86Assembler& c, auto& args)
{
using namespace asmjit;

View File

@ -160,7 +160,7 @@ DECLARE(spu_runtime::tr_all) = []
return reinterpret_cast<spu_function_t>(trptr);
}();
DECLARE(spu_runtime::g_gateway) = build_function_asm<spu_function_t>([](asmjit::X86Assembler& c, auto& args)
DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>([](asmjit::X86Assembler& c, auto& args)
{
// Gateway for SPU dispatcher, converts from native to GHC calling convention, also saves RSP value for spu_escape
using namespace asmjit;

View File

@ -133,7 +133,7 @@ public:
static std::array<atomic_t<spu_function_t>, (1 << 20)>* const g_dispatcher;
// Recompiler entry point
static const spu_function_t g_gateway;
static const built_function<spu_function_t> g_gateway;
// Longjmp to the end of the gateway function (native CC)
static void(*const g_escape)(spu_thread*);

View File

@ -405,7 +405,7 @@ std::array<u32, 2> op_branch_targets(u32 pc, spu_opcode_t op)
return res;
}
const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>([](asmjit::X86Assembler& c, auto& args)
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>([](asmjit::X86Assembler& c, auto& args)
{
using namespace asmjit;
@ -667,7 +667,7 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void*
c.ret();
});
const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>([](asmjit::X86Assembler& c, auto& args)
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>([](asmjit::X86Assembler& c, auto& args)
{
using namespace asmjit;
@ -801,7 +801,7 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
c.ret();
});
const extern auto spu_getllar_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>([](asmjit::X86Assembler& c, auto& args)
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>([](asmjit::X86Assembler& c, auto& args)
{
using namespace asmjit;