LLVM: Added JIT.cpp

Refactoring, used -fno-rtti option
This commit is contained in:
Nekotekina 2016-06-22 16:37:51 +03:00
parent 73ea020525
commit a19b721c26
7 changed files with 595 additions and 598 deletions

327
Utilities/JIT.cpp Normal file
View File

@ -0,0 +1,327 @@
#ifdef LLVM_AVAILABLE
#include <unordered_map>
#include <map>
#include <unordered_set>
#include <set>
#include <array>
#include "types.h"
#include "Macro.h"
#include "StrFmt.h"
#include "File.h"
#include "Log.h"
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#ifdef _WIN32
#include <Windows.h>
#else
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#endif
#include "JIT.h"
// Global LLVM context (thread-unsafe)
llvm::LLVMContext g_llvm_ctx;
// Size of virtual memory area reserved: 512 MB
static const u64 s_memory_size = 0x20000000;
// Try to reserve a portion of virtual memory in the first 2 GB address space beforehand, if possible.
static void* const s_memory = []() -> void*
{
#ifdef _WIN32
for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000)
{
if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS))
{
return (void*)addr;
}
}
return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS);
#else
return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
}();
// EH frames
static u8* s_unwind_info;
static u64 s_unwind_size;
#ifdef _WIN32
static std::vector<RUNTIME_FUNCTION> s_unwind; // Custom .pdata section replacement
#endif
// Helper class
struct MemoryManager final : llvm::RTDyldMemoryManager
{
std::unordered_map<std::string, std::uintptr_t> table;
MemoryManager(std::unordered_map<std::string, std::uintptr_t>&& table)
: table(std::move(table))
{
}
[[noreturn]] static void null()
{
throw std::runtime_error("Null function" HERE);
}
virtual u64 getSymbolAddress(const std::string& name) override
{
if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name))
{
// This may be bad if LLVM requests some built-in functions like fma.
LOG_ERROR(GENERAL, "LLVM: Symbol requested %s -> 0x%016llx", name, addr);
return addr;
}
const auto found = table.find(name);
if (found != table.end())
{
return found->second;
}
// It's fine if some function is never called, for example.
LOG_ERROR(GENERAL, "LLVM: Linkage failed for %s", name);
return (u64)null;
}
virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
{
// Simple allocation
const u64 next = ::align((u64)m_next + size, 4096);
if (next > (u64)s_memory + s_memory_size)
{
LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align);
return nullptr;
}
#ifdef _WIN32
if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE))
#else
if (::mprotect(m_next, size, PROT_READ | PROT_WRITE | PROT_EXEC))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next);
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Code section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align);
return (u8*)std::exchange(m_next, (void*)next);
}
virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
{
// Simple allocation
const u64 next = ::align((u64)m_next + size, 4096);
if (next > (u64)s_memory + s_memory_size)
{
LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align);
return nullptr;
}
#ifdef _WIN32
if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_READWRITE))
#else
if (::mprotect(m_next, size, PROT_READ | PROT_WRITE))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next);
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Data section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw");
return (u8*)std::exchange(m_next, (void*)next);
}
virtual bool finalizeMemory(std::string* = nullptr) override
{
// TODO: make sections read-only when necessary
return false;
}
virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
s_unwind_info = addr;
s_unwind_size = size;
return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
}
virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected
return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size);
}
~MemoryManager()
{
#ifdef _WIN32
if (!RtlDeleteFunctionTable(s_unwind.data()))
{
LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError());
}
if (!VirtualFree(s_memory, 0, MEM_DECOMMIT))
{
LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError());
}
#else
if (::mprotect(s_memory, s_memory_size, PROT_NONE))
{
LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno);
}
// TODO: unregister EH frames if necessary
#endif
}
private:
void* m_next = s_memory;
};
// Helper class
struct EventListener final : llvm::JITEventListener
{
virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
const llvm::StringRef elf = obj.getData();
fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite)
.write(elf.data(), elf.size());
}
};
static EventListener s_listener;
jit_compiler::jit_compiler(std::unique_ptr<llvm::Module>&& _module, std::unordered_map<std::string, std::uintptr_t>&& table)
{
EXPECTS(s_memory);
std::string result;
const auto module_ptr = _module.get();
// Initialization
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
LLVMLinkInMCJIT();
m_engine.reset(llvm::EngineBuilder(std::move(_module))
.setErrorStr(&result)
.setMCJITMemoryManager(std::make_unique<MemoryManager>(std::move(table)))
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setRelocationModel(llvm::Reloc::PIC_)
.setCodeModel((u64)s_memory <= 0x60000000 ? llvm::CodeModel::Medium : llvm::CodeModel::Large) // TODO
.setMCPU(llvm::sys::getHostCPUName())
.create());
if (!m_engine)
{
throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result);
}
m_engine->setProcessAllSections(true); // ???
m_engine->RegisterJITEventListener(&s_listener);
m_engine->finalizeObject();
for (auto& func : module_ptr->functions())
{
if (!func.empty())
{
const std::string& name = func.getName();
// Register compiled function
m_map[name] = m_engine->getFunctionAddress(name);
}
// Delete IR to lower memory consumption
func.deleteBody();
}
#ifdef _WIN32
// Register .xdata UNWIND_INFO (.pdata section is empty for some reason)
std::set<u64> func_set;
for (const auto& pair : m_map)
{
func_set.emplace(pair.second);
}
// Hack (cannot obtain last function size)
func_set.emplace(::align(*--func_set.end() + 4096, 4096));
const u64 base = (u64)s_memory;
const u8* bits = s_unwind_info;
s_unwind.clear();
s_unwind.reserve(m_map.size());
for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++)
{
const u64 addr = *it;
const u64 next = *func_set.upper_bound(addr);
// Generate RUNTIME_FUNCTION record
RUNTIME_FUNCTION uw;
uw.BeginAddress = static_cast<u32>(addr - base);
uw.EndAddress = static_cast<u32>(next - base);
uw.UnwindData = static_cast<u32>((u64)bits - base);
s_unwind.emplace_back(uw);
// Parse .xdata UNWIND_INFO record
const u8 flags = *bits++; // Version and flags
const u8 prolog = *bits++; // Size of prolog
const u8 count = *bits++; // Count of unwind codes
const u8 frame = *bits++; // Frame Reg + Off
bits += ::align(std::max<u8>(1, count), 2) * sizeof(u16); // UNWIND_CODE array
if (flags != 1)
{
LOG_ERROR(GENERAL, "LLVM: unsupported UNWIND_INFO version/flags (0x%02x)", flags);
break;
}
LOG_TRACE(GENERAL, "LLVM: .xdata at 0x%llx: function 0x%x..0x%x: p0x%02x, c0x%02x, f0x%02x", uw.UnwindData + base, uw.BeginAddress + base, uw.EndAddress + base, prolog, count, frame);
}
if (s_unwind_info + s_unwind_size != bits)
{
LOG_FATAL(GENERAL, "LLVM: .xdata analysis failed! (0x%p != 0x%p)", s_unwind_info + s_unwind_size, bits);
}
else if (!RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base))
{
LOG_FATAL(GENERAL, "RtlAddFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError());
}
else
{
LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size);
}
#endif
}
jit_compiler::~jit_compiler()
{
}
#endif

50
Utilities/JIT.h Normal file
View File

@ -0,0 +1,50 @@
#pragma once
#ifdef LLVM_AVAILABLE
#include <memory>
#include <string>
#include <unordered_map>
#include "types.h"
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
extern llvm::LLVMContext g_llvm_ctx;
// Temporary compiler interface
class jit_compiler final
{
// Execution instance
std::unique_ptr<llvm::ExecutionEngine> m_engine;
// Compiled functions
std::unordered_map<std::string, std::uintptr_t> m_map;
public:
jit_compiler(std::unique_ptr<llvm::Module>&&, std::unordered_map<std::string, std::uintptr_t>&&);
~jit_compiler();
// Get compiled function address
std::uintptr_t get(const std::string& name) const
{
const auto found = m_map.find(name);
if (found != m_map.end())
{
return found->second;
}
return 0;
}
};
#endif

View File

@ -146,6 +146,9 @@ else()
else()
llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize ipo x86codegen x86disassembler mcdisassembler)
endif()
if (NOT MSVC)
set_source_files_properties(${RPCS3_SRC_DIR}/../Utilities/JIT.cpp PROPERTIES COMPILE_FLAGS -fno-rtti)
endif()
endif()
link_directories(

View File

@ -8,6 +8,36 @@
#include "PPUAnalyser.h"
#include "PPUModule.h"
#ifdef LLVM_AVAILABLE
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
#include "llvm/Support/FormattedStream.h"
#include "llvm/IR/LLVMContext.h"
//#include "llvm/IR/Dominators.h"
#include "llvm/IR/Verifier.h"
//#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LegacyPassManager.h"
//#include "llvm/IR/Module.h"
//#include "llvm/IR/Function.h"
//#include "llvm/Analysis/Passes.h"
//#include "llvm/Analysis/BasicAliasAnalysis.h"
//#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
//#include "llvm/Analysis/LoopInfo.h"
//#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Vectorize.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#include "Utilities/JIT.h"
#include "PPUTranslator.h"
#endif
enum class ppu_decoder_type
{
precise,
@ -322,54 +352,6 @@ void PPUThread::fast_call(u32 addr, u32 rtoc)
//}
}
#ifdef LLVM_AVAILABLE
#ifdef _MSC_VER
#pragma warning(push, 0)
#endif
//#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
//#include "llvm/Support/Host.h"
#include "llvm/Support/FormattedStream.h"
//#include "llvm/Support/Debug.h"
//#include "llvm/CodeGen/CommandFlags.h"
//#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/LLVMContext.h"
//#include "llvm/IR/Dominators.h"
#include "llvm/IR/Verifier.h"
//#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LegacyPassManager.h"
//#include "llvm/IR/Module.h"
//#include "llvm/IR/Function.h"
//#include "llvm/Analysis/Passes.h"
//#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
//#include "llvm/Analysis/LoopInfo.h"
//#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
//#include "llvm/Object/ObjectFile.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#include "PPUTranslator.h"
#ifdef _WIN32
#include <Windows.h>
#else
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#endif
const ppu_decoder<ppu_itype> s_ppu_itype;
extern u64 get_timebased_time();
@ -386,326 +368,159 @@ extern __m128i sse_cellbe_lvrx(u64 addr);
extern void sse_cellbe_stvlx(u64 addr, __m128i a);
extern void sse_cellbe_stvrx(u64 addr, __m128i a);
struct Listener final : llvm::JITEventListener
[[noreturn]] static void ppu_trap(u64 addr)
{
virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
const llvm::StringRef elf = obj.getData();
fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite)
.write(elf.data(), elf.size());
}
};
throw fmt::exception("Trap! (0x%llx)", addr);
}
static Listener s_listener;
// Memory size: 512 MB
static const u64 s_memory_size = 0x20000000;
// Try to reserve a portion of virtual memory in the first 2 GB address space, if possible.
static void* const s_memory = []() -> void*
static void ppu_trace(u64 addr)
{
#ifdef _WIN32
for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000)
LOG_NOTICE(PPU, "Trace: 0x%llx", addr);
}
static void ppu_hlecall(PPUThread& ppu, u32 index)
{
ppu_execute_function(ppu, index);
if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily
}
static void ppu_syscall(PPUThread& ppu, u64 code)
{
ppu_execute_syscall(ppu, code);
if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily
}
static u32 ppu_tbl()
{
return (u32)get_timebased_time();
}
static void ppu_call(PPUThread& ppu, u32 addr)
{
const auto found = s_ppu_compiled.find(addr);
if (found != s_ppu_compiled.end())
{
if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS))
{
return (void*)addr;
}
return found->second(ppu);
}
return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS);
const auto op = vm::read32(addr).value();
const auto itype = s_ppu_itype.decode(op);
// Allow HLE callbacks without compiling them
if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR())
{
return ppu_hlecall(ppu, op & 0x3ffffff);
}
ppu_trap(addr);
}
static __m128 sse_rcp_ps(__m128 A)
{
return _mm_rcp_ps(A);
}
static __m128 sse_rsqrt_ps(__m128 A)
{
return _mm_rsqrt_ps(A);
}
static float sse_rcp_ss(float A)
{
_mm_store_ss(&A, _mm_rcp_ss(_mm_load_ss(&A)));
return A;
}
static float sse_rsqrt_ss(float A)
{
_mm_store_ss(&A, _mm_rsqrt_ss(_mm_load_ss(&A)));
return A;
}
static u32 ppu_lwarx(u32 addr)
{
be_t<u32> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
}
static u64 ppu_ldarx(u32 addr)
{
be_t<u64> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
}
static bool ppu_stwcx(u32 addr, u32 reg_value)
{
const be_t<u32> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
}
static bool ppu_stdcx(u32 addr, u64 reg_value)
{
const be_t<u64> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
}
static bool adde_carry(u64 a, u64 b, bool c)
{
#ifdef _MSC_VER
return _addcarry_u64(c, a, b, nullptr) != 0;
#else
return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
bool result;
__asm__("addb $0xff, %[c] \n adcq %[a], %[b] \n setb %[result]" : [a] "+&r" (a), [b] "+&r" (b), [c] "+&r" (c), [result] "=r" (result));
return result;
#endif
}();
}
// EH frames
static u8* s_unwind_info;
static u64 s_unwind_size;
#ifdef _WIN32
// Custom .pdata section replacement
static std::vector<RUNTIME_FUNCTION> s_unwind;
#endif
struct MemoryManager final : llvm::RTDyldMemoryManager
// Interpreter call for simple vector instructions
static __m128i ppu_vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c)
{
static PPUThread* context(u64 addr) // Unused
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu.VR[21].vi = _a;
ppu.VR[22].vi = _b;
ppu.VR[23].vi = _c;
ppu_opcode_t op{};
op.vd = 20;
op.va = 21;
op.vb = 22;
op.vc = 23;
func(ppu, op);
return ppu.VR[20].vi;
}
extern void ppu_initialize(const std::string& name, const std::vector<std::pair<u32, u32>>& funcs, u32 entry)
{
if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty())
{
//trace(addr);
return static_cast<PPUThread*>(get_current_cpu_thread());
return;
}
[[noreturn]] static void trap(u64 addr)
std::unordered_map<std::string, std::uintptr_t> link_table
{
LOG_ERROR(PPU, "Trap! (0x%llx)", addr);
throw fmt::exception("Trap! (0x%llx)", addr);
}
static void trace(u64 addr)
{
LOG_NOTICE(PPU, "Trace: 0x%llx", addr);
}
static void hlecall(PPUThread& ppu, u32 index)
{
ppu_execute_function(ppu, index);
if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily
}
static void syscall(PPUThread& ppu, u64 code)
{
ppu_execute_syscall(ppu, code);
if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily
}
static u32 tbl()
{
return (u32)get_timebased_time();
}
static void call(PPUThread& ppu, u32 addr)
{
const auto found = s_ppu_compiled.find(addr);
if (found != s_ppu_compiled.end())
{
return found->second(ppu);
}
const auto op = vm::read32(addr).value();
const auto itype = s_ppu_itype.decode(op);
// Allow HLE callbacks without compiling them
if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR())
{
return hlecall(ppu, op & 0x3ffffff);
}
trap(addr);
}
static __m128 sse_rcp_ps(__m128 A)
{
return _mm_rcp_ps(A);
}
static __m128 sse_rsqrt_ps(__m128 A)
{
return _mm_rsqrt_ps(A);
}
static float sse_rcp_ss(float A)
{
_mm_store_ss(&A, _mm_rcp_ss(_mm_load_ss(&A)));
return A;
}
static float sse_rsqrt_ss(float A)
{
_mm_store_ss(&A, _mm_rsqrt_ss(_mm_load_ss(&A)));
return A;
}
static u32 lwarx(u32 addr)
{
be_t<u32> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
}
static u64 ldarx(u32 addr)
{
be_t<u64> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
}
static bool stwcx(u32 addr, u32 reg_value)
{
const be_t<u32> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
}
static bool stdcx(u32 addr, u64 reg_value)
{
const be_t<u64> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
}
static bool adde_carry(u64 a, u64 b, bool c)
{
return _addcarry_u64(c, a, b, nullptr) != 0;
}
// Interpreter call for simple vector instructions
static __m128i vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu.VR[21].vi = _a;
ppu.VR[22].vi = _b;
ppu.VR[23].vi = _c;
ppu_opcode_t op{};
op.vd = 20;
op.va = 21;
op.vb = 22;
op.vc = 23;
func(ppu, op);
return ppu.VR[20].vi;
}
// Interpreter call for simple vector instructions with immediate
static __m128i veciop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, __m128i _b)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu.VR[22].vi = _b;
op.vd = 20;
op.vb = 22;
func(ppu, op);
return ppu.VR[20].vi;
}
// Interpreter call for FP instructions
static f64 fpop(decltype(&ppu_interpreter::UNK) func, f64 _a, f64 _b, f64 _c)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
ppu.FPR[21] = _a;
ppu.FPR[22] = _b;
ppu.FPR[23] = _c;
ppu_opcode_t op{};
op.frd = 20;
op.fra = 21;
op.frb = 22;
op.frc = 23;
func(ppu, op);
return ppu.FPR[20];
}
// Interpreter call for GPR instructions writing result to RA
static u64 aimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 s = ppu.GPR[op.rs];
ppu.GPR[op.rs] = _s;
func(ppu, op);
const u64 r = ppu.GPR[op.ra];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rs] = s;
return r;
}
// Interpreter call for GPR instructions writing result to RA
static u64 aimmbop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _b)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 s = ppu.GPR[op.rs];
const u64 b = ppu.GPR[op.rb];
ppu.GPR[op.rs] = _s;
ppu.GPR[op.rb] = _b;
func(ppu, op);
const u64 r = ppu.GPR[op.ra];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rs] = s;
ppu.GPR[op.rb] = b;
return r;
}
// Interpreter call for GPR instructions writing result to RA (destructive)
static u64 aaimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _a)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 s = ppu.GPR[op.rs];
const u64 a = ppu.GPR[op.ra];
ppu.GPR[op.rs] = _s;
ppu.GPR[op.ra] = _a;
func(ppu, op);
const u64 r = ppu.GPR[op.ra];
ppu.GPR[op.rs] = s;
ppu.GPR[op.ra] = a;
return r;
}
static u64 immaop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 d = ppu.GPR[op.rd];
ppu.GPR[op.ra] = _a;
func(ppu, op);
const u64 r = ppu.GPR[op.rd];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rd] = d;
return r;
}
static u64 immabop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a, u64 _b)
{
PPUThread& ppu = static_cast<PPUThread&>(*get_current_cpu_thread());
const u64 a = ppu.GPR[op.ra];
const u64 b = ppu.GPR[op.rb];
const u64 d = ppu.GPR[op.rd];
ppu.GPR[op.ra] = _a;
ppu.GPR[op.rb] = _b;
func(ppu, op);
const u64 r = ppu.GPR[op.rd];
ppu.GPR[op.ra] = a;
ppu.GPR[op.rb] = b;
ppu.GPR[op.rd] = d;
return r;
}
// No operation on specific u64 value (silly optimization barrier)
static u64 nop64(u64 value)
{
return value;
}
std::unordered_map<std::string, u64> table
{
{ "__memory", (u64)vm::base(0) },
{ "__trap", (u64)&trap },
{ "__trace", (u64)&trace },
{ "__hlecall", (u64)&hlecall },
{ "__syscall", (u64)&syscall },
{ "__get_tbl", (u64)&tbl },
{ "__call", (u64)&call },
{ "__lwarx", (u64)&lwarx },
{ "__ldarx", (u64)&ldarx },
{ "__stwcx", (u64)&stwcx },
{ "__stdcx", (u64)&stdcx },
{ "__memory", (u64)vm::g_base_addr },
{ "__memptr", (u64)&vm::g_base_addr },
{ "__trap", (u64)&ppu_trap },
{ "__trace", (u64)&ppu_trace },
{ "__hlecall", (u64)&ppu_hlecall },
{ "__syscall", (u64)&ppu_syscall },
{ "__get_tbl", (u64)&ppu_tbl },
{ "__call", (u64)&ppu_call },
{ "__lwarx", (u64)&ppu_lwarx },
{ "__ldarx", (u64)&ppu_ldarx },
{ "__stwcx", (u64)&ppu_stwcx },
{ "__stdcx", (u64)&ppu_stdcx },
{ "__vec3op", (u64)&ppu_vec3op },
{ "__adde_get_ca", (u64)&adde_carry },
{ "__vexptefp", (u64)&sse_exp2_ps },
{ "__vlogefp", (u64)&sse_log2_ps },
{ "__vperm", (u64)&sse_altivec_vperm },
{ "__vrefp", (u64)&sse_rcp_ps },
{ "__vrsqrtefp", (u64)&sse_rsqrt_ps },
{ "__vec3op", (u64)&vec3op },
{ "__veciop", (u64)&veciop },
{ "__aimmop", (u64)&aimmop },
{ "__aimmbop", (u64)&aimmbop },
{ "__aaimmop", (u64)&aaimmop },
{ "__immaop", (u64)&immaop },
{ "__immabop", (u64)&immabop },
{ "__fpop", (u64)&fpop },
{ "__nop64", (u64)&nop64 },
{ "__lvsl", (u64)&sse_altivec_lvsl },
{ "__lvsr", (u64)&sse_altivec_lvsr },
{ "__lvlx", (u64)&sse_cellbe_lvlx },
@ -716,142 +531,20 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
{ "__frsqrte", (u64)&sse_rsqrt_ss },
};
virtual u64 getSymbolAddress(const std::string& name) override
{
if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name))
{
LOG_ERROR(GENERAL, "LLVM: Linkage requested %s -> 0x%016llx", name, addr);
return addr;
}
const auto found = table.find(name);
if (found != table.end())
{
return found->second;
}
LOG_FATAL(GENERAL, "LLVM: Linkage failed for %s", name);
return (u64)trap;
}
virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
{
// Simple allocation (TODO)
const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096);
#ifdef _WIN32
if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE))
#else
if (::mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate code section '%s'", sec_name.data());
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Code section '%s' allocated -> 0x%p", sec_name.data(), ptr);
return (u8*)ptr;
}
virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
{
// Simple allocation (TODO)
const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096);
#ifdef _WIN32
if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE))
#else
if (::mprotect(ptr, size, PROT_READ | PROT_WRITE))
#endif
{
LOG_FATAL(GENERAL, "LLVM: Failed to allocate data section '%s'", sec_name.data());
return nullptr;
}
LOG_SUCCESS(GENERAL, "LLVM: Data section '%s' allocated -> 0x%p", sec_name.data(), ptr);
return (u8*)ptr;
}
virtual bool finalizeMemory(std::string* = nullptr) override
{
// TODO: make sections read-only when necessary
return false;
}
virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
s_unwind_info = addr;
s_unwind_size = size;
return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
}
virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected
return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size);
}
~MemoryManager()
{
#ifdef _WIN32
if (!RtlDeleteFunctionTable(s_unwind.data()))
{
LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError());
}
if (!VirtualFree(s_memory, 0, MEM_DECOMMIT))
{
LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError());
}
#else
if (::mprotect(s_memory, s_memory_size, PROT_NONE))
{
LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno);
}
// TODO: unregister EH frames if necessary
#endif
}
private:
void* m_next = s_memory;
};
llvm::LLVMContext g_context;
extern void ppu_initialize(const std::string& name, const std::vector<std::pair<u32, u32>>& funcs, u32 entry)
{
if (!s_memory)
{
throw std::runtime_error("LLVM: Memory not allocated, report to the developers." HERE);
}
if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty())
{
return;
}
#ifdef LLVM_AVAILABLE
using namespace llvm;
// Initialization
InitializeNativeTarget();
InitializeNativeTargetAsmPrinter();
LLVMLinkInMCJIT();
// Create LLVM module
std::unique_ptr<Module> module = std::make_unique<Module>(name, g_context);
std::unique_ptr<Module> module = std::make_unique<Module>(name, g_llvm_ctx);
// Initialize target
module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
// Initialize translator
std::unique_ptr<PPUTranslator> translator = std::make_unique<PPUTranslator>(g_context, module.get(), 0, entry);
std::unique_ptr<PPUTranslator> translator = std::make_unique<PPUTranslator>(g_llvm_ctx, module.get(), 0, entry);
// Define some types
const auto _void = Type::getVoidTy(g_context);
const auto _void = Type::getVoidTy(g_llvm_ctx);
const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false);
// Initialize function list
@ -859,7 +552,9 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
{
if (info.second)
{
translator->AddFunction(info.first, cast<Function>(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func)));
const auto f = cast<Function>(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func));
f->addAttribute(1, Attribute::NoAlias);
translator->AddFunction(info.first, f);
}
translator->AddBlockInfo(info.first);
@ -874,18 +569,16 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
pm.add(createTailCallEliminationPass());
pm.add(createReassociatePass());
pm.add(createInstructionCombiningPass());
//pm.add(new DominatorTreeWrapperPass());
//pm.add(createInstructionCombiningPass());
//pm.add(new MemoryDependenceAnalysis());
//pm.add(createBasicAAWrapperPass());
pm.add(new MemoryDependenceAnalysis());
pm.add(createLICMPass());
pm.add(createLoopInstSimplifyPass());
pm.add(createGVNPass());
pm.add(createDeadStoreEliminationPass());
//pm.add(createGVNPass());
//pm.add(createBBVectorizePass());
//pm.add(new LoopInfo());
//pm.add(new ScalarEvolution());
pm.add(createSCCPPass());
//pm.addPass(new SyscallAnalysisPass()); // Requires constant propagation
pm.add(createInstructionCombiningPass());
pm.add(createInstructionSimplifierPass());
pm.add(createAggressiveDCEPass());
pm.add(createCFGSimplificationPass());
//pm.add(createLintPass()); // Check
@ -899,28 +592,16 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
}
}
//static auto s_current = &PPUTranslator::UNK;
//for (const auto& info : s_test)
//{
// const u64 pseudo_addr = (u64)&info.first + INT64_MIN;
// s_current = info.second;
// const auto func = translator->TranslateToIR(pseudo_addr, pseudo_addr, nullptr, [](PPUTranslator* _this)
// {
// (_this->*s_current)(op);
// _this->ReturnFromFunction();
// });
// pm.run(*func);
//}
legacy::PassManager mpm;
// Remove unused functions, structs, global variables, etc
mpm.add(createStripDeadPrototypesPass());
//mpm.add(createFunctionInliningPass());
mpm.run(*module);
// TODO: replacing __syscall/__hlecall
// TODO: improve __call and s_ppu_compiled
std::string result;
raw_string_ostream out(result);
@ -941,25 +622,14 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
Module* module_ptr = module.get();
std::shared_ptr<ExecutionEngine> engine(EngineBuilder(std::move(module))
.setErrorStr(&result)
.setMCJITMemoryManager(std::make_unique<MemoryManager>())
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setRelocationModel(Reloc::PIC_)
.setCodeModel((u64)s_memory <= 0x60000000 ? CodeModel::Medium : CodeModel::Large)
.setMCPU(sys::getHostCPUName())
.create());
const auto jit = fxm::make<jit_compiler>(std::move(module), std::move(link_table));
if (!engine)
if (!jit)
{
throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result);
LOG_FATAL(PPU, "LLVM: Multiple modules are not yet supported");
return;
}
engine->setProcessAllSections(true);
//engine->setVerifyModules(true);
engine->RegisterJITEventListener(&s_listener);
engine->finalizeObject();
s_ppu_compiled.clear();
// Get function addresses
@ -967,72 +637,13 @@ extern void ppu_initialize(const std::string& name, const std::vector<std::pair<
{
if (info.second)
{
const std::uintptr_t link = engine->getFunctionAddress(fmt::format("__sub_%x", info.first));
const std::uintptr_t link = jit->get(fmt::format("__sub_%x", info.first));
s_ppu_compiled.emplace(info.first, (void(*)(PPUThread&))link);
LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second);
}
}
// Delete IR to lower memory consumption
for (auto& func : module_ptr->functions())
{
func.deleteBody();
}
#ifdef _WIN32
// Register .xdata UNWIND_INFO (.pdata section is empty for some reason)
std::set<u64> func_set;
for (const auto& pair : s_ppu_compiled)
{
// Get addresses
func_set.emplace((u64)pair.second);
}
func_set.emplace(::align(*--func_set.end() + 4096, 4096));
const u64 base = (u64)s_memory;
const u8* bits = s_unwind_info;
s_unwind.clear();
s_unwind.reserve(s_ppu_compiled.size());
for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++)
{
const u64 addr = *it;
const u64 next = *func_set.upper_bound(addr);
// Generate RUNTIME_FUNCTION record
RUNTIME_FUNCTION uw;
uw.BeginAddress = static_cast<u32>(addr - base);
uw.EndAddress = static_cast<u32>(next - base);
uw.UnwindData = static_cast<u32>((u64)bits - base);
s_unwind.emplace_back(uw);
// Parse .xdata record
VERIFY(*bits++ == 1); // Version and flags
bits++; // Size of prolog
const u8 count = *bits++; // Count of unwind codes
bits++; // Frame Reg + Off
bits += ::align(count, 2) * sizeof(u16); // UNWIND_CODE array
while (!*bits && bits < s_unwind_info + s_unwind_size) bits++; // Skip strange zero padding (???)
}
VERIFY(bits == s_unwind_info + s_unwind_size);
VERIFY(RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base));
LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size);
#endif
fxm::import<ExecutionEngine>(WRAP_EXPR(engine));
LOG_SUCCESS(PPU, "LLVM: Compilation finished (%s)", sys::getHostCPUName().data());
}
#else
extern void ppu_initialize(const std::string& name, const std::vector<std::pair<u32, u32>>& funcs, u32 entry)
{
}
#endif
}

View File

@ -1,5 +1,7 @@
#pragma once
#ifdef LLVM_AVAILABLE
#include <unordered_map>
#include <map>
#include <unordered_set>
@ -830,3 +832,5 @@ public:
void UNK(ppu_opcode_t op);
};
#endif

View File

@ -64,36 +64,19 @@
<AdditionalIncludeDirectories>..\llvm\include;..\llvm_build\include;</AdditionalIncludeDirectories>
</ClCompile>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug - MemLeak|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug - MemLeak|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Updating git-version.h</Message>
</PreBuildEvent>
<PreBuildEvent>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug - LLVM|x64'">%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug - LLVM|x64'">Updating git-version.h</Message>
<Command>%windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd"</Command>
<Message>Updating git-version.h</Message>
</PreBuildEvent>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\ps3emu_api\ps3emu_api.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug - LLVM|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug - MemLeak|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\AutoPause.cpp" />
<ClCompile Include="..\Utilities\dynamic_library.cpp" />
<ClCompile Include="..\Utilities\JIT.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\Log.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
@ -404,6 +387,7 @@
<ClInclude Include="..\Utilities\event.h" />
<ClInclude Include="..\Utilities\geometry.h" />
<ClInclude Include="..\Utilities\GSL.h" />
<ClInclude Include="..\Utilities\JIT.h" />
<ClInclude Include="..\Utilities\lockless.h" />
<ClInclude Include="..\Utilities\sync.h" />
<ClInclude Include="..\Utilities\Platform.h" />

View File

@ -872,6 +872,15 @@
<ClCompile Include="..\ps3emu_api\ps3emu_api.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Utilities\version.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="rpcs3_version.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Utilities\JIT.cpp">
<Filter>Utilities</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -1666,5 +1675,14 @@
<ClInclude Include="..\ps3emu_api\ps3emu_api.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Utilities\version.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="rpcs3_version.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Utilities\JIT.h">
<Filter>Utilities</Filter>
</ClInclude>
</ItemGroup>
</Project>