From d873802b9c7253239c291f0589559eaf7ca3db64 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 29 Mar 2019 16:35:00 +0300 Subject: [PATCH] Use LLVM 9 Use new add/sub with saturation intrinsics --- 3rdparty/llvm.cmake | 8 +-- Utilities/JIT.cpp | 2 +- llvm | 2 +- rpcs3/Emu/CPU/CPUTranslator.h | 118 +++++++++++++++++++++++++------ rpcs3/Emu/Cell/PPUThread.cpp | 2 +- rpcs3/Emu/Cell/PPUTranslator.cpp | 92 ++++++++++-------------- rpcs3/Emu/Cell/PPUTranslator.h | 2 +- rpcs3/Emu/Cell/SPURecompiler.cpp | 12 ++-- rpcs3_llvm.props | 2 +- 9 files changed, 148 insertions(+), 92 deletions(-) diff --git a/3rdparty/llvm.cmake b/3rdparty/llvm.cmake index 320b07f27c..847ca6eb2e 100644 --- a/3rdparty/llvm.cmake +++ b/3rdparty/llvm.cmake @@ -26,7 +26,7 @@ if(WITH_LLVM) set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD}) # now tries to find LLVM again - find_package(LLVM 8.0 CONFIG) + find_package(LLVM 9.0 CONFIG) if(NOT LLVM_FOUND) message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`") endif() @@ -39,11 +39,11 @@ if(WITH_LLVM) set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR}) endif() - find_package(LLVM 8.0 CONFIG) + find_package(LLVM 9.0 CONFIG) if (NOT LLVM_FOUND) - if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 8) - message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 8.0. \ + if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 9) + message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 9.0. \ Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.") endif() diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 1e876b5f68..8de280bc4f 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -570,7 +570,7 @@ struct EventListener : llvm::JITEventListener { } - void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override + void notifyObjectLoaded(ObjectKey K, const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override { #ifdef _WIN32 for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it) diff --git a/llvm b/llvm index 5c906fd169..71ca0f4f29 160000 --- a/llvm +++ b/llvm @@ -1 +1 @@ -Subproject commit 5c906fd1694e3c8f0b9548581d275ef01dc0972a +Subproject commit 71ca0f4f293dbfda4e73cc17ae5e60a9070e43a0 diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index f1488e1132..d574a096d6 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -992,13 +992,6 @@ public: return (b ^ s) & ~(a ^ b); } - // Get signed subtraction overflow into the sign bit (d = a - b) - template - static inline auto sborrow(T a, T b, T d) - { - return (a ^ b) & (a ^ d); - } - // Bitwise select (c ? a : b) template static inline auto merge(T c, T a, T b) @@ -1014,12 +1007,96 @@ public: return a << (b & mask) | a >> (-b & mask); } - // Rotate left + // Add with saturation template - static inline auto rol(T a, u64 b) + inline auto add_sat(T a, T b) { - static constexpr u64 mask = value_t::esize - 1; - return a << (b & mask) | a >> ((0 - b) & mask); + value_t result; + const auto eva = a.eval(m_ir); + const auto evb = b.eval(m_ir); + + // Compute constant result immediately if possible + if (llvm::isa(eva) && llvm::isa(evb)) + { + static_assert(result.is_sint || result.is_uint); + + if constexpr (result.is_sint) + { + llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2); + if constexpr (result.is_vector != 0) + cast_to = llvm::VectorType::get(cast_to, result.is_vector); + + const auto axt = m_ir->CreateSExt(eva, cast_to); + const auto bxt = m_ir->CreateSExt(evb, cast_to); + result.value = m_ir->CreateAdd(axt, bxt); + const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize)); + const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize)); + const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max; + const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min; + result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value); + result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value); + result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context)); + } + else + { + const auto _max = m_ir->getInt(llvm::APInt::getMaxValue(result.esize)); + const auto ones = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max; + result.value = m_ir->CreateAdd(eva, evb); + result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(result.value, eva), ones, result.value); + } + } + else + { + result.value = m_ir->CreateCall(get_intrinsic(result.is_sint ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat), {eva, evb}); + } + + return result; + } + + // Subtract with saturation + template + inline auto sub_sat(T a, T b) + { + value_t result; + const auto eva = a.eval(m_ir); + const auto evb = b.eval(m_ir); + + // Compute constant result immediately if possible + if (llvm::isa(eva) && llvm::isa(evb)) + { + static_assert(result.is_sint || result.is_uint); + + if constexpr (result.is_sint) + { + llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2); + if constexpr (result.is_vector != 0) + cast_to = llvm::VectorType::get(cast_to, result.is_vector); + + const auto axt = m_ir->CreateSExt(eva, cast_to); + const auto bxt = m_ir->CreateSExt(evb, cast_to); + result.value = m_ir->CreateSub(axt, bxt); + const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize)); + const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize)); + const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max; + const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min; + result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value); + result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value); + result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context)); + } + else + { + const auto _min = m_ir->getInt(llvm::APInt::getMinValue(result.esize)); + const auto zero = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min; + result.value = m_ir->CreateSub(eva, evb); + result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(eva, evb), zero, result.value); + } + } + else + { + result.value = m_ir->CreateCall(get_intrinsic(result.is_sint ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat), {eva, evb}); + } + + return result; } // Average: (a + b + 1) >> 1 @@ -1029,18 +1106,15 @@ public: //return (a >> 1) + (b >> 1) + ((a | b) & 1); value_t result; - llvm::Instruction::CastOps cast_op = llvm::Instruction::BitCast; - if (result.is_sint) - cast_op = llvm::Instruction::SExt; - if (result.is_uint) - cast_op = llvm::Instruction::ZExt; - llvm::Type* cast_t = m_ir->getIntNTy(result.esize * 2); - if (result.is_vector != 0) - cast_t = llvm::VectorType::get(cast_t, result.is_vector); + static_assert(result.is_sint || result.is_uint); + const auto cast_op = result.is_sint ? llvm::Instruction::SExt : llvm::Instruction::ZExt; + llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2); + if constexpr (result.is_vector != 0) + cast_to = llvm::VectorType::get(cast_to, result.is_vector); - const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_t); - const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_t); - const auto cxt = llvm::ConstantInt::get(cast_t, 1, false); + const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_to); + const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_to); + const auto cxt = llvm::ConstantInt::get(cast_to, 1, false); const auto abc = m_ir->CreateAdd(m_ir->CreateAdd(axt, bxt), cxt); result.value = m_ir->CreateTrunc(m_ir->CreateLShr(abc, 1), result.get_type(m_context)); return result; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 8b1d806cba..fa8319d5b1 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1690,7 +1690,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co { if (func.size) { - const auto f = cast(module->getOrInsertFunction(func.name, _func)); + const auto f = cast(module->getOrInsertFunction(func.name, _func).getCallee()); f->addAttribute(1, Attribute::NoAlias); } } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 285edb123f..2046637a21 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -267,7 +267,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) return; } - indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type); + indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type).getCallee(); } else { @@ -597,33 +597,27 @@ void PPUTranslator::VADDSBS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto s = eval(a + b); - const auto z = eval((a >> 7) ^ 0x7f); - const auto x = eval(scarry(a, b, s) >> 7); - set_vr(op.vd, eval(merge(x, z, s))); - SetSat(IsNotZero(x.value)); + const auto r = add_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a + b)).value)); } void PPUTranslator::VADDSHS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto s = eval(a + b); - const auto z = eval((a >> 15) ^ 0x7fff); - const auto x = eval(scarry(a, b, s) >> 15); - set_vr(op.vd, eval(merge(x, z, s))); - SetSat(IsNotZero(x.value)); + const auto r = add_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a + b)).value)); } void PPUTranslator::VADDSWS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto s = eval(a + b); - const auto z = eval((a >> 31) ^ 0x7fffffff); - const auto x = eval(scarry(a, b, s) >> 31); - set_vr(op.vd, eval(merge(x, z, s))); - SetSat(IsNotZero(x.value)); + const auto r = add_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a + b)).value)); } void PPUTranslator::VADDUBM(ppu_opcode_t op) @@ -637,10 +631,9 @@ void PPUTranslator::VADDUBS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto s = eval(a + b); - const auto x = eval(s < a); - set_vr(op.vd, select(x, splat(-1), s)); - SetSat(IsNotZero(x.value)); + const auto r = add_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a + b)).value)); } void PPUTranslator::VADDUHM(ppu_opcode_t op) @@ -654,10 +647,9 @@ void PPUTranslator::VADDUHS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto s = eval(a + b); - const auto x = eval(s < a); - set_vr(op.vd, select(x, splat(-1), s)); - SetSat(IsNotZero(x.value)); + const auto r = add_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a + b)).value)); } void PPUTranslator::VADDUWM(ppu_opcode_t op) @@ -671,10 +663,9 @@ void PPUTranslator::VADDUWS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto s = eval(a + b); - const auto x = eval(s < a); - set_vr(op.vd, select(x, splat(-1), s)); - SetSat(IsNotZero(x.value)); + const auto r = add_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a + b)).value)); } void PPUTranslator::VAND(ppu_opcode_t op) @@ -1491,33 +1482,27 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto d = eval(a - b); - const auto z = eval((a >> 7) ^ 0x7f); - const auto x = eval(sborrow(a, b, d) >> 7); - set_vr(op.vd, eval(merge(x, z, d))); - SetSat(IsNotZero(x.value)); + const auto r = sub_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a - b)).value)); } void PPUTranslator::VSUBSHS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto d = eval(a - b); - const auto z = eval((a >> 15) ^ 0x7fff); - const auto x = eval(sborrow(a, b, d) >> 15); - set_vr(op.vd, eval(merge(x, z, d))); - SetSat(IsNotZero(x.value)); + const auto r = sub_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a - b)).value)); } void PPUTranslator::VSUBSWS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto d = eval(a - b); - const auto z = eval((a >> 31) ^ 0x7fffffff); - const auto x = eval(sborrow(a, b, d) >> 31); - set_vr(op.vd, eval(merge(x, z, d))); - SetSat(IsNotZero(x.value)); + const auto r = sub_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a - b)).value)); } void PPUTranslator::VSUBUBM(ppu_opcode_t op) @@ -1531,10 +1516,9 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto d = eval(a - b); - const auto x = eval(a < b); - set_vr(op.vd, select(x, splat(0), d)); - SetSat(IsNotZero(x.value)); + const auto r = sub_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a - b)).value)); } void PPUTranslator::VSUBUHM(ppu_opcode_t op) @@ -1548,10 +1532,9 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto d = eval(a - b); - const auto x = eval(a < b); - set_vr(op.vd, select(x, splat(0), d)); - SetSat(IsNotZero(x.value)); + const auto r = sub_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a - b)).value)); } void PPUTranslator::VSUBUWM(ppu_opcode_t op) @@ -1565,10 +1548,9 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - const auto d = eval(a - b); - const auto x = eval(a < b); - set_vr(op.vd, select(x, splat(0), d)); - SetSat(IsNotZero(x.value)); + const auto r = sub_sat(a, b); + set_vr(op.vd, r); + SetSat(IsNotZero(eval(r != (a - b)).value)); } void PPUTranslator::VSUMSWS(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index c22ecb9658..88f3edbe21 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -302,7 +302,7 @@ public: llvm::CallInst* Call(llvm::Type* ret, llvm::AttributeList attr, llvm::StringRef name, Args... args) { // Call the function - return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...), {args...}); + return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...).getCallee(), {args...}); } // Call a function diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 8e666e5201..bd4c71f1d4 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -2193,7 +2193,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { // Get function chunk name const std::string name = fmt::format("spu-chunk-0x%05x", addr); - llvm::Function* result = llvm::cast(m_module->getOrInsertFunction(name, get_type(), get_type(), get_type(), get_type())); + llvm::Function* result = llvm::cast(m_module->getOrInsertFunction(name, get_ftype()).getCallee()); // Set parameters result->setLinkage(llvm::GlobalValue::InternalLinkage); @@ -3089,7 +3089,7 @@ public: m_ir = &irb; // Add entry function (contains only state/code check) - const auto main_func = llvm::cast(m_module->getOrInsertFunction(hash, get_type(), get_type(), get_type(), get_type())); + const auto main_func = llvm::cast(m_module->getOrInsertFunction(hash, get_ftype()).getCallee()); const auto main_arg2 = &*(main_func->arg_begin() + 2); set_function(main_func); @@ -3423,7 +3423,7 @@ public: std::vector chunks; chunks.reserve(m_size / 4); - const auto null = cast(module->getOrInsertFunction("spu-null", get_type(), get_type(), get_type(), get_type())); + const auto null = cast(module->getOrInsertFunction("spu-null", get_ftype()).getCallee()); null->setLinkage(llvm::GlobalValue::InternalLinkage); set_function(null); m_ir->CreateRetVoid(); @@ -3599,7 +3599,7 @@ public: m_function_table = new GlobalVariable(*m_module, ArrayType::get(if_type->getPointerTo(), 1u << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr); // Add return function - const auto ret_func = cast(module->getOrInsertFunction("spu_ret", if_type)); + const auto ret_func = cast(module->getOrInsertFunction("spu_ret", if_type).getCallee()); ret_func->setCallingConv(CallingConv::GHC); ret_func->setLinkage(GlobalValue::InternalLinkage); m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", ret_func)); @@ -3609,7 +3609,7 @@ public: m_ir->CreateRetVoid(); // Add entry function, serves as a trampoline - const auto main_func = llvm::cast(m_module->getOrInsertFunction("spu_interpreter", get_ftype())); + const auto main_func = llvm::cast(m_module->getOrInsertFunction("spu_interpreter", get_ftype()).getCallee()); set_function(main_func); // Load pc and opcode @@ -3681,7 +3681,7 @@ public: } // Decode instruction name, access function - const auto f = cast(module->getOrInsertFunction(fname, if_type)); + const auto f = cast(module->getOrInsertFunction(fname, if_type).getCallee()); // Build if necessary if (f->empty()) diff --git a/rpcs3_llvm.props b/rpcs3_llvm.props index edca3c4b0d..e5317f5008 100644 --- a/rpcs3_llvm.props +++ b/rpcs3_llvm.props @@ -10,7 +10,7 @@ ..\llvm_build\Debug\lib ..\llvm_build\Release\lib - LLVMProfileData.lib;LLVMDebugInfoCodeView.lib;LLVMDebugInfoMSF.lib;LLVMInstrumentation.lib;LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMGlobalISel.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib;LLVMipo.lib;LLVMBinaryFormat.lib;LLVMPasses.lib;LLVMIRReader.lib;LLVMLinker.lib;LLVMAsmParser.lib + LLVMProfileData.lib;LLVMDebugInfoCodeView.lib;LLVMDebugInfoMSF.lib;LLVMInstrumentation.lib;LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMGlobalISel.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib;LLVMipo.lib;LLVMBinaryFormat.lib;LLVMPasses.lib;LLVMIRReader.lib;LLVMLinker.lib;LLVMAsmParser.lib;LLVMDemangle.lib;LLVMDebugInfoDWARF.lib