From 92f821aeb101beee9b6ce092384ba0ce69e75181 Mon Sep 17 00:00:00 2001 From: Eladash Date: Tue, 31 Mar 2020 20:01:10 +0300 Subject: [PATCH] PPU LLVM: Add FMA accuracy setting (#7874) * PPU LLVM : Match PS3 for the instructions fmadd, fmadds, fmsub, fmsubs, fnmadd, fnmadds, fnmsub, fnmsubs Co-authored-by: doesthisusername --- rpcs3/Emu/Cell/PPUThread.cpp | 5 ++ rpcs3/Emu/Cell/PPUTranslator.cpp | 109 +++++++++++++++++++++++++++---- rpcs3/Emu/Cell/PPUTranslator.h | 2 +- rpcs3/Emu/system_config.h | 1 + 4 files changed, 102 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 3a5e1065f3..439f55e8d9 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1531,6 +1531,7 @@ extern void ppu_initialize(const ppu_module& info) enum class ppu_settings : u32 { non_win32, + accurate_fma, __bitset_enum_max }; @@ -1540,6 +1541,10 @@ extern void ppu_initialize(const ppu_module& info) #ifndef _WIN32 settings += ppu_settings::non_win32; #endif + if (g_cfg.core.ppu_accurate_fma) + { + settings += ppu_settings::accurate_fma; + } // Write version, hash, CPU, settings fmt::append(obj_name, "v3-tane-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 551e479323..c428c652f0 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1,5 +1,6 @@ #ifdef LLVM_AVAILABLE +#include "Emu/system_config.h" #include "PPUTranslator.h" #include "PPUThread.h" #include "PPUInterpreter.h" @@ -3878,8 +3879,18 @@ void PPUTranslator::FMADDS(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFPTrunc(m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b), GetType()); - SetFpr(op.frd, result); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, b}); + } + else + { + result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b); + } + + SetFpr(op.frd, m_ir->CreateFPTrunc(result, GetType())); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); @@ -3896,8 +3907,18 @@ void PPUTranslator::FMSUBS(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFPTrunc(m_ir->CreateFSub(m_ir->CreateFMul(a, c), b), GetType()); - SetFpr(op.frd, result); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); + } + else + { + result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b); + } + + SetFpr(op.frd, m_ir->CreateFPTrunc(result, GetType())); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ??? //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); @@ -3914,8 +3935,18 @@ void PPUTranslator::FNMSUBS(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFPTrunc(m_ir->CreateFNeg(m_ir->CreateFSub(m_ir->CreateFMul(a, c), b)), GetType()); - SetFpr(op.frd, result); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); + } + else + { + result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b); + } + + SetFpr(op.frd, m_ir->CreateFNeg(m_ir->CreateFPTrunc(result, GetType()))); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ??? //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); @@ -3932,8 +3963,18 @@ void PPUTranslator::FNMADDS(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFPTrunc(m_ir->CreateFNeg(m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b)), GetType()); - SetFpr(op.frd, result); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, b}); + } + else + { + result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b); + } + + SetFpr(op.frd, m_ir->CreateFNeg(m_ir->CreateFPTrunc(result, GetType()))); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ??? //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); @@ -4182,7 +4223,17 @@ void PPUTranslator::FMSUB(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); + } + else + { + result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b); + } + SetFpr(op.frd, result); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ??? @@ -4200,7 +4251,17 @@ void PPUTranslator::FMADD(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), { a, c, b }); + } + else + { + result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b); + } + SetFpr(op.frd, result); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); @@ -4218,8 +4279,18 @@ void PPUTranslator::FNMSUB(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFNeg(m_ir->CreateFSub(m_ir->CreateFMul(a, c), b)); - SetFpr(op.frd, result); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); + } + else + { + result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b); + } + + SetFpr(op.frd, m_ir->CreateFNeg(result)); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ??? //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadd_get_fi", a, b, c)); @@ -4236,8 +4307,18 @@ void PPUTranslator::FNMADD(ppu_opcode_t op) const auto a = GetFpr(op.fra); const auto b = GetFpr(op.frb); const auto c = GetFpr(op.frc); - const auto result = m_ir->CreateFNeg(m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b)); - SetFpr(op.frd, result); + + llvm::Value* result; + if (g_cfg.core.ppu_accurate_fma) + { + result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, b}); + } + else + { + result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b); + } + + SetFpr(op.frd, m_ir->CreateFNeg(result)); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ??? //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadd_get_fi", a, b, c)); diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index a459cb4aef..6e7e6104d3 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #ifdef LLVM_AVAILABLE diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index a7265a1637..68547d7f93 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -45,6 +45,7 @@ struct cfg_root : cfg::node cfg::_enum enable_TSX{ this, "Enable TSX", tsx_usage::enabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false }; cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true }; + cfg::_bool ppu_accurate_fma{ this, "PPU Accurate FMA", true }; // Enable accurate FMA for CPUs which do not support it natively (can't be disabled for CPUs which do support it) cfg::_bool debug_console_mode{ this, "Debug Console Mode", false }; // Debug console emulation, not recommended cfg::_enum lib_loading{ this, "Lib Loader", lib_loading_type::liblv2only };