From ac28b89fa54dd01ef09c647492a508e9dec30fc2 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 9 Jun 2021 20:16:41 +0200 Subject: [PATCH] NetPlay/Jit64: Avoid using software FMA When I added the software FMA path in 2c38d64 and made us use it when determinism is enabled, I was assuming that either the performance impact of software FMA wouldn't be too large or CPUs that were too old to have FMA instructions were too slow to run Dolphin well anyway. This was wrong. To give an example, the netplay performance went from 60 FPS to 30 FPS in one case. This change makes netplay clients negotiate whether FMA should be used. If all clients use an x64 CPU that supports FMA, or AArch64, then FMA is enabled, and otherwise FMA is disabled. In other words, we sacrifice accuracy if needed to avoid massive slowdown, but not otherwise. When not using netplay, whether to enable FMA is simply based on whether the host CPU supports it. The only remaining case where the software FMA path gets used under normal circumstances is when an input recording is created on a CPU with FMA support and then played back on a CPU without. This is not an especially common scenario (though it can happen), and TASers are generally less picky about performance and more picky about accuracy than other users anyway. With this change, FMA desyncs are avoided between AArch64 and modern x64 CPUs (unlike before 2c38d64), but we do get FMA desyncs between AArch64 and old x64 CPUs (like before 2c38d64). This desync can be avoided by adding a non-FMA path to JitArm64 as an option, which I will wait with for another pull request so that we can get the performance regression fixed as quickly as possible. https://bugs.dolphin-emu.org/issues/12542 --- Source/Core/Common/Config/Config.cpp | 4 ++- Source/Core/Common/Config/Enums.h | 1 + Source/Core/Core/CMakeLists.txt | 2 ++ Source/Core/Core/Config/SessionSettings.cpp | 14 +++++++++ Source/Core/Core/Config/SessionSettings.h | 13 ++++++++ .../Core/ConfigLoaders/BaseConfigLoader.cpp | 4 +++ .../Core/ConfigLoaders/GameConfigLoader.cpp | 5 +++- .../Core/ConfigLoaders/MovieConfigLoader.cpp | 7 ++++- .../ConfigLoaders/NetPlayConfigLoader.cpp | 4 +++ Source/Core/Core/Movie.h | 3 +- Source/Core/Core/NetPlayClient.cpp | 13 +++++--- Source/Core/Core/NetPlayProto.h | 3 +- Source/Core/Core/NetPlayServer.cpp | 22 ++++++++++---- Source/Core/Core/NetPlayServer.h | 2 ++ .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 30 ++++++++++++------- Source/Core/DolphinLib.props | 2 ++ 16 files changed, 105 insertions(+), 24 deletions(-) create mode 100644 Source/Core/Core/Config/SessionSettings.cpp create mode 100644 Source/Core/Core/Config/SessionSettings.h diff --git a/Source/Core/Common/Config/Config.cpp b/Source/Core/Common/Config/Config.cpp index c75b6ee5e9..17e65ea234 100644 --- a/Source/Core/Common/Config/Config.cpp +++ b/Source/Core/Common/Config/Config.cpp @@ -141,7 +141,9 @@ static const std::map system_to_name = { {System::Logger, "Logger"}, {System::Debugger, "Debugger"}, {System::SYSCONF, "SYSCONF"}, - {System::DualShockUDPClient, "DualShockUDPClient"}}; + {System::DualShockUDPClient, "DualShockUDPClient"}, + {System::FreeLook, "FreeLook"}, + {System::Session, "Session"}}; const std::string& GetSystemName(System system) { diff --git a/Source/Core/Common/Config/Enums.h b/Source/Core/Common/Config/Enums.h index 1999005401..c25858b098 100644 --- a/Source/Core/Common/Config/Enums.h +++ b/Source/Core/Common/Config/Enums.h @@ -33,6 +33,7 @@ enum class System Debugger, DualShockUDPClient, FreeLook, + Session, }; constexpr std::array SEARCH_ORDER{{ diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 5cf611024f..892960e05c 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -26,6 +26,8 @@ add_library(core Config/MainSettings.h Config/NetplaySettings.cpp Config/NetplaySettings.h + Config/SessionSettings.cpp + Config/SessionSettings.h Config/SYSCONFSettings.cpp Config/SYSCONFSettings.h Config/UISettings.cpp diff --git a/Source/Core/Core/Config/SessionSettings.cpp b/Source/Core/Core/Config/SessionSettings.cpp new file mode 100644 index 0000000000..0a1f822a4f --- /dev/null +++ b/Source/Core/Core/Config/SessionSettings.cpp @@ -0,0 +1,14 @@ +// Copyright 2021 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/Config/SessionSettings.h" + +#include "Common/CPUDetect.h" +#include "Common/CommonTypes.h" +#include "Common/Config/Config.h" + +namespace Config +{ +const Info SESSION_USE_FMA{{System::Session, "Core", "UseFMA"}, CPUInfo().bFMA}; +} // namespace Config diff --git a/Source/Core/Core/Config/SessionSettings.h b/Source/Core/Core/Config/SessionSettings.h new file mode 100644 index 0000000000..904824d135 --- /dev/null +++ b/Source/Core/Core/Config/SessionSettings.h @@ -0,0 +1,13 @@ +// Copyright 2021 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Common/CommonTypes.h" +#include "Common/Config/Config.h" + +namespace Config +{ +extern const Info SESSION_USE_FMA; +} // namespace Config diff --git a/Source/Core/Core/ConfigLoaders/BaseConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/BaseConfigLoader.cpp index 254237df9a..f375a806fd 100644 --- a/Source/Core/Core/ConfigLoaders/BaseConfigLoader.cpp +++ b/Source/Core/Core/ConfigLoaders/BaseConfigLoader.cpp @@ -94,6 +94,7 @@ const std::map system_to_ini = { {Config::System::Debugger, F_DEBUGGERCONFIG_IDX}, {Config::System::DualShockUDPClient, F_DUALSHOCKUDPCLIENTCONFIG_IDX}, {Config::System::FreeLook, F_FREELOOKCONFIG_IDX}, + // Config::System::Session should not be added to this list }; // INI layer configuration loader @@ -144,6 +145,9 @@ public: if (location.system == Config::System::SYSCONF) continue; + if (location.system == Config::System::Session) + continue; + auto ini = inis.find(location.system); if (ini == inis.end()) { diff --git a/Source/Core/Core/ConfigLoaders/GameConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/GameConfigLoader.cpp index eb4e71a352..86280e1e45 100644 --- a/Source/Core/Core/ConfigLoaders/GameConfigLoader.cpp +++ b/Source/Core/Core/ConfigLoaders/GameConfigLoader.cpp @@ -250,6 +250,9 @@ private: if (location.section.empty() && location.key.empty()) continue; + if (location.system == Config::System::Session) + continue; + layer->Set(location, value.second); } } @@ -272,7 +275,7 @@ void INIGameConfigLayerLoader::Save(Config::Layer* layer) const Config::Location& location = config.first; const std::optional& value = config.second; - if (!IsSettingSaveable(location)) + if (!IsSettingSaveable(location) || location.system == Config::System::Session) continue; const auto ini_location = GetINILocationFromConfig(location); diff --git a/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp index 948e93d257..be8cee619a 100644 --- a/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp +++ b/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp @@ -14,6 +14,7 @@ #include "Core/Config/GraphicsSettings.h" #include "Core/Config/MainSettings.h" #include "Core/Config/SYSCONFSettings.h" +#include "Core/Config/SessionSettings.h" #include "Core/ConfigManager.h" #include "Core/Movie.h" #include "VideoCommon/VideoConfig.h" @@ -46,6 +47,8 @@ static void LoadFromDTM(Config::Layer* config_layer, Movie::DTMHeader* dtm) config_layer->Set(Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES, dtm->bEFBEmulateFormatChanges); config_layer->Set(Config::GFX_HACK_IMMEDIATE_XFB, dtm->bImmediateXFB); config_layer->Set(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM, dtm->bSkipXFBCopyToRam); + + config_layer->Set(Config::SESSION_USE_FMA, dtm->bUseFMA); } void SaveToDTM(Movie::DTMHeader* dtm) @@ -70,7 +73,9 @@ void SaveToDTM(Movie::DTMHeader* dtm) dtm->bImmediateXFB = Config::Get(Config::GFX_HACK_IMMEDIATE_XFB); dtm->bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM); - // This never used the regular config + dtm->bUseFMA = Config::Get(Config::SESSION_USE_FMA); + + // Settings which only existed in old Dolphin versions dtm->bSkipIdle = true; dtm->bEFBCopyEnable = true; dtm->bEFBCopyCacheEnable = false; diff --git a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp index f720649cdc..c379cfb488 100644 --- a/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp +++ b/Source/Core/Core/ConfigLoaders/NetPlayConfigLoader.cpp @@ -11,9 +11,11 @@ #include "Common/CommonPaths.h" #include "Common/Config/Config.h" #include "Common/FileUtil.h" + #include "Core/Config/GraphicsSettings.h" #include "Core/Config/MainSettings.h" #include "Core/Config/SYSCONFSettings.h" +#include "Core/Config/SessionSettings.h" #include "Core/NetPlayProto.h" namespace ConfigLoaders @@ -87,6 +89,8 @@ public: layer->Set(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE, m_settings.m_EFBAccessTileSize); layer->Set(Config::GFX_HACK_EFB_DEFER_INVALIDATION, m_settings.m_EFBAccessDeferInvalidation); + layer->Set(Config::SESSION_USE_FMA, m_settings.m_UseFMA); + if (m_settings.m_StrictSettingsSync) { layer->Set(Config::GFX_HACK_VERTEX_ROUDING, m_settings.m_VertexRounding); diff --git a/Source/Core/Core/Movie.h b/Source/Core/Core/Movie.h index e76ea6a10e..e025e55134 100644 --- a/Source/Core/Core/Movie.h +++ b/Source/Core/Core/Movie.h @@ -116,7 +116,8 @@ struct DTMHeader u8 language; u8 reserved3; bool bFollowBranch; - std::array reserved; // Padding for any new config options + bool bUseFMA; + std::array reserved; // Padding for any new config options std::array discChange; // Name of iso file to switch to, for two disc games. std::array revision; // Git hash u32 DSPiromHash; diff --git a/Source/Core/Core/NetPlayClient.cpp b/Source/Core/Core/NetPlayClient.cpp index 065afc801e..cb826600fb 100644 --- a/Source/Core/Core/NetPlayClient.cpp +++ b/Source/Core/Core/NetPlayClient.cpp @@ -34,8 +34,10 @@ #include "Common/StringUtil.h" #include "Common/Timer.h" #include "Common/Version.h" + #include "Core/ActionReplay.h" #include "Core/Config/NetplaySettings.h" +#include "Core/Config/SessionSettings.h" #include "Core/ConfigManager.h" #include "Core/GeckoCode.h" #include "Core/HW/EXI/EXI_DeviceIPL.h" @@ -54,6 +56,7 @@ #include "Core/Movie.h" #include "Core/PowerPC/PowerPC.h" #include "Core/SyncIdentifier.h" + #include "InputCommon/ControllerEmu/ControlGroup/Attachments.h" #include "InputCommon/GCAdapter.h" #include "InputCommon/InputConfig.h" @@ -609,10 +612,11 @@ unsigned int NetPlayClient::OnData(sf::Packet& packet) game_status_packet << static_cast(result); Send(game_status_packet); - sf::Packet ipl_status_packet; - ipl_status_packet << static_cast(NP_MSG_IPL_STATUS); - ipl_status_packet << ExpansionInterface::CEXIIPL::HasIPLDump(); - Send(ipl_status_packet); + sf::Packet client_capabilities_packet; + client_capabilities_packet << static_cast(NP_MSG_CLIENT_CAPABILITIES); + client_capabilities_packet << ExpansionInterface::CEXIIPL::HasIPLDump(); + client_capabilities_packet << Config::Get(Config::SESSION_USE_FMA); + Send(client_capabilities_packet); } break; @@ -735,6 +739,7 @@ unsigned int NetPlayClient::OnData(sf::Packet& packet) packet >> extension; packet >> m_net_settings.m_GolfMode; + packet >> m_net_settings.m_UseFMA; m_net_settings.m_IsHosting = m_local_player->IsHost(); m_net_settings.m_HostInputAuthority = m_host_input_authority; diff --git a/Source/Core/Core/NetPlayProto.h b/Source/Core/Core/NetPlayProto.h index 8983fe809a..640c0989bd 100644 --- a/Source/Core/Core/NetPlayProto.h +++ b/Source/Core/Core/NetPlayProto.h @@ -97,6 +97,7 @@ struct NetSettings bool m_SyncAllWiiSaves; std::array m_WiimoteExtension; bool m_GolfMode; + bool m_UseFMA; // These aren't sent over the network directly bool m_IsHosting; @@ -156,7 +157,7 @@ enum NP_MSG_STOP_GAME = 0xA2, NP_MSG_DISABLE_GAME = 0xA3, NP_MSG_GAME_STATUS = 0xA4, - NP_MSG_IPL_STATUS = 0xA5, + NP_MSG_CLIENT_CAPABILITIES = 0xA5, NP_MSG_HOST_INPUT_AUTHORITY = 0xA6, NP_MSG_POWER_BUTTON = 0xA7, diff --git a/Source/Core/Core/NetPlayServer.cpp b/Source/Core/Core/NetPlayServer.cpp index 2695703ca5..8aa891a2e6 100644 --- a/Source/Core/Core/NetPlayServer.cpp +++ b/Source/Core/Core/NetPlayServer.cpp @@ -32,11 +32,13 @@ #include "Common/StringUtil.h" #include "Common/UPnP.h" #include "Common/Version.h" + #include "Core/ActionReplay.h" #include "Core/Config/GraphicsSettings.h" #include "Core/Config/MainSettings.h" #include "Core/Config/NetplaySettings.h" #include "Core/Config/SYSCONFSettings.h" +#include "Core/Config/SessionSettings.h" #include "Core/ConfigLoaders/GameConfigLoader.h" #include "Core/ConfigManager.h" #include "Core/GeckoCode.h" @@ -54,10 +56,13 @@ #include "Core/IOS/Uids.h" #include "Core/NetPlayClient.h" //for NetPlayUI #include "Core/SyncIdentifier.h" + #include "DiscIO/Enums.h" + #include "InputCommon/ControllerEmu/ControlGroup/Attachments.h" #include "InputCommon/GCPadStatus.h" #include "InputCommon/InputConfig.h" + #include "UICommon/GameFile.h" #if !defined(_WIN32) @@ -943,12 +948,10 @@ unsigned int NetPlayServer::OnData(sf::Packet& packet, Client& player) } break; - case NP_MSG_IPL_STATUS: + case NP_MSG_CLIENT_CAPABILITIES: { - bool status; - packet >> status; - - m_players[player.pid].has_ipl_dump = status; + packet >> m_players[player.pid].has_ipl_dump; + packet >> m_players[player.pid].has_hardware_fma; } break; @@ -1306,12 +1309,14 @@ bool NetPlayServer::SetupNetSettings() settings.m_DeferEFBCopies = Config::Get(Config::GFX_HACK_DEFER_EFB_COPIES); settings.m_EFBAccessTileSize = Config::Get(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE); settings.m_EFBAccessDeferInvalidation = Config::Get(Config::GFX_HACK_EFB_DEFER_INVALIDATION); + settings.m_StrictSettingsSync = Config::Get(Config::NETPLAY_STRICT_SETTINGS_SYNC); settings.m_SyncSaveData = Config::Get(Config::NETPLAY_SYNC_SAVES); settings.m_SyncCodes = Config::Get(Config::NETPLAY_SYNC_CODES); settings.m_SyncAllWiiSaves = Config::Get(Config::NETPLAY_SYNC_ALL_WII_SAVES) && Config::Get(Config::NETPLAY_SYNC_SAVES); settings.m_GolfMode = Config::Get(Config::NETPLAY_NETWORK_MODE) == "golf"; + settings.m_UseFMA = DoAllPlayersHaveHardwareFMA(); // Unload GameINI to restore things to normal Config::RemoveLayer(Config::LayerType::GlobalGame); @@ -1328,6 +1333,12 @@ bool NetPlayServer::DoAllPlayersHaveIPLDump() const [](const auto& p) { return p.second.has_ipl_dump; }); } +bool NetPlayServer::DoAllPlayersHaveHardwareFMA() const +{ + return std::all_of(m_players.begin(), m_players.end(), + [](const auto& p) { return p.second.has_hardware_fma; }); +} + // called from ---GUI--- thread bool NetPlayServer::RequestStartGame() { @@ -1490,6 +1501,7 @@ bool NetPlayServer::StartGame() } spac << m_settings.m_GolfMode; + spac << m_settings.m_UseFMA; SendAsyncToClients(std::move(spac)); diff --git a/Source/Core/Core/NetPlayServer.h b/Source/Core/Core/NetPlayServer.h index 4c18f11931..de83e8b2cb 100644 --- a/Source/Core/Core/NetPlayServer.h +++ b/Source/Core/Core/NetPlayServer.h @@ -51,6 +51,7 @@ public: void SendChatMessage(const std::string& msg); bool DoAllPlayersHaveIPLDump() const; + bool DoAllPlayersHaveHardwareFMA() const; bool StartGame(); bool RequestStartGame(); void AbortGameStart(); @@ -82,6 +83,7 @@ private: std::string revision; SyncIdentifierComparison game_status; bool has_ipl_dump; + bool has_hardware_fma; ENetPeer* socket; u32 ping; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index ead87a64df..9730d95d2e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -10,7 +10,9 @@ #include "Common/Assert.h" #include "Common/CPUDetect.h" #include "Common/CommonTypes.h" +#include "Common/Config/Config.h" #include "Common/x64Emitter.h" +#include "Core/Config/SessionSettings.h" #include "Core/ConfigManager.h" #include "Core/Core.h" #include "Core/PowerPC/Jit64/Jit.h" @@ -241,10 +243,21 @@ void Jit64::fmaddXX(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - // While we don't know if any games are actually affected (replays seem to work with all the usual - // suspects for desyncing), netplay and other applications need absolute perfect determinism, so - // be extra careful and use software FMA on CPUs that don't have hardware FMA. - const bool software_fma = !cpu_info.bFMA && Core::WantsDeterminism(); + // We would like to emulate FMA instructions accurately without rounding error if possible, but + // unfortunately emulating FMA in software is just too slow on CPUs that are too old to have FMA + // instructions, so we have the Config::SESSION_USE_FMA setting to determine whether we should + // emulate FMA instructions accurately or by a performing a multiply followed by a separate add. + // + // Why have a setting instead of just checking cpu_info.bFMA, you might wonder? Because for + // netplay and TAS, it's important that everyone gets exactly the same results. The setting + // is not user configurable - Dolphin automatically sets it based on what is supported by the + // CPUs of everyone in the netplay room (or when not using netplay, simply the system's CPU). + // + // There is one circumstance where the software FMA path does get used: when an input recording + // is created on a CPU that has FMA instructions and then gets played back on a CPU that doesn't. + // (Or if the user just really wants to override the setting and knows how to do so.) + const bool use_fma = Config::Get(Config::SESSION_USE_FMA); + const bool software_fma = use_fma && !cpu_info.bFMA; int a = inst.FA; int b = inst.FB; @@ -272,11 +285,11 @@ void Jit64::fmaddXX(UGeckoInstruction inst) } else { - // For cpu_info.bFMA == true: + // For use_fma == true: // Statistics suggests b is a lot less likely to be unbound in practice, so // if we have to pick one of a or b to bind, let's make it b. Ra = fpr.Use(a, RCMode::Read); - Rb = cpu_info.bFMA ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read); + Rb = use_fma ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read); Rc = fpr.Use(c, RCMode::Read); Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite); RegCache::Realize(Ra, Rb, Rc, Rd); @@ -357,7 +370,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst) break; } - if (cpu_info.bFMA) + if (use_fma) { switch (inst.SUBOP5) { @@ -395,9 +408,6 @@ void Jit64::fmaddXX(UGeckoInstruction inst) } else { - // No hardware support for FMA, and determinism is not enabled. In this case we inaccurately - // do the multiplication and addition/subtraction in two separate operations for performance. - if (inst.SUBOP5 == 30) // nmsub { // We implement nmsub a little differently ((b - a*c) instead of -(a*c - b)), diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index ecfcb078e5..8ad1af3b4a 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -169,6 +169,7 @@ + @@ -742,6 +743,7 @@ +