NetPlay/Jit64: Avoid using software FMA
When I added the software FMA path in2c38d64
and made us use it when determinism is enabled, I was assuming that either the performance impact of software FMA wouldn't be too large or CPUs that were too old to have FMA instructions were too slow to run Dolphin well anyway. This was wrong. To give an example, the netplay performance went from 60 FPS to 30 FPS in one case. This change makes netplay clients negotiate whether FMA should be used. If all clients use an x64 CPU that supports FMA, or AArch64, then FMA is enabled, and otherwise FMA is disabled. In other words, we sacrifice accuracy if needed to avoid massive slowdown, but not otherwise. When not using netplay, whether to enable FMA is simply based on whether the host CPU supports it. The only remaining case where the software FMA path gets used under normal circumstances is when an input recording is created on a CPU with FMA support and then played back on a CPU without. This is not an especially common scenario (though it can happen), and TASers are generally less picky about performance and more picky about accuracy than other users anyway. With this change, FMA desyncs are avoided between AArch64 and modern x64 CPUs (unlike before2c38d64
), but we do get FMA desyncs between AArch64 and old x64 CPUs (like before2c38d64
). This desync can be avoided by adding a non-FMA path to JitArm64 as an option, which I will wait with for another pull request so that we can get the performance regression fixed as quickly as possible. https://bugs.dolphin-emu.org/issues/12542
This commit is contained in:
parent
8f9bb5612a
commit
ac28b89fa5
|
@ -141,7 +141,9 @@ static const std::map<System, std::string> system_to_name = {
|
|||
{System::Logger, "Logger"},
|
||||
{System::Debugger, "Debugger"},
|
||||
{System::SYSCONF, "SYSCONF"},
|
||||
{System::DualShockUDPClient, "DualShockUDPClient"}};
|
||||
{System::DualShockUDPClient, "DualShockUDPClient"},
|
||||
{System::FreeLook, "FreeLook"},
|
||||
{System::Session, "Session"}};
|
||||
|
||||
const std::string& GetSystemName(System system)
|
||||
{
|
||||
|
|
|
@ -33,6 +33,7 @@ enum class System
|
|||
Debugger,
|
||||
DualShockUDPClient,
|
||||
FreeLook,
|
||||
Session,
|
||||
};
|
||||
|
||||
constexpr std::array<LayerType, 7> SEARCH_ORDER{{
|
||||
|
|
|
@ -26,6 +26,8 @@ add_library(core
|
|||
Config/MainSettings.h
|
||||
Config/NetplaySettings.cpp
|
||||
Config/NetplaySettings.h
|
||||
Config/SessionSettings.cpp
|
||||
Config/SessionSettings.h
|
||||
Config/SYSCONFSettings.cpp
|
||||
Config/SYSCONFSettings.h
|
||||
Config/UISettings.cpp
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
// Copyright 2021 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Core/Config/SessionSettings.h"
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Config/Config.h"
|
||||
|
||||
namespace Config
|
||||
{
|
||||
const Info<bool> SESSION_USE_FMA{{System::Session, "Core", "UseFMA"}, CPUInfo().bFMA};
|
||||
} // namespace Config
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2021 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Config/Config.h"
|
||||
|
||||
namespace Config
|
||||
{
|
||||
extern const Info<bool> SESSION_USE_FMA;
|
||||
} // namespace Config
|
|
@ -94,6 +94,7 @@ const std::map<Config::System, int> system_to_ini = {
|
|||
{Config::System::Debugger, F_DEBUGGERCONFIG_IDX},
|
||||
{Config::System::DualShockUDPClient, F_DUALSHOCKUDPCLIENTCONFIG_IDX},
|
||||
{Config::System::FreeLook, F_FREELOOKCONFIG_IDX},
|
||||
// Config::System::Session should not be added to this list
|
||||
};
|
||||
|
||||
// INI layer configuration loader
|
||||
|
@ -144,6 +145,9 @@ public:
|
|||
if (location.system == Config::System::SYSCONF)
|
||||
continue;
|
||||
|
||||
if (location.system == Config::System::Session)
|
||||
continue;
|
||||
|
||||
auto ini = inis.find(location.system);
|
||||
if (ini == inis.end())
|
||||
{
|
||||
|
|
|
@ -250,6 +250,9 @@ private:
|
|||
if (location.section.empty() && location.key.empty())
|
||||
continue;
|
||||
|
||||
if (location.system == Config::System::Session)
|
||||
continue;
|
||||
|
||||
layer->Set(location, value.second);
|
||||
}
|
||||
}
|
||||
|
@ -272,7 +275,7 @@ void INIGameConfigLayerLoader::Save(Config::Layer* layer)
|
|||
const Config::Location& location = config.first;
|
||||
const std::optional<std::string>& value = config.second;
|
||||
|
||||
if (!IsSettingSaveable(location))
|
||||
if (!IsSettingSaveable(location) || location.system == Config::System::Session)
|
||||
continue;
|
||||
|
||||
const auto ini_location = GetINILocationFromConfig(location);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "Core/Config/GraphicsSettings.h"
|
||||
#include "Core/Config/MainSettings.h"
|
||||
#include "Core/Config/SYSCONFSettings.h"
|
||||
#include "Core/Config/SessionSettings.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Movie.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
|
@ -46,6 +47,8 @@ static void LoadFromDTM(Config::Layer* config_layer, Movie::DTMHeader* dtm)
|
|||
config_layer->Set(Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES, dtm->bEFBEmulateFormatChanges);
|
||||
config_layer->Set(Config::GFX_HACK_IMMEDIATE_XFB, dtm->bImmediateXFB);
|
||||
config_layer->Set(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM, dtm->bSkipXFBCopyToRam);
|
||||
|
||||
config_layer->Set(Config::SESSION_USE_FMA, dtm->bUseFMA);
|
||||
}
|
||||
|
||||
void SaveToDTM(Movie::DTMHeader* dtm)
|
||||
|
@ -70,7 +73,9 @@ void SaveToDTM(Movie::DTMHeader* dtm)
|
|||
dtm->bImmediateXFB = Config::Get(Config::GFX_HACK_IMMEDIATE_XFB);
|
||||
dtm->bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM);
|
||||
|
||||
// This never used the regular config
|
||||
dtm->bUseFMA = Config::Get(Config::SESSION_USE_FMA);
|
||||
|
||||
// Settings which only existed in old Dolphin versions
|
||||
dtm->bSkipIdle = true;
|
||||
dtm->bEFBCopyEnable = true;
|
||||
dtm->bEFBCopyCacheEnable = false;
|
||||
|
|
|
@ -11,9 +11,11 @@
|
|||
#include "Common/CommonPaths.h"
|
||||
#include "Common/Config/Config.h"
|
||||
#include "Common/FileUtil.h"
|
||||
|
||||
#include "Core/Config/GraphicsSettings.h"
|
||||
#include "Core/Config/MainSettings.h"
|
||||
#include "Core/Config/SYSCONFSettings.h"
|
||||
#include "Core/Config/SessionSettings.h"
|
||||
#include "Core/NetPlayProto.h"
|
||||
|
||||
namespace ConfigLoaders
|
||||
|
@ -87,6 +89,8 @@ public:
|
|||
layer->Set(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE, m_settings.m_EFBAccessTileSize);
|
||||
layer->Set(Config::GFX_HACK_EFB_DEFER_INVALIDATION, m_settings.m_EFBAccessDeferInvalidation);
|
||||
|
||||
layer->Set(Config::SESSION_USE_FMA, m_settings.m_UseFMA);
|
||||
|
||||
if (m_settings.m_StrictSettingsSync)
|
||||
{
|
||||
layer->Set(Config::GFX_HACK_VERTEX_ROUDING, m_settings.m_VertexRounding);
|
||||
|
|
|
@ -116,7 +116,8 @@ struct DTMHeader
|
|||
u8 language;
|
||||
u8 reserved3;
|
||||
bool bFollowBranch;
|
||||
std::array<u8, 9> reserved; // Padding for any new config options
|
||||
bool bUseFMA;
|
||||
std::array<u8, 8> reserved; // Padding for any new config options
|
||||
std::array<char, 40> discChange; // Name of iso file to switch to, for two disc games.
|
||||
std::array<u8, 20> revision; // Git hash
|
||||
u32 DSPiromHash;
|
||||
|
|
|
@ -34,8 +34,10 @@
|
|||
#include "Common/StringUtil.h"
|
||||
#include "Common/Timer.h"
|
||||
#include "Common/Version.h"
|
||||
|
||||
#include "Core/ActionReplay.h"
|
||||
#include "Core/Config/NetplaySettings.h"
|
||||
#include "Core/Config/SessionSettings.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/GeckoCode.h"
|
||||
#include "Core/HW/EXI/EXI_DeviceIPL.h"
|
||||
|
@ -54,6 +56,7 @@
|
|||
#include "Core/Movie.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
#include "Core/SyncIdentifier.h"
|
||||
|
||||
#include "InputCommon/ControllerEmu/ControlGroup/Attachments.h"
|
||||
#include "InputCommon/GCAdapter.h"
|
||||
#include "InputCommon/InputConfig.h"
|
||||
|
@ -609,10 +612,11 @@ unsigned int NetPlayClient::OnData(sf::Packet& packet)
|
|||
game_status_packet << static_cast<u32>(result);
|
||||
Send(game_status_packet);
|
||||
|
||||
sf::Packet ipl_status_packet;
|
||||
ipl_status_packet << static_cast<MessageId>(NP_MSG_IPL_STATUS);
|
||||
ipl_status_packet << ExpansionInterface::CEXIIPL::HasIPLDump();
|
||||
Send(ipl_status_packet);
|
||||
sf::Packet client_capabilities_packet;
|
||||
client_capabilities_packet << static_cast<MessageId>(NP_MSG_CLIENT_CAPABILITIES);
|
||||
client_capabilities_packet << ExpansionInterface::CEXIIPL::HasIPLDump();
|
||||
client_capabilities_packet << Config::Get(Config::SESSION_USE_FMA);
|
||||
Send(client_capabilities_packet);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -735,6 +739,7 @@ unsigned int NetPlayClient::OnData(sf::Packet& packet)
|
|||
packet >> extension;
|
||||
|
||||
packet >> m_net_settings.m_GolfMode;
|
||||
packet >> m_net_settings.m_UseFMA;
|
||||
|
||||
m_net_settings.m_IsHosting = m_local_player->IsHost();
|
||||
m_net_settings.m_HostInputAuthority = m_host_input_authority;
|
||||
|
|
|
@ -97,6 +97,7 @@ struct NetSettings
|
|||
bool m_SyncAllWiiSaves;
|
||||
std::array<int, 4> m_WiimoteExtension;
|
||||
bool m_GolfMode;
|
||||
bool m_UseFMA;
|
||||
|
||||
// These aren't sent over the network directly
|
||||
bool m_IsHosting;
|
||||
|
@ -156,7 +157,7 @@ enum
|
|||
NP_MSG_STOP_GAME = 0xA2,
|
||||
NP_MSG_DISABLE_GAME = 0xA3,
|
||||
NP_MSG_GAME_STATUS = 0xA4,
|
||||
NP_MSG_IPL_STATUS = 0xA5,
|
||||
NP_MSG_CLIENT_CAPABILITIES = 0xA5,
|
||||
NP_MSG_HOST_INPUT_AUTHORITY = 0xA6,
|
||||
NP_MSG_POWER_BUTTON = 0xA7,
|
||||
|
||||
|
|
|
@ -32,11 +32,13 @@
|
|||
#include "Common/StringUtil.h"
|
||||
#include "Common/UPnP.h"
|
||||
#include "Common/Version.h"
|
||||
|
||||
#include "Core/ActionReplay.h"
|
||||
#include "Core/Config/GraphicsSettings.h"
|
||||
#include "Core/Config/MainSettings.h"
|
||||
#include "Core/Config/NetplaySettings.h"
|
||||
#include "Core/Config/SYSCONFSettings.h"
|
||||
#include "Core/Config/SessionSettings.h"
|
||||
#include "Core/ConfigLoaders/GameConfigLoader.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/GeckoCode.h"
|
||||
|
@ -54,10 +56,13 @@
|
|||
#include "Core/IOS/Uids.h"
|
||||
#include "Core/NetPlayClient.h" //for NetPlayUI
|
||||
#include "Core/SyncIdentifier.h"
|
||||
|
||||
#include "DiscIO/Enums.h"
|
||||
|
||||
#include "InputCommon/ControllerEmu/ControlGroup/Attachments.h"
|
||||
#include "InputCommon/GCPadStatus.h"
|
||||
#include "InputCommon/InputConfig.h"
|
||||
|
||||
#include "UICommon/GameFile.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
|
@ -943,12 +948,10 @@ unsigned int NetPlayServer::OnData(sf::Packet& packet, Client& player)
|
|||
}
|
||||
break;
|
||||
|
||||
case NP_MSG_IPL_STATUS:
|
||||
case NP_MSG_CLIENT_CAPABILITIES:
|
||||
{
|
||||
bool status;
|
||||
packet >> status;
|
||||
|
||||
m_players[player.pid].has_ipl_dump = status;
|
||||
packet >> m_players[player.pid].has_ipl_dump;
|
||||
packet >> m_players[player.pid].has_hardware_fma;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1306,12 +1309,14 @@ bool NetPlayServer::SetupNetSettings()
|
|||
settings.m_DeferEFBCopies = Config::Get(Config::GFX_HACK_DEFER_EFB_COPIES);
|
||||
settings.m_EFBAccessTileSize = Config::Get(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE);
|
||||
settings.m_EFBAccessDeferInvalidation = Config::Get(Config::GFX_HACK_EFB_DEFER_INVALIDATION);
|
||||
|
||||
settings.m_StrictSettingsSync = Config::Get(Config::NETPLAY_STRICT_SETTINGS_SYNC);
|
||||
settings.m_SyncSaveData = Config::Get(Config::NETPLAY_SYNC_SAVES);
|
||||
settings.m_SyncCodes = Config::Get(Config::NETPLAY_SYNC_CODES);
|
||||
settings.m_SyncAllWiiSaves =
|
||||
Config::Get(Config::NETPLAY_SYNC_ALL_WII_SAVES) && Config::Get(Config::NETPLAY_SYNC_SAVES);
|
||||
settings.m_GolfMode = Config::Get(Config::NETPLAY_NETWORK_MODE) == "golf";
|
||||
settings.m_UseFMA = DoAllPlayersHaveHardwareFMA();
|
||||
|
||||
// Unload GameINI to restore things to normal
|
||||
Config::RemoveLayer(Config::LayerType::GlobalGame);
|
||||
|
@ -1328,6 +1333,12 @@ bool NetPlayServer::DoAllPlayersHaveIPLDump() const
|
|||
[](const auto& p) { return p.second.has_ipl_dump; });
|
||||
}
|
||||
|
||||
bool NetPlayServer::DoAllPlayersHaveHardwareFMA() const
|
||||
{
|
||||
return std::all_of(m_players.begin(), m_players.end(),
|
||||
[](const auto& p) { return p.second.has_hardware_fma; });
|
||||
}
|
||||
|
||||
// called from ---GUI--- thread
|
||||
bool NetPlayServer::RequestStartGame()
|
||||
{
|
||||
|
@ -1490,6 +1501,7 @@ bool NetPlayServer::StartGame()
|
|||
}
|
||||
|
||||
spac << m_settings.m_GolfMode;
|
||||
spac << m_settings.m_UseFMA;
|
||||
|
||||
SendAsyncToClients(std::move(spac));
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ public:
|
|||
void SendChatMessage(const std::string& msg);
|
||||
|
||||
bool DoAllPlayersHaveIPLDump() const;
|
||||
bool DoAllPlayersHaveHardwareFMA() const;
|
||||
bool StartGame();
|
||||
bool RequestStartGame();
|
||||
void AbortGameStart();
|
||||
|
@ -82,6 +83,7 @@ private:
|
|||
std::string revision;
|
||||
SyncIdentifierComparison game_status;
|
||||
bool has_ipl_dump;
|
||||
bool has_hardware_fma;
|
||||
|
||||
ENetPeer* socket;
|
||||
u32 ping;
|
||||
|
|
|
@ -10,7 +10,9 @@
|
|||
#include "Common/Assert.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Config/Config.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/Config/SessionSettings.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
|
@ -241,10 +243,21 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
// While we don't know if any games are actually affected (replays seem to work with all the usual
|
||||
// suspects for desyncing), netplay and other applications need absolute perfect determinism, so
|
||||
// be extra careful and use software FMA on CPUs that don't have hardware FMA.
|
||||
const bool software_fma = !cpu_info.bFMA && Core::WantsDeterminism();
|
||||
// We would like to emulate FMA instructions accurately without rounding error if possible, but
|
||||
// unfortunately emulating FMA in software is just too slow on CPUs that are too old to have FMA
|
||||
// instructions, so we have the Config::SESSION_USE_FMA setting to determine whether we should
|
||||
// emulate FMA instructions accurately or by a performing a multiply followed by a separate add.
|
||||
//
|
||||
// Why have a setting instead of just checking cpu_info.bFMA, you might wonder? Because for
|
||||
// netplay and TAS, it's important that everyone gets exactly the same results. The setting
|
||||
// is not user configurable - Dolphin automatically sets it based on what is supported by the
|
||||
// CPUs of everyone in the netplay room (or when not using netplay, simply the system's CPU).
|
||||
//
|
||||
// There is one circumstance where the software FMA path does get used: when an input recording
|
||||
// is created on a CPU that has FMA instructions and then gets played back on a CPU that doesn't.
|
||||
// (Or if the user just really wants to override the setting and knows how to do so.)
|
||||
const bool use_fma = Config::Get(Config::SESSION_USE_FMA);
|
||||
const bool software_fma = use_fma && !cpu_info.bFMA;
|
||||
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
|
@ -272,11 +285,11 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// For cpu_info.bFMA == true:
|
||||
// For use_fma == true:
|
||||
// Statistics suggests b is a lot less likely to be unbound in practice, so
|
||||
// if we have to pick one of a or b to bind, let's make it b.
|
||||
Ra = fpr.Use(a, RCMode::Read);
|
||||
Rb = cpu_info.bFMA ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read);
|
||||
Rb = use_fma ? fpr.Bind(b, RCMode::Read) : fpr.Use(b, RCMode::Read);
|
||||
Rc = fpr.Use(c, RCMode::Read);
|
||||
Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite);
|
||||
RegCache::Realize(Ra, Rb, Rc, Rd);
|
||||
|
@ -357,7 +370,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
break;
|
||||
}
|
||||
|
||||
if (cpu_info.bFMA)
|
||||
if (use_fma)
|
||||
{
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
|
@ -395,9 +408,6 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// No hardware support for FMA, and determinism is not enabled. In this case we inaccurately
|
||||
// do the multiplication and addition/subtraction in two separate operations for performance.
|
||||
|
||||
if (inst.SUBOP5 == 30) // nmsub
|
||||
{
|
||||
// We implement nmsub a little differently ((b - a*c) instead of -(a*c - b)),
|
||||
|
|
|
@ -169,6 +169,7 @@
|
|||
<ClInclude Include="Core\Config\GraphicsSettings.h" />
|
||||
<ClInclude Include="Core\Config\MainSettings.h" />
|
||||
<ClInclude Include="Core\Config\NetplaySettings.h" />
|
||||
<ClInclude Include="Core\Config\SessionSettings.h" />
|
||||
<ClInclude Include="Core\Config\SYSCONFSettings.h" />
|
||||
<ClInclude Include="Core\Config\UISettings.h" />
|
||||
<ClInclude Include="Core\ConfigLoaders\BaseConfigLoader.h" />
|
||||
|
@ -742,6 +743,7 @@
|
|||
<ClCompile Include="Core\Config\GraphicsSettings.cpp" />
|
||||
<ClCompile Include="Core\Config\MainSettings.cpp" />
|
||||
<ClCompile Include="Core\Config\NetplaySettings.cpp" />
|
||||
<ClCompile Include="Core\Config\SessionSettings.cpp" />
|
||||
<ClCompile Include="Core\Config\SYSCONFSettings.cpp" />
|
||||
<ClCompile Include="Core\Config\UISettings.cpp" />
|
||||
<ClCompile Include="Core\ConfigLoaders\BaseConfigLoader.cpp" />
|
||||
|
|
Loading…
Reference in New Issue