diff --git a/src/common/intrin.h b/src/common/intrin.h index 364a0cc38..b04236719 100644 --- a/src/common/intrin.h +++ b/src/common/intrin.h @@ -14,9 +14,9 @@ #define CPU_ARCH_SIMD 1 #define CPU_ARCH_SSE 1 #include -#include -#include #include +#include +#include #if defined(__AVX2__) #define CPU_ARCH_AVX 1 @@ -96,3 +96,40 @@ ALWAYS_INLINE_RELEASE static void MemsetPtrs(T* ptr, T value, u32 count) for (u32 i = 0; i < remaining_count; i++) *(dest++) = value; } + +ALWAYS_INLINE static void MultiPause() +{ +#if defined(CPU_ARCH_X86) || defined(CPU_ARCH_X64) + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); +#elif defined(CPU_ARCH_ARM64) && defined(_MSC_VER) + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); +#elif defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_ARM32) + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); +#elif defined(CPU_ARCH_RISCV64) + // Probably wrong... pause is optional :/ + asm volatile("fence" ::: "memory"); +#else +#pragma warning("Missing implementation") +#endif +} diff --git a/src/common/log_channels.h b/src/common/log_channels.h index af5744eac..5d06d2398 100644 --- a/src/common/log_channels.h +++ b/src/common/log_channels.h @@ -27,6 +27,7 @@ X(GPU) \ X(GPUDevice) \ X(GPUDump) \ + X(GPUThread) \ X(GPU_SW) \ X(GPU_HW) \ X(GameDatabase) \ diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 758d9afc2..98ebc33e4 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -61,10 +61,11 @@ add_library(core gpu_shadergen.h gpu_sw.cpp gpu_sw.h - gpu_sw_backend.cpp - gpu_sw_backend.h gpu_sw_rasterizer.cpp gpu_sw_rasterizer.h + gpu_thread.cpp + gpu_thread.h + gpu_thread_commands.h gpu_types.h guncon.cpp guncon.h @@ -73,8 +74,6 @@ add_library(core gte_types.h host.cpp host.h - host_interface_progress_callback.cpp - host_interface_progress_callback.h hotkeys.cpp input_types.h imgui_overlays.cpp diff --git a/src/core/achievements.cpp b/src/core/achievements.cpp index 88eb58229..3efb850ce 100644 --- a/src/core/achievements.cpp +++ b/src/core/achievements.cpp @@ -9,7 +9,9 @@ #include "bus.h" #include "cpu_core.h" #include "fullscreen_ui.h" +#include "gpu_thread.h" #include "host.h" +#include "imgui_overlays.h" #include "system.h" #include "scmversion/scmversion.h" @@ -480,7 +482,9 @@ void Achievements::UpdateGlyphRanges() std::sort(sorted_codepoints.begin(), sorted_codepoints.end()); // Compact codepoints to ranges. - ImGuiManager::SetEmojiFontRange(ImGuiManager::CompactFontRange(sorted_codepoints)); + GPUThread::RunOnThread([sorted_codepoints = std::move(sorted_codepoints)]() { + ImGuiManager::SetEmojiFontRange(ImGuiManager::CompactFontRange(sorted_codepoints)); + }); } bool Achievements::IsActive() @@ -1173,7 +1177,7 @@ void Achievements::ClientLoadGameCallback(int result, const char* error_message, // ensure fullscreen UI is ready for notifications if (display_summary) - FullscreenUI::Initialize(); + GPUThread::RunOnThread(&FullscreenUI::Initialize); char url_buf[URL_BUFFER_SIZE]; if (int err = rc_client_game_get_image_url(info, url_buf, std::size(url_buf)); err == RC_OK) @@ -1229,7 +1233,7 @@ void Achievements::ClearGameHash() void Achievements::DisplayAchievementSummary() { - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title; if (IsHardcoreModeActive()) @@ -1254,8 +1258,14 @@ void Achievements::DisplayAchievementSummary() summary = TRANSLATE_STR("Achievements", "This game has no achievements."); } - ImGuiFullscreen::AddNotification("achievement_summary", ACHIEVEMENT_SUMMARY_NOTIFICATION_TIME, std::move(title), - std::move(summary), s_state.game_icon); + GPUThread::RunOnThread( + [title = std::move(title), summary = std::move(summary), icon = s_state.game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievement_summary", ACHIEVEMENT_SUMMARY_NOTIFICATION_TIME, std::move(title), + std::move(summary), std::move(icon)); + }); } // Technically not going through the resource API, but since we're passing this to something else, we can't. @@ -1265,12 +1275,16 @@ void Achievements::DisplayAchievementSummary() void Achievements::DisplayHardcoreDeferredMessage() { - if (g_settings.achievements_hardcore_mode && !s_state.hardcore_mode && System::IsValid() && - FullscreenUI::Initialize()) + if (g_settings.achievements_hardcore_mode && !s_state.hardcore_mode && System::IsValid()) { - ImGuiFullscreen::ShowToast(std::string(), - TRANSLATE_STR("Achievements", "Hardcore mode will be enabled on system reset."), - Host::OSD_WARNING_DURATION); + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::ShowToast(std::string(), + TRANSLATE_STR("Achievements", "Hardcore mode will be enabled on system reset."), + Host::OSD_WARNING_DURATION); + }); } } @@ -1292,7 +1306,7 @@ void Achievements::HandleUnlockEvent(const rc_client_event_t* event) INFO_LOG("Achievement {} ({}) for game {} unlocked", cheevo->title, cheevo->id, s_state.game_id); UpdateGameSummary(); - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title; if (cheevo->category == RC_CLIENT_ACHIEVEMENT_CATEGORY_UNOFFICIAL) @@ -1302,9 +1316,15 @@ void Achievements::HandleUnlockEvent(const rc_client_event_t* event) std::string badge_path = GetAchievementBadgePath(cheevo, cheevo->state); - ImGuiFullscreen::AddNotification(fmt::format("achievement_unlock_{}", cheevo->id), - static_cast(g_settings.achievements_notification_duration), - std::move(title), cheevo->description, std::move(badge_path)); + GPUThread::RunOnThread([id = cheevo->id, duration = g_settings.achievements_notification_duration, + title = std::move(title), description = std::string(cheevo->description), + badge_path = std::move(badge_path)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("achievement_unlock_{}", id), static_cast(duration), + std::move(title), std::move(description), std::move(badge_path)); + }); } if (g_settings.achievements_sound_effects) @@ -1316,7 +1336,7 @@ void Achievements::HandleGameCompleteEvent(const rc_client_event_t* event) INFO_LOG("Game {} complete", s_state.game_id); UpdateGameSummary(); - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title = fmt::format(TRANSLATE_FS("Achievements", "Mastered {}"), s_state.game_title); std::string message = fmt::format( @@ -1325,8 +1345,14 @@ void Achievements::HandleGameCompleteEvent(const rc_client_event_t* event) s_state.game_summary.num_unlocked_achievements), TRANSLATE_PLURAL_STR("Achievements", "%n points", "Achievement points", s_state.game_summary.points_unlocked)); - ImGuiFullscreen::AddNotification("achievement_mastery", GAME_COMPLETE_NOTIFICATION_TIME, std::move(title), - std::move(message), s_state.game_icon); + GPUThread::RunOnThread( + [title = std::move(title), message = std::move(message), icon = s_state.game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievement_mastery", GAME_COMPLETE_NOTIFICATION_TIME, std::move(title), + std::move(message), std::move(icon)); + }); } } @@ -1334,14 +1360,19 @@ void Achievements::HandleLeaderboardStartedEvent(const rc_client_event_t* event) { DEV_LOG("Leaderboard {} ({}) started", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { std::string title = event->leaderboard->title; std::string message = TRANSLATE_STR("Achievements", "Leaderboard attempt started."); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - LEADERBOARD_STARTED_NOTIFICATION_TIME, std::move(title), std::move(message), - s_state.game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_state.game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), LEADERBOARD_STARTED_NOTIFICATION_TIME, + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1349,14 +1380,19 @@ void Achievements::HandleLeaderboardFailedEvent(const rc_client_event_t* event) { DEV_LOG("Leaderboard {} ({}) failed", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { std::string title = event->leaderboard->title; std::string message = TRANSLATE_STR("Achievements", "Leaderboard attempt failed."); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - LEADERBOARD_FAILED_NOTIFICATION_TIME, std::move(title), std::move(message), - s_state.game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_state.game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), LEADERBOARD_FAILED_NOTIFICATION_TIME, + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1364,7 +1400,7 @@ void Achievements::HandleLeaderboardSubmittedEvent(const rc_client_event_t* even { DEV_LOG("Leaderboard {} ({}) submitted", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { static const char* value_strings[NUM_RC_CLIENT_LEADERBOARD_FORMATS] = { TRANSLATE_NOOP("Achievements", "Your Time: {}{}"), @@ -1380,9 +1416,14 @@ void Achievements::HandleLeaderboardSubmittedEvent(const rc_client_event_t* even event->leaderboard->tracker_value ? event->leaderboard->tracker_value : "Unknown", g_settings.achievements_spectator_mode ? std::string_view() : TRANSLATE_SV("Achievements", " (Submitting)")); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - static_cast(g_settings.achievements_leaderboard_duration), std::move(title), - std::move(message), s_state.game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_state.game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), + static_cast(g_settings.achievements_leaderboard_duration), + std::move(title), std::move(message), std::move(icon)); + }); } if (g_settings.achievements_sound_effects) @@ -1394,7 +1435,7 @@ void Achievements::HandleLeaderboardScoreboardEvent(const rc_client_event_t* eve DEV_LOG("Leaderboard {} scoreboard rank {} of {}", event->leaderboard_scoreboard->leaderboard_id, event->leaderboard_scoreboard->new_rank, event->leaderboard_scoreboard->num_entries); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { static const char* value_strings[NUM_RC_CLIENT_LEADERBOARD_FORMATS] = { TRANSLATE_NOOP("Achievements", "Your Time: {} (Best: {})"), @@ -1411,9 +1452,15 @@ void Achievements::HandleLeaderboardScoreboardEvent(const rc_client_event_t* eve event->leaderboard_scoreboard->submitted_score, event->leaderboard_scoreboard->best_score), event->leaderboard_scoreboard->new_rank, event->leaderboard_scoreboard->num_entries); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - static_cast(g_settings.achievements_leaderboard_duration), std::move(title), - std::move(message), s_state.game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_state.game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), + static_cast(g_settings.achievements_leaderboard_duration), + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1543,26 +1590,30 @@ void Achievements::HandleServerDisconnectedEvent(const rc_client_event_t* event) { WARNING_LOG("Server disconnected."); - if (FullscreenUI::Initialize()) - { + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::ShowToast( TRANSLATE_STR("Achievements", "Achievements Disconnected"), TRANSLATE_STR("Achievements", "An unlock request could not be completed. We will keep retrying to submit this request."), Host::OSD_ERROR_DURATION); - } + }); } void Achievements::HandleServerReconnectedEvent(const rc_client_event_t* event) { WARNING_LOG("Server reconnected."); - if (FullscreenUI::Initialize()) - { + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::ShowToast(TRANSLATE_STR("Achievements", "Achievements Reconnected"), TRANSLATE_STR("Achievements", "All pending unlock requests have completed."), Host::OSD_INFO_DURATION); - } + }); } void Achievements::ResetClient() @@ -1640,12 +1691,17 @@ void Achievements::SetHardcoreMode(bool enabled, bool force_display_message) // new mode s_state.hardcore_mode = enabled; - if (System::IsValid() && (HasActiveGame() || force_display_message) && FullscreenUI::Initialize()) + if (System::IsValid() && (HasActiveGame() || force_display_message)) { - ImGuiFullscreen::ShowToast(std::string(), - enabled ? TRANSLATE_STR("Achievements", "Hardcore mode is now enabled.") : - TRANSLATE_STR("Achievements", "Hardcore mode is now disabled."), - Host::OSD_INFO_DURATION); + GPUThread::RunOnThread([enabled]() { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::ShowToast(std::string(), + enabled ? TRANSLATE_STR("Achievements", "Hardcore mode is now enabled.") : + TRANSLATE_STR("Achievements", "Hardcore mode is now disabled."), + Host::OSD_INFO_DURATION); + }); } rc_client_set_hardcore_enabled(s_state.client, enabled); @@ -1687,8 +1743,15 @@ bool Achievements::DoState(StateWrapper& sw) // before deserializing, otherwise that state's going to get lost. if (!IsUsingRAIntegration() && s_state.load_game_request) { - Host::DisplayLoadingScreen("Downloading achievements data..."); + // Messy because GPU-thread, but at least it looks pretty. + GPUThread::RunOnThread([]() { + FullscreenUI::OpenLoadingScreen(ImGuiManager::LOGO_IMAGE_NAME, + TRANSLATE_SV("Achievements", "Downloading achievements data...")); + }); + s_state.http_downloader->WaitForAllRequests(); + + GPUThread::RunOnThread([]() { FullscreenUI::CloseLoadingScreen(); }); } u32 data_size = 0; @@ -1957,7 +2020,7 @@ void Achievements::ShowLoginNotification() if (!user) return; - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string badge_path = GetLoggedInUserBadgePath(); std::string title = user->display_name; @@ -1966,8 +2029,14 @@ void Achievements::ShowLoginNotification() std::string summary = fmt::format(TRANSLATE_FS("Achievements", "Score: {} ({} softcore)\nUnread messages: {}"), user->score, user->score_softcore, user->num_unread_messages); - ImGuiFullscreen::AddNotification("achievements_login", LOGIN_NOTIFICATION_TIME, std::move(title), - std::move(summary), std::move(badge_path)); + GPUThread::RunOnThread( + [title = std::move(title), summary = std::move(summary), badge_path = std::move(badge_path)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievements_login", LOGIN_NOTIFICATION_TIME, std::move(title), + std::move(summary), std::move(badge_path)); + }); } } @@ -2066,6 +2135,15 @@ bool Achievements::ConfirmHardcoreModeDisable(const char* trigger) void Achievements::ConfirmHardcoreModeDisableAsync(const char* trigger, std::function callback) { + auto real_callback = [callback = std::move(callback)](bool res) mutable { + // don't run the callback in the middle of rendering the UI + Host::RunOnCPUThread([callback = std::move(callback), res]() { + if (res) + DisableHardcoreMode(); + callback(res); + }); + }; + #ifndef __ANDROID__ #ifdef ENABLE_RAINTEGRATION if (IsUsingRAIntegration()) @@ -2076,34 +2154,32 @@ void Achievements::ConfirmHardcoreModeDisableAsync(const char* trigger, std::fun } #endif - if (!FullscreenUI::Initialize()) - { - Host::AddOSDMessage(fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), - Host::OSD_WARNING_DURATION); - callback(false); - return; - } + GPUThread::RunOnThread([trigger = std::string(trigger), real_callback = std::move(real_callback)]() mutable { + if (!FullscreenUI::Initialize()) + { + Host::AddOSDMessage( + fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), + Host::OSD_WARNING_DURATION); + real_callback(false); + return; + } - auto real_callback = [callback = std::move(callback)](bool res) mutable { - // don't run the callback in the middle of rendering the UI - Host::RunOnCPUThread([callback = std::move(callback), res]() { - if (res) - DisableHardcoreMode(); - callback(res); - }); - }; - - ImGuiFullscreen::OpenConfirmMessageDialog( - TRANSLATE_STR("Achievements", "Confirm Hardcore Mode"), - fmt::format(TRANSLATE_FS("Achievements", "{0} cannot be performed while hardcore mode is active. Do you " - "want to disable hardcore mode? {0} will be cancelled if you select No."), - trigger), - std::move(real_callback), fmt::format(ICON_FA_CHECK " {}", TRANSLATE_SV("Achievements", "Yes")), - fmt::format(ICON_FA_TIMES " {}", TRANSLATE_SV("Achievements", "No"))); + ImGuiFullscreen::OpenConfirmMessageDialog( + TRANSLATE_STR("Achievements", "Confirm Hardcore Mode"), + fmt::format(TRANSLATE_FS("Achievements", + "{0} cannot be performed while hardcore mode is active. Do you " + "want to disable hardcore mode? {0} will be cancelled if you select No."), + trigger), + std::move(real_callback), fmt::format(ICON_FA_CHECK " {}", TRANSLATE_SV("Achievements", "Yes")), + fmt::format(ICON_FA_TIMES " {}", TRANSLATE_SV("Achievements", "No"))); + }); #else - Host::AddOSDMessage(fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), - Host::OSD_WARNING_DURATION); - callback(false); + Host::ConfirmMessageAsync( + TRANSLATE_STR("Achievements", "Confirm Hardcore Mode"), + fmt::format(TRANSLATE_FS("Achievements", "{0} cannot be performed while hardcore mode is active. Do you want to " + "disable hardcore mode? {0} will be cancelled if you select No."), + trigger), + std::move(real_callback)); #endif } diff --git a/src/core/cdrom.cpp b/src/core/cdrom.cpp index 1aadd966f..b4fd51cd4 100644 --- a/src/core/cdrom.cpp +++ b/src/core/cdrom.cpp @@ -5,8 +5,8 @@ #include "cdrom_async_reader.h" #include "cdrom_subq_replacement.h" #include "dma.h" +#include "fullscreen_ui.h" #include "host.h" -#include "host_interface_progress_callback.h" #include "interrupt_controller.h" #include "settings.h" #include "spu.h" @@ -999,7 +999,7 @@ bool CDROM::PrecacheMedia() return false; } - HostInterfaceProgressCallback callback; + LoadingScreenProgressCallback callback; if (!s_reader.Precache(&callback)) { Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Precaching CD image failed, it may be unreliable."), diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 029dc81dc..5ea3ff393 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -40,14 +40,13 @@ - + - @@ -119,8 +118,9 @@ - + + @@ -129,7 +129,6 @@ - diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 32cdd1889..b279d9b70 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -33,13 +33,11 @@ - - @@ -67,6 +65,7 @@ + @@ -102,7 +101,6 @@ - @@ -111,7 +109,6 @@ - @@ -143,6 +140,8 @@ + + diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index 44e3fc9ae..7050fe5e0 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -8,7 +8,9 @@ #include "controller.h" #include "game_list.h" #include "gpu.h" +#include "gpu_thread.h" #include "host.h" +#include "imgui_overlays.h" #include "settings.h" #include "system.h" #include "system_private.h" @@ -203,6 +205,7 @@ struct PostProcessingStageInfo ////////////////////////////////////////////////////////////////////////// // Main ////////////////////////////////////////////////////////////////////////// +static void UpdateRunIdleState(); static void PauseForMenuOpen(bool set_pause_menu_open); static bool AreAnyDialogsOpen(); static void ClosePauseMenu(); @@ -602,12 +605,12 @@ bool FullscreenUI::Initialize() s_state.about_window_open = false; s_state.hotkey_list_cache = InputManager::GetHotkeyList(); - if (!System::IsValid()) + Host::RunOnCPUThread([]() { Host::OnFullscreenUIStartedOrStopped(true); }); + + if (!GPUThread::HasGPUBackend() && !GPUThread::IsGPUBackendRequested()) SwitchToLanding(); - if (!System::IsRunning()) - Host::OnIdleStateChanged(); - + UpdateRunIdleState(); ForceKeyNavEnabled(); return true; } @@ -631,6 +634,7 @@ bool FullscreenUI::AreAnyDialogsOpen() void FullscreenUI::CheckForConfigChanges(const Settings& old_settings) { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; @@ -638,55 +642,98 @@ void FullscreenUI::CheckForConfigChanges(const Settings& old_settings) // That means we're going to be reading achievement state. if (old_settings.achievements_enabled && !g_settings.achievements_enabled) { - if (s_state.current_main_window == MainWindowType::Achievements || - s_state.current_main_window == MainWindowType::Leaderboards) - ReturnToPreviousWindow(); + if (!IsInitialized()) + return; + + GPUThread::RunOnThread([]() { + if (s_state.current_main_window == MainWindowType::Achievements || + s_state.current_main_window == MainWindowType::Leaderboards) + { + ReturnToPreviousWindow(); + } + }); } } +void FullscreenUI::UpdateRunIdleState() +{ + const bool new_run_idle = HasActiveWindow(); + GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::FullscreenUIActive, new_run_idle); +} + void FullscreenUI::OnSystemStarted() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; - s_state.current_main_window = MainWindowType::None; - QueueResetFocus(FocusResetType::ViewChanged); -} + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; -void FullscreenUI::OnSystemPaused() -{ - // noop + s_state.current_main_window = MainWindowType::None; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + }); } void FullscreenUI::OnSystemResumed() { - // get rid of pause menu if we unpaused another way - if (s_state.current_main_window == MainWindowType::PauseMenu) - ClosePauseMenu(); + // NOTE: Called on CPU thread. + if (!IsInitialized()) + return; + + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + // get rid of pause menu if we unpaused another way + if (s_state.current_main_window == MainWindowType::PauseMenu) + ClosePauseMenu(); + + UpdateRunIdleState(); + }); } void FullscreenUI::OnSystemDestroyed() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; - s_state.pause_menu_was_open = false; - s_state.was_paused_on_quick_menu_open = false; - s_state.current_pause_submenu = PauseSubMenu::None; - SwitchToLanding(); + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + s_state.pause_menu_was_open = false; + s_state.was_paused_on_quick_menu_open = false; + s_state.current_pause_submenu = PauseSubMenu::None; + SwitchToLanding(); + UpdateRunIdleState(); + }); } void FullscreenUI::OnRunningGameChanged() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; const std::string& path = System::GetDiscPath(); const std::string& serial = System::GetGameSerial(); + + std::string subtitle; if (!serial.empty()) - s_state.current_game_subtitle = fmt::format("{0} - {1}", serial, Path::GetFileName(path)); + subtitle = fmt::format("{0} - {1}", serial, Path::GetFileName(path)); else - s_state.current_game_subtitle = {}; + subtitle = {}; + + GPUThread::RunOnThread([subtitle = std::move(subtitle)]() mutable { + if (!IsInitialized()) + return; + + s_state.current_game_subtitle = std::move(subtitle); + }); } void FullscreenUI::PauseForMenuOpen(bool set_pause_menu_open) @@ -703,15 +750,18 @@ void FullscreenUI::OpenPauseMenu() if (!System::IsValid()) return; - if (!Initialize() || s_state.current_main_window != MainWindowType::None) - return; + GPUThread::RunOnThread([]() { + if (!Initialize() || s_state.current_main_window != MainWindowType::None) + return; - PauseForMenuOpen(true); - s_state.current_main_window = MainWindowType::PauseMenu; - s_state.current_pause_submenu = PauseSubMenu::None; - QueueResetFocus(FocusResetType::ViewChanged); - ForceKeyNavEnabled(); - FixStateIfPaused(); + PauseForMenuOpen(true); + s_state.current_main_window = MainWindowType::PauseMenu; + s_state.current_pause_submenu = PauseSubMenu::None; + QueueResetFocus(FocusResetType::ViewChanged); + ForceKeyNavEnabled(); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } void FullscreenUI::OpenCheatsMenu() @@ -725,41 +775,39 @@ void FullscreenUI::OpenCheatsMenu() s_state.settings_page = SettingsPage::Cheats; PauseForMenuOpen(true); ForceKeyNavEnabled(); + UpdateRunIdleState(); FixStateIfPaused(); } void FullscreenUI::FixStateIfPaused() { - if (!System::IsValid() || System::IsRunning()) + if (!GPUThread::HasGPUBackend() || System::IsRunning()) return; // When we're paused, we won't have trickled the key up event for escape yet. Do it now. ImGui::UpdateInputEvents(false); - - Host::OnIdleStateChanged(); - Host::RunOnCPUThread([]() { - if (System::IsValid()) - { - // Why twice? To clear the "wants keyboard input" flag. - System::InvalidateDisplay(); - System::InvalidateDisplay(); - } - }); } void FullscreenUI::ClosePauseMenu() { - if (!IsInitialized() || !System::IsValid()) + if (!System::IsValid()) return; - if (System::GetState() == System::State::Paused && !s_state.was_paused_on_quick_menu_open) - Host::RunOnCPUThread([]() { System::PauseSystem(false); }); + const bool paused = System::IsPaused(); + GPUThread::RunOnThread([paused]() { + if (!IsInitialized()) + return; - s_state.current_main_window = MainWindowType::None; - s_state.current_pause_submenu = PauseSubMenu::None; - s_state.pause_menu_was_open = false; - QueueResetFocus(FocusResetType::ViewChanged); - FixStateIfPaused(); + if (paused && !s_state.was_paused_on_quick_menu_open) + Host::RunOnCPUThread([]() { System::PauseSystem(false); }); + + s_state.current_main_window = MainWindowType::None; + s_state.current_pause_submenu = PauseSubMenu::None; + s_state.pause_menu_was_open = false; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } void FullscreenUI::OpenPauseSubMenu(PauseSubMenu submenu) @@ -790,14 +838,21 @@ void FullscreenUI::Shutdown() s_state.current_game_subtitle = {}; DestroyResources(); ImGuiFullscreen::Shutdown(); + if (s_state.initialized) + Host::RunOnCPUThread([]() { Host::OnFullscreenUIStartedOrStopped(false); }); + s_state.initialized = false; s_state.tried_to_initialize = false; + UpdateRunIdleState(); } void FullscreenUI::Render() { if (!s_state.initialized) + { + ImGuiFullscreen::RenderLoadingScreen(); return; + } ImGuiFullscreen::UploadAsyncTextures(); @@ -856,6 +911,8 @@ void FullscreenUI::Render() ImGuiFullscreen::EndLayout(); + ImGuiFullscreen::RenderLoadingScreen(); + if (s_state.settings_changed.load(std::memory_order_relaxed)) { Host::CommitBaseSettingChanges(); @@ -889,7 +946,7 @@ void FullscreenUI::Render() } } - if (System::IsValid()) + if (GPUThread::HasGPUBackend()) Host::RunOnCPUThread([]() { System::ReloadGameSettings(false); }); } s_state.game_settings_changed.store(false, std::memory_order_release); @@ -908,7 +965,7 @@ void FullscreenUI::InvalidateCoverCache() void FullscreenUI::ReturnToPreviousWindow() { - if (System::IsValid() && s_state.pause_menu_was_open) + if (GPUThread::HasGPUBackend() && s_state.pause_menu_was_open) { s_state.current_main_window = MainWindowType::PauseMenu; QueueResetFocus(FocusResetType::ViewChanged); @@ -922,7 +979,8 @@ void FullscreenUI::ReturnToPreviousWindow() void FullscreenUI::ReturnToMainWindow() { ClosePauseMenu(); - s_state.current_main_window = System::IsValid() ? MainWindowType::None : MainWindowType::Landing; + s_state.current_main_window = GPUThread::HasGPUBackend() ? MainWindowType::None : MainWindowType::Landing; + UpdateRunIdleState(); FixStateIfPaused(); } @@ -958,9 +1016,14 @@ ImGuiFullscreen::FileSelectorFilters FullscreenUI::GetDiscImageFilters() void FullscreenUI::DoStartPath(std::string path, std::string state, std::optional fast_boot) { - if (System::IsValid()) + if (GPUThread::HasGPUBackend()) return; + // Switch to nothing, we'll get called back via OnSystemDestroyed() if startup fails. + s_state.current_main_window = MainWindowType::None; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + SystemBootParameters params; params.filename = std::move(path); params.save_state = std::move(state); @@ -1155,108 +1218,116 @@ void FullscreenUI::DoChangeDiscFromFile() void FullscreenUI::DoChangeDisc() { - ImGuiFullscreen::ChoiceDialogOptions options; + Host::RunOnCPUThread([]() { + ImGuiFullscreen::ChoiceDialogOptions options; - if (System::HasMediaSubImages()) - { - const u32 current_index = System::GetMediaSubImageIndex(); - const u32 count = System::GetMediaSubImageCount(); - options.reserve(count + 1); - options.emplace_back(FSUI_STR("From File..."), false); - - for (u32 i = 0; i < count; i++) - options.emplace_back(System::GetMediaSubImageTitle(i), i == current_index); - - auto callback = [](s32 index, const std::string& title, bool checked) { - if (index == 0) - { - CloseChoiceDialog(); - DoChangeDiscFromFile(); - return; - } - else if (index > 0) - { - System::SwitchMediaSubImage(static_cast(index - 1)); - } - - CloseChoiceDialog(); - ReturnToPreviousWindow(); - }; - - OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), - std::move(callback)); - - return; - } - - if (const GameDatabase::Entry* entry = System::GetGameDatabaseEntry(); entry && !entry->disc_set_serials.empty()) - { - const auto lock = GameList::GetLock(); - auto matches = GameList::GetMatchingEntriesForSerial(entry->disc_set_serials); - if (matches.size() > 1) + if (System::HasMediaSubImages()) { - options.reserve(matches.size() + 1); + const u32 current_index = System::GetMediaSubImageIndex(); + const u32 count = System::GetMediaSubImageCount(); + options.reserve(count + 1); options.emplace_back(FSUI_STR("From File..."), false); - std::vector paths; - paths.reserve(matches.size()); + for (u32 i = 0; i < count; i++) + options.emplace_back(System::GetMediaSubImageTitle(i), i == current_index); - const std::string& current_path = System::GetDiscPath(); - for (auto& [title, glentry] : matches) - { - options.emplace_back(std::move(title), current_path == glentry->path); - paths.push_back(glentry->path); - } + GPUThread::RunOnThread([options = std::move(options)]() mutable { + auto callback = [](s32 index, const std::string& title, bool checked) { + if (index == 0) + { + CloseChoiceDialog(); + DoChangeDiscFromFile(); + return; + } + else if (index > 0) + { + System::SwitchMediaSubImage(static_cast(index - 1)); + } - auto callback = [paths = std::move(paths)](s32 index, const std::string& title, bool checked) { - if (index == 0) - { CloseChoiceDialog(); - DoChangeDiscFromFile(); - return; - } - else if (index > 0) - { - System::InsertMedia(paths[index - 1].c_str()); - } + ReturnToPreviousWindow(); + }; - CloseChoiceDialog(); - ReturnToMainWindow(); - }; - - OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), - std::move(callback)); + OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), + std::move(callback)); + }); return; } - } - DoChangeDiscFromFile(); + if (const GameDatabase::Entry* entry = System::GetGameDatabaseEntry(); entry && !entry->disc_set_serials.empty()) + { + const auto lock = GameList::GetLock(); + auto matches = GameList::GetMatchingEntriesForSerial(entry->disc_set_serials); + if (matches.size() > 1) + { + options.reserve(matches.size() + 1); + options.emplace_back(FSUI_STR("From File..."), false); + + std::vector paths; + paths.reserve(matches.size()); + + const std::string& current_path = System::GetDiscPath(); + for (auto& [title, glentry] : matches) + { + options.emplace_back(std::move(title), current_path == glentry->path); + paths.push_back(glentry->path); + } + + GPUThread::RunOnThread([options = std::move(options), paths = std::move(paths)]() mutable { + auto callback = [paths = std::move(paths)](s32 index, const std::string& title, bool checked) { + if (index == 0) + { + CloseChoiceDialog(); + DoChangeDiscFromFile(); + return; + } + else if (index > 0) + { + System::InsertMedia(paths[index - 1].c_str()); + } + + CloseChoiceDialog(); + ReturnToMainWindow(); + }; + + OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), + std::move(callback)); + }); + + return; + } + } + + GPUThread::RunOnThread([]() { DoChangeDiscFromFile(); }); + }); } void FullscreenUI::DoToggleAnalogMode() { // hacky way to toggle analog mode - for (u32 i = 0; i < NUM_CONTROLLER_AND_CARD_PORTS; i++) - { - Controller* ctrl = System::GetController(i); - if (!ctrl) - continue; - - const Controller::ControllerInfo* cinfo = Controller::GetControllerInfo(ctrl->GetType()); - if (!cinfo) - continue; - - for (const Controller::ControllerBindingInfo& bi : cinfo->bindings) + Host::RunOnCPUThread([]() { + for (u32 i = 0; i < NUM_CONTROLLER_AND_CARD_PORTS; i++) { - if (std::strcmp(bi.name, "Analog") == 0) + Controller* ctrl = System::GetController(i); + if (!ctrl) + continue; + + const Controller::ControllerInfo* cinfo = Controller::GetControllerInfo(ctrl->GetType()); + if (!cinfo) + continue; + + for (const Controller::ControllerBindingInfo& bi : cinfo->bindings) { - ctrl->SetBindState(bi.bind_index, 1.0f); - ctrl->SetBindState(bi.bind_index, 0.0f); - break; + if (std::strcmp(bi.name, "Analog") == 0) + { + ctrl->SetBindState(bi.bind_index, 1.0f); + ctrl->SetBindState(bi.bind_index, 0.0f); + break; + } } } - } + }); } void FullscreenUI::DoRequestExit() @@ -2857,7 +2928,7 @@ void FullscreenUI::DrawSettingsWindow() (LayoutScale(LAYOUT_MENU_BUTTON_Y_PADDING) * 2.0f) + LayoutScale(2.0f)); const float bg_alpha = - System::IsValid() ? (s_state.settings_page == SettingsPage::PostProcessing ? 0.50f : 0.90f) : 1.0f; + GPUThread::HasGPUBackend() ? (s_state.settings_page == SettingsPage::PostProcessing ? 0.50f : 0.90f) : 1.0f; if (BeginFullscreenWindow(ImVec2(0.0f, 0.0f), heading_size, "settings_category", ImVec4(UIStyle.PrimaryColor.x, UIStyle.PrimaryColor.y, UIStyle.PrimaryColor.z, bg_alpha))) @@ -3802,12 +3873,9 @@ void FullscreenUI::DrawControllerSettingsPage() &Settings::GetMultitapModeName, &Settings::GetMultitapModeDisplayName, MultitapMode::Count); // load mtap settings - MultitapMode mtap_mode = g_settings.multitap_mode; - if (IsEditingGameSettings(bsi)) - { - mtap_mode = Settings::ParseMultitapModeName(bsi->GetTinyStringValue("ControllerPorts", "MultitapMode", "").c_str()) - .value_or(g_settings.multitap_mode); - } + const MultitapMode mtap_mode = + Settings::ParseMultitapModeName(bsi->GetTinyStringValue("ControllerPorts", "MultitapMode", "").c_str()) + .value_or(Settings::DEFAULT_MULTITAP_MODE); const std::array mtap_enabled = { {(mtap_mode == MultitapMode::Port1Only || mtap_mode == MultitapMode::BothPorts), (mtap_mode == MultitapMode::Port2Only || mtap_mode == MultitapMode::BothPorts)}}; @@ -4674,7 +4742,7 @@ void FullscreenUI::DrawPostProcessingSettingsPage() FSUI_CSTR("Reloads the shaders from disk, applying any changes."), bsi->GetBoolValue("PostProcessing", "Enabled", false))) { - if (System::IsValid() && PostProcessing::ReloadShaders()) + if (GPUThread::HasGPUBackend() && PostProcessing::ReloadShaders()) ShowToast(std::string(), FSUI_STR("Post-processing shaders reloaded.")); } @@ -5110,7 +5178,7 @@ void FullscreenUI::DrawAchievementsSettingsPage() "cheats, and slowdown functions."), "Cheevos", "ChallengeMode", false, enabled)) { - if (System::IsValid() && bsi->GetBoolValue("Cheevos", "ChallengeMode", false)) + if (GPUThread::HasGPUBackend() && bsi->GetBoolValue("Cheevos", "ChallengeMode", false)) ShowToast(std::string(), FSUI_STR("Hardcore mode will be enabled on next game restart.")); } DrawToggleSetting( @@ -5272,28 +5340,30 @@ void FullscreenUI::DrawAchievementsLoginWindow() 0, 0, 0); Host::RunOnCPUThread([username = std::string(username), password = std::string(password)]() { - ImGuiFullscreen::CloseBackgroundProgressDialog(LOGIN_PROGRESS_NAME); - Error error; - if (Achievements::Login(username.c_str(), password.c_str(), &error)) - { - // TODO-GPU-THREAD: Synchronize access to s_achievements_login_window_open. - actually_close_popup(); - return; - } + const bool result = Achievements::Login(username.c_str(), password.c_str(), &error); + GPUThread::RunOnThread([result, error = std::move(error)]() { + ImGuiFullscreen::CloseBackgroundProgressDialog(LOGIN_PROGRESS_NAME); - // keep popup open on failure - // because of the whole popup stack thing, we need to hide the dialog while this popup is visible - s_state.achievements_login_window_open = false; - ImGuiFullscreen::OpenInfoMessageDialog( - FSUI_STR("Login Error"), - fmt::format(FSUI_FSTR("Login Failed.\nError: {}\nPlease check your username and password, and try again."), - error.GetDescription()), - []() { - s_state.achievements_login_window_open = true; - QueueResetFocus(FocusResetType::PopupOpened); - }, - FSUI_ICONSTR(ICON_FA_TIMES, "Close")); + if (result) + { + actually_close_popup(); + return; + } + + // keep popup open on failure + // because of the whole popup stack thing, we need to hide the dialog while this popup is visible + s_state.achievements_login_window_open = false; + ImGuiFullscreen::OpenInfoMessageDialog( + FSUI_STR("Login Error"), + fmt::format(FSUI_FSTR("Login Failed.\nError: {}\nPlease check your username and password, and try again."), + error.GetDescription()), + []() { + s_state.achievements_login_window_open = true; + QueueResetFocus(FocusResetType::PopupOpened); + }, + FSUI_ICONSTR(ICON_FA_TIMES, "Close")); + }); }); } @@ -5804,7 +5874,7 @@ void FullscreenUI::DrawPauseMenu() case PauseSubMenu::None: { // NOTE: Menu close must come first, because otherwise VM destruction options will race. - const bool has_game = System::IsValid() && !System::GetGameSerial().empty(); + const bool has_game = GPUThread::HasGPUBackend() && !System::GetGameSerial().empty(); if (DefaultActiveButton(FSUI_ICONSTR(ICON_FA_PLAY, "Resume Game"), false) || WantsToCloseMenu()) ClosePauseMenu(); @@ -6667,7 +6737,7 @@ void FullscreenUI::DrawGameListWindow() ImVec2(io.DisplaySize.x, LayoutScale(LAYOUT_MENU_BUTTON_HEIGHT_NO_SUMMARY) + (LayoutScale(LAYOUT_MENU_BUTTON_Y_PADDING) * 2.0f) + LayoutScale(2.0f)); - const float bg_alpha = System::IsValid() ? 0.90f : 1.0f; + const float bg_alpha = GPUThread::HasGPUBackend() ? 0.90f : 1.0f; if (BeginFullscreenWindow(ImVec2(0.0f, 0.0f), heading_size, "gamelist_view", MulAlpha(UIStyle.PrimaryColor, bg_alpha))) @@ -7217,7 +7287,7 @@ void FullscreenUI::DrawGameListSettingsWindow() ImVec2(io.DisplaySize.x, LayoutScale(LAYOUT_MENU_BUTTON_HEIGHT_NO_SUMMARY) + (LayoutScale(LAYOUT_MENU_BUTTON_Y_PADDING) * 2.0f) + LayoutScale(2.0f)); - const float bg_alpha = System::IsValid() ? 0.90f : 1.0f; + const float bg_alpha = GPUThread::HasGPUBackend() ? 0.90f : 1.0f; if (BeginFullscreenWindow(ImVec2(0.0f, 0.0f), heading_size, "gamelist_view", MulAlpha(UIStyle.PrimaryColor, bg_alpha))) @@ -7533,31 +7603,36 @@ void FullscreenUI::DrawAboutWindow() void FullscreenUI::OpenAchievementsWindow() { + if (!System::IsValid()) + return; + if (!Achievements::IsActive()) { Host::AddKeyedOSDMessage("achievements_disabled", FSUI_STR("Achievements are not enabled."), Host::OSD_INFO_DURATION); return; } - - if (!System::IsValid() || !Initialize()) - return; - - if (!Achievements::HasAchievements() || !Achievements::PrepareAchievementsWindow()) + else if (!Achievements::HasAchievements()) { ShowToast(std::string(), FSUI_STR("This game has no achievements.")); return; } - if (s_state.current_main_window != MainWindowType::PauseMenu) - { - PauseForMenuOpen(false); - ForceKeyNavEnabled(); - } + GPUThread::RunOnThread([]() { + if (!Initialize() || !Achievements::PrepareAchievementsWindow()) + return; - s_state.current_main_window = MainWindowType::Achievements; - QueueResetFocus(FocusResetType::ViewChanged); - FixStateIfPaused(); + if (s_state.current_main_window != MainWindowType::PauseMenu) + { + PauseForMenuOpen(false); + ForceKeyNavEnabled(); + } + + s_state.current_main_window = MainWindowType::Achievements; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } bool FullscreenUI::IsAchievementsWindowOpen() @@ -7567,31 +7642,36 @@ bool FullscreenUI::IsAchievementsWindowOpen() void FullscreenUI::OpenLeaderboardsWindow() { + if (!System::IsValid()) + return; + if (!Achievements::IsActive()) { Host::AddKeyedOSDMessage("achievements_disabled", FSUI_STR("Leaderboards are not enabled."), Host::OSD_INFO_DURATION); return; } - - if (!System::IsValid() || !Initialize()) - return; - - if (!Achievements::HasLeaderboards() || !Achievements::PrepareLeaderboardsWindow()) + else if (!Achievements::HasLeaderboards()) { ShowToast(std::string(), FSUI_STR("This game has no leaderboards.")); return; } - if (s_state.current_main_window != MainWindowType::PauseMenu) - { - PauseForMenuOpen(false); - ForceKeyNavEnabled(); - } + GPUThread::RunOnThread([]() { + if (!Initialize() || !Achievements::PrepareLeaderboardsWindow()) + return; - s_state.current_main_window = MainWindowType::Leaderboards; - QueueResetFocus(FocusResetType::ViewChanged); - FixStateIfPaused(); + if (s_state.current_main_window != MainWindowType::PauseMenu) + { + PauseForMenuOpen(false); + ForceKeyNavEnabled(); + } + + s_state.current_main_window = MainWindowType::Leaderboards; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } bool FullscreenUI::IsLeaderboardsWindowOpen() @@ -7601,6 +7681,155 @@ bool FullscreenUI::IsLeaderboardsWindowOpen() #endif // __ANDROID__ +LoadingScreenProgressCallback::LoadingScreenProgressCallback() + : ProgressCallback(), m_open_time(Timer::GetCurrentValue()), m_on_gpu_thread(GPUThread::IsOnThread()) +{ +} + +LoadingScreenProgressCallback::~LoadingScreenProgressCallback() +{ + // Did we activate? + if (m_last_progress_percent < 0) + return; + + if (!m_on_gpu_thread) + { + GPUThread::RunOnThread([]() { + ImGuiFullscreen::CloseLoadingScreen(); + Assert(GPUThread::GetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive)); + GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive, false); + }); + } + else + { + // since this was pushing frames, we need to restore the context + GPUThread::Internal::RestoreContextAfterPresent(); + } +} + +void LoadingScreenProgressCallback::PushState() +{ + ProgressCallback::PushState(); +} + +void LoadingScreenProgressCallback::PopState() +{ + ProgressCallback::PopState(); + Redraw(true); +} + +void LoadingScreenProgressCallback::SetCancellable(bool cancellable) +{ + ProgressCallback::SetCancellable(cancellable); + Redraw(true); +} + +void LoadingScreenProgressCallback::SetTitle(const std::string_view title) +{ + // todo? +} + +void LoadingScreenProgressCallback::SetStatusText(const std::string_view text) +{ + ProgressCallback::SetStatusText(text); + Redraw(true); +} + +void LoadingScreenProgressCallback::SetProgressRange(u32 range) +{ + u32 last_range = m_progress_range; + + ProgressCallback::SetProgressRange(range); + + if (m_progress_range != last_range) + Redraw(false); +} + +void LoadingScreenProgressCallback::SetProgressValue(u32 value) +{ + u32 lastValue = m_progress_value; + + ProgressCallback::SetProgressValue(value); + + if (m_progress_value != lastValue) + Redraw(false); +} + +void LoadingScreenProgressCallback::Redraw(bool force) +{ + if (m_last_progress_percent < 0 && + Timer::ConvertValueToSeconds(Timer::GetCurrentValue() - m_open_time) < m_open_delay) + { + return; + } + + const int percent = + static_cast((static_cast(m_progress_value) / static_cast(m_progress_range)) * 100.0f); + DebugAssert(percent >= 0); + if (percent == m_last_progress_percent && !force) + return; + + // activation? + if (m_last_progress_percent < 0 && !m_on_gpu_thread) + { + GPUThread::RunOnThread([]() { + Assert(!GPUThread::GetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive)); + GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive, true); + }); + } + + m_last_progress_percent = percent; + if (m_on_gpu_thread) + { + ImGuiFullscreen::RenderLoadingScreen(ImGuiManager::LOGO_IMAGE_NAME, m_status_text, 0, + static_cast(m_progress_range), static_cast(m_progress_value)); + } + else + { + GPUThread::RunOnThread([status_text = SmallString(std::string_view(m_status_text)), + range = static_cast(m_progress_range), value = static_cast(m_progress_value)]() { + ImGuiFullscreen::OpenOrUpdateLoadingScreen(ImGuiManager::LOGO_IMAGE_NAME, status_text, 0, range, value); + }); + } +} + +void LoadingScreenProgressCallback::ModalError(const std::string_view message) +{ + ERROR_LOG(message); + Host::ReportErrorAsync("Error", message); +} + +bool LoadingScreenProgressCallback::ModalConfirmation(const std::string_view message) +{ + INFO_LOG(message); + return Host::ConfirmMessage("Confirm", message); +} + +void FullscreenUI::OpenLoadingScreen(std::string_view image, std::string_view message, s32 progress_min /*= -1*/, + s32 progress_max /*= -1*/, s32 progress_value /*= -1*/) +{ + Assert(GPUThread::IsOnThread()); + Assert(!GPUThread::GetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive)); + GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive, true); + ImGuiFullscreen::OpenOrUpdateLoadingScreen(image, message, progress_min, progress_max, progress_value); +} + +void FullscreenUI::UpdateLoadingScreen(std::string_view image, std::string_view message, s32 progress_min /*= -1*/, + s32 progress_max /*= -1*/, s32 progress_value /*= -1*/) +{ + Assert(GPUThread::IsOnThread()); + Assert(GPUThread::GetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive)); + ImGuiFullscreen::OpenOrUpdateLoadingScreen(image, message, progress_min, progress_max, progress_value); +} + +void FullscreenUI::CloseLoadingScreen() +{ + Assert(GPUThread::IsOnThread()); + Assert(GPUThread::GetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive)); + ImGuiFullscreen::CloseLoadingScreen(); + GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::LoadingScreenActive, false); +} + ///////////////////////////////////////////////////////////////////////////////////////////////////////////// // Translation String Area // To avoid having to type T_RANSLATE("FullscreenUI", ...) everywhere, we use the shorter macros at the top diff --git a/src/core/fullscreen_ui.h b/src/core/fullscreen_ui.h index 9e8405442..e84a00b81 100644 --- a/src/core/fullscreen_ui.h +++ b/src/core/fullscreen_ui.h @@ -21,7 +21,6 @@ bool IsInitialized(); bool HasActiveWindow(); void CheckForConfigChanges(const Settings& old_settings); void OnSystemStarted(); -void OnSystemPaused(); void OnSystemResumed(); void OnSystemDestroyed(); void OnRunningGameChanged(); @@ -43,13 +42,51 @@ void Render(); void InvalidateCoverCache(); void TimeToPrintableString(SmallStringBase* str, time_t t); +void OpenLoadingScreen(std::string_view image, std::string_view message, s32 progress_min = -1, s32 progress_max = -1, + s32 progress_value = -1); +void UpdateLoadingScreen(std::string_view image, std::string_view message, s32 progress_min = -1, s32 progress_max = -1, + s32 progress_value = -1); +void CloseLoadingScreen(); + } // namespace FullscreenUI +class LoadingScreenProgressCallback final : public ProgressCallback +{ +public: + LoadingScreenProgressCallback(); + ~LoadingScreenProgressCallback() override; + + ALWAYS_INLINE void SetOpenDelay(float delay) { m_open_delay = delay; } + + void PushState() override; + void PopState() override; + + void SetCancellable(bool cancellable) override; + void SetTitle(const std::string_view title) override; + void SetStatusText(const std::string_view text) override; + void SetProgressRange(u32 range) override; + void SetProgressValue(u32 value) override; + + void ModalError(const std::string_view message) override; + bool ModalConfirmation(const std::string_view message) override; + +private: + void Redraw(bool force); + + u64 m_open_time = 0; + float m_open_delay = 1.0f; + s32 m_last_progress_percent = -1; + bool m_on_gpu_thread = false; +}; + // Host UI triggers from Big Picture mode. namespace Host { #ifndef __ANDROID__ +/// Called whenever fullscreen UI starts/stops. +void OnFullscreenUIStartedOrStopped(bool started); + /// Requests shut down and exit of the hosting application. This may not actually exit, /// if the user cancels the shutdown confirmation. void RequestExitApplication(bool allow_confirm); diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 028b3313d..367d8282b 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -3,9 +3,12 @@ #include "gpu.h" #include "dma.h" +#include "gpu_backend.h" #include "gpu_dump.h" +#include "gpu_hw_texture_cache.h" #include "gpu_shadergen.h" #include "gpu_sw_rasterizer.h" +#include "gpu_thread.h" #include "host.h" #include "interrupt_controller.h" #include "performance_counters.h" @@ -72,16 +75,7 @@ static u64 s_active_gpu_cycles = 0; static u32 s_active_gpu_cycles_frames = 0; #endif -static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; - -static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, Image image, std::string osd_key); - -GPU::GPU() -{ - GPU_SW_Rasterizer::SelectImplementation(); - ResetStatistics(); -} +GPU::GPU() = default; GPU::~GPU() { @@ -90,11 +84,9 @@ GPU::~GPU() s_frame_done_event.Deactivate(); StopRecordingGPUDump(); - DestroyDeinterlaceTextures(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); } -bool GPU::Initialize(Error* error) +void GPU::Initialize() { if (!System::IsReplayingGPUDump()) s_crtc_tick_event.Activate(); @@ -106,21 +98,14 @@ bool GPU::Initialize(Error* error) m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); - if (!CompileDisplayPipelines(true, true, g_settings.display_24bit_chroma_smoothing, error)) - return false; - #ifdef PSX_GPU_STATS s_active_gpu_cycles = 0; s_active_gpu_cycles_frames = 0; #endif - - return true; } void GPU::UpdateSettings(const Settings& old_settings) { - FlushRender(); - m_force_progressive_scan = (g_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive); m_fifo_size = g_settings.gpu_fifo_size; m_max_run_ahead = g_settings.gpu_max_run_ahead; @@ -136,23 +121,6 @@ void GPU::UpdateSettings(const Settings& old_settings) // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); } - - if (g_settings.display_scaling != old_settings.display_scaling || - g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || - g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing) - { - // Toss buffers on mode change. - if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) - DestroyDeinterlaceTextures(); - - if (!CompileDisplayPipelines( - g_settings.display_scaling != old_settings.display_scaling, - g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, - g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing, nullptr)) - { - Panic("Failed to compile display pipeline on settings change."); - } - } } void GPU::CPUClockChanged() @@ -160,15 +128,6 @@ void GPU::CPUClockChanged() UpdateCRTCConfig(); } -u32 GPU::GetResolutionScale() const -{ - return 1u; -} - -void GPU::UpdateResolutionScale() -{ -} - std::tuple GPU::GetFullDisplayResolution() const { u32 width, height; @@ -220,12 +179,6 @@ void GPU::Reset(bool clear_vram) m_crtc_state.interlaced_field = 0; m_crtc_state.interlaced_display_field = 0; - if (clear_vram) - { - std::memset(g_vram, 0, sizeof(g_vram)); - std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); - } - // Cancel VRAM writes. m_blitter_state = BlitterState::Idle; @@ -234,12 +187,14 @@ void GPU::Reset(bool clear_vram) s_command_tick_event.Deactivate(); SoftReset(); - UpdateDisplay(); + + // Can skip the VRAM clear if it's not a hardware reset. + if (clear_vram) + GPUBackend::PushCommand(GPUBackend::NewClearVRAMCommand()); } void GPU::SoftReset() { - FlushRender(); if (m_blitter_state == BlitterState::WritingVRAM) FinishVRAMWrite(); @@ -287,6 +242,12 @@ void GPU::SoftReset() bool GPU::DoState(StateWrapper& sw, bool update_display) { + if (sw.IsWriting()) + { + // Need to ensure our copy of VRAM is good. + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + } + sw.Do(&m_GPUSTAT.bits); sw.Do(&m_draw_mode.mode_reg.bits); @@ -355,16 +316,20 @@ bool GPU::DoState(StateWrapper& sw, bool update_display) sw.Do(&m_command_total_words); sw.Do(&m_GPUREAD_latch); + u16 load_clut_data[GPU_CLUT_SIZE]; if (sw.GetVersion() < 64) [[unlikely]] { // Clear CLUT cache and let it populate later. InvalidateCLUT(); + std::memset(load_clut_data, 0, sizeof(load_clut_data)); } else { sw.Do(&m_current_clut_reg_bits); sw.Do(&m_current_clut_is_8bit); - sw.DoArray(g_gpu_clut, std::size(g_gpu_clut)); + + // I hate this extra copy... because I'm a moron and put it in the middle of the state data. + sw.DoArray(sw.IsReading() ? load_clut_data : g_gpu_clut, std::size(g_gpu_clut)); } sw.Do(&m_vram_transfer.x); @@ -385,11 +350,26 @@ bool GPU::DoState(StateWrapper& sw, bool update_display) if (!sw.DoMarker("GPU-VRAM")) return false; - sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - if (sw.IsReading()) { - m_draw_mode.texture_page_changed = true; + // Need to calculate the TC data size. But skip over VRAM first, we'll grab it later. + const size_t vram_start_pos = sw.GetPosition(); + sw.SkipBytes(VRAM_SIZE); + u32 tc_data_size; + if (!GPUTextureCache::GetStateSize(sw, &tc_data_size)) [[unlikely]] + return false; + + // Now we can actually allocate FIFO storage, and push it to the GPU thread. + GPUBackendLoadStateCommand* cmd = static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::LoadState, sizeof(GPUBackendLoadStateCommand) + tc_data_size)); + std::memcpy(cmd->clut_data, load_clut_data, sizeof(cmd->clut_data)); + std::memcpy(cmd->vram_data, sw.GetData() + vram_start_pos, VRAM_SIZE); + cmd->texture_cache_state_version = sw.GetVersion(); + cmd->texture_cache_state_size = tc_data_size; + if (tc_data_size > 0) + std::memcpy(cmd->texture_cache_state, sw.GetData() + vram_start_pos + VRAM_SIZE, tc_data_size); + GPUThread::PushCommand(cmd); + m_drawing_area_changed = true; SetClampedDrawingArea(); UpdateDMARequest(); @@ -398,13 +378,21 @@ bool GPU::DoState(StateWrapper& sw, bool update_display) // If we're paused, need to update the display FB. if (update_display) - UpdateDisplay(); + UpdateDisplay(false); + } + else // if not memory state + { + // write vram + sw.DoBytes(g_vram, VRAM_SIZE); + + // write TC data, we have to be super careful here, since we're reading GPU thread state... + GPUTextureCache::DoState(sw, false); } return !sw.HasError(); } -bool GPU::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) +void GPU::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) { sw.Do(&m_GPUSTAT.bits); @@ -438,22 +426,25 @@ bool GPU::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool upd if (sw.IsReading()) { - m_draw_mode.texture_page_changed = true; m_drawing_area_changed = true; SetClampedDrawingArea(); UpdateDMARequest(); UpdateCRTCConfig(); UpdateCommandTickEvent(); - - if (update_display) - UpdateDisplay(); } - return true; -} + // Push to thread. + GPUBackendDoMemoryStateCommand* cmd = static_cast(GPUThread::AllocateCommand( + sw.IsReading() ? GPUBackendCommandType::LoadMemoryState : GPUBackendCommandType::SaveMemoryState, + sizeof(GPUBackendDoMemoryStateCommand))); + cmd->memory_save_state = &mss; + GPUThread::PushCommandAndWakeThread(cmd); -void GPU::RestoreDeviceContext() -{ + if (update_display) + { + DebugAssert(sw.IsReading()); + UpdateDisplay(false); + } } void GPU::UpdateDMARequest() @@ -659,9 +650,6 @@ float GPU::ComputeVerticalFrequency() const float GPU::ComputeDisplayAspectRatio() const { - if (g_settings.debugging.show_vram) - return static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT); - // Display off => Doesn't matter. if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0) return 4.0f / 3.0f; @@ -673,10 +661,11 @@ float GPU::ComputeDisplayAspectRatio() const float ar = 4.0f / 3.0f; if (!g_settings.display_force_4_3_for_24bit || !m_GPUSTAT.display_area_color_depth_24) { - if (g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow && g_gpu_device->HasMainSwapChain()) + if (g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow) { - ar = static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()) / - static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); + const WindowInfo& wi = GPUThread::GetRenderWindowInfo(); + if (!wi.IsSurfaceless()) + ar = static_cast(wi.surface_width) / static_cast(wi.surface_height); } else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Custom) { @@ -695,9 +684,7 @@ float GPU::ComputeDisplayAspectRatio() const float GPU::ComputeSourceAspectRatio() const { const float source_aspect_ratio = - (g_settings.debugging.show_vram ? - (static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)) : - static_cast(m_crtc_state.display_width) / static_cast(m_crtc_state.display_height)); + static_cast(m_crtc_state.display_width) / static_cast(m_crtc_state.display_height); // Correction is applied to the GTE for stretch to fit, that way it fills the window. const float source_aspect_ratio_correction = @@ -706,13 +693,20 @@ float GPU::ComputeSourceAspectRatio() const return source_aspect_ratio / source_aspect_ratio_correction; } +float GPU::ComputePixelAspectRatio() const +{ + const float dar = ComputeDisplayAspectRatio(); + const float sar = ComputeSourceAspectRatio(); + const float par = dar / sar; + return par; +} + float GPU::ComputeAspectRatioCorrection() const { const CRTCState& cs = m_crtc_state; float relative_width = static_cast(cs.horizontal_visible_end - cs.horizontal_visible_start); float relative_height = static_cast(cs.vertical_visible_end - cs.vertical_visible_start); - if (relative_width <= 0 || relative_height <= 0 || g_settings.debugging.show_vram || - g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1 || + if (relative_width <= 0 || relative_height <= 0 || g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1 || g_settings.display_crop_mode == DisplayCropMode::OverscanUncorrected || g_settings.display_crop_mode == DisplayCropMode::BordersUncorrected) { @@ -733,12 +727,8 @@ float GPU::ComputeAspectRatioCorrection() const return (relative_width / relative_height); } -void GPU::ApplyPixelAspectRatioToSize(float* width, float* height) const +void GPU::ApplyPixelAspectRatioToSize(float par, float* width, float* height) { - const float dar = ComputeDisplayAspectRatio(); - const float sar = ComputeSourceAspectRatio(); - const float par = dar / sar; - if (par < 1.0f) { // stretch height, preserve width @@ -1003,8 +993,11 @@ void GPU::UpdateCRTCDisplayParameters() System::UpdateGTEAspectRatio(); } - if (cs.display_vram_width != old_vram_width || cs.display_vram_height != old_vram_height) - UpdateResolutionScale(); + if ((cs.display_vram_width != old_vram_width || cs.display_vram_height != old_vram_height) && + g_settings.gpu_resolution_scale == 0) + { + GPUThread::RunOnBackend([](GPUBackend* backend) { backend->UpdateResolutionScale(); }, false, false); + } } TickCount GPU::GetPendingCRTCTicks() const @@ -1190,9 +1183,8 @@ void GPU::CRTCTickEvent(TickCount ticks) // flush any pending draws and "scan out" the image // TODO: move present in here I guess - FlushRender(); - UpdateDisplay(); System::IncrementFrameNumber(); + UpdateDisplay(!System::IsRunaheadActive()); frame_done = true; // switch fields early. this is needed so we draw to the correct one. @@ -1304,16 +1296,21 @@ void GPU::UpdateCommandTickEvent() void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const { - if (!g_gpu_device->HasMainSwapChain()) [[unlikely]] + const WindowInfo& wi = GPUThread::GetRenderWindowInfo(); + if (wi.IsSurfaceless()) { - *display_x = 0.0f; - *display_y = 0.0f; + *display_x = *display_y = -1.0f; return; } GSVector4i display_rc, draw_rc; - CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), true, - true, &display_rc, &draw_rc); + CalculateDrawRect(wi.surface_width, wi.surface_height, m_crtc_state.display_width, m_crtc_state.display_height, + m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_crtc_state.display_vram_width, + m_crtc_state.display_vram_height, g_settings.display_rotation, g_settings.display_alignment, + ComputePixelAspectRatio(), g_settings.display_stretch_vertically, + (g_settings.display_scaling == DisplayScalingMode::NearestInteger || + g_settings.display_scaling == DisplayScalingMode::BilinearInteger), + &display_rc, &draw_rc); // convert coordinates to active display region, then to full display region const float scaled_display_x = @@ -1328,7 +1325,7 @@ void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float win // TODO: apply rotation matrix DEV_LOG("win {:.0f},{:.0f} -> local {:.0f},{:.0f}, disp {:.2f},{:.2f} (size {},{} frac {},{})", window_x, window_y, - window_x - draw_rc.left, window_y - draw_rc.top, *display_x, *display_y, m_crtc_state.display_width, + window_x - display_rc.left, window_y - display_rc.top, *display_x, *display_y, m_crtc_state.display_width, m_crtc_state.display_height, *display_x / static_cast(m_crtc_state.display_width), *display_y / static_cast(m_crtc_state.display_height)); } @@ -1507,7 +1504,7 @@ void GPU::WriteGP1(u32 value) SynchronizeCRTC(); m_crtc_state.regs.display_address_start = new_value; UpdateCRTCDisplayParameters(); - OnBufferSwapped(); + GPUBackend::PushCommand(GPUBackend::NewBufferSwappedCommand()); } } break; @@ -1663,9 +1660,13 @@ void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut) { DEBUG_LOG("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit"); AddCommandTicks(needs_8bit ? 256 : 16); - UpdateCLUT(clut, needs_8bit); m_current_clut_reg_bits = clut.bits; m_current_clut_is_8bit = needs_8bit; + + GPUBackendUpdateCLUTCommand* cmd = GPUBackend::NewUpdateCLUTCommand(); + cmd->reg.bits = clut.bits; + cmd->clut_is_8bit = needs_8bit; + GPUBackend::PushCommand(cmd); } } @@ -1680,27 +1681,21 @@ bool GPU::IsCLUTValid() const return (m_current_clut_reg_bits != std::numeric_limits::max()); } -void GPU::ClearDisplay() -{ - ClearDisplayTexture(); - - // Just recycle the textures, it'll get re-fetched. - DestroyDeinterlaceTextures(); -} - void GPU::SetClampedDrawingArea() { - if (m_drawing_area.left > m_drawing_area.right || m_drawing_area.top > m_drawing_area.bottom) [[unlikely]] - { - m_clamped_drawing_area = GSVector4i::zero(); - return; - } + m_clamped_drawing_area = GetClampedDrawingArea(m_drawing_area); +} - const u32 right = std::min(m_drawing_area.right + 1, static_cast(VRAM_WIDTH)); - const u32 left = std::min(m_drawing_area.left, std::min(m_drawing_area.right, VRAM_WIDTH - 1)); - const u32 bottom = std::min(m_drawing_area.bottom + 1, static_cast(VRAM_HEIGHT)); - const u32 top = std::min(m_drawing_area.top, std::min(m_drawing_area.bottom, VRAM_HEIGHT - 1)); - m_clamped_drawing_area = GSVector4i(left, top, right, bottom); +GSVector4i GPU::GetClampedDrawingArea(const GPUDrawingArea& drawing_area) +{ + if (drawing_area.left > drawing_area.right || drawing_area.top > drawing_area.bottom) [[unlikely]] + return GSVector4i::zero(); + + const u32 right = std::min(drawing_area.right + 1, static_cast(VRAM_WIDTH)); + const u32 left = std::min(drawing_area.left, std::min(drawing_area.right, VRAM_WIDTH - 1)); + const u32 bottom = std::min(drawing_area.bottom + 1, static_cast(VRAM_HEIGHT)); + const u32 top = std::min(drawing_area.top, std::min(drawing_area.bottom, VRAM_HEIGHT - 1)); + return GSVector4i(left, top, right, bottom); } void GPU::SetDrawMode(u16 value) @@ -1709,16 +1704,8 @@ void GPU::SetDrawMode(u16 value) if (!m_set_texture_disable_mask) new_mode_reg.texture_disable = false; - if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) - return; - - m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK) != - (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK)); m_draw_mode.mode_reg.bits = new_mode_reg.bits; - if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) - FlushRender(); - // Bits 0..10 are returned in the GPU status register. m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) | (ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK); @@ -1728,11 +1715,7 @@ void GPU::SetDrawMode(u16 value) void GPU::SetTexturePalette(u16 value) { value &= DrawMode::PALETTE_MASK; - if (m_draw_mode.palette_reg.bits == value) - return; - m_draw_mode.palette_reg.bits = value; - m_draw_mode.texture_page_changed = true; } void GPU::SetTextureWindow(u32 value) @@ -1754,713 +1737,21 @@ void GPU::SetTextureWindow(u32 value) m_draw_mode.texture_window_value = value; } -void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit) +void GPU::CalculateDrawRect(u32 window_width, u32 window_height, u32 crtc_display_width, u32 crtc_display_height, + s32 display_origin_left, s32 display_origin_top, u32 display_vram_width, + u32 display_vram_height, DisplayRotation rotation, DisplayAlignment alignment, + float pixel_aspect_ratio, bool stretch_vertically, bool integer_scale, + GSVector4i* display_rect, GSVector4i* draw_rect) { - const u16* src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; - const u32 start_x = reg.GetXBase(); - if (!clut_is_8bit) - { - // Wraparound can't happen in 4-bit mode. - std::memcpy(dest, &src_row[start_x], sizeof(u16) * 16); - } - else - { - if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] - { - const u32 end = VRAM_WIDTH - start_x; - const u32 start = 256 - end; - std::memcpy(dest, &src_row[start_x], sizeof(u16) * end); - std::memcpy(dest + end, src_row, sizeof(u16) * start); - } - else - { - std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256); - } - } -} - -bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error) -{ - GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, - g_gpu_device->GetFeatures().framebuffer_fetch); - - GPUPipeline::GraphicsConfig plconfig; - plconfig.input_layout.vertex_stride = 0; - plconfig.primitive = GPUPipeline::Primitive::Triangles; - plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); - plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); - plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - plconfig.geometry_shader = nullptr; - plconfig.depth_format = GPUTexture::Format::Unknown; - plconfig.samples = 1; - plconfig.per_sample_shading = false; - plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; - - if (display) - { - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : - GPUTexture::Format::RGBA8); - - std::string vs = shadergen.GenerateDisplayVertexShader(); - std::string fs; - switch (g_settings.display_scaling) - { - case DisplayScalingMode::BilinearSharp: - fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); - break; - - case DisplayScalingMode::BilinearSmooth: - case DisplayScalingMode::BilinearInteger: - fs = shadergen.GenerateDisplayFragmentShader(true, false); - break; - - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - default: - fs = shadergen.GenerateDisplayFragmentShader(false, true); - break; - } - - std::unique_ptr vso = - g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error); - std::unique_ptr fso = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Display Vertex Shader"); - GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", - Settings::GetDisplayScalingName(g_settings.display_scaling)); - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", - Settings::GetDisplayScalingName(g_settings.display_scaling)); - } - - if (deinterlace) - { - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader(), error); - if (!vso) - return false; - GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); - - std::unique_ptr fso; - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateInterleavedFieldExtractFragmentShader(), error))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); - - switch (g_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - case DisplayDeinterlacingMode::Progressive: - break; - - case DisplayDeinterlacingMode::Weave: - { - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateDeinterlaceWeaveFragmentShader(), error))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Blend: - { - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateDeinterlaceBlendFragmentShader(), error))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Adaptive: - { - fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateFastMADReconstructFragmentShader(), error); - if (!fso) - return false; - - GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); - } - break; - - default: - UnreachableCode(); - } - } - - if (chroma_smoothing) - { - m_chroma_smoothing_pipeline.reset(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); - - if (g_settings.display_24bit_chroma_smoothing) - { - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader(), error); - std::unique_ptr fso = g_gpu_device->CreateShader( - GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateChromaSmoothingFragmentShader(), error); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); - GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); - - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); - } - } - - return true; -} - -void GPU::ClearDisplayTexture() -{ - m_display_texture = nullptr; - m_display_texture_view_x = 0; - m_display_texture_view_y = 0; - m_display_texture_view_width = 0; - m_display_texture_view_height = 0; -} - -void GPU::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, - s32 view_height) -{ - DebugAssert(texture); - - if (g_settings.display_auto_resize_window && - (view_width != m_display_texture_view_width || view_height != m_display_texture_view_height)) - { - System::RequestDisplaySize(); - } - - m_display_texture = texture; - m_display_depth_buffer = depth_buffer; - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; -} - -GPUDevice::PresentResult GPU::PresentDisplay() -{ - FlushRender(); - - if (!g_gpu_device->HasMainSwapChain()) - return GPUDevice::PresentResult::SkipPresent; - - GSVector4i display_rect; - GSVector4i draw_rect; - CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), - !g_settings.debugging.show_vram, true, &display_rect, &draw_rect); - return RenderDisplay(nullptr, display_rect, draw_rect, !g_settings.debugging.show_vram); -} - -GPUDevice::PresentResult GPU::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, - const GSVector4i draw_rect, bool postfx) -{ - GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); - - if (m_display_texture) - m_display_texture->MakeReadyForSampling(); - - // Internal post-processing. - GPUTexture* display_texture = m_display_texture; - s32 display_texture_view_x = m_display_texture_view_x; - s32 display_texture_view_y = m_display_texture_view_y; - s32 display_texture_view_width = m_display_texture_view_width; - s32 display_texture_view_height = m_display_texture_view_height; - if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && - PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, - display_texture_view_height)) - { - DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && - static_cast(display_texture->GetWidth()) == display_texture_view_width && - static_cast(display_texture->GetHeight()) == display_texture_view_height); - - // Now we can apply the post chain. - GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); - if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, - GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), - display_texture_view_width, display_texture_view_height, - m_crtc_state.display_width, - m_crtc_state.display_height) == GPUDevice::PresentResult::OK) - { - display_texture_view_x = 0; - display_texture_view_y = 0; - display_texture = post_output_texture; - display_texture->MakeReadyForSampling(); - } - } - - const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetMainSwapChain()->GetFormat(); - const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetMainSwapChain()->GetWidth(); - const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetMainSwapChain()->GetHeight(); - const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && g_gpu_device->HasMainSwapChain() && - hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && - PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); - GSVector4i real_draw_rect = target ? draw_rect : g_gpu_device->GetMainSwapChain()->PreRotateClipRect(draw_rect); - if (g_gpu_device->UsesLowerLeftOrigin()) - { - real_draw_rect = GPUDevice::FlipToLowerLeft( - real_draw_rect, - (target || really_postfx) ? target_height : g_gpu_device->GetMainSwapChain()->GetPostRotatedHeight()); - } - if (really_postfx) - { - g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); - g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); - } - else - { - if (target) - { - g_gpu_device->SetRenderTarget(target); - } - else - { - const GPUDevice::PresentResult pres = g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain()); - if (pres != GPUDevice::PresentResult::OK) - return pres; - } - } - - if (display_texture) - { - bool texture_filter_linear = false; - - struct Uniforms - { - float src_rect[4]; - float src_size[4]; - float clamp_rect[4]; - float params[4]; - float rotation_matrix[2][2]; - } uniforms; - std::memset(uniforms.params, 0, sizeof(uniforms.params)); - - switch (g_settings.display_scaling) - { - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - break; - - case DisplayScalingMode::BilinearSmooth: - case DisplayScalingMode::BilinearInteger: - texture_filter_linear = true; - break; - - case DisplayScalingMode::BilinearSharp: - { - texture_filter_linear = true; - uniforms.params[0] = std::max( - std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); - uniforms.params[1] = std::max( - std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); - uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; - uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; - } - break; - - default: - UnreachableCode(); - break; - } - - g_gpu_device->SetPipeline(m_display_pipeline.get()); - g_gpu_device->SetTextureSampler( - 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); - - // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because - // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. - const float rcp_width = 1.0f / static_cast(display_texture->GetWidth()); - const float rcp_height = 1.0f / static_cast(display_texture->GetHeight()); - uniforms.src_rect[0] = static_cast(display_texture_view_x) * rcp_width; - uniforms.src_rect[1] = static_cast(display_texture_view_y) * rcp_height; - uniforms.src_rect[2] = static_cast(display_texture_view_width) * rcp_width; - uniforms.src_rect[3] = static_cast(display_texture_view_height) * rcp_height; - uniforms.clamp_rect[0] = (static_cast(display_texture_view_x) + 0.5f) * rcp_width; - uniforms.clamp_rect[1] = (static_cast(display_texture_view_y) + 0.5f) * rcp_height; - uniforms.clamp_rect[2] = - (static_cast(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; - uniforms.clamp_rect[3] = - (static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; - uniforms.src_size[0] = static_cast(display_texture->GetWidth()); - uniforms.src_size[1] = static_cast(display_texture->GetHeight()); - uniforms.src_size[2] = rcp_width; - uniforms.src_size[3] = rcp_height; - - const WindowInfo::PreRotation surface_prerotation = (target || really_postfx) ? - WindowInfo::PreRotation::Identity : - g_gpu_device->GetMainSwapChain()->GetPreRotation(); - if (g_settings.display_rotation != DisplayRotation::Normal || - surface_prerotation != WindowInfo::PreRotation::Identity) - { - static constexpr const std::array(DisplayRotation::Count)> rotation_radians = {{ - 0.0f, // Disabled - static_cast(std::numbers::pi * 1.5f), // Rotate90 - static_cast(std::numbers::pi), // Rotate180 - static_cast(std::numbers::pi / 2.0), // Rotate270 - }}; - - const u32 rotation_idx = (static_cast(g_settings.display_rotation) + static_cast(surface_prerotation)) % - static_cast(rotation_radians.size()); - GSMatrix2x2::Rotation(rotation_radians[rotation_idx]).store(uniforms.rotation_matrix); - } - else - { - GSMatrix2x2::Identity().store(uniforms.rotation_matrix); - } - - g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); - - g_gpu_device->SetViewportAndScissor(real_draw_rect); - g_gpu_device->Draw(3, 0); - } - - if (really_postfx) - { - DebugAssert(!g_settings.debugging.show_vram); - - // "original size" in postfx includes padding. - const float upscale_x = m_display_texture ? static_cast(m_display_texture_view_width) / - static_cast(m_crtc_state.display_vram_width) : - 1.0f; - const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / - static_cast(m_crtc_state.display_vram_height) : - 1.0f; - const s32 orig_width = static_cast(std::ceil(static_cast(m_crtc_state.display_width) * upscale_x)); - const s32 orig_height = static_cast(std::ceil(static_cast(m_crtc_state.display_height) * upscale_y)); - - return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, - display_rect, orig_width, orig_height, m_crtc_state.display_width, - m_crtc_state.display_height); - } - else - { - return GPUDevice::PresentResult::OK; - } -} - -bool GPU::SendDisplayToMediaCapture(MediaCapture* cap) -{ - GPUTexture* target = cap->GetRenderTexture(); - if (!target) [[unlikely]] - return false; - - const bool apply_aspect_ratio = - (g_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution); - const bool postfx = (g_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution); - GSVector4i display_rect, draw_rect; - CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_settings.debugging.show_vram, apply_aspect_ratio, - &display_rect, &draw_rect); - - // Not cleared by RenderDisplay(). - g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR); - - if (RenderDisplay(target, display_rect, draw_rect, postfx) != GPUDevice::PresentResult::OK) [[unlikely]] - return false; - - return cap->DeliverVideoFrame(target); -} - -void GPU::DestroyDeinterlaceTextures() -{ - for (std::unique_ptr& tex : m_deinterlace_buffers) - g_gpu_device->RecycleTexture(std::move(tex)); - g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); - m_current_deinterlace_buffer = 0; -} - -bool GPU::Deinterlace(u32 field, u32 line_skip) -{ - GPUTexture* src = m_display_texture; - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - - switch (g_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - { - if (line_skip == 0) - return true; - - // Still have to extract the field. - if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] - return false; - - SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); - return true; - } - - case DisplayDeinterlacingMode::Weave: - { - GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); - - const u32 full_height = height * 2; - if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - src->MakeReadyForSampling(); - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, field, line_skip}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - case DisplayDeinterlacingMode::Blend: - { - constexpr u32 NUM_BLEND_BUFFERS = 2; - - GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); - - const u32 this_buffer = m_current_deinterlace_buffer; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || - !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - // TODO: could be implemented with alpha blending instead.. - - g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; - } - - case DisplayDeinterlacingMode::Adaptive: - { - GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, - line_skip); - - const u32 full_height = height * 2; - const u32 this_buffer = m_current_deinterlace_buffer; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || - !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {field, full_height}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - default: - UnreachableCode(); - } -} - -bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip) -{ - if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || - m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); - } - - GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); - g_gpu_device->InvalidateRenderTarget(dst); - - // If we're not skipping lines, then we can simply copy the texture. - if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) - { - GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); - g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); - } - else - { - GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, - line_skip); - - // Otherwise, we need to extract every other line from the texture. - src->MakeReadyForSampling(); - g_gpu_device->SetRenderTarget(dst); - g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); - g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, line_skip}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - } - - dst->MakeReadyForSampling(); - return true; -} - -bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) -{ - if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || - m_deinterlace_texture->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); - } - - return true; -} - -bool GPU::ApplyChromaSmoothing() -{ - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || - m_chroma_smoothing_texture->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) - { - ClearDisplayTexture(); - return false; - } - - GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); - } - - GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); - - m_display_texture->MakeReadyForSampling(); - g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, width - 1, height - 1}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_chroma_smoothing_texture->MakeReadyForSampling(); - SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; -} - -void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, - GSVector4i* display_rect, GSVector4i* draw_rect) const -{ - const bool integer_scale = (g_settings.display_scaling == DisplayScalingMode::NearestInteger || - g_settings.display_scaling == DisplayScalingMode::BilinearInteger); - const bool show_vram = g_settings.debugging.show_vram; const float window_ratio = static_cast(window_width) / static_cast(window_height); - const float crtc_display_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_width); - const float crtc_display_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_height); - const float display_aspect_ratio = ComputeDisplayAspectRatio(); - const float source_aspect_ratio = ComputeSourceAspectRatio(); - const float pixel_aspect_ratio = display_aspect_ratio / source_aspect_ratio; - const float x_scale = apply_aspect_ratio ? pixel_aspect_ratio : 1.0f; - float display_width = crtc_display_width; - float display_height = crtc_display_height; - float active_left = static_cast(show_vram ? 0 : m_crtc_state.display_origin_left); - float active_top = static_cast(show_vram ? 0 : m_crtc_state.display_origin_top); - float active_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_vram_width); - float active_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_vram_height); - - if (!g_settings.display_stretch_vertically) + const float x_scale = pixel_aspect_ratio; + float display_width = static_cast(crtc_display_width); + float display_height = static_cast(crtc_display_height); + float active_left = static_cast(display_origin_left); + float active_top = static_cast(display_origin_top); + float active_width = static_cast(display_vram_width); + float active_height = static_cast(display_vram_height); + if (!stretch_vertically) { display_width *= x_scale; active_left *= x_scale; @@ -2474,8 +1765,7 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota } // swap width/height when rotated, the flipping of padding is taken care of in the shader with the rotation matrix - if (g_settings.display_rotation == DisplayRotation::Rotate90 || - g_settings.display_rotation == DisplayRotation::Rotate270) + if (rotation == DisplayRotation::Rotate90 || rotation == DisplayRotation::Rotate270) { std::swap(display_width, display_height); std::swap(active_width, active_height); @@ -2499,7 +1789,7 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota left_padding = 0.0f; } - switch (g_settings.display_alignment) + switch (alignment) { case DisplayAlignment::RightOrBottom: top_padding = std::max(static_cast(window_height) - (display_height * scale), 0.0f); @@ -2529,7 +1819,7 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota top_padding = 0.0f; } - switch (g_settings.display_alignment) + switch (alignment) { case DisplayAlignment::RightOrBottom: left_padding = std::max(static_cast(window_width) - (display_width * scale), 0.0f); @@ -2556,254 +1846,85 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota GSVector4(left_padding, top_padding, left_padding + display_width * scale, top_padding + display_height * scale)); } -bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, u8 quality, - bool clear_alpha, bool flip_y, Image image, std::string osd_key) +void GPU::ReadVRAM(u16 x, u16 y, u16 width, u16 height) { - Error error; - - if (flip_y) - image.FlipY(); - - if (image.GetFormat() != ImageFormat::RGBA8) + // If we're using the software renderer, we only need to sync the thread. + if (!GPUBackend::IsUsingHardwareBackend() || g_settings.gpu_use_software_renderer_for_readbacks) { - std::optional convert_image = image.ConvertToRGBA8(&error); - if (!convert_image.has_value()) - { - ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()), - error.GetDescription()); - image.Invalidate(); - } - else - { - image = std::move(convert_image.value()); - } + GPUBackend::SyncGPUThread(true); + return; } - bool result = false; - if (image.IsValid()) - { - if (clear_alpha) - image.SetAllPixelsOpaque(); - - result = image.SaveToFile(path.c_str(), fp.get(), quality, &error); - if (!result) - ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription()); - } - - if (!osd_key.empty()) - { - Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, - fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : - TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), - Path::GetFileName(path), - result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); - } - - return result; + GPUBackendReadVRAMCommand* cmd = GPUBackend::NewReadVRAMCommand(); + cmd->x = x; + cmd->y = y; + cmd->width = width; + cmd->height = height; + GPUBackend::PushCommandAndSync(cmd, true); } -bool GPU::WriteDisplayTextureToFile(std::string filename) +void GPU::UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask) { - if (!m_display_texture) - return false; - - const u32 read_x = static_cast(m_display_texture_view_x); - const u32 read_y = static_cast(m_display_texture_view_y); - const u32 read_width = static_cast(m_display_texture_view_width); - const u32 read_height = static_cast(m_display_texture_view_height); - const ImageFormat read_format = GPUTexture::GetImageFormatForTextureFormat(m_display_texture->GetFormat()); - if (read_format == ImageFormat::None) - return false; - - Image image(read_width, read_height, read_format); - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), - image.GetPixels(), image.GetStorageSize(), image.GetPitch()); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) - { - ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, - GPUTexture::GetFormatName(m_display_texture->GetFormat())); - return false; - } - } - - dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); - if (!dltex->ReadTexels(0, 0, read_width, read_height, image.GetPixels(), image.GetPitch())) - { - RestoreDeviceContext(); - return false; - } - - RestoreDeviceContext(); - - Error error; - auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); - return false; - } - - constexpr bool clear_alpha = true; - const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); - - return CompressAndWriteTextureToFile(read_width, read_height, std::move(filename), std::move(fp), - g_settings.display_screenshot_quality, clear_alpha, flip_y, std::move(image), - std::string()); + const u32 num_words = width * height; + GPUBackendUpdateVRAMCommand* cmd = GPUBackend::NewUpdateVRAMCommand(num_words); + cmd->x = x; + cmd->y = y; + cmd->width = width; + cmd->height = height; + cmd->set_mask_while_drawing = set_mask; + cmd->check_mask_before_draw = check_mask; + std::memcpy(cmd->data, data, num_words * sizeof(u16)); + GPUBackend::PushCommand(cmd); } -bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx, Image* out_image) +void GPU::ClearDisplay() { - const GPUTexture::Format hdformat = - g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; - const ImageFormat image_format = GPUTexture::GetImageFormatForTextureFormat(hdformat); - if (image_format == ImageFormat::None) - return false; - - auto render_texture = g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, - hdformat, GPUTexture::Flags::None); - if (!render_texture) - return false; - - g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR); - - // TODO: this should use copy shader instead. - RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); - - Image image(width, height, image_format); - - Error error; - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, image.GetPixels(), image.GetStorageSize(), - image.GetPitch(), &error); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, &error))) - { - ERROR_LOG("Failed to create {}x{} download texture: {}", width, height, error.GetDescription()); - return false; - } - } - - dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); - if (!dltex->ReadTexels(0, 0, width, height, image.GetPixels(), image.GetPitch())) - { - RestoreDeviceContext(); - return false; - } - - RestoreDeviceContext(); - *out_image = std::move(image); - return true; + GPUBackend::PushCommand(GPUBackend::NewClearDisplayCommand()); } -void GPU::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, - GSVector4i* draw_rect) const +void GPU::UpdateDisplay(bool submit_frame) { - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram); - if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) + GPUBackendUpdateDisplayCommand* cmd = GPUBackend::NewUpdateDisplayCommand(); + cmd->display_width = m_crtc_state.display_width; + cmd->display_height = m_crtc_state.display_height; + cmd->display_origin_left = m_crtc_state.display_origin_left; + cmd->display_origin_top = m_crtc_state.display_origin_top; + cmd->display_vram_left = m_crtc_state.display_vram_left; + cmd->display_vram_top = m_crtc_state.display_vram_top; + cmd->display_vram_width = m_crtc_state.display_vram_width; + cmd->display_vram_height = m_crtc_state.display_vram_height; + cmd->X = m_crtc_state.regs.X; + cmd->interlaced_display_enabled = IsInterlacedDisplayEnabled(); + cmd->interlaced_display_field = ConvertToBoolUnchecked(GetInterlacedDisplayField()); + cmd->interlaced_display_interleaved = cmd->interlaced_display_enabled && m_GPUSTAT.vertical_resolution; + cmd->display_24bit = m_GPUSTAT.display_area_color_depth_24; + cmd->display_disabled = IsDisplayDisabled(); + cmd->display_pixel_aspect_ratio = ComputePixelAspectRatio(); + if ((cmd->submit_frame = submit_frame && System::GetFramePresentationParameters(&cmd->frame))) { - if (mode == DisplayScreenshotMode::InternalResolution) - { - float f_width = static_cast(m_display_texture_view_width); - float f_height = static_cast(m_display_texture_view_height); - ApplyPixelAspectRatioToSize(&f_width, &f_height); - - // DX11 won't go past 16K texture size. - const float max_texture_size = static_cast(g_gpu_device->GetMaxTextureSize()); - if (f_width > max_texture_size) - { - f_height = f_height / (f_width / max_texture_size); - f_width = max_texture_size; - } - if (f_height > max_texture_size) - { - f_height = max_texture_size; - f_width = f_width / (f_height / max_texture_size); - } - - *width = static_cast(std::ceil(f_width)); - *height = static_cast(std::ceil(f_height)); - } - else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) - { - *width = m_display_texture_view_width; - *height = m_display_texture_view_height; - } - - // Remove padding, it's not part of the framebuffer. - *draw_rect = GSVector4i(0, 0, static_cast(*width), static_cast(*height)); - *display_rect = *draw_rect; + const bool drain_one = cmd->frame.present_frame && GPUBackend::BeginQueueFrame(); + GPUThread::PushCommandAndWakeThread(cmd); + if (drain_one) + GPUBackend::WaitForOneQueuedFrame(); } else { - *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; - *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; - CalculateDrawRect(*width, *height, true, !g_settings.debugging.show_vram, display_rect, draw_rect); + GPUThread::PushCommand(cmd); } } -bool GPU::RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, - bool show_osd_message) +void GPU::QueuePresentCurrentFrame() { - u32 width, height; - GSVector4i display_rect, draw_rect; - CalculateScreenshotSize(mode, &width, &height, &display_rect, &draw_rect); + DebugAssert(g_settings.IsRunaheadEnabled()); - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); - if (width == 0 || height == 0) - return false; - - Image image; - if (!RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &image)) + // Submit can be skipped if it's a dupe frame and we're not dumping frames. + GPUBackendSubmitFrameCommand* cmd = GPUBackend::NewSubmitFrameCommand(); + if (System::GetFramePresentationParameters(&cmd->frame)) { - ERROR_LOG("Failed to render {}x{} screenshot", width, height); - return false; - } - - Error error; - auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); - return false; - } - - std::string osd_key; - if (show_osd_message) - { - // Use a 60 second timeout to give it plenty of time to actually save. - osd_key = fmt::format("ScreenshotSaver_{}", path); - Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, - fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)), - 60.0f); - } - - if (compress_on_thread) - { - System::QueueTaskOnThread([width, height, path = std::move(path), fp = fp.release(), quality, - flip_y = g_gpu_device->UsesLowerLeftOrigin(), image = std::move(image), - osd_key = std::move(osd_key)]() mutable { - CompressAndWriteTextureToFile(width, height, std::move(path), FileSystem::ManagedCFilePtr(fp), quality, true, - flip_y, std::move(image), std::move(osd_key)); - System::RemoveSelfFromTaskThreads(); - }); - - return true; - } - else - { - return CompressAndWriteTextureToFile(width, height, std::move(path), std::move(fp), quality, true, - g_gpu_device->UsesLowerLeftOrigin(), std::move(image), std::move(osd_key)); + const bool drain_one = cmd->frame.present_frame && GPUBackend::BeginQueueFrame(); + GPUThread::PushCommandAndWakeThread(cmd); + if (drain_one) + GPUBackend::WaitForOneQueuedFrame(); } } @@ -2856,8 +1977,6 @@ bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride void GPU::DrawDebugStateWindow(float scale) { - DrawRendererStats(); - if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) { static constexpr std::array state_strings = { @@ -2912,76 +2031,6 @@ void GPU::DrawDebugStateWindow(float scale) } } -void GPU::DrawRendererStats() -{ -} - -void GPU::OnBufferSwapped() -{ -} - -void GPU::GetStatsString(SmallStringBase& str) -{ - if (IsHardwareRenderer()) - { - str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", - GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, - m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes, - m_stats.host_num_downloads, m_stats.num_copies, m_stats.num_writes); - } - else - { - str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), - m_stats.num_primitives, m_stats.num_reads, m_stats.num_copies, m_stats.num_writes); - } -} - -void GPU::GetMemoryStatsString(SmallStringBase& str) -{ - const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); - const u32 stream_kb = static_cast((m_stats.host_buffer_streamed + (1024 - 1)) / 1024); - - str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, m_stats.host_num_copies, - m_stats.host_num_uploads); -} - -void GPU::ResetStatistics() -{ - m_counters = {}; - g_gpu_device->ResetStatistics(); -} - -void GPU::UpdateStatistics(u32 frame_count) -{ - const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); - const u32 round = (frame_count - 1); - -#define UPDATE_COUNTER(x) m_stats.x = (m_counters.x + round) / frame_count -#define UPDATE_GPU_STAT(x) m_stats.host_##x = (stats.x + round) / frame_count - - UPDATE_COUNTER(num_reads); - UPDATE_COUNTER(num_writes); - UPDATE_COUNTER(num_copies); - UPDATE_COUNTER(num_vertices); - UPDATE_COUNTER(num_primitives); - - // UPDATE_COUNTER(num_read_texture_updates); - // UPDATE_COUNTER(num_ubo_updates); - - UPDATE_GPU_STAT(buffer_streamed); - UPDATE_GPU_STAT(num_draws); - UPDATE_GPU_STAT(num_barriers); - UPDATE_GPU_STAT(num_render_passes); - UPDATE_GPU_STAT(num_copies); - UPDATE_GPU_STAT(num_downloads); - UPDATE_GPU_STAT(num_uploads); - -#undef UPDATE_GPU_STAT -#undef UPDATE_COUNTER - - ResetStatistics(); -} - bool GPU::StartRecordingGPUDump(const char* path, u32 num_frames /* = 1 */) { if (m_gpu_dump) @@ -3020,7 +2069,8 @@ bool GPU::StartRecordingGPUDump(const char* path, u32 num_frames /* = 1 */) Host::OSD_QUICK_DURATION); // save screenshot to same location to identify it - RenderScreenshotToFile(Path::ReplaceExtension(path, "png"), DisplayScreenshotMode::ScreenResolution, 85, true, false); + GPUBackend::RenderScreenshotToFile(Path::ReplaceExtension(path, "png"), DisplayScreenshotMode::ScreenResolution, 85, + true, false); return true; } @@ -3193,10 +2243,8 @@ void GPU::ProcessGPUDumpPacket(GPUDump::PacketType type, const std::span(system_ticks_per_frame)); - - FlushRender(); - UpdateDisplay(); System::IncrementFrameNumber(); + UpdateDisplay(true); System::FrameDone(); } break; diff --git a/src/core/gpu.h b/src/core/gpu.h index 8f73b5023..a1258f6b5 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -39,17 +39,18 @@ enum class PacketType : u8; class Recorder; class Player; } // namespace GPUDump -struct Settings; -namespace Threading { -class Thread; -} +class GPUBackend; +struct Settings; namespace System { struct MemorySaveState; } -class GPU +struct GPUBackendCommand; +struct GPUBackendDrawCommand; + +class GPU final { public: enum class BlitterState : u8 @@ -66,7 +67,6 @@ public: DOT_TIMER_INDEX = 0, HBLANK_TIMER_INDEX = 1, MAX_RESOLUTION_SCALE = 32, - DEINTERLACE_BUFFER_COUNT = 4, DRAWING_AREA_COORD_MASK = 1023, }; @@ -92,26 +92,15 @@ public: // Base class constructor. GPU(); - virtual ~GPU(); + ~GPU(); - virtual const Threading::Thread* GetSWThread() const = 0; - virtual bool IsHardwareRenderer() const = 0; - - virtual bool Initialize(Error* error); - virtual void Reset(bool clear_vram); - virtual bool DoState(StateWrapper& sw, bool update_display); - virtual bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display); - - // Graphics API state reset/restore - call when drawing the UI etc. - // TODO: replace with "invalidate cached state" - virtual void RestoreDeviceContext(); + void Initialize(); + void Reset(bool clear_vram); + bool DoState(StateWrapper& sw, bool update_display); + void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display); // Render statistics debug window. void DrawDebugStateWindow(float scale); - void GetStatsString(SmallStringBase& str); - void GetMemoryStatsString(SmallStringBase& str); - void ResetStatistics(); - void UpdateStatistics(u32 frame_count); void CPUClockChanged(); @@ -175,31 +164,26 @@ public: void SynchronizeCRTC(); /// Recompile shaders/recreate framebuffers when needed. - virtual void UpdateSettings(const Settings& old_settings); - - /// Returns the current resolution scale. - virtual u32 GetResolutionScale() const; - - /// Updates the resolution scale when it's set to automatic. - virtual void UpdateResolutionScale(); + void UpdateSettings(const Settings& old_settings); /// Returns the full display resolution of the GPU, including padding. std::tuple GetFullDisplayResolution() const; + /// Computes clamped drawing area. + static GSVector4i GetClampedDrawingArea(const GPUDrawingArea& drawing_area); + float ComputeHorizontalFrequency() const; float ComputeVerticalFrequency() const; float ComputeDisplayAspectRatio() const; float ComputeSourceAspectRatio() const; + float ComputePixelAspectRatio() const; /// Computes aspect ratio correction, i.e. the scale to apply to the source aspect ratio to preserve /// the original pixel aspect ratio regardless of how much cropping has been applied. float ComputeAspectRatioCorrection() const; /// Applies the pixel aspect ratio to a given size, preserving the larger dimension. - void ApplyPixelAspectRatioToSize(float* width, float* height) const; - - static std::unique_ptr CreateHardwareRenderer(); - static std::unique_ptr CreateSoftwareRenderer(); + static void ApplyPixelAspectRatioToSize(float par, float* width, float* height); // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. void ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, @@ -231,38 +215,17 @@ public: // Dumps raw VRAM to a file. bool DumpVRAMToFile(const char* filename); - // Ensures all buffered vertices are drawn. - virtual void FlushRender() = 0; + // Queues the current frame for presentation. Should only be used with runahead. + void QueuePresentCurrentFrame(); /// Helper function for computing the draw rectangle in a larger window. - void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, - GSVector4i* display_rect, GSVector4i* draw_rect) const; + static void CalculateDrawRect(u32 window_width, u32 window_height, u32 crtc_display_width, u32 crtc_display_height, + s32 display_origin_left, s32 display_origin_top, u32 display_vram_width, + u32 display_vram_height, DisplayRotation rotation, DisplayAlignment alignment, + float pixel_aspect_ratio, bool stretch_vertically, bool integer_scale, + GSVector4i* display_rect, GSVector4i* draw_rect); - /// Helper function for computing screenshot bounds. - void CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, - GSVector4i* draw_rect) const; - - /// Helper function to save current display texture to PNG. - bool WriteDisplayTextureToFile(std::string path); - - /// Renders the display, optionally with postprocessing to the specified image. - bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx, Image* out_image); - - /// Helper function to save screenshot to PNG. - bool RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, - bool show_osd_message); - - /// Draws the current display texture, with any post-processing. - GPUDevice::PresentResult PresentDisplay(); - - /// Sends the current frame to media capture. - bool SendDisplayToMediaCapture(MediaCapture* cap); - - /// Reads the CLUT from the specified coordinates, accounting for wrap-around. - static void ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit); - -protected: +private: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; @@ -273,16 +236,6 @@ protected: } ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; } - static constexpr std::tuple UnpackTexcoord(u16 texcoord) - { - return std::make_tuple(static_cast(texcoord), static_cast(texcoord >> 8)); - } - - static constexpr std::tuple UnpackColorRGB24(u32 rgb24) - { - return std::make_tuple(static_cast(rgb24), static_cast(rgb24 >> 8), static_cast(rgb24 >> 16)); - } - static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha); @@ -302,10 +255,10 @@ protected: void UpdateGPUIdle(); /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...). - ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); } + ALWAYS_INLINE u8 GetInterlacedDisplayField() const { return m_crtc_state.interlaced_field; } /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. - ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); } + ALWAYS_INLINE u8 GetActiveLineLSB() const { return m_crtc_state.active_line_lsb; } /// Updates drawing area that's suitablef or clamping. void SetClampedDrawingArea(); @@ -340,16 +293,13 @@ protected: void InvalidateCLUT(); bool IsCLUTValid() const; - // Rendering in the backend - virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0; - virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) = 0; - virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) = 0; - virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) = 0; - virtual void DispatchRenderCommand() = 0; - virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; - virtual void UpdateDisplay() = 0; - virtual void DrawRendererStats(); - virtual void OnBufferSwapped(); + void ReadVRAM(u16 x, u16 y, u16 width, u16 height); + void UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask); + void UpdateDisplay(bool submit_frame); + + void PrepareForDraw(); + void FinishPolyline(); + void FillDrawCommand(GPUBackendDrawCommand* RESTRICT cmd, GPURenderCommand rc) const; ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded, bool textured, bool semitransparent) @@ -446,14 +396,10 @@ protected: u32 texture_window_value; // decoded values + // TODO: Make this a command GPUTextureWindow texture_window; bool texture_x_flip; bool texture_y_flip; - bool texture_page_changed; - - ALWAYS_INLINE bool IsTexturePageChanged() const { return texture_page_changed; } - ALWAYS_INLINE void SetTexturePageChanged() { texture_page_changed = true; } - ALWAYS_INLINE void ClearTexturePageChangedFlag() { texture_page_changed = false; } } m_draw_mode = {}; GPUDrawingArea m_drawing_area = {}; @@ -587,65 +533,7 @@ protected: TickCount m_max_run_ahead = 128; u32 m_fifo_size = 128; - void ClearDisplayTexture(); - void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, s32 view_width, - s32 view_height); - - GPUDevice::PresentResult RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx); - - bool Deinterlace(u32 field, u32 line_skip); - bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); - bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); - void DestroyDeinterlaceTextures(); - bool ApplyChromaSmoothing(); - - u32 m_current_deinterlace_buffer = 0; - std::unique_ptr m_deinterlace_pipeline; - std::unique_ptr m_deinterlace_extract_pipeline; - std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; - std::unique_ptr m_deinterlace_texture; - - std::unique_ptr m_chroma_smoothing_pipeline; - std::unique_ptr m_chroma_smoothing_texture; - - std::unique_ptr m_display_pipeline; - GPUTexture* m_display_texture = nullptr; - GPUTexture* m_display_depth_buffer = nullptr; - s32 m_display_texture_view_x = 0; - s32 m_display_texture_view_y = 0; - s32 m_display_texture_view_width = 0; - s32 m_display_texture_view_height = 0; - - struct Counters - { - u32 num_reads; - u32 num_writes; - u32 num_copies; - u32 num_vertices; - u32 num_primitives; - - // u32 num_read_texture_updates; - // u32 num_ubo_updates; - }; - - struct Stats : Counters - { - size_t host_buffer_streamed; - u32 host_num_draws; - u32 host_num_barriers; - u32 host_num_render_passes; - u32 host_num_copies; - u32 host_num_downloads; - u32 host_num_uploads; - }; - - Counters m_counters = {}; - Stats m_stats = {}; - private: - bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error); - using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index 59d883dc5..087473f89 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -2,90 +2,234 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_backend.h" +#include "gpu.h" +#include "gpu_shadergen.h" +#include "gpu_sw_rasterizer.h" +#include "gpu_thread.h" +#include "host.h" +#include "performance_counters.h" +#include "save_state_version.h" +#include "settings.h" +#include "system.h" +#include "system_private.h" +#include "util/gpu_device.h" +#include "util/image.h" +#include "util/imgui_manager.h" +#include "util/media_capture.h" +#include "util/postprocessing.h" #include "util/state_wrapper.h" #include "common/align.h" +#include "common/error.h" +#include "common/file_system.h" +#include "common/gsvector_formatter.h" #include "common/log.h" +#include "common/path.h" +#include "common/small_string.h" +#include "common/string_util.h" #include "common/timer.h" +#include "IconsEmoji.h" +#include "IconsFontAwesome5.h" +#include "fmt/format.h" + +#include +#include + LOG_CHANNEL(GPU); -std::unique_ptr g_gpu_backend; +namespace { -GPUBackend::GPUBackend() = default; - -GPUBackend::~GPUBackend() = default; - -bool GPUBackend::Initialize(bool use_thread) +struct Counters { - if (use_thread) - StartGPUThread(); + u32 num_reads; + u32 num_writes; + u32 num_copies; + u32 num_vertices; + u32 num_primitives; +}; + +struct Stats : Counters +{ + size_t host_buffer_streamed; + u32 host_num_draws; + u32 host_num_barriers; + u32 host_num_render_passes; + u32 host_num_copies; + u32 host_num_downloads; + u32 host_num_uploads; +}; + +struct ALIGN_TO_CACHE_LINE CPUThreadState +{ + std::atomic queued_frames; + std::atomic_bool waiting_for_gpu_thread; + Threading::KernelSemaphore gpu_thread_wait; +}; + +} // namespace + +static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, + u8 quality, bool clear_alpha, bool flip_y, Image image, std::string osd_key); + +static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; + +static Counters s_counters = {}; +static Stats s_stats = {}; +static CPUThreadState s_cpu_thread_state = {}; + +GPUBackend::GPUBackend() +{ + GPU_SW_Rasterizer::SelectImplementation(); + ResetStatistics(); + + // Should be zero. + Assert(s_cpu_thread_state.queued_frames.load(std::memory_order_acquire) == 0); + Assert(!s_cpu_thread_state.waiting_for_gpu_thread.load(std::memory_order_acquire)); +} + +GPUBackend::~GPUBackend() +{ + DestroyDeinterlaceTextures(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); +} + +bool GPUBackend::Initialize(bool clear_vram, Error* error) +{ + if (!CompileDisplayPipelines(true, true, g_gpu_settings.display_24bit_chroma_smoothing, error)) + return false; return true; } -void GPUBackend::Reset() +void GPUBackend::UpdateSettings(const Settings& old_settings) { - Sync(true); - DrawingAreaChanged(GPUDrawingArea{0, 0, 0, 0}, GSVector4i::zero()); -} + FlushRender(); -void GPUBackend::SetThreadEnabled(bool use_thread) -{ - Sync(true); + if (g_gpu_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) + GPUBackend::ResetStatistics(); - if (m_use_gpu_thread != use_thread) + if (g_gpu_settings.display_scaling != old_settings.display_scaling || + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || + g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing) { - if (!use_thread) - StopGPUThread(); - else - StartGPUThread(); + // Toss buffers on mode change. + if (g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) + DestroyDeinterlaceTextures(); + + if (!CompileDisplayPipelines( + g_gpu_settings.display_scaling != old_settings.display_scaling, + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, + g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing, nullptr)) + { + Panic("Failed to compile display pipeline on settings change."); + } } } -void GPUBackend::Shutdown() +void GPUBackend::UpdateResolutionScale() { - StopGPUThread(); +} + +u32 GPUBackend::GetResolutionScale() const +{ + return 1u; +} + +void GPUBackend::RestoreDeviceContext() +{ +} + +GPUThreadCommand* GPUBackend::NewClearVRAMCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearVRAM, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewClearDisplayCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearDisplay, sizeof(GPUThreadCommand))); +} + +GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateDisplay, sizeof(GPUBackendUpdateDisplayCommand))); +} + +GPUBackendSubmitFrameCommand* GPUBackend::NewSubmitFrameCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::SubmitFrame, sizeof(GPUBackendUpdateDisplayCommand))); +} + +GPUThreadCommand* GPUBackend::NewClearCacheCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearCache, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewBufferSwappedCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::BufferSwapped, sizeof(GPUThreadCommand))); +} + +GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ReadVRAM, sizeof(GPUBackendReadVRAMCommand))); } GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand))); } GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words) { const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16)); GPUBackendUpdateVRAMCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::UpdateVRAM, size)); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::UpdateVRAM, size)); return cmd; } GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand))); } GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); } GPUBackendUpdateCLUTCommand* GPUBackend::NewUpdateCLUTCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); } GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex)); GPUBackendDrawPolygonCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::DrawPolygon, size)); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawPolygon, size)); + cmd->num_vertices = Truncate16(num_vertices); + return cmd; +} + +GPUBackendDrawPrecisePolygonCommand* GPUBackend::NewDrawPrecisePolygonCommand(u32 num_vertices) +{ + const u32 size = + sizeof(GPUBackendDrawPrecisePolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + GPUBackendDrawPrecisePolygonCommand* cmd = static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::DrawPrecisePolygon, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } @@ -93,219 +237,205 @@ GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand))); } GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex)); GPUBackendDrawLineCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::DrawLine, size)); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawLine, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } -void* GPUBackend::AllocateCommand(GPUBackendCommandType command, u32 size) +void GPUBackend::PushCommand(GPUThreadCommand* cmd) { - // Ensure size is a multiple of 4 so we don't end up with an unaligned command. - size = Common::AlignUpPow2(size, 4); + GPUThread::PushCommand(cmd); +} - for (;;) +void GPUBackend::PushCommandAndWakeThread(GPUThreadCommand* cmd) +{ + GPUThread::PushCommandAndWakeThread(cmd); +} + +void GPUBackend::PushCommandAndSync(GPUThreadCommand* cmd, bool spin) +{ + GPUThread::PushCommandAndSync(cmd, spin); +} + +void GPUBackend::SyncGPUThread(bool spin) +{ + GPUThread::SyncGPUThread(spin); +} + +bool GPUBackend::IsUsingHardwareBackend() +{ + return (GPUThread::GetRequestedRenderer().value_or(GPURenderer::Software) != GPURenderer::Software); +} + +bool GPUBackend::BeginQueueFrame() +{ + const u32 queued_frames = s_cpu_thread_state.queued_frames.fetch_add(1, std::memory_order_acq_rel) + 1; + if (queued_frames <= g_settings.gpu_max_queued_frames) + return false; + + DEV_LOG("<-- {} queued frames, {} max, blocking CPU thread", queued_frames, g_settings.gpu_max_queued_frames); + s_cpu_thread_state.waiting_for_gpu_thread.store(true, std::memory_order_release); + return true; +} + +void GPUBackend::WaitForOneQueuedFrame() +{ + // Inbetween this and the post call, we may have finished the frame. Check. + if (s_cpu_thread_state.queued_frames.load(std::memory_order_acquire) <= g_settings.gpu_max_queued_frames) { - u32 read_ptr = m_command_fifo_read_ptr.load(); - u32 write_ptr = m_command_fifo_write_ptr.load(); - if (read_ptr > write_ptr) + // It's possible that the GPU thread has already signaled the semaphore. + // If so, then we still need to drain it, otherwise waits in the future will return prematurely. + bool expected = true; + if (s_cpu_thread_state.waiting_for_gpu_thread.compare_exchange_strong(expected, false, std::memory_order_acq_rel, + std::memory_order_relaxed)) { - u32 available_size = read_ptr - write_ptr; - while (available_size < (size + sizeof(GPUBackendCommandType))) - { - WakeGPUThread(); - read_ptr = m_command_fifo_read_ptr.load(); - available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr); - } + return; } - else - { - const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; - if ((size + sizeof(GPUBackendCommand)) > available_size) + } + + s_cpu_thread_state.gpu_thread_wait.Wait(); + + // Sanity check: queued frames should be in range now. If they're not, we fucked up the semaphore. + Assert(s_cpu_thread_state.queued_frames.load(std::memory_order_acquire) <= g_settings.gpu_max_queued_frames); +} + +u32 GPUBackend::GetQueuedFrameCount() +{ + return s_cpu_thread_state.queued_frames.load(std::memory_order_acquire); +} + +bool GPUBackend::AllocateMemorySaveStates(std::span states, Error* error) +{ + bool result; + GPUThread::RunOnBackend( + [states, error, &result](GPUBackend* backend) { + // Free old textures first. + for (size_t i = 0; i < states.size(); i++) + g_gpu_device->RecycleTexture(std::move(states[i].vram_texture)); + + for (size_t i = 0; i < states.size(); i++) { - // allocate a dummy command to wrap the buffer around - GPUBackendCommand* dummy_cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); - dummy_cmd->type = GPUBackendCommandType::Wraparound; - dummy_cmd->size = available_size; - dummy_cmd->params.bits = 0; - m_command_fifo_write_ptr.store(0); - continue; - } - } - - GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); - cmd->type = command; - cmd->size = size; - return cmd; - } -} - -u32 GPUBackend::GetPendingCommandSize() const -{ - const u32 read_ptr = m_command_fifo_read_ptr.load(); - const u32 write_ptr = m_command_fifo_write_ptr.load(); - return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr); -} - -void GPUBackend::PushCommand(GPUBackendCommand* cmd) -{ - if (!m_use_gpu_thread) - { - // single-thread mode - if (cmd->type != GPUBackendCommandType::Sync) - HandleCommand(cmd); - } - else - { - const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size; - DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); - UNREFERENCED_VARIABLE(new_write_ptr); - if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU) - WakeGPUThread(); - } -} - -void GPUBackend::WakeGPUThread() -{ - std::unique_lock lock(m_sync_mutex); - if (!m_gpu_thread_sleeping.load()) - return; - - m_wake_gpu_thread_cv.notify_one(); -} - -void GPUBackend::StartGPUThread() -{ - m_gpu_loop_done.store(false); - m_use_gpu_thread = true; - m_gpu_thread.Start([this]() { RunGPULoop(); }); - INFO_LOG("GPU thread started."); -} - -void GPUBackend::StopGPUThread() -{ - if (!m_use_gpu_thread) - return; - - m_gpu_loop_done.store(true); - WakeGPUThread(); - m_gpu_thread.Join(); - m_use_gpu_thread = false; - INFO_LOG("GPU thread stopped."); -} - -void GPUBackend::Sync(bool allow_sleep) -{ - if (!m_use_gpu_thread) - return; - - GPUBackendSyncCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::Sync, sizeof(GPUBackendSyncCommand))); - cmd->allow_sleep = allow_sleep; - PushCommand(cmd); - WakeGPUThread(); - - m_sync_semaphore.Wait(); -} - -void GPUBackend::RunGPULoop() -{ - static constexpr double SPIN_TIME_NS = 1 * 1000000; - Timer::Value last_command_time = 0; - - for (;;) - { - u32 write_ptr = m_command_fifo_write_ptr.load(); - u32 read_ptr = m_command_fifo_read_ptr.load(); - if (read_ptr == write_ptr) - { - const Timer::Value current_time = Timer::GetCurrentValue(); - if (Timer::ConvertValueToNanoseconds(current_time - last_command_time) < SPIN_TIME_NS) - continue; - - std::unique_lock lock(m_sync_mutex); - m_gpu_thread_sleeping.store(true); - m_wake_gpu_thread_cv.wait(lock, [this]() { return m_gpu_loop_done.load() || GetPendingCommandSize() > 0; }); - m_gpu_thread_sleeping.store(false); - - if (m_gpu_loop_done.load()) - break; - else - continue; - } - - if (write_ptr < read_ptr) - write_ptr = COMMAND_QUEUE_SIZE; - - bool allow_sleep = false; - while (read_ptr < write_ptr) - { - const GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[read_ptr]); - read_ptr += cmd->size; - - switch (cmd->type) - { - case GPUBackendCommandType::Wraparound: + if (!backend->AllocateMemorySaveState(states[i], error)) { - DebugAssert(read_ptr == COMMAND_QUEUE_SIZE); - write_ptr = m_command_fifo_write_ptr.load(); - read_ptr = 0; + // Free anything that was allocated. + for (size_t j = 0; j <= i; i++) + { + states[j].state_data.deallocate(); + states[j].vram_texture.reset(); + result = false; + return; + } } - break; - - case GPUBackendCommandType::Sync: - { - DebugAssert(read_ptr == write_ptr); - m_sync_semaphore.Post(); - allow_sleep = static_cast(cmd)->allow_sleep; - } - break; - - default: - HandleCommand(cmd); - break; } - } - last_command_time = allow_sleep ? 0 : Timer::GetCurrentValue(); - m_command_fifo_read_ptr.store(read_ptr); - } + result = true; + }, + true, false); + return result; } -void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) +void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) { switch (cmd->type) { + case GPUBackendCommandType::ClearVRAM: + { + ClearVRAM(); + } + break; + + case GPUBackendCommandType::LoadState: + { + LoadState(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::LoadMemoryState: + { + System::MemorySaveState& mss = *static_cast(cmd)->memory_save_state; + StateWrapper sw(mss.gpu_state_data.span(mss.gpu_state_size), StateWrapper::Mode::Read, SAVE_STATE_VERSION); + DoMemoryState(sw, mss); + } + break; + + case GPUBackendCommandType::SaveMemoryState: + { + System::MemorySaveState& mss = *static_cast(cmd)->memory_save_state; + StateWrapper sw(mss.gpu_state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); + DoMemoryState(sw, mss); + mss.gpu_state_size = static_cast(sw.GetPosition()); + } + break; + + case GPUBackendCommandType::ClearDisplay: + { + ClearDisplay(); + } + break; + + case GPUBackendCommandType::UpdateDisplay: + { + HandleUpdateDisplayCommand(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::SubmitFrame: + { + HandleSubmitFrameCommand(&static_cast(cmd)->frame); + } + break; + + case GPUBackendCommandType::ClearCache: + { + ClearCache(); + } + break; + + case GPUBackendCommandType::BufferSwapped: + { + OnBufferSwapped(); + } + break; + + case GPUBackendCommandType::ReadVRAM: + { + const GPUBackendReadVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_reads++; + ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height)); + } + break; + case GPUBackendCommandType::FillVRAM: { - FlushRender(); const GPUBackendFillVRAMCommand* ccmd = static_cast(cmd); FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), - ccmd->color, ccmd->params); + ccmd->color, ccmd->interlaced_rendering, ccmd->active_line_lsb); } break; case GPUBackendCommandType::UpdateVRAM: { - FlushRender(); const GPUBackendUpdateVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_writes++; UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), - ccmd->data, ccmd->params); + ccmd->data, ccmd->set_mask_while_drawing, ccmd->check_mask_before_draw); } break; case GPUBackendCommandType::CopyVRAM: { - FlushRender(); const GPUBackendCopyVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_copies++; CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x), - ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params); + ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), + ccmd->set_mask_while_drawing, ccmd->check_mask_before_draw); } break; @@ -313,7 +443,8 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) { FlushRender(); const GPUBackendSetDrawingAreaCommand* ccmd = static_cast(cmd); - DrawingAreaChanged(ccmd->new_area, GSVector4i::load(ccmd->new_clamped_area)); + GPU_SW_Rasterizer::g_drawing_area = ccmd->new_area; + DrawingAreaChanged(); } break; @@ -326,23 +457,1134 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) case GPUBackendCommandType::DrawPolygon: { - DrawPolygon(static_cast(cmd)); + const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives++; + DrawPolygon(ccmd); + } + break; + + case GPUBackendCommandType::DrawPrecisePolygon: + { + const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives++; + DrawPrecisePolygon(static_cast(cmd)); } break; case GPUBackendCommandType::DrawRectangle: { - DrawRectangle(static_cast(cmd)); + const GPUBackendDrawRectangleCommand* ccmd = static_cast(cmd); + s_counters.num_vertices++; + s_counters.num_primitives++; + DrawSprite(ccmd); } break; case GPUBackendCommandType::DrawLine: { - DrawLine(static_cast(cmd)); + const GPUBackendDrawLineCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives += ccmd->num_vertices / 2; + DrawLine(ccmd); } break; + DefaultCaseIsUnreachable(); + } +} + +bool GPUBackend::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error) +{ + const GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); + + GPUPipeline::GraphicsConfig plconfig; + plconfig.input_layout.vertex_stride = 0; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.geometry_shader = nullptr; + plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.samples = 1; + plconfig.per_sample_shading = false; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + + if (display) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : + GPUTexture::Format::RGBA8); + + std::string vs = shadergen.GenerateDisplayVertexShader(); + std::string fs; + switch (g_gpu_settings.display_scaling) + { + case DisplayScalingMode::BilinearSharp: + fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); + break; + + case DisplayScalingMode::BilinearSmooth: + case DisplayScalingMode::BilinearInteger: + fs = shadergen.GenerateDisplayFragmentShader(true, false); + break; + + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + default: + fs = shadergen.GenerateDisplayFragmentShader(false, true); + break; + } + + std::unique_ptr vso = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error); + std::unique_ptr fso = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Display Vertex Shader"); + GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", + Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", + Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); + } + + if (deinterlace) + { + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader(), error); + if (!vso) + return false; + GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); + + std::unique_ptr fso; + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateInterleavedFieldExtractFragmentShader(), error))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); + + switch (g_gpu_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + case DisplayDeinterlacingMode::Progressive: + break; + + case DisplayDeinterlacingMode::Weave: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateDeinterlaceWeaveFragmentShader(), error))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Blend: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateDeinterlaceBlendFragmentShader(), error))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Adaptive: + { + fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateFastMADReconstructFragmentShader(), error); + if (!fso) + return false; + + GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); + } + break; + + default: + UnreachableCode(); + } + } + + if (chroma_smoothing) + { + m_chroma_smoothing_pipeline.reset(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); + + if (g_gpu_settings.display_24bit_chroma_smoothing) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader(), error); + std::unique_ptr fso = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateChromaSmoothingFragmentShader(), error); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); + GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); + + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); + } + } + + return true; +} + +void GPUBackend::HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd) +{ + const GPUBackendUpdateDisplayCommand* ccmd = static_cast(cmd); + m_display_width = ccmd->display_width; + m_display_height = ccmd->display_height; + m_display_origin_left = ccmd->display_origin_left; + m_display_origin_top = ccmd->display_origin_top; + m_display_vram_width = ccmd->display_vram_width; + m_display_vram_height = ccmd->display_vram_height; + m_display_pixel_aspect_ratio = ccmd->display_pixel_aspect_ratio; + + UpdateDisplay(ccmd); + + if (cmd->submit_frame) + HandleSubmitFrameCommand(&cmd->frame); +} + +void GPUBackend::HandleSubmitFrameCommand(const GPUBackendFramePresentationParameters* cmd) +{ + // For regtest. + Host::FrameDoneOnGPUThread(this, cmd->frame_number); + + if (cmd->media_capture) + SendDisplayToMediaCapture(cmd->media_capture); + + if (cmd->present_frame) + { + GPUThread::Internal::PresentFrame(cmd->allow_present_skip, cmd->present_time); + + s_cpu_thread_state.queued_frames.fetch_sub(1, std::memory_order_acq_rel); + + bool expected = true; + if (s_cpu_thread_state.waiting_for_gpu_thread.compare_exchange_strong(expected, false, std::memory_order_acq_rel, + std::memory_order_relaxed)) + { + DEV_LOG("--> Unblocking CPU thread"); + s_cpu_thread_state.gpu_thread_wait.Post(); + } + } + + // Update perf counters *after* throttling, we want to measure from start-of-frame + // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different + // amounts of computation happening in each frame). + if (cmd->update_performance_counters) + PerformanceCounters::Update(this, cmd->frame_number, cmd->internal_frame_number); +} + +void GPUBackend::ClearDisplay() +{ + ClearDisplayTexture(); + + // Just recycle the textures, it'll get re-fetched. + DestroyDeinterlaceTextures(); +} + +void GPUBackend::ClearDisplayTexture() +{ + m_display_texture = nullptr; + m_display_texture_view_x = 0; + m_display_texture_view_y = 0; + m_display_texture_view_width = 0; + m_display_texture_view_height = 0; +} + +void GPUBackend::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, + s32 view_width, s32 view_height) +{ + DebugAssert(texture); + + if (g_gpu_settings.display_auto_resize_window && + (view_width != m_display_texture_view_width || view_height != m_display_texture_view_height)) + { + Host::RunOnCPUThread([]() { System::RequestDisplaySize(); }); + } + + m_display_texture = texture; + m_display_depth_buffer = depth_buffer; + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; +} + +GPUDevice::PresentResult GPUBackend::PresentDisplay() +{ + FlushRender(); + + if (!g_gpu_device->HasMainSwapChain()) + return GPUDevice::PresentResult::SkipPresent; + + GSVector4i display_rect; + GSVector4i draw_rect; + CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), + !g_gpu_settings.debugging.show_vram, true, &display_rect, &draw_rect); + return RenderDisplay(nullptr, display_rect, draw_rect, !g_gpu_settings.debugging.show_vram); +} + +GPUDevice::PresentResult GPUBackend::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, + const GSVector4i draw_rect, bool postfx) +{ + GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); + + if (m_display_texture) + m_display_texture->MakeReadyForSampling(); + + // Internal post-processing. + GPUTexture* display_texture = m_display_texture; + s32 display_texture_view_x = m_display_texture_view_x; + s32 display_texture_view_y = m_display_texture_view_y; + s32 display_texture_view_width = m_display_texture_view_width; + s32 display_texture_view_height = m_display_texture_view_height; + if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && + PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, + display_texture_view_height)) + { + DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && + static_cast(display_texture->GetWidth()) == display_texture_view_width && + static_cast(display_texture->GetHeight()) == display_texture_view_height); + + // Now we can apply the post chain. + GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); + if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, + GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), + display_texture_view_width, display_texture_view_height, m_display_width, + m_display_height) == GPUDevice::PresentResult::OK) + { + display_texture_view_x = 0; + display_texture_view_y = 0; + display_texture = post_output_texture; + display_texture->MakeReadyForSampling(); + } + } + + const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetMainSwapChain()->GetFormat(); + const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetMainSwapChain()->GetWidth(); + const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetMainSwapChain()->GetHeight(); + const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && g_gpu_device->HasMainSwapChain() && + hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && + PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); + GSVector4i real_draw_rect = + (target || really_postfx) ? draw_rect : g_gpu_device->GetMainSwapChain()->PreRotateClipRect(draw_rect); + if (g_gpu_device->UsesLowerLeftOrigin()) + { + real_draw_rect = GPUDevice::FlipToLowerLeft( + real_draw_rect, + (target || really_postfx) ? target_height : g_gpu_device->GetMainSwapChain()->GetPostRotatedHeight()); + } + if (really_postfx) + { + g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); + g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); + } + else + { + if (target) + { + g_gpu_device->SetRenderTarget(target); + } + else + { + const GPUDevice::PresentResult pres = g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain()); + if (pres != GPUDevice::PresentResult::OK) + return pres; + } + } + + if (display_texture) + { + bool texture_filter_linear = false; + + struct Uniforms + { + float src_rect[4]; + float src_size[4]; + float clamp_rect[4]; + float params[4]; + float rotation_matrix[2][2]; + } uniforms; + std::memset(uniforms.params, 0, sizeof(uniforms.params)); + + switch (g_gpu_settings.display_scaling) + { + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + break; + + case DisplayScalingMode::BilinearSmooth: + case DisplayScalingMode::BilinearInteger: + texture_filter_linear = true; + break; + + case DisplayScalingMode::BilinearSharp: + { + texture_filter_linear = true; + uniforms.params[0] = std::max( + std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); + uniforms.params[1] = std::max( + std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); + uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; + uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; + } + break; + + default: + UnreachableCode(); + break; + } + + g_gpu_device->SetPipeline(m_display_pipeline.get()); + g_gpu_device->SetTextureSampler( + 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); + + // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because + // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. + const float rcp_width = 1.0f / static_cast(display_texture->GetWidth()); + const float rcp_height = 1.0f / static_cast(display_texture->GetHeight()); + uniforms.src_rect[0] = static_cast(display_texture_view_x) * rcp_width; + uniforms.src_rect[1] = static_cast(display_texture_view_y) * rcp_height; + uniforms.src_rect[2] = static_cast(display_texture_view_width) * rcp_width; + uniforms.src_rect[3] = static_cast(display_texture_view_height) * rcp_height; + uniforms.clamp_rect[0] = (static_cast(display_texture_view_x) + 0.5f) * rcp_width; + uniforms.clamp_rect[1] = (static_cast(display_texture_view_y) + 0.5f) * rcp_height; + uniforms.clamp_rect[2] = + (static_cast(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; + uniforms.clamp_rect[3] = + (static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; + uniforms.src_size[0] = static_cast(display_texture->GetWidth()); + uniforms.src_size[1] = static_cast(display_texture->GetHeight()); + uniforms.src_size[2] = rcp_width; + uniforms.src_size[3] = rcp_height; + + const WindowInfo::PreRotation surface_prerotation = (target || really_postfx) ? + WindowInfo::PreRotation::Identity : + g_gpu_device->GetMainSwapChain()->GetPreRotation(); + if (g_gpu_settings.display_rotation != DisplayRotation::Normal || + surface_prerotation != WindowInfo::PreRotation::Identity) + { + static constexpr const std::array(DisplayRotation::Count)> rotation_radians = {{ + 0.0f, + static_cast(std::numbers::pi * 1.5f), // Rotate90 + static_cast(std::numbers::pi), // Rotate180 + static_cast(std::numbers::pi / 2.0), // Rotate270 + }}; + + const u32 rotation_idx = + (static_cast(g_gpu_settings.display_rotation) + static_cast(surface_prerotation)) % + static_cast(rotation_radians.size()); + GSMatrix2x2::Rotation(rotation_radians[rotation_idx]).store(uniforms.rotation_matrix); + } + else + { + GSMatrix2x2::Identity().store(uniforms.rotation_matrix); + } + + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + + g_gpu_device->SetViewportAndScissor(real_draw_rect); + g_gpu_device->Draw(3, 0); + } + + if (really_postfx) + { + DebugAssert(!g_gpu_settings.debugging.show_vram); + + // "original size" in postfx includes padding. + const float upscale_x = + m_display_texture ? static_cast(m_display_texture_view_width) / static_cast(m_display_vram_width) : + 1.0f; + const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / + static_cast(m_display_vram_height) : + 1.0f; + const s32 orig_width = static_cast(std::ceil(static_cast(m_display_width) * upscale_x)); + const s32 orig_height = static_cast(std::ceil(static_cast(m_display_height) * upscale_y)); + + return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, + display_rect, orig_width, orig_height, m_display_width, m_display_height); + } + else + { + return GPUDevice::PresentResult::OK; + } +} + +void GPUBackend::SendDisplayToMediaCapture(MediaCapture* cap) +{ + GPUTexture* target = cap->GetRenderTexture(); + if (!target) [[unlikely]] + { + WARNING_LOG("Failed to get video capture render texture."); + Host::RunOnCPUThread(&System::StopMediaCapture); + return; + } + + const bool apply_aspect_ratio = + (g_gpu_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution); + const bool postfx = (g_gpu_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution); + GSVector4i display_rect, draw_rect; + CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_gpu_settings.debugging.show_vram, apply_aspect_ratio, + &display_rect, &draw_rect); + + // Not cleared by RenderDisplay(). + g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR); + + if (RenderDisplay(target, display_rect, draw_rect, postfx) != GPUDevice::PresentResult::OK || + !cap->DeliverVideoFrame(target)) [[unlikely]] + { + WARNING_LOG("Failed to render/deliver video capture frame."); + Host::RunOnCPUThread(&System::StopMediaCapture); + return; + } +} + +void GPUBackend::DestroyDeinterlaceTextures() +{ + for (std::unique_ptr& tex : m_deinterlace_buffers) + g_gpu_device->RecycleTexture(std::move(tex)); + g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); + m_current_deinterlace_buffer = 0; +} + +bool GPUBackend::Deinterlace(u32 field, u32 line_skip) +{ + GPUTexture* src = m_display_texture; + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + + switch (g_gpu_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + { + if (line_skip == 0) + return true; + + // Still have to extract the field. + if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] + return false; + + SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Weave: + { + GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 full_height = height * 2; + if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + src->MakeReadyForSampling(); + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, field, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + + case DisplayDeinterlacingMode::Blend: + { + constexpr u32 NUM_BLEND_BUFFERS = 2; + + GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + // TODO: could be implemented with alpha blending instead.. + + g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Adaptive: + { + GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, + line_skip); + + const u32 full_height = height * 2; + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {field, full_height}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + default: UnreachableCode(); } } + +bool GPUBackend::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, + u32 line_skip) +{ + if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || + m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); + } + + GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); + g_gpu_device->InvalidateRenderTarget(dst); + + // If we're not skipping lines, then we can simply copy the texture. + if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) + { + GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); + g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); + } + else + { + GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, + line_skip); + + // Otherwise, we need to extract every other line from the texture. + src->MakeReadyForSampling(); + g_gpu_device->SetRenderTarget(dst); + g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + } + + dst->MakeReadyForSampling(); + return true; +} + +bool GPUBackend::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) +{ + if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || + m_deinterlace_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); + } + + return true; +} + +bool GPUBackend::ApplyChromaSmoothing() +{ + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || + m_chroma_smoothing_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) + { + ClearDisplayTexture(); + return false; + } + + GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); + } + + GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); + + m_display_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, width - 1, height - 1}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_chroma_smoothing_texture->MakeReadyForSampling(); + SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; +} + +void GPUBackend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ +} + +void GPUBackend::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, + GSVector4i* display_rect, GSVector4i* draw_rect) const +{ + const bool integer_scale = (g_gpu_settings.display_scaling == DisplayScalingMode::NearestInteger || + g_gpu_settings.display_scaling == DisplayScalingMode::BilinearInteger); + const bool show_vram = g_gpu_settings.debugging.show_vram; + const u32 display_width = show_vram ? VRAM_WIDTH : m_display_width; + const u32 display_height = show_vram ? VRAM_HEIGHT : m_display_height; + const s32 display_origin_left = show_vram ? 0 : m_display_origin_left; + const s32 display_origin_top = show_vram ? 0 : m_display_origin_top; + const u32 display_vram_width = show_vram ? VRAM_WIDTH : m_display_vram_width; + const u32 display_vram_height = show_vram ? VRAM_HEIGHT : m_display_vram_height; + const float display_pixel_aspect_ratio = show_vram ? 1.0f : m_display_pixel_aspect_ratio; + GPU::CalculateDrawRect(window_width, window_height, display_width, display_height, display_origin_left, + display_origin_top, display_vram_width, display_vram_height, g_gpu_settings.display_rotation, + g_gpu_settings.display_alignment, display_pixel_aspect_ratio, + g_gpu_settings.display_stretch_vertically, integer_scale, display_rect, draw_rect); +} + +bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, u8 quality, + bool clear_alpha, bool flip_y, Image image, std::string osd_key) +{ + + Error error; + + if (flip_y) + image.FlipY(); + + if (image.GetFormat() != ImageFormat::RGBA8) + { + std::optional convert_image = image.ConvertToRGBA8(&error); + if (!convert_image.has_value()) + { + ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()), + error.GetDescription()); + image.Invalidate(); + } + else + { + image = std::move(convert_image.value()); + } + } + + bool result = false; + if (image.IsValid()) + { + if (clear_alpha) + image.SetAllPixelsOpaque(); + + result = image.SaveToFile(path.c_str(), fp.get(), quality, &error); + if (!result) + ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription()); + } + + if (!osd_key.empty()) + { + Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, + fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : + TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), + Path::GetFileName(path), + result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); + } + + return result; +} + +bool GPUBackend::WriteDisplayTextureToFile(std::string filename) +{ + if (!m_display_texture) + return false; + + const u32 read_x = static_cast(m_display_texture_view_x); + const u32 read_y = static_cast(m_display_texture_view_y); + const u32 read_width = static_cast(m_display_texture_view_width); + const u32 read_height = static_cast(m_display_texture_view_height); + const ImageFormat read_format = GPUTexture::GetImageFormatForTextureFormat(m_display_texture->GetFormat()); + if (read_format == ImageFormat::None) + return false; + + Image image(read_width, read_height, read_format); + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), + image.GetPixels(), image.GetStorageSize(), image.GetPitch()); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) + { + ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, + GPUTexture::GetFormatName(m_display_texture->GetFormat())); + return false; + } + } + + dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); + if (!dltex->ReadTexels(0, 0, read_width, read_height, image.GetPixels(), image.GetPitch())) + { + RestoreDeviceContext(); + return false; + } + + RestoreDeviceContext(); + + Error error; + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); + return false; + } + + constexpr bool clear_alpha = true; + const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); + + return CompressAndWriteTextureToFile(read_width, read_height, std::move(filename), std::move(fp), + g_gpu_settings.display_screenshot_quality, clear_alpha, flip_y, std::move(image), + std::string()); +} + +bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, Image* out_image) +{ + bool result; + GPUThread::RunOnBackend( + [width, height, postfx, out_image, &result](GPUBackend* backend) { + if (!backend) + return; + + GSVector4i draw_rect, display_rect; + backend->CalculateDrawRect(static_cast(width), static_cast(height), true, true, &display_rect, + &draw_rect); + + // Crop it. + const u32 cropped_width = static_cast(display_rect.width()); + const u32 cropped_height = static_cast(display_rect.height()); + draw_rect = draw_rect.sub32(display_rect.xyxy()); + display_rect = display_rect.sub32(display_rect.xyxy()); + result = + backend->RenderScreenshotToBuffer(cropped_width, cropped_height, display_rect, draw_rect, postfx, out_image); + }, + true, false); + + return result; +} + +bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, + const GSVector4i draw_rect, bool postfx, Image* out_image) +{ + const GPUTexture::Format hdformat = + g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; + const ImageFormat image_format = GPUTexture::GetImageFormatForTextureFormat(hdformat); + if (image_format == ImageFormat::None) + return false; + + auto render_texture = g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, + hdformat, GPUTexture::Flags::None); + if (!render_texture) + return false; + + g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR); + + // TODO: this should use copy shader instead. + RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); + + Image image(width, height, image_format); + + Error error; + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, image.GetPixels(), image.GetStorageSize(), + image.GetPitch(), &error); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, &error))) + { + ERROR_LOG("Failed to create {}x{} download texture: {}", width, height, error.GetDescription()); + return false; + } + } + + dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); + if (!dltex->ReadTexels(0, 0, width, height, image.GetPixels(), image.GetPitch())) + { + RestoreDeviceContext(); + return false; + } + + RestoreDeviceContext(); + *out_image = std::move(image); + return true; +} + +void GPUBackend::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, + GSVector4i* draw_rect) const +{ + const bool internal_resolution = + (mode != DisplayScreenshotMode::ScreenResolution || g_gpu_settings.debugging.show_vram); + if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) + { + if (mode == DisplayScreenshotMode::InternalResolution) + { + float f_width = static_cast(m_display_texture_view_width); + float f_height = static_cast(m_display_texture_view_height); + if (!g_gpu_settings.debugging.show_vram) + GPU::ApplyPixelAspectRatioToSize(m_display_pixel_aspect_ratio, &f_width, &f_height); + + // DX11 won't go past 16K texture size. + const float max_texture_size = static_cast(g_gpu_device->GetMaxTextureSize()); + if (f_width > max_texture_size) + { + f_height = f_height / (f_width / max_texture_size); + f_width = max_texture_size; + } + if (f_height > max_texture_size) + { + f_height = max_texture_size; + f_width = f_width / (f_height / max_texture_size); + } + + *width = static_cast(std::ceil(f_width)); + *height = static_cast(std::ceil(f_height)); + } + else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) + { + *width = m_display_texture_view_width; + *height = m_display_texture_view_height; + } + + // Remove padding, it's not part of the framebuffer. + *draw_rect = GSVector4i(0, 0, static_cast(*width), static_cast(*height)); + *display_rect = *draw_rect; + } + else + { + *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; + *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; + CalculateDrawRect(*width, *height, true, !g_settings.debugging.show_vram, display_rect, draw_rect); + } +} + +void GPUBackend::RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, + bool compress_on_thread, bool show_osd_message) +{ + GPUThread::RunOnBackend( + [path = std::string(path), mode, quality, compress_on_thread, show_osd_message](GPUBackend* backend) mutable { + if (!backend) + return; + + u32 width, height; + GSVector4i display_rect, draw_rect; + backend->CalculateScreenshotSize(mode, &width, &height, &display_rect, &draw_rect); + + const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); + if (width == 0 || height == 0) + return; + + Image image; + if (!backend->RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &image)) + { + ERROR_LOG("Failed to render {}x{} screenshot", width, height); + return; + } + + Error error; + auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); + return; + } + + std::string osd_key; + if (show_osd_message) + { + // Use a 60 second timeout to give it plenty of time to actually save. + osd_key = fmt::format("ScreenshotSaver_{}", path); + Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, + fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)), + 60.0f); + } + + if (compress_on_thread) + { + System::QueueTaskOnThread([width, height, path = std::move(path), fp = fp.release(), quality, + flip_y = g_gpu_device->UsesLowerLeftOrigin(), image = std::move(image), + osd_key = std::move(osd_key)]() mutable { + CompressAndWriteTextureToFile(width, height, std::move(path), FileSystem::ManagedCFilePtr(fp), quality, true, + flip_y, std::move(image), std::move(osd_key)); + System::RemoveSelfFromTaskThreads(); + }); + } + else + { + CompressAndWriteTextureToFile(width, height, std::move(path), std::move(fp), quality, true, + g_gpu_device->UsesLowerLeftOrigin(), std::move(image), std::move(osd_key)); + } + }, + false, false); +} + +void GPUBackend::GetStatsString(SmallStringBase& str) const +{ + if (IsUsingHardwareBackend()) + { + str.format("{}{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", + GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), g_gpu_settings.gpu_use_thread ? "-MT" : "", + s_stats.num_primitives, s_stats.host_num_draws, s_stats.host_num_barriers, + s_stats.host_num_render_passes, s_stats.host_num_downloads, s_stats.num_copies, s_stats.num_writes); + } + else + { + str.format("{}{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), + g_gpu_settings.gpu_use_thread ? "-MT" : "", s_stats.num_primitives, s_stats.num_reads, + s_stats.num_copies, s_stats.num_writes); + } +} + +void GPUBackend::GetMemoryStatsString(SmallStringBase& str) const +{ + const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); + const u32 stream_kb = static_cast((s_stats.host_buffer_streamed + (1024 - 1)) / 1024); + + str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, s_stats.host_num_copies, + s_stats.host_num_uploads); +} + +void GPUBackend::ResetStatistics() +{ + s_counters = {}; + g_gpu_device->ResetStatistics(); +} + +void GPUBackend::UpdateStatistics(u32 frame_count) +{ + const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); + const u32 round = (frame_count - 1); + +#define UPDATE_COUNTER(x) s_stats.x = (s_counters.x + round) / frame_count +#define UPDATE_GPU_STAT(x) s_stats.host_##x = (stats.x + round) / frame_count + + UPDATE_COUNTER(num_reads); + UPDATE_COUNTER(num_writes); + UPDATE_COUNTER(num_copies); + UPDATE_COUNTER(num_vertices); + UPDATE_COUNTER(num_primitives); + + // UPDATE_COUNTER(num_read_texture_updates); + // UPDATE_COUNTER(num_ubo_updates); + + UPDATE_GPU_STAT(buffer_streamed); + UPDATE_GPU_STAT(num_draws); + UPDATE_GPU_STAT(num_barriers); + UPDATE_GPU_STAT(num_render_passes); + UPDATE_GPU_STAT(num_copies); + UPDATE_GPU_STAT(num_downloads); + UPDATE_GPU_STAT(num_uploads); + +#undef UPDATE_GPU_STAT +#undef UPDATE_COUNTER + + ResetStatistics(); +} diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index be9701f95..e75cca310 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -3,7 +3,9 @@ #pragma once -#include "gpu_types.h" +#include "gpu_thread_commands.h" + +#include "util/gpu_device.h" #include "common/heap_array.h" #include "common/threading.h" @@ -12,85 +14,193 @@ #include #include #include +#include -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324) // warning C4324: 'GPUBackend': structure was padded due to alignment specifier -#endif +class Error; +class SmallStringBase; + +class GPUFramebuffer; +class GPUPipeline; + +struct Settings; +class StateWrapper; + +namespace System { +struct MemorySaveState; +} + +// DESIGN NOTE: Only static methods should be called on the CPU thread. +// You specifically don't have a global pointer available for this reason. class GPUBackend { +public: + static GPUThreadCommand* NewClearVRAMCommand(); + static GPUThreadCommand* NewClearDisplayCommand(); + static GPUBackendUpdateDisplayCommand* NewUpdateDisplayCommand(); + static GPUBackendSubmitFrameCommand* NewSubmitFrameCommand(); + static GPUThreadCommand* NewClearCacheCommand(); + static GPUThreadCommand* NewBufferSwappedCommand(); + static GPUBackendReadVRAMCommand* NewReadVRAMCommand(); + static GPUBackendFillVRAMCommand* NewFillVRAMCommand(); + static GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); + static GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); + static GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); + static GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand(); + static GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); + static GPUBackendDrawPrecisePolygonCommand* NewDrawPrecisePolygonCommand(u32 num_vertices); + static GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); + static GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + static void PushCommand(GPUThreadCommand* cmd); + static void PushCommandAndWakeThread(GPUThreadCommand* cmd); + static void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); + static void SyncGPUThread(bool spin); + + static bool IsUsingHardwareBackend(); + + static std::unique_ptr CreateHardwareBackend(); + static std::unique_ptr CreateSoftwareBackend(); + + static bool RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, Image* out_image); + static void RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, + bool compress_on_thread, bool show_osd_message); + + static bool BeginQueueFrame(); + static void WaitForOneQueuedFrame(); + static u32 GetQueuedFrameCount(); + + static bool AllocateMemorySaveStates(std::span states, Error* error); + public: GPUBackend(); virtual ~GPUBackend(); - ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; } - ALWAYS_INLINE bool IsUsingThread() const { return m_use_gpu_thread; } + virtual bool Initialize(bool upload_vram, Error* error); - virtual bool Initialize(bool use_thread); - virtual void Reset(); - virtual void Shutdown(); + virtual void UpdateSettings(const Settings& old_settings); - void SetThreadEnabled(bool use_thread); + /// Returns the current resolution scale. + virtual u32 GetResolutionScale() const = 0; - GPUBackendFillVRAMCommand* NewFillVRAMCommand(); - GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); - GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); - GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); - GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand(); - GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); - GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); - GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + /// Updates the resolution scale when it's set to automatic. + virtual void UpdateResolutionScale() = 0; - void PushCommand(GPUBackendCommand* cmd); - void Sync(bool allow_sleep); + // Graphics API state reset/restore - call when drawing the UI etc. + // TODO: replace with "invalidate cached state" + virtual void RestoreDeviceContext() = 0; - /// Processes all pending GPU commands. - void RunGPULoop(); + /// Main command handler for GPU thread. + void HandleCommand(const GPUThreadCommand* cmd); + + /// Draws the current display texture, with any post-processing. + GPUDevice::PresentResult PresentDisplay(); + + /// Helper function to save current display texture to PNG. Used for regtest. + bool WriteDisplayTextureToFile(std::string filename); + + /// Helper function for computing screenshot bounds. + void CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, + GSVector4i* draw_rect) const; + + void GetStatsString(SmallStringBase& str) const; + void GetMemoryStatsString(SmallStringBase& str) const; + + void ResetStatistics(); + void UpdateStatistics(u32 frame_count); protected: - void* AllocateCommand(GPUBackendCommandType command, u32 size); - u32 GetPendingCommandSize() const; - void WakeGPUThread(); - void StartGPUThread(); - void StopGPUThread(); - - virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0; - virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, - GPUBackendCommandParameters params) = 0; - virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) = 0; - virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0; - virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0; - virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; - virtual void FlushRender() = 0; - virtual void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) = 0; - virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; - - void HandleCommand(const GPUBackendCommand* cmd); - - Threading::KernelSemaphore m_sync_semaphore; - std::atomic_bool m_gpu_thread_sleeping{false}; - std::atomic_bool m_gpu_loop_done{false}; - Threading::Thread m_gpu_thread; - bool m_use_gpu_thread = false; - - std::mutex m_sync_mutex; - std::condition_variable m_sync_cpu_thread_cv; - std::condition_variable m_wake_gpu_thread_cv; - bool m_sync_done = false; - enum : u32 { - COMMAND_QUEUE_SIZE = 4 * 1024 * 1024, - THRESHOLD_TO_WAKE_GPU = 256 + DEINTERLACE_BUFFER_COUNT = 4, }; - FixedHeapArray m_command_fifo_data; - alignas(HOST_CACHE_LINE_SIZE) std::atomic m_command_fifo_read_ptr{0}; - alignas(HOST_CACHE_LINE_SIZE) std::atomic m_command_fifo_write_ptr{0}; + virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0; + virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, + u8 interlaced_display_field) = 0; + virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) = 0; + virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, + bool check_mask) = 0; + + virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0; + virtual void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) = 0; + virtual void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) = 0; + virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; + + virtual void DrawingAreaChanged() = 0; + virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; + virtual void ClearCache() = 0; + virtual void OnBufferSwapped() = 0; + virtual void ClearVRAM() = 0; + + virtual void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) = 0; + + virtual void LoadState(const GPUBackendLoadStateCommand* cmd) = 0; + + virtual bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) = 0; + virtual void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) = 0; + + /// Ensures all pending draws are flushed to the host GPU. + virtual void FlushRender() = 0; + + /// Helper function for computing the draw rectangle in a larger window. + void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, + GSVector4i* display_rect, GSVector4i* draw_rect) const; + + /// Renders the display, optionally with postprocessing to the specified image. + bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, + bool postfx, Image* out_image); + + bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error); + + void HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd); + void HandleSubmitFrameCommand(const GPUBackendFramePresentationParameters* cmd); + + void ClearDisplay(); + void ClearDisplayTexture(); + void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, + s32 view_height); + + GPUDevice::PresentResult RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, + bool postfx); + + /// Sends the current frame to media capture. + void SendDisplayToMediaCapture(MediaCapture* cap); + + bool Deinterlace(u32 field, u32 line_skip); + bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); + bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); + void DestroyDeinterlaceTextures(); + bool ApplyChromaSmoothing(); + + s32 m_display_width = 0; + s32 m_display_height = 0; + s32 m_display_origin_left = 0; + s32 m_display_origin_top = 0; + s32 m_display_vram_width = 0; + s32 m_display_vram_height = 0; + float m_display_pixel_aspect_ratio = 1.0f; + + u32 m_current_deinterlace_buffer = 0; + std::unique_ptr m_deinterlace_pipeline; + std::unique_ptr m_deinterlace_extract_pipeline; + std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; + std::unique_ptr m_deinterlace_texture; + + std::unique_ptr m_chroma_smoothing_pipeline; + std::unique_ptr m_chroma_smoothing_texture; + + std::unique_ptr m_display_pipeline; + GPUTexture* m_display_texture = nullptr; + GPUTexture* m_display_depth_buffer = nullptr; + s32 m_display_texture_view_x = 0; + s32 m_display_texture_view_y = 0; + s32 m_display_texture_view_width = 0; + s32 m_display_texture_view_height = 0; }; -#ifdef _MSC_VER -#pragma warning(pop) -#endif +namespace Host { + +/// Called at the end of the frame, before presentation. +void FrameDoneOnGPUThread(GPUBackend* gpu_backend, u32 frame_number); + +} // namespace Host diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 890cfa632..df7d46df6 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -1,13 +1,17 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 +#include "cpu_pgxp.h" #include "gpu.h" +#include "gpu_backend.h" #include "gpu_dump.h" #include "gpu_hw_texture_cache.h" +#include "gpu_thread_commands.h" #include "interrupt_controller.h" #include "system.h" #include "common/assert.h" +#include "common/gsvector_formatter.h" #include "common/log.h" #include "common/string_util.h" @@ -93,7 +97,7 @@ void GPU::TryExecuteCommands() // drop terminator m_fifo.RemoveOne(); DEBUG_LOG("Drawing poly-line with {} vertices", GetPolyLineVertexCount()); - DispatchRenderCommand(); + FinishPolyline(); m_blit_buffer.clear(); EndCommand(); continue; @@ -200,8 +204,8 @@ bool GPU::HandleNOPCommand() bool GPU::HandleClearCacheCommand() { DEBUG_LOG("GP0 clear cache"); - m_draw_mode.SetTexturePageChanged(); InvalidateCLUT(); + GPUBackend::PushCommand(GPUBackend::NewClearCacheCommand()); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); @@ -248,8 +252,6 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand() DEBUG_LOG("Set drawing area top-left: ({}, {})", left, top); if (m_drawing_area.left != left || m_drawing_area.top != top) { - FlushRender(); - m_drawing_area.left = left; m_drawing_area.top = top; m_drawing_area_changed = true; @@ -270,8 +272,6 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand() DEBUG_LOG("Set drawing area bottom-right: ({}, {})", right, bottom); if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) { - FlushRender(); - m_drawing_area.right = right; m_drawing_area.bottom = bottom; m_drawing_area_changed = true; @@ -291,8 +291,6 @@ bool GPU::HandleSetDrawingOffsetCommand() DEBUG_LOG("Set drawing offset ({}, {})", x, y); if (m_drawing_offset.x != x || m_drawing_offset.y != y) { - FlushRender(); - m_drawing_offset.x = x; m_drawing_offset.y = y; } @@ -308,11 +306,7 @@ bool GPU::HandleSetMaskBitCommand() constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; - if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits) - { - FlushRender(); - m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; - } + m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; DEBUG_LOG("Set mask bit {} {}", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); @@ -321,6 +315,35 @@ bool GPU::HandleSetMaskBitCommand() return true; } +void GPU::PrepareForDraw() +{ + if (m_drawing_area_changed) + { + m_drawing_area_changed = false; + GPUBackendSetDrawingAreaCommand* cmd = GPUBackend::NewSetDrawingAreaCommand(); + cmd->new_area = m_drawing_area; + GPUBackend::PushCommand(cmd); + } +} + +void GPU::FillDrawCommand(GPUBackendDrawCommand* RESTRICT cmd, GPURenderCommand rc) const +{ + cmd->interlaced_rendering = IsInterlacedRenderingEnabled(); + cmd->active_line_lsb = ConvertToBoolUnchecked(m_crtc_state.active_line_lsb); + cmd->check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + cmd->set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + cmd->texture_enable = rc.IsTexturingEnabled(); + cmd->raw_texture_enable = rc.raw_texture_enable; + cmd->transparency_enable = rc.transparency_enable; + cmd->shading_enable = rc.shading_enable; + cmd->quad_polygon = rc.quad_polygon; + cmd->dither_enable = rc.IsDitheringEnabled() && m_draw_mode.mode_reg.dither_enable; + + cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; + cmd->palette.bits = m_draw_mode.palette_reg.bits; + cmd->window = m_draw_mode.texture_window; +} + bool GPU::HandleRenderPolygonCommand() { const GPURenderCommand rc{FifoPeek(0)}; @@ -346,6 +369,7 @@ bool GPU::HandleRenderPolygonCommand() words_per_vertex, setup_ticks); // set draw state up + // TODO: Get rid of SetTexturePalette() and just fill it as needed if (rc.texture_enable) { const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16); @@ -355,12 +379,233 @@ bool GPU::HandleRenderPolygonCommand() UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg); } - m_counters.num_vertices += num_vertices; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + + if (g_settings.gpu_pgxp_enable) + { + GPUBackendDrawPrecisePolygonCommand* RESTRICT cmd = GPUBackend::NewDrawPrecisePolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + cmd->num_vertices = Truncate16(num_vertices); + + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + bool valid_w = g_settings.gpu_pgxp_texture_correction; + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPrecisePolygonCommand::Vertex* RESTRICT vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->native_x = m_drawing_offset.x + vp.x; + vert->native_y = m_drawing_offset.y + vp.y; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + + valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->native_x, vert->native_y, + m_drawing_offset.x, m_drawing_offset.y, &vert->x, &vert->y, &vert->w); + } + + cmd->valid_w = valid_w; + if (!valid_w) + { + if (g_settings.gpu_pgxp_disable_2d) + { + // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPrecisePolygonCommand::Vertex& v = cmd->vertices[i]; + GSVector2::store(&v.x, GSVector2(GSVector2i::load(&v.native_x))); + v.w = 1.0f; + } + } + else + { + for (u32 i = 0; i < num_vertices; i++) + cmd->vertices[i].w = 1.0f; + } + } + + // Cull polygons which are too large. + const GSVector2 v0f = GSVector2::load(&cmd->vertices[0].x); + const GSVector2 v1f = GSVector2::load(&cmd->vertices[1].x); + const GSVector2 v2f = GSVector2::load(&cmd->vertices[2].x); + const GSVector2 min_pos_12 = v1f.min(v2f); + const GSVector2 max_pos_12 = v1f.max(v2f); + const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_012.rintersects(m_clamped_drawing_area)); + if (first_tri_culled) + { + // TODO: GPU events... somehow. + DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, + cmd->vertices[0].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y, + cmd->vertices[2].native_x, cmd->vertices[2].native_y); + + if (!rc.quad_polygon) + { + EndCommand(); + return true; + } + } + else + { + AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[0].native_x), + GSVector2i::load(&cmd->vertices[1].native_x), + GSVector2i::load(&cmd->vertices[2].native_x), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const GSVector2 v3f = GSVector2::load(&cmd->vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + + // Cull polygons which are too large. + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_123.rintersects(m_clamped_drawing_area)); + if (second_tri_culled) + { + DEBUG_LOG("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", + cmd->vertices[2].native_x, cmd->vertices[2].native_y, cmd->vertices[1].native_x, + cmd->vertices[1].native_y, cmd->vertices[0].native_x, cmd->vertices[0].native_y); + + if (first_tri_culled) + { + EndCommand(); + return true; + } + + // Remove second part of quad. + // NOTE: Culling this way results in subtle differences with UV clamping, since the fourth vertex is no + // longer considered in the range. This is mainly apparent when the UV gradient is zero. Seems like it + // generally looks better this way, so I'm keeping it. + cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPrecisePolygonCommand) + + 3 * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + else + { + AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[2].native_x), + GSVector2i::load(&cmd->vertices[1].native_x), + GSVector2i::load(&cmd->vertices[3].native_x), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + + // If first part was culled, move the second part to the first. + if (first_tri_culled) + { + std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPrecisePolygonCommand) + + 3 * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + } + } + + GPUBackend::PushCommand(cmd); + } + else + { + GPUBackendDrawPolygonCommand* RESTRICT cmd = GPUBackend::NewDrawPolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + cmd->num_vertices = Truncate16(num_vertices); + + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPolygonCommand::Vertex* RESTRICT vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->x = m_drawing_offset.x + vp.x; + vert->y = m_drawing_offset.y + vp.y; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + } + + // Cull polygons which are too large. + const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); + const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); + const GSVector2i v2 = GSVector2i::load(&cmd->vertices[2].x); + const GSVector2i min_pos_12 = v1.min_s32(v2); + const GSVector2i max_pos_12 = v1.max_s32(v2); + const GSVector4i draw_rect_012 = + GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_012.rintersects(m_clamped_drawing_area)); + if (first_tri_culled) + { + DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); + + if (!rc.quad_polygon) + { + EndCommand(); + return true; + } + } + else + { + AddDrawTriangleTicks(v0, v1, v2, rc.shading_enable, rc.texture_enable, rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const GSVector2i v3 = GSVector2i::load(&cmd->vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(v3)) + .upl64(GSVector4i(max_pos_12.max_s32(v3))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + + // Cull polygons which are too large. + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_123.rintersects(m_clamped_drawing_area)); + if (second_tri_culled) + { + DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, + cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); + + if (first_tri_culled) + { + EndCommand(); + return true; + } + + // Remove second part of quad. + cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPolygonCommand) + + 3 * sizeof(GPUBackendDrawPolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + else + { + AddDrawTriangleTicks(v2, v1, v3, rc.shading_enable, rc.texture_enable, rc.transparency_enable); + + // If first part was culled, move the second part to the first. + if (first_tri_culled) + { + std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPolygonCommand) + + 3 * sizeof(GPUBackendDrawPolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + } + } + + GPUBackend::PushCommand(cmd); + } + EndCommand(); return true; } @@ -389,12 +634,65 @@ bool GPU::HandleRenderRectangleCommand() rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", total_words, setup_ticks); - m_counters.num_vertices++; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + GPUBackendDrawRectangleCommand* cmd = GPUBackend::NewDrawRectangleCommand(); + FillDrawCommand(cmd, rc); + cmd->color = rc.color_for_first_vertex; + + const GPUVertexPosition vp{FifoPop()}; + cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); + cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); + + if (rc.texture_enable) + { + const u32 texcoord_and_palette = FifoPop(); + cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); + cmd->texcoord = Truncate16(texcoord_and_palette); + } + else + { + cmd->palette.bits = 0; + cmd->texcoord = 0; + } + + switch (rc.rectangle_size) + { + case GPUDrawRectangleSize::R1x1: + cmd->width = 1; + cmd->height = 1; + break; + case GPUDrawRectangleSize::R8x8: + cmd->width = 8; + cmd->height = 8; + break; + case GPUDrawRectangleSize::R16x16: + cmd->width = 16; + cmd->height = 16; + break; + default: + { + const u32 width_and_height = FifoPop(); + cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); + cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); + } + break; + } + + const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + if (clamped_rect.rempty()) [[unlikely]] + { + DEBUG_LOG("Culling off-screen rectangle {}", rect); + EndCommand(); + return true; + } + + AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); + + GPUBackend::PushCommand(cmd); EndCommand(); return true; } @@ -411,12 +709,55 @@ bool GPU::HandleRenderLineCommand() TRACE_LOG("Render {} {} line ({} total words)", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", total_words); - m_counters.num_vertices += 2; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->palette.bits = 0; + + if (rc.shading_enable) + { + cmd->vertices[0].color = rc.color_for_first_vertex; + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + else + { + cmd->vertices[0].color = rc.color_for_first_vertex; + cmd->vertices[1].color = rc.color_for_first_vertex; + + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + + const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); + const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); + const GSVector4i rect = GSVector4i::xyxy(v0.min_s32(v1), v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y); + EndCommand(); + return true; + } + + AddDrawLineTicks(clamped_rect, rc.shading_enable); + GPUBackend::PushCommand(cmd); EndCommand(); return true; } @@ -453,6 +794,64 @@ bool GPU::HandleRenderPolyLineCommand() return true; } +void GPU::FinishPolyline() +{ + PrepareForDraw(); + + const u32 num_vertices = GetPolyLineVertexCount(); + DebugAssert(num_vertices >= 2); + + GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand((num_vertices - 1) * 2); + FillDrawCommand(cmd, m_render_command); + + u32 buffer_pos = 0; + const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; + const GSVector2i draw_offset = GSVector2i::load(&m_drawing_offset.x); + GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset); + u32 start_color = m_render_command.color_for_first_vertex; + + const bool shaded = m_render_command.shading_enable; + u32 out_vertex_count = 0; + for (u32 i = 1; i < num_vertices; i++) + { + const u32 end_color = + shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; + const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; + const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset); + + const GSVector4i rect = + GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); + } + else + { + AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + + GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; + out_vertex_count += 2; + + GSVector2i::store(&out_vertex[0].x, start_pos); + out_vertex[0].color = start_color; + GSVector2i::store(&out_vertex[1].x, end_pos); + out_vertex[1].color = end_color; + } + + start_pos = end_pos; + start_color = end_color; + } + + if (out_vertex_count > 0) + { + DebugAssert(out_vertex_count <= cmd->num_vertices); + cmd->num_vertices = Truncate16(out_vertex_count); + GPUBackend::PushCommand(cmd); + } +} + bool GPU::HandleFillRectangleCommand() { CHECK_COMMAND_SIZE(3); @@ -460,8 +859,6 @@ bool GPU::HandleFillRectangleCommand() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); - const u32 color = FifoPop() & 0x00FFFFFF; const u32 dst_x = FifoPeek() & 0x3F0; const u32 dst_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; @@ -471,9 +868,18 @@ bool GPU::HandleFillRectangleCommand() DEBUG_LOG("Fill VRAM rectangle offset=({},{}), size=({},{})", dst_x, dst_y, width, height); if (width > 0 && height > 0) - FillVRAM(dst_x, dst_y, width, height, color); + { + GPUBackendFillVRAMCommand* cmd = GPUBackend::NewFillVRAMCommand(); + cmd->x = static_cast(dst_x); + cmd->y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + cmd->color = color; + cmd->interlaced_rendering = IsInterlacedRenderingEnabled(); + cmd->active_line_lsb = m_crtc_state.active_line_lsb; + GPUBackend::PushCommand(cmd); + } - m_counters.num_writes++; AddCommandTicks(46 + ((width / 8) + 9) * height); EndCommand(); return true; @@ -523,8 +929,6 @@ void GPU::FinishVRAMWrite() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); - if (m_blit_remaining_words == 0) { if (g_settings.debugging.dump_cpu_to_vram_copies) @@ -557,18 +961,18 @@ void GPU::FinishVRAMWrite() const u8* blit_ptr = reinterpret_cast(m_blit_buffer.data()); if (transferred_full_rows > 0) { - UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, transferred_full_rows, blit_ptr, - m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); + UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, static_cast(transferred_full_rows), + blit_ptr, m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); blit_ptr += (ZeroExtend32(m_vram_transfer.width) * transferred_full_rows) * sizeof(u16); } if (transferred_width_last_row > 0) { - UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y + transferred_full_rows, transferred_width_last_row, 1, blit_ptr, - m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); + UpdateVRAM(m_vram_transfer.x, static_cast(m_vram_transfer.y + transferred_full_rows), + static_cast(transferred_width_last_row), 1, blit_ptr, m_GPUSTAT.set_mask_while_drawing, + m_GPUSTAT.check_mask_before_draw); } } - m_counters.num_writes++; m_blit_buffer.clear(); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; @@ -588,9 +992,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() m_vram_transfer.width, m_vram_transfer.height); DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); - // all rendering should be done first... - FlushRender(); - // ensure VRAM shadow is up to date ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); @@ -602,7 +1003,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() } // switch to pixel-by-pixel read state - m_counters.num_reads++; m_blitter_state = BlitterState::ReadingVRAM; m_command_total_words = 0; @@ -633,10 +1033,16 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand() width == 0 || height == 0 || (src_x == dst_x && src_y == dst_y && !m_GPUSTAT.set_mask_while_drawing); if (!skip_copy) { - m_counters.num_copies++; - - FlushRender(); - CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + GPUBackendCopyVRAMCommand* cmd = GPUBackend::NewCopyVRAMCommand(); + cmd->src_x = static_cast(src_x); + cmd->src_y = static_cast(src_y); + cmd->dst_x = static_cast(dst_x); + cmd->dst_y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + cmd->check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + cmd->set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + GPUBackend::PushCommand(cmd); } AddCommandTicks(width * height * 2); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index c923790ea..75ad8255b 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -4,13 +4,15 @@ #include "gpu_hw.h" #include "cpu_core.h" #include "cpu_pgxp.h" +#include "gpu.h" #include "gpu_hw_shadergen.h" -#include "gpu_sw_backend.h" #include "gpu_sw_rasterizer.h" #include "host.h" +#include "imgui_overlays.h" #include "settings.h" #include "system_private.h" +#include "util/imgui_fullscreen.h" #include "util/imgui_manager.h" #include "util/postprocessing.h" #include "util/state_wrapper.h" @@ -26,6 +28,7 @@ #include "IconsEmoji.h" #include "IconsFontAwesome5.h" +#include "fmt/format.h" #include "imgui.h" #include @@ -87,7 +90,7 @@ ALWAYS_INLINE static u32 GetMaxResolutionScale() ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) { - u32 scale = std::min(resolution_scale, g_settings.gpu_downsample_scale); + u32 scale = std::min(resolution_scale, g_gpu_settings.gpu_downsample_scale); while ((resolution_scale % scale) != 0) scale--; return scale; @@ -96,19 +99,21 @@ ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) ALWAYS_INLINE static bool ShouldClampUVs(GPUTextureFilter texture_filter) { // We only need UV limits if PGXP is enabled, or texture filtering is enabled. - return g_settings.gpu_pgxp_enable || texture_filter != GPUTextureFilter::Nearest; + return g_gpu_settings.gpu_pgxp_enable || texture_filter != GPUTextureFilter::Nearest; } ALWAYS_INLINE static bool ShouldAllowSpriteMode(u8 resolution_scale, GPUTextureFilter texture_filter, GPUTextureFilter sprite_texture_filter) { // Use sprite shaders/mode when texcoord rounding is forced, or if the filters are different. - return (sprite_texture_filter != texture_filter || (resolution_scale > 1 && g_settings.gpu_force_round_texcoords)); + return (sprite_texture_filter != texture_filter || + (resolution_scale > 1 && g_gpu_settings.gpu_force_round_texcoords)); } ALWAYS_INLINE static bool ShouldDisableColorPerspective() { - return g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction && !g_settings.gpu_pgxp_color_correction; + return g_gpu_settings.gpu_pgxp_enable && g_gpu_settings.gpu_pgxp_texture_correction && + !g_gpu_settings.gpu_pgxp_color_correction; } /// Returns true if the specified texture filtering mode requires dual-source blending. @@ -164,16 +169,25 @@ public: return Timer::ConvertValueToMilliseconds(Timer::GetCurrentValue() - m_start_time); } - void Increment(u32 progress = 1) + bool Increment(u32 progress, Error* error) { m_progress += progress; + if (System::IsStartupCancelled()) + { + Error::SetStringView(error, TRANSLATE_SV("System", "Startup was cancelled.")); + return false; + } + const u64 tv = Timer::GetCurrentValue(); if ((tv - m_start_time) >= m_min_time && (tv - m_last_update_time) >= m_update_interval) { - Host::DisplayLoadingScreen(m_title.c_str(), 0, static_cast(m_total), static_cast(m_progress)); + ImGuiFullscreen::RenderLoadingScreen(ImGuiManager::LOGO_IMAGE_NAME, m_title, 0, static_cast(m_total), + static_cast(m_progress)); m_last_update_time = tv; } + + return true; } private: @@ -187,7 +201,7 @@ private: }; } // namespace -GPU_HW::GPU_HW() : GPU() +GPU_HW::GPU_HW() : GPUBackend() { #if defined(_DEBUG) || defined(_DEVEL) s_draw_number = 0; @@ -197,12 +211,6 @@ GPU_HW::GPU_HW() : GPU() GPU_HW::~GPU_HW() { GPUTextureCache::Shutdown(); - - if (m_sw_renderer) - { - m_sw_renderer->Shutdown(); - m_sw_renderer.reset(); - } } ALWAYS_INLINE void GPU_HW::BatchVertex::Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, @@ -235,34 +243,24 @@ ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 mi uv_limits = PackUVLimits(min_u, max_u, min_v, max_v); } -const Threading::Thread* GPU_HW::GetSWThread() const +bool GPU_HW::Initialize(bool upload_vram, Error* error) { - return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr; -} - -bool GPU_HW::IsHardwareRenderer() const -{ - return true; -} - -bool GPU_HW::Initialize(Error* error) -{ - if (!GPU::Initialize(error)) + if (!GPUBackend::Initialize(upload_vram, error)) return false; const GPUDevice::Features features = g_gpu_device->GetFeatures(); m_resolution_scale = Truncate8(CalculateResolutionScale()); - m_multisamples = Truncate8(std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); - m_texture_filtering = g_settings.gpu_texture_filter; - m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; - m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; + m_multisamples = Truncate8(std::min(g_gpu_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); + m_texture_filtering = g_gpu_settings.gpu_texture_filter; + m_sprite_texture_filtering = g_gpu_settings.gpu_sprite_texture_filter; + m_line_detect_mode = (m_resolution_scale > 1) ? g_gpu_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_downsample_mode = GetDownsampleMode(m_resolution_scale); - m_wireframe_mode = g_settings.gpu_wireframe_mode; + m_wireframe_mode = g_gpu_settings.gpu_wireframe_mode; m_supports_dual_source_blend = features.dual_source_blend; m_supports_framebuffer_fetch = features.framebuffer_fetch; - m_true_color = g_settings.gpu_true_color; - m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + m_true_color = g_gpu_settings.gpu_true_color; + m_pgxp_depth_buffer = g_gpu_settings.UsingPGXPDepthBuffer(); m_clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); @@ -271,8 +269,6 @@ bool GPU_HW::Initialize(Error* error) CheckSettings(); - UpdateSoftwareRenderer(false); - PrintSettingsToLog(); if (!CompileCommonShaders(error) || !CompilePipelines(error) || !CreateBuffers(error)) @@ -280,160 +276,158 @@ bool GPU_HW::Initialize(Error* error) if (m_use_texture_cache) { - if (!GPUTextureCache::Initialize()) + if (!GPUTextureCache::Initialize(this)) { ERROR_LOG("Failed to initialize texture cache, disabling."); m_use_texture_cache = false; } } + else + { + // Still potentially have VRAM texture replacements. + GPUTextureCache::ReloadTextureReplacements(false); + } UpdateDownsamplingLevels(); RestoreDeviceContext(); + + // If we're not initializing VRAM, need to upload it here. Implies RestoreDeviceContext(). + if (upload_vram) + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); + + DrawingAreaChanged(); return true; } -void GPU_HW::Reset(bool clear_vram) +u32 GPU_HW::GetResolutionScale() const +{ + return m_resolution_scale; +} + +void GPU_HW::ClearVRAM() { // Texture cache needs to be invalidated before we load, otherwise we dump black. if (m_use_texture_cache) GPUTextureCache::Invalidate(); + // Don't need to finish the current draw. if (m_batch_vertex_ptr) UnmapGPUBuffer(0, 0); - GPU::Reset(clear_vram); + m_texpage_dirty = false; + m_compute_uv_range = m_clamp_uvs; - if (m_sw_renderer) - m_sw_renderer->Reset(); + if (ShouldDrawWithSoftwareRenderer()) + { + std::memset(g_vram, 0, sizeof(g_vram)); + std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); + } m_batch = {}; m_current_depth = 1; - SetClampedDrawingArea(); - - if (clear_vram) - ClearFramebuffer(); + ClearFramebuffer(); } -bool GPU_HW::DoState(StateWrapper& sw, bool update_display) +void GPU_HW::LoadState(const GPUBackendLoadStateCommand* cmd) { - FlushRender(); + DebugAssert((m_batch_vertex_ptr != nullptr) == (m_batch_index_ptr != nullptr)); + if (m_batch_vertex_ptr) + UnmapGPUBuffer(0, 0); - // Need to download local VRAM copy before calling the base class, because it serializes this. - if (m_sw_renderer) - { - m_sw_renderer->Sync(true); - } - else if (sw.IsWriting()) - { - // If SW renderer readbacks aren't enabled, the CLUT won't be populated, which means it'll be invalid if the user - // loads this state with software instead of hardware renderers. So force-update the CLUT. - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - if (IsCLUTValid()) - GPU::ReadCLUT(g_gpu_clut, GPUTexturePaletteReg{Truncate16(m_current_clut_reg_bits)}, m_current_clut_is_8bit); - } + std::memcpy(g_vram, cmd->vram_data, sizeof(g_vram)); + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); - if (!GPU::DoState(sw, false)) - return false; + if (ShouldDrawWithSoftwareRenderer()) + std::memcpy(g_gpu_clut, cmd->clut_data, sizeof(g_gpu_clut)); - if (sw.IsReading()) - { - // Wipe out state. - m_batch = {}; - m_current_depth = 1; - SetClampedDrawingArea(); - - // Need to update the VRAM copy on the GPU with the state data. - // Would invalidate the TC, but base DoState() calls Reset(). - UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); - - // invalidate the whole VRAM read texture when loading state - DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); - ClearVRAMDirtyRectangle(); - SetFullVRAMDirtyRectangle(); - UpdateVRAMReadTexture(true, false); - ClearVRAMDirtyRectangle(); - ResetBatchVertexDepth(); - - // refresh display, has to be done here because of the upload above - if (update_display) - UpdateDisplay(); - } - - return GPUTextureCache::DoState(sw, !m_use_texture_cache); -} - -bool GPU_HW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) -{ - // sw-for-readbacks just makes a mess here - if (m_sw_renderer) - m_sw_renderer->Sync(true); - if (m_sw_renderer || m_use_texture_cache) - sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - - // This could be faster too. if (m_use_texture_cache) - GPUTextureCache::DoState(sw, m_use_texture_cache); + { + StateWrapper sw(std::span(cmd->texture_cache_state, cmd->texture_cache_state_size), + StateWrapper::Mode::Read, cmd->texture_cache_state_version); + if (!GPUTextureCache::DoState(sw, false)) [[unlikely]] + Panic("Failed to process texture cache state."); + } - // Base class never fails. - GPU::DoMemoryState(sw, mss, false); + m_batch = {}; + m_current_depth = 1; + ClearVRAMDirtyRectangle(); + SetFullVRAMDirtyRectangle(); + UpdateVRAMReadTexture(true, false); + ClearVRAMDirtyRectangle(); + ResetBatchVertexDepth(); +} + +bool GPU_HW::AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) +{ + mss.vram_texture = g_gpu_device->FetchTexture( + m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(), + m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, nullptr, 0, error); + if (!mss.vram_texture) [[unlikely]] + { + Error::AddPrefix(error, "Failed to allocate VRAM texture for memory save state: "); + return false; + } + + GL_OBJECT_NAME(mss.vram_texture, "Memory save state VRAM copy"); + + static constexpr u32 MAX_TC_SIZE = 1024 * 1024; + + u32 buffer_size = 0; + if (ShouldDrawWithSoftwareRenderer() || m_use_texture_cache) + buffer_size += sizeof(g_vram); + if (ShouldDrawWithSoftwareRenderer()) + buffer_size += sizeof(g_gpu_clut); + if (m_use_texture_cache) + buffer_size += MAX_TC_SIZE; + + if (buffer_size > 0) + mss.gpu_state_data.resize(buffer_size); + + return true; +} + +void GPU_HW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) +{ + Assert(mss.vram_texture && mss.vram_texture->GetWidth() == m_vram_texture->GetWidth() && + mss.vram_texture->GetHeight() == m_vram_texture->GetHeight() && + mss.vram_texture->GetSamples() == m_vram_texture->GetSamples()); if (sw.IsReading()) { if (m_batch_vertex_ptr) UnmapGPUBuffer(0, 0); - DebugAssert(mss.vram_texture->GetWidth() == m_vram_texture->GetWidth() && - mss.vram_texture->GetHeight() == m_vram_texture->GetHeight() && - mss.vram_texture->GetSamples() == m_vram_texture->GetSamples()); g_gpu_device->CopyTextureRegion(m_vram_texture.get(), 0, 0, 0, 0, mss.vram_texture.get(), 0, 0, 0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); - // Wipe out state. - DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); m_batch = {}; - SetClampedDrawingArea(); ClearVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle(); UpdateVRAMReadTexture(true, false); ClearVRAMDirtyRectangle(); ResetBatchVertexDepth(); - - if (update_display) - UpdateDisplay(); } else { FlushRender(); // saving state - if (!mss.vram_texture || mss.vram_texture->GetWidth() != m_vram_texture->GetWidth() || - mss.vram_texture->GetHeight() != m_vram_texture->GetHeight() || - mss.vram_texture->GetSamples() != m_vram_texture->GetSamples()) [[unlikely]] - { - g_gpu_device->RecycleTexture(std::move(mss.vram_texture)); - mss.vram_texture.reset(); - } - if (!mss.vram_texture) - { - // We copy to/from the save state texture, but we can't have multisampled non-RTs. - Error error; - mss.vram_texture = g_gpu_device->FetchTexture( - m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(), - m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None); - if (!mss.vram_texture) [[unlikely]] - { - ERROR_LOG("Failed to allocate VRAM texture for memory save state: {}", error.GetDescription()); - return false; - } - } - g_gpu_device->CopyTextureRegion(mss.vram_texture.get(), 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); } - return true; + // Save VRAM/CLUT. + if (ShouldDrawWithSoftwareRenderer() || m_use_texture_cache) + sw.DoBytes(g_vram, sizeof(g_vram)); + if (ShouldDrawWithSoftwareRenderer()) + sw.DoBytes(g_gpu_clut, sizeof(g_gpu_clut)); + if (m_use_texture_cache) + { + if (!GPUTextureCache::DoState(sw, false)) [[unlikely]] + Panic("Failed to process texture cache state."); + } } void GPU_HW::RestoreDeviceContext() @@ -447,51 +441,52 @@ void GPU_HW::RestoreDeviceContext() void GPU_HW::UpdateSettings(const Settings& old_settings) { - const bool prev_force_progressive_scan = m_force_progressive_scan; - - GPU::UpdateSettings(old_settings); + GPUBackend::UpdateSettings(old_settings); const GPUDevice::Features features = g_gpu_device->GetFeatures(); const u8 resolution_scale = Truncate8(CalculateResolutionScale()); - const u8 multisamples = Truncate8(std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); + const u8 multisamples = Truncate8(std::min(g_gpu_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); const bool clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); - const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || - g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || - m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || - (!old_settings.gpu_texture_cache && g_settings.gpu_texture_cache)); + const bool framebuffer_changed = + (m_resolution_scale != resolution_scale || m_multisamples != multisamples || + g_gpu_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || + m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() || + (!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache)); const bool shaders_changed = ((m_resolution_scale > 1) != (resolution_scale > 1) || m_multisamples != multisamples || - m_true_color != g_settings.gpu_true_color || prev_force_progressive_scan != m_force_progressive_scan || - (multisamples > 1 && g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || - (resolution_scale > 1 && g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || - (resolution_scale > 1 && g_settings.gpu_texture_filter == GPUTextureFilter::Nearest && - g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || - g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || - m_texture_filtering != g_settings.gpu_texture_filter || - m_sprite_texture_filtering != g_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || - (features.geometry_shaders && g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode) || - m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || - (features.noperspective_interpolation && g_settings.gpu_pgxp_enable && - g_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction) || - m_allow_sprite_mode != - ShouldAllowSpriteMode(m_resolution_scale, g_settings.gpu_texture_filter, g_settings.gpu_sprite_texture_filter) || - (!old_settings.gpu_texture_cache && g_settings.gpu_texture_cache)); + m_true_color != g_gpu_settings.gpu_true_color || + (old_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive) != + (g_gpu_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive) || + (multisamples > 1 && g_gpu_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || + (resolution_scale > 1 && g_gpu_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || + (resolution_scale > 1 && g_gpu_settings.gpu_texture_filter == GPUTextureFilter::Nearest && + g_gpu_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || + g_gpu_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || + m_texture_filtering != g_gpu_settings.gpu_texture_filter || + m_sprite_texture_filtering != g_gpu_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || + (features.geometry_shaders && g_gpu_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode) || + m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() || + (features.noperspective_interpolation && g_gpu_settings.gpu_pgxp_enable && + g_gpu_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction) || + m_allow_sprite_mode != ShouldAllowSpriteMode(m_resolution_scale, g_gpu_settings.gpu_texture_filter, + g_gpu_settings.gpu_sprite_texture_filter) || + (!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache)); const bool resolution_dependent_shaders_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples); const bool downsampling_shaders_changed = ((m_resolution_scale > 1) != (resolution_scale > 1) || - (resolution_scale > 1 && (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || + (resolution_scale > 1 && (g_gpu_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || (m_downsample_mode == GPUDownsampleMode::Box && (resolution_scale != m_resolution_scale || - g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale))))); + g_gpu_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale))))); if (m_resolution_scale != resolution_scale) { Host::AddIconOSDMessage("ResolutionScaleChanged", ICON_FA_PAINT_BRUSH, fmt::format(TRANSLATE_FS("GPU_HW", "Internal resolution set to {0}x ({1}x{2})."), - resolution_scale, m_crtc_state.display_width * resolution_scale, - resolution_scale * m_crtc_state.display_height), + resolution_scale, m_display_width * resolution_scale, + resolution_scale * m_display_height), Host::OSD_INFO_DURATION); } @@ -523,31 +518,29 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_resolution_scale = resolution_scale; m_multisamples = multisamples; - m_texture_filtering = g_settings.gpu_texture_filter; - m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; - m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; + m_texture_filtering = g_gpu_settings.gpu_texture_filter; + m_sprite_texture_filtering = g_gpu_settings.gpu_sprite_texture_filter; + m_line_detect_mode = (m_resolution_scale > 1) ? g_gpu_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_downsample_mode = GetDownsampleMode(resolution_scale); - m_wireframe_mode = g_settings.gpu_wireframe_mode; - m_true_color = g_settings.gpu_true_color; + m_wireframe_mode = g_gpu_settings.gpu_wireframe_mode; + m_true_color = g_gpu_settings.gpu_true_color; m_clamp_uvs = clamp_uvs; m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering); - m_use_texture_cache = g_settings.gpu_texture_cache; - m_texture_dumping = m_use_texture_cache && g_settings.texture_replacements.dump_textures; + m_use_texture_cache = g_gpu_settings.gpu_texture_cache; + m_texture_dumping = m_use_texture_cache && g_gpu_settings.texture_replacements.dump_textures; m_batch.sprite_mode = (m_allow_sprite_mode && m_batch.sprite_mode); - const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); + const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer()); if (depth_buffer_changed) { - m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + m_pgxp_depth_buffer = g_gpu_settings.UsingPGXPDepthBuffer(); m_batch.use_depth_buffer = false; m_depth_was_copied = false; } CheckSettings(); - UpdateSoftwareRenderer(true); - PrintSettingsToLog(); if (shaders_changed) @@ -586,10 +579,9 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) UpdateDownsamplingLevels(); RestoreDeviceContext(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false); + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); if (m_write_mask_as_depth) UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); } else if (m_vram_depth_texture && depth_buffer_changed) { @@ -601,7 +593,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) if (m_use_texture_cache && !old_settings.gpu_texture_cache) { - if (!GPUTextureCache::Initialize()) + if (!GPUTextureCache::Initialize(this)) { ERROR_LOG("Failed to initialize texture cache, disabling."); m_use_texture_cache = false; @@ -614,23 +606,33 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) GPUTextureCache::UpdateSettings(m_use_texture_cache, old_settings); - if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || - (g_settings.gpu_downsample_mode == GPUDownsampleMode::Box && - g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)) + if (g_gpu_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || + (g_gpu_settings.gpu_downsample_mode == GPUDownsampleMode::Box && + g_gpu_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)) { UpdateDownsamplingLevels(); } + + // Need to reload CLUT if we're enabling SW rendering. + if (g_gpu_settings.gpu_use_software_renderer_for_readbacks && !old_settings.gpu_use_software_renderer_for_readbacks) + { + if (m_draw_mode.mode_reg.texture_mode <= GPUTextureMode::Palette8Bit) + { + GPU_SW_Rasterizer::UpdateCLUT(m_draw_mode.palette_reg, + m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Palette8Bit); + } + } } void GPU_HW::CheckSettings() { const GPUDevice::Features features = g_gpu_device->GetFeatures(); - if (m_multisamples != g_settings.gpu_multisamples) + if (m_multisamples != g_gpu_settings.gpu_multisamples) { Host::AddIconOSDMessage("MSAAUnsupported", ICON_EMOJI_WARNING, fmt::format(TRANSLATE_FS("GPU_HW", "{}x MSAA is not supported, using {}x instead."), - g_settings.gpu_multisamples, m_multisamples), + g_gpu_settings.gpu_multisamples, m_multisamples), Host::OSD_CRITICAL_ERROR_DURATION); } else @@ -638,7 +640,7 @@ void GPU_HW::CheckSettings() Host::RemoveKeyedOSDMessage("MSAAUnsupported"); } - if (g_settings.gpu_per_sample_shading && !features.per_sample_shading) + if (g_gpu_settings.gpu_per_sample_shading && !features.per_sample_shading) { Host::AddIconOSDMessage("SSAAUnsupported", ICON_EMOJI_WARNING, TRANSLATE_STR("GPU_HW", "SSAA is not supported, using MSAA instead."), @@ -704,13 +706,13 @@ void GPU_HW::CheckSettings() { const u32 resolution_scale = CalculateResolutionScale(); const u32 box_downscale = GetBoxDownsampleScale(resolution_scale); - if (box_downscale != g_settings.gpu_downsample_scale || box_downscale == resolution_scale) + if (box_downscale != g_gpu_settings.gpu_downsample_scale || box_downscale == resolution_scale) { Host::AddIconOSDMessage( "BoxDownsampleUnsupported", ICON_FA_PAINT_BRUSH, fmt::format(TRANSLATE_FS( "GPU_HW", "Resolution scale {0}x is not divisible by downsample scale {1}x, using {2}x instead."), - resolution_scale, g_settings.gpu_downsample_scale, box_downscale), + resolution_scale, g_gpu_settings.gpu_downsample_scale, box_downscale), Host::OSD_WARNING_DURATION); } else @@ -718,7 +720,7 @@ void GPU_HW::CheckSettings() Host::RemoveKeyedOSDMessage("BoxDownsampleUnsupported"); } - if (box_downscale == g_settings.gpu_resolution_scale) + if (box_downscale == g_gpu_settings.gpu_resolution_scale) m_downsample_mode = GPUDownsampleMode::Disabled; } } @@ -726,15 +728,15 @@ void GPU_HW::CheckSettings() u32 GPU_HW::CalculateResolutionScale() const { u32 scale; - if (g_settings.gpu_resolution_scale != 0) + if (g_gpu_settings.gpu_resolution_scale != 0) { - scale = g_settings.gpu_resolution_scale; + scale = g_gpu_settings.gpu_resolution_scale; } else { // Auto scaling. - if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0 || m_crtc_state.display_vram_width == 0 || - m_crtc_state.display_vram_height == 0 || m_GPUSTAT.display_disable || !g_gpu_device->HasMainSwapChain()) + if (m_display_width == 0 || m_display_height == 0 || m_display_vram_width == 0 || m_display_vram_height == 0 || + !m_display_texture || !g_gpu_device->HasMainSwapChain()) { // When the system is starting and all borders crop is enabled, the registers are zero, and // display_height therefore is also zero. Keep the existing resolution until it updates. @@ -751,19 +753,19 @@ u32 GPU_HW::CalculateResolutionScale() const const s32 draw_width = draw_rect.width(); const s32 draw_height = draw_rect.height(); scale = static_cast( - std::ceil(std::max(static_cast(draw_width) / static_cast(m_crtc_state.display_vram_width), - static_cast(draw_height) / static_cast(m_crtc_state.display_vram_height)))); + std::ceil(std::max(static_cast(draw_width) / static_cast(m_display_vram_width), + static_cast(draw_height) / static_cast(m_display_vram_height)))); VERBOSE_LOG("Draw Size = {}x{}, VRAM Size = {}x{}, Preferred Scale = {}", draw_width, draw_height, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, scale); + m_display_vram_width, m_display_vram_height, scale); } } - if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) + if (g_gpu_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) { const u32 new_scale = Common::PreviousPow2(scale); WARNING_LOG("Resolution scale {}x not supported for adaptive downsampling, using {}x", scale, new_scale); - if (g_settings.gpu_resolution_scale != 0) + if (g_gpu_settings.gpu_resolution_scale != 0) { Host::AddIconOSDMessage( "ResolutionNotPow2", ICON_FA_PAINT_BRUSH, @@ -779,11 +781,6 @@ u32 GPU_HW::CalculateResolutionScale() const return std::clamp(scale, 1, GetMaxResolutionScale()); } -u32 GPU_HW::GetResolutionScale() const -{ - return m_resolution_scale; -} - void GPU_HW::UpdateResolutionScale() { if (CalculateResolutionScale() != m_resolution_scale) @@ -792,7 +789,13 @@ void GPU_HW::UpdateResolutionScale() GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const { - return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode; + return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_gpu_settings.gpu_downsample_mode; +} + +bool GPU_HW::ShouldDrawWithSoftwareRenderer() const +{ + // TODO: FIXME: Move into class. + return g_gpu_settings.gpu_use_software_renderer_for_readbacks; } bool GPU_HW::IsUsingMultisampling() const @@ -800,15 +803,15 @@ bool GPU_HW::IsUsingMultisampling() const return m_multisamples > 1; } -bool GPU_HW::IsUsingDownsampling() const +bool GPU_HW::IsUsingDownsampling(const GPUBackendUpdateDisplayCommand* cmd) const { - return (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24); + return (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit); } void GPU_HW::SetFullVRAMDirtyRectangle() { m_vram_dirty_draw_rect = VRAM_SIZE_RECT; - m_draw_mode.SetTexturePageChanged(); + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; } void GPU_HW::ClearVRAMDirtyRectangle() @@ -853,12 +856,12 @@ void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) { // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // shadow texture is updated - if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && + if (m_draw_mode.bits != INVALID_DRAW_MODE_BITS && m_batch.texture_mode != BatchTextureMode::Disabled && (GetTextureRect(m_draw_mode.mode_reg.texture_page, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect) || (m_draw_mode.mode_reg.IsUsingPalette() && GetPaletteRect(m_draw_mode.palette_reg, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) { - m_draw_mode.SetTexturePageChanged(); + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; } } @@ -867,12 +870,13 @@ void GPU_HW::PrintSettingsToLog() INFO_LOG("Resolution Scale: {} ({}x{}), maximum {}", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, VRAM_HEIGHT * m_resolution_scale, GetMaxResolutionScale()); INFO_LOG("Multisampling: {}x{}", m_multisamples, - (g_settings.gpu_per_sample_shading && g_gpu_device->GetFeatures().per_sample_shading) ? + (g_gpu_settings.gpu_per_sample_shading && g_gpu_device->GetFeatures().per_sample_shading) ? " (per sample shading)" : ""); - INFO_LOG("Dithering: {}", m_true_color ? "Disabled" : "Enabled", (!m_true_color && g_settings.gpu_scaled_dithering)); + INFO_LOG("Dithering: {}", m_true_color ? "Disabled" : "Enabled", + (!m_true_color && g_gpu_settings.gpu_scaled_dithering)); INFO_LOG("Force round texture coordinates: {}", - (m_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) ? "Enabled" : "Disabled"); + (m_resolution_scale > 1 && g_gpu_settings.gpu_force_round_texcoords) ? "Enabled" : "Disabled"); INFO_LOG("Texture Filtering: {}/{}", Settings::GetTextureFilterDisplayName(m_texture_filtering), Settings::GetTextureFilterDisplayName(m_sprite_texture_filtering)); INFO_LOG("Dual-source blending: {}", m_supports_dual_source_blend ? "Supported" : "Not supported"); @@ -881,7 +885,7 @@ void GPU_HW::PrintSettingsToLog() INFO_LOG("Downsampling: {}", Settings::GetDownsampleModeDisplayName(m_downsample_mode)); INFO_LOG("Wireframe rendering: {}", Settings::GetGPUWireframeModeDisplayName(m_wireframe_mode)); INFO_LOG("Line detection: {}", Settings::GetLineDetectModeDisplayName(m_line_detect_mode)); - INFO_LOG("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); + INFO_LOG("Using software renderer for readbacks: {}", ShouldDrawWithSoftwareRenderer() ? "YES" : "NO"); INFO_LOG("Separate sprite shaders: {}", m_allow_sprite_mode ? "YES" : "NO"); } @@ -994,6 +998,7 @@ void GPU_HW::ClearFramebuffer() if (m_use_texture_cache) GPUTextureCache::Invalidate(); m_last_depth_z = 1.0f; + m_current_depth = 1; } void GPU_HW::SetVRAMRenderTarget() @@ -1063,13 +1068,15 @@ bool GPU_HW::CompilePipelines(Error* error) const GPUDevice::Features features = g_gpu_device->GetFeatures(); const bool upscaled = (m_resolution_scale > 1); const bool msaa = (m_multisamples > 1); - const bool per_sample_shading = (msaa && g_settings.gpu_per_sample_shading && features.per_sample_shading); + const bool per_sample_shading = (msaa && g_gpu_settings.gpu_per_sample_shading && features.per_sample_shading); const bool force_round_texcoords = - (upscaled && m_texture_filtering == GPUTextureFilter::Nearest && g_settings.gpu_force_round_texcoords); - const bool true_color = g_settings.gpu_true_color; - const bool scaled_dithering = (!m_true_color && upscaled && g_settings.gpu_scaled_dithering); + (upscaled && m_texture_filtering == GPUTextureFilter::Nearest && g_gpu_settings.gpu_force_round_texcoords); + const bool true_color = g_gpu_settings.gpu_true_color; + const bool scaled_dithering = (!m_true_color && upscaled && g_gpu_settings.gpu_scaled_dithering); const bool disable_color_perspective = (features.noperspective_interpolation && ShouldDisableColorPerspective()); const bool needs_page_texture = m_use_texture_cache; + const bool force_progressive_scan = + (g_gpu_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive); // Determine when to use shader blending. // FBFetch is free, we need it for filtering without DSB, or when accurate blending is forced. @@ -1078,10 +1085,10 @@ bool GPU_HW::CompilePipelines(Error* error) // Abuse the depth buffer for the mask bit when it's free (FBFetch), or PGXP depth buffering is enabled. m_allow_shader_blend = features.framebuffer_fetch || ((features.feedback_loops || features.raster_order_views) && - (m_pgxp_depth_buffer || g_settings.IsUsingAccurateBlending() || + (m_pgxp_depth_buffer || g_gpu_settings.IsUsingAccurateBlending() || (!m_supports_dual_source_blend && (IsBlendedTextureFiltering(m_texture_filtering) || IsBlendedTextureFiltering(m_sprite_texture_filtering))))); - m_prefer_shader_blend = (m_allow_shader_blend && g_settings.IsUsingAccurateBlending()); + m_prefer_shader_blend = (m_allow_shader_blend && g_gpu_settings.IsUsingAccurateBlending()); m_use_rov_for_shader_blend = (m_allow_shader_blend && !features.framebuffer_fetch && features.raster_order_views && (m_prefer_shader_blend || !features.feedback_loops)); m_write_mask_as_depth = (!m_pgxp_depth_buffer && !features.framebuffer_fetch && !m_prefer_shader_blend); @@ -1117,11 +1124,11 @@ bool GPU_HW::CompilePipelines(Error* error) const u32 num_active_texture_modes = (max_active_texture_modes - BoolToUInt32(!needs_page_texture)); const u32 total_vertex_shaders = ((m_allow_sprite_mode ? 7 : 4) - BoolToUInt32(!needs_page_texture)); const u32 total_fragment_shaders = ((1 + BoolToUInt32(needs_rov_depth)) * 5 * 5 * num_active_texture_modes * 2 * - (1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!m_force_progressive_scan))); + (1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!force_progressive_scan))); const u32 total_items = total_vertex_shaders + total_fragment_shaders + ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * num_active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * - (1 + BoolToUInt32(!m_force_progressive_scan))) + // batch pipelines + (1 + BoolToUInt32(!force_progressive_scan))) + // batch pipelines ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe (2 * 2) + // vram fill (1 + BoolToUInt32(m_write_mask_as_depth)) + // vram copy @@ -1181,7 +1188,8 @@ bool GPU_HW::CompilePipelines(Error* error) return false; } - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } } } @@ -1210,8 +1218,13 @@ bool GPU_HW::CompilePipelines(Error* error) // If using ROV depth, we only draw with shader blending. (needs_rov_depth && render_mode != static_cast(BatchRenderMode::ShaderBlend))) { - progress.Increment(num_active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * - (1 + BoolToUInt32(!m_force_progressive_scan))); + if (!progress.Increment(num_active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * + (1 + BoolToUInt32(!force_progressive_scan)), + error)) [[unlikely]] + { + return false; + } + continue; } @@ -1225,7 +1238,12 @@ bool GPU_HW::CompilePipelines(Error* error) if (check_mask && render_mode != static_cast(BatchRenderMode::ShaderBlend)) { // mask bit testing is only valid with shader blending. - progress.Increment((1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!m_force_progressive_scan))); + if (!progress.Increment((1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!force_progressive_scan)), + error)) [[unlikely]] + { + return false; + } + continue; } @@ -1238,7 +1256,7 @@ bool GPU_HW::CompilePipelines(Error* error) for (u8 interlacing = 0; interlacing < 2; interlacing++) { // Never going to draw with line skipping in force progressive. - if (interlacing && m_force_progressive_scan) + if (interlacing && force_progressive_scan) continue; const bool sprite = (static_cast(texture_mode) >= BatchTextureMode::SpriteStart); @@ -1262,7 +1280,8 @@ bool GPU_HW::CompilePipelines(Error* error) return false; } - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } } } @@ -1320,8 +1339,13 @@ bool GPU_HW::CompilePipelines(Error* error) // If using ROV depth, we only draw with shader blending. (needs_rov_depth && render_mode != static_cast(BatchRenderMode::ShaderBlend))) { - progress.Increment(num_active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * - (1 + BoolToUInt32(!m_force_progressive_scan))); + if (!progress.Increment(num_active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * + (1 + BoolToUInt32(!force_progressive_scan)), + error)) [[unlikely]] + { + return false; + } + continue; } @@ -1339,7 +1363,7 @@ bool GPU_HW::CompilePipelines(Error* error) for (u8 interlacing = 0; interlacing < 2; interlacing++) { // Never going to draw with line skipping in force progressive. - if (interlacing && m_force_progressive_scan) + if (interlacing && force_progressive_scan) continue; for (u8 check_mask = 0; check_mask < 2; check_mask++) @@ -1456,7 +1480,8 @@ bool GPU_HW::CompilePipelines(Error* error) return false; } - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } } } @@ -1500,7 +1525,8 @@ bool GPU_HW::CompilePipelines(Error* error) plconfig.geometry_shader = nullptr; plconfig.fragment_shader = nullptr; - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } batch_shader_guard.Run(); @@ -1534,7 +1560,8 @@ bool GPU_HW::CompilePipelines(Error* error) if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig, error))) return false; - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } } @@ -1561,7 +1588,8 @@ bool GPU_HW::CompilePipelines(Error* error) GL_OBJECT_NAME_FMT(m_vram_copy_pipelines[depth_test], "VRAM Write Pipeline, depth={}", depth_test); - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } } @@ -1592,7 +1620,8 @@ bool GPU_HW::CompilePipelines(Error* error) GL_OBJECT_NAME_FMT(m_vram_write_pipelines[depth_test], "VRAM Write Pipeline, depth={}", depth_test); - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } } @@ -1610,7 +1639,8 @@ bool GPU_HW::CompilePipelines(Error* error) if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) return false; - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } // VRAM update depth @@ -1631,7 +1661,8 @@ bool GPU_HW::CompilePipelines(Error* error) GL_OBJECT_NAME(m_vram_update_depth_pipeline, "VRAM Update Depth Pipeline"); - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; } plconfig.SetTargetFormats(VRAM_RT_FORMAT); @@ -1657,7 +1688,8 @@ bool GPU_HW::CompilePipelines(Error* error) if (!CompileResolutionDependentPipelines(error) || !CompileDownsamplePipelines(error)) return false; - progress.Increment(); + if (!progress.Increment(1, error)) [[unlikely]] + return false; #undef UPDATE_PROGRESS @@ -2064,7 +2096,8 @@ ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode } } -ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) +ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(const GPUBackendDrawCommand* cmd, + BatchVertex* vertices) { // Taken from beetle-psx gpu_polygon.cpp // For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior. If U or V is decreasing in X @@ -2170,7 +2203,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVert // 2D polygons should have zero change in V on the X axis, and vice versa. if (m_allow_sprite_mode) - SetBatchSpriteMode(zero_dudy && zero_dvdx); + SetBatchSpriteMode(cmd, zero_dudy && zero_dvdx); } bool GPU_HW::IsPossibleSpritePolygon(const BatchVertex* vertices) const @@ -2350,7 +2383,7 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::ExpandLineTriangles(BatchVertex* vertices) return true; } -void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) +void GPU_HW::ComputePolygonUVLimits(const GPUBackendDrawCommand* cmd, BatchVertex* vertices, u32 num_vertices) { DebugAssert(num_vertices == 3 || num_vertices == 4); @@ -2378,10 +2411,10 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); if (ShouldCheckForTexPageOverlap()) - CheckForTexPageOverlap(GSVector4i(min).upl32(GSVector4i(max)).u16to32()); + CheckForTexPageOverlap(cmd, GSVector4i(min).upl32(GSVector4i(max)).u16to32()); } -void GPU_HW::SetBatchDepthBuffer(bool enabled) +void GPU_HW::SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled) { if (m_batch.use_depth_buffer == enabled) return; @@ -2389,13 +2422,13 @@ void GPU_HW::SetBatchDepthBuffer(bool enabled) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } m_batch.use_depth_buffer = enabled; } -void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) +void GPU_HW::CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices) { DebugAssert(num_vertices == 3 || num_vertices == 4); float average_z; @@ -2404,17 +2437,17 @@ void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) else average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w + vertices[3].w) / 4.0f, 1.0f); - if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold) + if ((average_z - m_last_depth_z) >= g_gpu_settings.gpu_pgxp_depth_clear_threshold) { FlushRender(); CopyAndClearDepthBuffer(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } m_last_depth_z = average_z; } -void GPU_HW::SetBatchSpriteMode(bool enabled) +void GPU_HW::SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled) { if (m_batch.sprite_mode == enabled) return; @@ -2422,7 +2455,7 @@ void GPU_HW::SetBatchSpriteMode(bool enabled) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } GL_INS_FMT("Sprite mode is now {}", enabled ? "ON" : "OFF"); @@ -2430,6 +2463,43 @@ void GPU_HW::SetBatchSpriteMode(bool enabled) m_batch.sprite_mode = enabled; } +void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + + const u32 num_vertices = cmd->num_vertices; + DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); + + const float depth = GetCurrentNormalizedVertexDepth(); + + for (u32 i = 0; i < num_vertices; i += 2) + { + const GSVector2i start_pos = GSVector2i::load(&cmd->vertices[i].x); + const u32 start_color = cmd->vertices[i].color; + const GSVector2i end_pos = GSVector2i::load(&cmd->vertices[i + 1].x); + const u32 end_color = cmd->vertices[i + 1].color; + + const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); + const GSVector4i rect = + GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty()); + + AddDrawnRectangle(clamped_rect); + DrawLine(GSVector4(bounds), start_color, end_color, depth); + } + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawLineFunction(cmd->shading_enable, cmd->transparency_enable); + + for (u32 i = 0; i < num_vertices; i += 2) + DrawFunction(cmd, &cmd->vertices[i], &cmd->vertices[i + 1]); + } +} + void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) { DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); @@ -2528,454 +2598,242 @@ void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) m_batch_index_space -= 6; } -void GPU_HW::LoadVertices() +void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) { - if (m_GPUSTAT.check_mask_before_draw) - m_current_depth++; + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + SetBatchSpriteMode(cmd, m_allow_sprite_mode); + DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); - const GPURenderCommand rc{m_render_command.bits}; - const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg.bits) << 16); + const s32 pos_x = cmd->x; + const s32 pos_y = cmd->y; + const u32 texpage = m_draw_mode.bits; + const u32 color = (cmd->texture_enable && cmd->raw_texture_enable) ? UINT32_C(0x00808080) : cmd->color; const float depth = GetCurrentNormalizedVertexDepth(); + const u32 orig_tex_left = ZeroExtend32(Truncate8(cmd->texcoord)); + const u32 orig_tex_top = ZeroExtend32(cmd->texcoord) >> 8; + const u32 rectangle_width = cmd->width; + const u32 rectangle_height = cmd->height; - switch (rc.primitive) + const GSVector4i rect = + GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + DebugAssert(!clamped_rect.rempty()); + + // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. + u32 tex_top = orig_tex_top; + for (u32 y_offset = 0; y_offset < rectangle_height;) { - case GPUPrimitive::Polygon: + const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); + const float quad_start_y = static_cast(pos_y + static_cast(y_offset)); + const float quad_end_y = quad_start_y + static_cast(quad_height); + const u32 tex_bottom = tex_top + quad_height; + + u32 tex_left = orig_tex_left; + for (u32 x_offset = 0; x_offset < rectangle_width;) { - const bool textured = rc.texture_enable; - const bool raw_texture = textured && rc.raw_texture_enable; - const bool shaded = rc.shading_enable; - const bool pgxp = g_settings.gpu_pgxp_enable; + const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); + const float quad_start_x = static_cast(pos_x + static_cast(x_offset)); + const float quad_end_x = quad_start_x + static_cast(quad_width); + const u32 tex_right = tex_left + quad_width; + const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - const u32 first_color = rc.color_for_first_vertex; - u32 num_vertices = rc.quad_polygon ? 4 : 3; - std::array vertices; - std::array native_vertex_positions; - std::array native_texcoords; - bool valid_w = g_settings.gpu_pgxp_texture_correction; - for (u32 i = 0; i < num_vertices; i++) + if (cmd->texture_enable && ShouldCheckForTexPageOverlap()) { - const u32 vert_color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - const u32 color = raw_texture ? UINT32_C(0x00808080) : vert_color; - const u64 maddr_and_pos = m_fifo.Pop(); - const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; - const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; - const s32 native_x = native_vertex_positions[i].x = m_drawing_offset.x + vp.x; - const s32 native_y = native_vertex_positions[i].y = m_drawing_offset.y + vp.y; - native_texcoords[i] = texcoord; - vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage, - texcoord, 0xFFFF0000u); - - if (pgxp) - { - valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, - m_drawing_offset.x, m_drawing_offset.y, &vertices[i].x, &vertices[i].y, - &vertices[i].w); - } - } - if (pgxp) - { - if (!valid_w) - { - SetBatchDepthBuffer(false); - if (g_settings.gpu_pgxp_disable_2d) - { - // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. - for (size_t i = 0; i < vertices.size(); i++) - { - BatchVertex& v = vertices[i]; - v.x = static_cast(native_vertex_positions[i].x); - v.y = static_cast(native_vertex_positions[i].y); - v.w = 1.0f; - } - } - else - { - for (BatchVertex& v : vertices) - v.w = 1.0f; - } - } - else if (m_pgxp_depth_buffer) - { - SetBatchDepthBuffer(true); - CheckForDepthClear(vertices.data(), num_vertices); - } + CheckForTexPageOverlap(cmd, GSVector4i(static_cast(tex_left), static_cast(tex_top), + static_cast(tex_right), static_cast(tex_bottom))); } - // Use PGXP to exclude primitives that are definitely 3D. - const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); - if (m_resolution_scale > 1 && !is_3d && rc.quad_polygon) - HandleFlippedQuadTextureCoordinates(vertices.data()); - else if (m_allow_sprite_mode) - SetBatchSpriteMode(pgxp ? !is_3d : IsPossibleSpritePolygon(vertices.data())); + const u32 base_vertex = m_batch_vertex_count; + (m_batch_vertex_ptr++) + ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), + uv_limits); + m_batch_vertex_count += 4; + m_batch_vertex_space -= 4; - if (m_sw_renderer) - { - GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); - FillDrawCommand(cmd, rc); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 0); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); + m_batch_index_count += 6; + m_batch_index_space -= 6; - const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3; - for (u32 i = 0; i < sw_num_vertices; i++) - { - GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; - vert->x = native_vertex_positions[i].x; - vert->y = native_vertex_positions[i].y; - vert->texcoord = native_texcoords[i]; - vert->color = vertices[i].color; - } - - m_sw_renderer->PushCommand(cmd); - } - - // Cull polygons which are too large. - const GSVector2 v0f = GSVector2::load(&vertices[0].x); - const GSVector2 v1f = GSVector2::load(&vertices[1].x); - const GSVector2 v2f = GSVector2::load(&vertices[2].x); - const GSVector2 min_pos_12 = v1f.min(v2f); - const GSVector2 max_pos_12 = v1f.max(v2f); - const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); - const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || - draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || clamped_draw_rect_012.rempty()); - if (first_tri_culled) - { - GL_INS_FMT("Culling off-screen/too-large polygon: {},{} {},{} {},{}", native_vertex_positions[0].x, - native_vertex_positions[0].y, native_vertex_positions[1].x, native_vertex_positions[1].y, - native_vertex_positions[2].x, native_vertex_positions[2].y); - - if (!rc.quad_polygon) - return; - } - else - { - if (textured && m_compute_uv_range) - ComputePolygonUVLimits(vertices.data(), num_vertices); - - AddDrawnRectangle(clamped_draw_rect_012); - AddDrawTriangleTicks(native_vertex_positions[0], native_vertex_positions[1], native_vertex_positions[2], - rc.shading_enable, rc.texture_enable, rc.transparency_enable); - - // Expand lines to triangles (Doom, Soul Blade, etc.) - if (!rc.quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && - ExpandLineTriangles(vertices.data())) - { - return; - } - - const u32 start_index = m_batch_vertex_count; - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - - // quads - if (rc.quad_polygon) - { - const GSVector2 v3f = GSVector2::load(&vertices[3].x); - const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); - - // Cull polygons which are too large. - const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - clamped_draw_rect_123.rempty()); - if (second_tri_culled) - { - GL_INS_FMT("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", - native_vertex_positions[2].x, native_vertex_positions[2].y, native_vertex_positions[1].x, - native_vertex_positions[1].y, native_vertex_positions[0].x, native_vertex_positions[0].y); - - if (first_tri_culled) - return; - } - else - { - if (first_tri_culled && textured && m_compute_uv_range) - ComputePolygonUVLimits(vertices.data(), num_vertices); - - AddDrawnRectangle(clamped_draw_rect_123); - AddDrawTriangleTicks(native_vertex_positions[2], native_vertex_positions[1], native_vertex_positions[3], - rc.shading_enable, rc.texture_enable, rc.transparency_enable); - - const u32 start_index = m_batch_vertex_count; - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 3); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - } - - if (num_vertices == 4) - { - DebugAssert(m_batch_vertex_space >= 4); - std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); - m_batch_vertex_ptr += 4; - m_batch_vertex_count += 4; - m_batch_vertex_space -= 4; - } - else - { - DebugAssert(m_batch_vertex_space >= 3); - std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); - m_batch_vertex_ptr += 3; - m_batch_vertex_count += 3; - m_batch_vertex_space -= 3; - } + x_offset += quad_width; + tex_left = 0; } - break; - case GPUPrimitive::Rectangle: + y_offset += quad_height; + tex_top = 0; + } + + AddDrawnRectangle(clamped_rect); + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawRectangleFunction( + cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable); + DrawFunction(cmd); + } +} + +void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + + // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. + const float depth = GetCurrentNormalizedVertexDepth(); + const bool raw_texture = (cmd->texture_enable && cmd->raw_texture_enable); + const u32 num_vertices = cmd->num_vertices; + const u32 texpage = m_draw_mode.bits; + std::array vertices; + for (u32 i = 0; i < num_vertices; i++) + { + const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i]; + const GSVector2 vert_pos = GSVector2(GSVector2i::load(&vert.x)); + vertices[i].Set(vert_pos.x, vert_pos.y, depth, 1.0f, raw_texture ? UINT32_C(0x00808080) : vert.color, texpage, + vert.texcoord, 0xFFFF0000u); + } + + FinishPolygonDraw(cmd, vertices, num_vertices, false, false); + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + cmd->shading_enable, cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable); + DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); + } +} + +void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) +{ + PrepareDraw(cmd); + + // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. + const float depth = GetCurrentNormalizedVertexDepth(); + const bool raw_texture = (cmd->texture_enable && cmd->raw_texture_enable); + const u32 num_vertices = cmd->num_vertices; + const u32 texpage = m_draw_mode.bits; + std::array vertices; + for (u32 i = 0; i < num_vertices; i++) + { + const GPUBackendDrawPrecisePolygonCommand::Vertex& vert = cmd->vertices[i]; + vertices[i].Set(vert.x, vert.y, depth, vert.w, raw_texture ? UINT32_C(0x00808080) : vert.color, texpage, + vert.texcoord, 0xFFFF0000u); + } + + const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w; + SetBatchDepthBuffer(cmd, use_depth); + if (use_depth) + CheckForDepthClear(cmd, vertices.data(), num_vertices); + + // Use PGXP to exclude primitives that are definitely 3D. + const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); + FinishPolygonDraw(cmd, vertices, num_vertices, true, is_3d); + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + cmd->shading_enable, cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable); + GPUBackendDrawPolygonCommand::Vertex sw_vertices[4]; + for (u32 i = 0; i < cmd->num_vertices; i++) { - const u32 color = (rc.texture_enable && rc.raw_texture_enable) ? UINT32_C(0x00808080) : rc.color_for_first_vertex; - const GPUVertexPosition vp{FifoPop()}; - const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); - const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); - - const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0); - u32 orig_tex_left = ZeroExtend16(texcoord_x); - u32 orig_tex_top = ZeroExtend16(texcoord_y); - u32 rectangle_width; - u32 rectangle_height; - switch (rc.rectangle_size) - { - case GPUDrawRectangleSize::R1x1: - rectangle_width = 1; - rectangle_height = 1; - break; - case GPUDrawRectangleSize::R8x8: - rectangle_width = 8; - rectangle_height = 8; - break; - case GPUDrawRectangleSize::R16x16: - rectangle_width = 16; - rectangle_height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - rectangle_width = (width_and_height & VRAM_WIDTH_MASK); - rectangle_height = ((width_and_height >> 16) & VRAM_HEIGHT_MASK); - } - break; - } - - const GSVector4i rect = - GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - if (clamped_rect.rempty()) [[unlikely]] - { - GL_INS_FMT("Culling off-screen rectangle {}", rect); - return; - } - - // we can split the rectangle up into potentially 8 quads - SetBatchDepthBuffer(false); - SetBatchSpriteMode(m_allow_sprite_mode); - DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && - m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); - - // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. - u32 tex_top = orig_tex_top; - for (u32 y_offset = 0; y_offset < rectangle_height;) - { - const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); - const float quad_start_y = static_cast(pos_y + static_cast(y_offset)); - const float quad_end_y = quad_start_y + static_cast(quad_height); - const u32 tex_bottom = tex_top + quad_height; - - u32 tex_left = orig_tex_left; - for (u32 x_offset = 0; x_offset < rectangle_width;) - { - const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); - const float quad_start_x = static_cast(pos_x + static_cast(x_offset)); - const float quad_end_x = quad_start_x + static_cast(quad_width); - const u32 tex_right = tex_left + quad_width; - const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - - if (rc.texture_enable && ShouldCheckForTexPageOverlap()) - { - CheckForTexPageOverlap(GSVector4i(static_cast(tex_left), static_cast(tex_top), - static_cast(tex_right), static_cast(tex_bottom))); - } - - const u32 base_vertex = m_batch_vertex_count; - (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), - uv_limits); - m_batch_vertex_count += 4; - m_batch_vertex_space -= 4; - - *(m_batch_index_ptr++) = Truncate16(base_vertex + 0); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); - m_batch_index_count += 6; - m_batch_index_space -= 6; - - x_offset += quad_width; - tex_left = 0; - } - - y_offset += quad_height; - tex_top = 0; - } - - AddDrawnRectangle(clamped_rect); - AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); - - if (m_sw_renderer) - { - GPUBackendDrawRectangleCommand* cmd = m_sw_renderer->NewDrawRectangleCommand(); - FillDrawCommand(cmd, rc); - cmd->color = color; - cmd->x = pos_x; - cmd->y = pos_y; - cmd->width = static_cast(rectangle_width); - cmd->height = static_cast(rectangle_height); - cmd->texcoord = (static_cast(texcoord_y) << 8) | static_cast(texcoord_x); - m_sw_renderer->PushCommand(cmd); - } + const GPUBackendDrawPrecisePolygonCommand::Vertex& src = cmd->vertices[i]; + sw_vertices[i] = GPUBackendDrawPolygonCommand::Vertex{ + .x = src.native_x, .y = src.native_y, .color = src.color, .texcoord = src.texcoord}; } - break; - case GPUPrimitive::Line: - { - SetBatchDepthBuffer(false); + DrawFunction(cmd, &sw_vertices[0], &sw_vertices[1], &sw_vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &sw_vertices[2], &sw_vertices[1], &sw_vertices[3]); + } +} - if (!rc.polyline) - { - DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); +ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand* cmd, + std::array& vertices, u32 num_vertices, + bool is_precise, bool is_3d) +{ + // Use PGXP to exclude primitives that are definitely 3D. + if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon) + HandleFlippedQuadTextureCoordinates(cmd, vertices.data()); + else if (m_allow_sprite_mode) + SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data())); - u32 start_color, end_color; - GPUVertexPosition start_pos, end_pos; - if (rc.shading_enable) - { - start_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_color = FifoPop() & UINT32_C(0x00FFFFFF); - end_pos.bits = FifoPop(); - } - else - { - start_color = end_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_pos.bits = FifoPop(); - } + const GSVector2 v0f = GSVector2::load(&vertices[0].x); + const GSVector2 v1f = GSVector2::load(&vertices[1].x); + const GSVector2 v2f = GSVector2::load(&vertices[2].x); + const GSVector2 min_pos_12 = v1f.min(v2f); + const GSVector2 max_pos_12 = v1f.max(v2f); + const GSVector4i draw_rect_012 = + GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); + DebugAssert(draw_rect_012.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_012.height() <= MAX_PRIMITIVE_HEIGHT && + !clamped_draw_rect_012.rempty()); - const GSVector2i vstart_pos = GSVector2i(start_pos.x + m_drawing_offset.x, start_pos.y + m_drawing_offset.y); - const GSVector2i vend_pos = GSVector2i(end_pos.x + m_drawing_offset.x, end_pos.y + m_drawing_offset.y); - const GSVector4i bounds = GSVector4i::xyxy(vstart_pos, vend_pos); - const GSVector4i rect = GSVector4i::xyxy(vstart_pos.min_s32(vend_pos), vstart_pos.max_s32(vend_pos)) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + if (cmd->texture_enable && m_compute_uv_range) + ComputePolygonUVLimits(cmd, vertices.data(), num_vertices); - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - GL_INS_FMT("Culling too-large/off-screen line: {},{} - {},{}", bounds.x, bounds.y, bounds.z, bounds.w); - return; - } + AddDrawnRectangle(clamped_draw_rect_012); - AddDrawnRectangle(clamped_rect); - AddDrawLineTicks(clamped_rect, rc.shading_enable); + // Expand lines to triangles (Doom, Soul Blade, etc.) + if (!cmd->quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && + ExpandLineTriangles(vertices.data())) + { + return; + } - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(GSVector4(bounds), start_color, end_color, depth); + const u32 start_index = m_batch_vertex_count; + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + m_batch_index_count += 3; + m_batch_index_space -= 3; - if (m_sw_renderer) - { - GPUBackendDrawLineCommand* cmd = m_sw_renderer->NewDrawLineCommand(2); - FillDrawCommand(cmd, rc); - GSVector4i::storel(&cmd->vertices[0], bounds); - cmd->vertices[0].color = start_color; - GSVector4i::storeh(&cmd->vertices[1], bounds); - cmd->vertices[1].color = end_color; - m_sw_renderer->PushCommand(cmd); - } - } - else - { - // Multiply by two because we don't use line strips. - const u32 num_vertices = GetPolyLineVertexCount(); - DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); + // quads, use num_vertices here, because the first half might be culled + if (num_vertices == 4) + { + const GSVector2 v3f = GSVector2::load(&vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); + DebugAssert(draw_rect_123.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_123.height() <= MAX_PRIMITIVE_HEIGHT && + !clamped_draw_rect_123.rempty()); + AddDrawnRectangle(clamped_draw_rect_123); - const bool shaded = rc.shading_enable; + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 3); + m_batch_index_count += 3; + m_batch_index_space -= 3; - u32 buffer_pos = 0; - const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - GSVector2i start_pos = GSVector2i(start_vp.x + m_drawing_offset.x, start_vp.y + m_drawing_offset.y); - u32 start_color = rc.color_for_first_vertex; - - GPUBackendDrawLineCommand* cmd; - if (m_sw_renderer) - { - cmd = m_sw_renderer->NewDrawLineCommand(num_vertices); - FillDrawCommand(cmd, rc); - GSVector2i::store(&cmd->vertices[0].x, start_pos); - cmd->vertices[0].color = start_color; - } - else - { - cmd = nullptr; - } - - for (u32 i = 1; i < num_vertices; i++) - { - const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color; - const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); - const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); - const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - GL_INS_FMT("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); - } - else - { - AddDrawnRectangle(clamped_rect); - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(GSVector4(bounds), start_color, end_color, depth); - } - - start_pos = end_pos; - start_color = end_color; - - if (cmd) - { - GSVector2i::store(&cmd->vertices[i], end_pos); - cmd->vertices[i].color = end_color; - } - } - - if (cmd) - m_sw_renderer->PushCommand(cmd); - } - } - break; - - default: - UnreachableCode(); - break; + DebugAssert(m_batch_vertex_space >= 4); + std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); + m_batch_vertex_ptr += 4; + m_batch_vertex_count += 4; + m_batch_vertex_space -= 4; + } + else + { + DebugAssert(m_batch_vertex_space >= 3); + std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); + m_batch_vertex_ptr += 3; + m_batch_vertex_count += 3; + m_batch_vertex_space -= 3; } } @@ -2993,7 +2851,7 @@ bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u return true; } -ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) +ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(const GPUBackendDrawCommand* cmd, GSVector4i uv_rect) { DebugAssert((m_texpage_dirty != 0 || m_texture_dumping) && m_batch.texture_mode != BatchTextureMode::Disabled); @@ -3034,7 +2892,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } // We need to swap the dirty tracking over to drawn/written. @@ -3076,7 +2934,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } UpdateVRAMReadTexture(update_drawn, update_written); @@ -3129,26 +2987,27 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices MapGPUBuffer(required_vertices, required_indices); } -void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() +void GPU_HW::EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd) { u32 required_vertices; u32 required_indices; - switch (m_render_command.primitive) + switch (cmd->type) { - case GPUPrimitive::Polygon: + case GPUBackendCommandType::DrawPolygon: + case GPUBackendCommandType::DrawPrecisePolygon: required_vertices = 4; // assume quad, in case of expansion required_indices = 6; break; - case GPUPrimitive::Rectangle: + case GPUBackendCommandType::DrawRectangle: required_vertices = MAX_VERTICES_FOR_RECTANGLE; // TODO: WRong required_indices = MAX_VERTICES_FOR_RECTANGLE; break; - case GPUPrimitive::Line: + case GPUBackendCommandType::DrawLine: { // assume expansion - const u32 vert_count = m_render_command.polyline ? GetPolyLineVertexCount() : 2; - required_vertices = vert_count * 4; - required_indices = vert_count * 6; + const GPUBackendDrawLineCommand* lcmd = static_cast(cmd); + required_vertices = lcmd->num_vertices * 4; + required_indices = lcmd->num_vertices * 6; } break; @@ -3183,96 +3042,19 @@ ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const return 1.0f - (static_cast(m_current_depth) / 65535.0f); } -void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) +void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 active_line_lsb) { - const bool current_enabled = (m_sw_renderer != nullptr); - const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks; - const bool use_thread = !g_settings.gpu_texture_cache; - if (current_enabled == new_enabled) - { - if (m_sw_renderer) - m_sw_renderer->SetThreadEnabled(use_thread); - return; - } + FlushRender(); - if (!new_enabled) - { - if (m_sw_renderer) - m_sw_renderer->Shutdown(); - m_sw_renderer.reset(); - return; - } - - std::unique_ptr sw_renderer = std::make_unique(); - if (!sw_renderer->Initialize(use_thread)) - return; - - // We need to fill in the SW renderer's VRAM with the current state for hot toggles. - if (copy_vram_from_hw) - { - FlushRender(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - - // Sync the drawing area and CLUT. - GPUBackendSetDrawingAreaCommand* clip_cmd = sw_renderer->NewSetDrawingAreaCommand(); - clip_cmd->new_area = m_drawing_area; - sw_renderer->PushCommand(clip_cmd); - - if (IsCLUTValid()) - { - GPUBackendUpdateCLUTCommand* clut_cmd = sw_renderer->NewUpdateCLUTCommand(); - FillBackendCommandParameters(clut_cmd); - clut_cmd->reg.bits = static_cast(m_current_clut_reg_bits); - clut_cmd->clut_is_8bit = m_current_clut_is_8bit; - sw_renderer->PushCommand(clut_cmd); - } - } - - m_sw_renderer = std::move(sw_renderer); -} - -void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const -{ - cmd->params.bits = 0; - cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; - cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; - cmd->params.interlaced_rendering = m_GPUSTAT.SkipDrawingToActiveField(); -} - -void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const -{ - FillBackendCommandParameters(cmd); - cmd->rc.bits = rc.bits; - cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; - cmd->palette.bits = m_draw_mode.palette_reg.bits; - cmd->window = m_draw_mode.texture_window; -} - -void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ GL_SCOPE_FMT("FillVRAM({},{} => {},{} ({}x{}) with 0x{:08X}", x, y, x + width, y + height, width, height, color); DeactivateROV(); - const bool handle_with_tc = (m_use_texture_cache && !IsInterlacedRenderingEnabled()); - if (m_sw_renderer && !handle_with_tc) - { - GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - cmd->color = color; - m_sw_renderer->PushCommand(cmd); - } - GL_INS_FMT("Dirty draw area before: {}", m_vram_dirty_draw_rect); const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); // If TC is enabled, we have to update local memory. - if (handle_with_tc) + if (m_use_texture_cache && !interlaced_rendering) { AddWrittenRectangle(bounds); GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, false, 0); @@ -3280,13 +3062,14 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) else { AddUnclampedDrawnRectangle(bounds); + if (ShouldDrawWithSoftwareRenderer()) + GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, interlaced_rendering, active_line_lsb); } GL_INS_FMT("Dirty draw area after: {}", m_vram_dirty_draw_rect); const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)); - g_gpu_device->SetPipeline( - m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(IsInterlacedRenderingEnabled())].get()); + g_gpu_device->SetPipeline(m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(interlaced_rendering)].get()); const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(scaled_bounds); @@ -3308,7 +3091,7 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) // drop precision unless true colour is enabled uniforms.u_fill_color = GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); - uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + uniforms.u_interlaced_displayed_field = active_line_lsb; g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); @@ -3317,11 +3100,13 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { + FlushRender(); + GL_PUSH_FMT("ReadVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); - if (m_sw_renderer) + if (ShouldDrawWithSoftwareRenderer()) { - m_sw_renderer->Sync(false); + GL_INS("VRAM is already up to date due to SW draws."); GL_POP(); return; } @@ -3377,6 +3162,8 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) { + FlushRender(); + GL_SCOPE_FMT("UpdateVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); // TODO: Handle wrapped transfers... break them up or something @@ -3384,24 +3171,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); AddWrittenRectangle(bounds); - if (m_sw_renderer && m_sw_renderer->IsUsingThread()) - { - const u32 num_words = width * height; - GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); - FillBackendCommandParameters(cmd); - cmd->params.set_mask_while_drawing = set_mask; - cmd->params.check_mask_before_draw = check_mask; - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - std::memcpy(cmd->data, data, sizeof(u16) * num_words); - m_sw_renderer->PushCommand(cmd); - } - else - { - GPUTextureCache::WriteVRAM(x, y, width, height, data, set_mask, check_mask, bounds); - } + GPUTextureCache::WriteVRAM(x, y, width, height, data, set_mask, check_mask, bounds); if (check_mask) { @@ -3494,8 +3264,10 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da RestoreDeviceContext(); } -void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, bool check_mask) { + FlushRender(); + GL_SCOPE_FMT("CopyVRAM({}x{} @ {},{} => {},{}", width, height, src_x, src_y, dst_x, dst_y); // masking enabled, oversized, or overlapping @@ -3504,7 +3276,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 const bool intersect_with_draw = m_vram_dirty_draw_rect.rintersects(src_bounds); const bool intersect_with_write = m_vram_dirty_write_rect.rintersects(src_bounds); const bool use_shader = - (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + (set_mask || check_mask || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT) || (!intersect_with_draw && !intersect_with_write); @@ -3513,24 +3285,15 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 if (m_use_texture_cache && !GPUTextureCache::IsRectDrawn(src_bounds)) { GL_INS("Performed in local memory."); - GPUTextureCache::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, m_GPUSTAT.set_mask_while_drawing, - m_GPUSTAT.check_mask_before_draw, src_bounds, dst_bounds); + GPUTextureCache::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, set_mask, check_mask, src_bounds, dst_bounds); UpdateVRAMOnGPU(dst_bounds.left, dst_bounds.top, dst_bounds.width(), dst_bounds.height(), &g_vram[dst_bounds.top * VRAM_WIDTH + dst_bounds.left], VRAM_WIDTH * sizeof(u16), false, false, dst_bounds); return; } - else if (m_sw_renderer) + else if (ShouldDrawWithSoftwareRenderer()) { - GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->src_x = static_cast(src_x); - cmd->src_y = static_cast(src_y); - cmd->dst_x = static_cast(dst_x); - cmd->dst_y = static_cast(dst_y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - m_sw_renderer->PushCommand(cmd); + GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, set_mask, check_mask); } if (use_shader || IsUsingMultisampling()) @@ -3564,20 +3327,19 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 static_cast(m_vram_texture->GetWidth()), static_cast(m_vram_texture->GetHeight()), static_cast(m_resolution_scale), - m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, + BoolToUInt32(set_mask), GetCurrentNormalizedVertexDepth()}; // VRAM read texture should already be bound. const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); - g_gpu_device->SetPipeline( - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && m_write_mask_as_depth)].get()); + g_gpu_device->SetPipeline(m_vram_copy_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get()); g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); - if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) + if (check_mask && !m_pgxp_depth_buffer) m_current_depth++; return; @@ -3612,7 +3374,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 AddUnclampedDrawnRectangle(dst_bounds); } - if (m_GPUSTAT.check_mask_before_draw) + if (check_mask) { // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; @@ -3625,19 +3387,29 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 m_vram_read_texture->MakeReadyForSampling(); } -void GPU_HW::DispatchRenderCommand() +void GPU_HW::ClearCache() { - const GPURenderCommand rc{m_render_command.bits}; + FlushRender(); + // Force the check below to fail. + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; +} + +void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd) +{ // TODO: avoid all this for vertex loading, only do when the type of draw changes - BatchTextureMode texture_mode = rc.IsTexturingEnabled() ? m_batch.texture_mode : BatchTextureMode::Disabled; + BatchTextureMode texture_mode = cmd->texture_enable ? m_batch.texture_mode : BatchTextureMode::Disabled; GPUTextureCache::SourceKey texture_cache_key = m_batch.texture_cache_key; - if (rc.IsTexturingEnabled()) + if (cmd->texture_enable) { // texture page changed - check that the new page doesn't intersect the drawing area - if (m_draw_mode.IsTexturePageChanged() || texture_mode == BatchTextureMode::Disabled) + if (((m_draw_mode.bits ^ cmd->draw_mode.bits) & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK) != 0 || + (cmd->draw_mode.IsUsingPalette() && m_draw_mode.palette_reg.bits != cmd->palette.bits) || + texture_mode == BatchTextureMode::Disabled) + { - m_draw_mode.ClearTexturePageChangedFlag(); + m_draw_mode.mode_reg.bits = cmd->draw_mode.bits; + m_draw_mode.palette_reg.bits = cmd->palette.bits; // start by assuming we can use the TC bool use_texture_cache = m_use_texture_cache; @@ -3713,39 +3485,41 @@ void GPU_HW::DispatchRenderCommand() } } - DebugAssert((rc.IsTexturingEnabled() && (texture_mode == BatchTextureMode::PageTexture && - texture_cache_key.mode == m_draw_mode.mode_reg.texture_mode) || + DebugAssert((cmd->texture_enable && (texture_mode == BatchTextureMode::PageTexture && + texture_cache_key.mode == m_draw_mode.mode_reg.texture_mode) || texture_mode == static_cast( (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit) ? GPUTextureMode::Direct16Bit : m_draw_mode.mode_reg.texture_mode)) || - (!rc.IsTexturingEnabled() && texture_mode == BatchTextureMode::Disabled)); + (!cmd->texture_enable && texture_mode == BatchTextureMode::Disabled)); DebugAssert(!(m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT) || texture_mode == BatchTextureMode::PageTexture || - !rc.IsTexturingEnabled()); + !cmd->texture_enable); // has any state changed which requires a new batch? // Reverse blending breaks with mixed transparent and opaque pixels, so we have to do one draw per polygon. // If we have fbfetch, we don't need to draw it in two passes. Test case: Suikoden 2 shadows. + // TODO: make this suck less.. somehow. probably arrange the relevant bits in a comparable pattern const GPUTransparencyMode transparency_mode = - rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; - const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; + cmd->transparency_enable ? cmd->draw_mode.transparency_mode : GPUTransparencyMode::Disabled; + const bool dithering_enable = (!m_true_color && cmd->dither_enable); if (!IsFlushed()) { if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || - dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window || + dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != cmd->window || + m_batch_ubo_data.u_set_mask_while_drawing != BoolToUInt32(cmd->set_mask_while_drawing) || (texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key)) { FlushRender(); } } - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); if (m_batch_index_count == 0) { // transparency mode change - const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + const bool check_mask_before_draw = cmd->check_mask_before_draw; if (transparency_mode != GPUTransparencyMode::Disabled && !m_rov_active && !m_prefer_shader_blend && !NeedsShaderBlending(transparency_mode, texture_mode, check_mask_before_draw)) { @@ -3759,7 +3533,7 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor; } - const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + const bool set_mask_while_drawing = cmd->set_mask_while_drawing; if (m_batch.check_mask_before_draw != check_mask_before_draw || m_batch.set_mask_while_drawing != set_mask_while_drawing) { @@ -3769,10 +3543,10 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(set_mask_while_drawing); } - m_batch.interlacing = IsInterlacedRenderingEnabled(); + m_batch.interlacing = cmd->interlaced_rendering; if (m_batch.interlacing) { - const u32 displayed_field = GetActiveLineLSB(); + const u32 displayed_field = BoolToUInt32(cmd->active_line_lsb); m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field); m_batch_ubo_data.u_interlaced_displayed_field = displayed_field; } @@ -3783,51 +3557,36 @@ void GPU_HW::DispatchRenderCommand() m_batch.dithering = dithering_enable; m_batch.texture_cache_key = texture_cache_key; - if (m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window) + if (m_batch_ubo_data.u_texture_window_bits != cmd->window) { - m_batch_ubo_data.u_texture_window_bits = m_draw_mode.texture_window; - m_texture_window_active = (m_draw_mode.texture_window != GPUTextureWindow{0xFF, 0xFF, 0x00, 0x00}); - GSVector4i::store(&m_batch_ubo_data.u_texture_window[0], - GSVector4i::load32(&m_draw_mode.texture_window).u8to32()); + m_batch_ubo_data.u_texture_window_bits = cmd->window; + m_texture_window_active = (cmd->window != GPUTextureWindow{{0xFF, 0xFF, 0x00, 0x00}}); + GSVector4i::store(&m_batch_ubo_data.u_texture_window[0], GSVector4i::load32(&cmd->window).u8to32()); m_batch_ubo_dirty = true; } if (m_drawing_area_changed) { m_drawing_area_changed = false; - SetClampedDrawingArea(); SetScissor(); if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) { FlushRender(); CopyAndClearDepthBuffer(); - EnsureVertexBufferSpaceForCurrentCommand(); - } - - if (m_sw_renderer) - { - GPUBackendSetDrawingAreaCommand* cmd = m_sw_renderer->NewSetDrawingAreaCommand(); - cmd->new_area = m_drawing_area; - m_sw_renderer->PushCommand(cmd); + EnsureVertexBufferSpaceForCommand(cmd); } } } - LoadVertices(); + if (cmd->check_mask_before_draw) + m_current_depth++; } void GPU_HW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) { - // Not done in HW, but need to forward through to SW if using that for readbacks - if (m_sw_renderer) - { - GPUBackendUpdateCLUTCommand* cmd = m_sw_renderer->NewUpdateCLUTCommand(); - FillBackendCommandParameters(cmd); - cmd->reg.bits = reg.bits; - cmd->clut_is_8bit = clut_is_8bit; - m_sw_renderer->PushCommand(cmd); - } + if (ShouldDrawWithSoftwareRenderer()) + GPU_SW_Rasterizer::UpdateCLUT(reg, clut_is_8bit); } void GPU_HW::FlushRender() @@ -3895,7 +3654,13 @@ void GPU_HW::FlushRender() } } -void GPU_HW::UpdateDisplay() +void GPU_HW::DrawingAreaChanged() +{ + m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area); + m_drawing_area_changed = true; +} + +void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { FlushRender(); DeactivateROV(); @@ -3904,7 +3669,7 @@ void GPU_HW::UpdateDisplay() GPUTextureCache::Compact(); - if (g_settings.debugging.show_vram) + if (g_gpu_settings.debugging.show_vram) { if (IsUsingMultisampling()) { @@ -3920,30 +3685,30 @@ void GPU_HW::UpdateDisplay() return; } - const bool interlaced = IsInterlacedDisplayEnabled(); - const u32 interlaced_field = GetInterlacedDisplayField(); - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 scaled_vram_offset_x = m_crtc_state.display_vram_left * resolution_scale; - const u32 scaled_vram_offset_y = (m_crtc_state.display_vram_top * resolution_scale) + - ((interlaced && m_GPUSTAT.vertical_resolution) ? interlaced_field : 0); - const u32 scaled_display_width = m_crtc_state.display_vram_width * resolution_scale; - const u32 scaled_display_height = m_crtc_state.display_vram_height * resolution_scale; + const bool interlaced = cmd->interlaced_display_enabled; + const u32 interlaced_field = BoolToUInt32(cmd->interlaced_display_field); + const u32 resolution_scale = cmd->display_24bit ? 1 : m_resolution_scale; + const u32 scaled_vram_offset_x = cmd->display_vram_left * resolution_scale; + const u32 scaled_vram_offset_y = (cmd->display_vram_top * resolution_scale) + + ((interlaced && cmd->interlaced_display_interleaved) ? interlaced_field : 0); + const u32 scaled_display_width = cmd->display_vram_width * resolution_scale; + const u32 scaled_display_height = cmd->display_vram_height * resolution_scale; const u32 read_height = interlaced ? (scaled_display_height / 2u) : scaled_display_height; - const u32 line_skip = BoolToUInt32(interlaced && m_GPUSTAT.vertical_resolution); + const u32 line_skip = cmd->interlaced_display_interleaved; bool drew_anything = false; // Don't bother grabbing depth if postfx doesn't need it. - GPUTexture* depth_source = (!m_GPUSTAT.display_area_color_depth_24 && m_pgxp_depth_buffer && - PostProcessing::InternalChain.NeedsDepthBuffer()) ? - (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : - nullptr; + GPUTexture* depth_source = + (!cmd->display_24bit && m_pgxp_depth_buffer && PostProcessing::InternalChain.NeedsDepthBuffer()) ? + (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : + nullptr; - if (IsDisplayDisabled()) + if (cmd->display_disabled) { ClearDisplayTexture(); return; } - else if (!m_GPUSTAT.display_area_color_depth_24 && !IsUsingMultisampling() && + else if (!cmd->display_24bit && !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight() && !PostProcessing::InternalChain.IsActive()) @@ -3999,14 +3764,14 @@ void GPU_HW::UpdateDisplay() else { g_gpu_device->SetRenderTarget(m_vram_extract_texture.get()); - g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)].get()); + g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(cmd->display_24bit)].get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); } - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 skip_x = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + const u32 reinterpret_start_x = cmd->X * resolution_scale; + const u32 skip_x = (cmd->display_vram_left - cmd->X) * resolution_scale; GL_INS_FMT("VRAM extract, depth = {}, 24bpp = {}, skip_x = {}, line_skip = {}", depth_source ? "yes" : "no", - m_GPUSTAT.display_area_color_depth_24.GetValue(), skip_x, line_skip); + cmd->display_24bit, skip_x, line_skip); GL_INS_FMT("Source: {},{} => {},{} ({}x{})", reinterpret_start_x, scaled_vram_offset_y, reinterpret_start_x + scaled_display_width, scaled_vram_offset_y + read_height, scaled_display_width, read_height); @@ -4052,7 +3817,7 @@ void GPU_HW::UpdateDisplay() } } - if (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24) + if (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit) { DebugAssert(m_display_texture); DownsampleFramebuffer(); @@ -4265,64 +4030,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, ds_width, ds_height); } -void GPU_HW::DrawRendererStats() -{ - if (ImGui::CollapsingHeader("Renderer Statistics", ImGuiTreeNodeFlags_DefaultOpen)) - { - static const ImVec4 active_color{1.0f, 1.0f, 1.0f, 1.0f}; - static const ImVec4 inactive_color{0.4f, 0.4f, 0.4f, 1.0f}; - - ImGui::Columns(2); - ImGui::SetColumnWidth(0, 200.0f * ImGuiManager::GetGlobalScale()); - - ImGui::TextUnformatted("Resolution Scale:"); - ImGui::NextColumn(); - ImGui::Text("%u (VRAM %ux%u)", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, - VRAM_HEIGHT * m_resolution_scale); - ImGui::NextColumn(); - - ImGui::TextUnformatted("Effective Display Resolution:"); - ImGui::NextColumn(); - ImGui::Text("%ux%u", m_crtc_state.display_vram_width * m_resolution_scale, - m_crtc_state.display_vram_height * m_resolution_scale); - ImGui::NextColumn(); - - ImGui::TextUnformatted("True Color:"); - ImGui::NextColumn(); - ImGui::TextColored(m_true_color ? active_color : inactive_color, m_true_color ? "Enabled" : "Disabled"); - ImGui::NextColumn(); - - const bool scaled_dithering = (m_resolution_scale > 1 && g_settings.gpu_scaled_dithering); - ImGui::TextUnformatted("Scaled Dithering:"); - ImGui::NextColumn(); - ImGui::TextColored(scaled_dithering ? active_color : inactive_color, scaled_dithering ? "Enabled" : "Disabled"); - ImGui::NextColumn(); - - ImGui::TextUnformatted("Texture Filtering:"); - ImGui::NextColumn(); - ImGui::TextColored((m_texture_filtering != GPUTextureFilter::Nearest) ? active_color : inactive_color, "%s", - Settings::GetTextureFilterDisplayName(m_texture_filtering)); - ImGui::NextColumn(); - - ImGui::TextUnformatted("PGXP:"); - ImGui::NextColumn(); - ImGui::TextColored(g_settings.gpu_pgxp_enable ? active_color : inactive_color, "Geom"); - ImGui::SameLine(); - ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling) ? active_color : inactive_color, - "Cull"); - ImGui::SameLine(); - ImGui::TextColored( - (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction) ? active_color : inactive_color, "Tex"); - ImGui::SameLine(); - ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_vertex_cache) ? active_color : inactive_color, - "Cache"); - ImGui::NextColumn(); - - ImGui::Columns(1); - } -} - -std::unique_ptr GPU::CreateHardwareRenderer() +std::unique_ptr GPUBackend::CreateHardwareBackend() { return std::make_unique(); } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index f40ecb7cd..0b2772363 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -3,7 +3,7 @@ #pragma once -#include "gpu.h" +#include "gpu_backend.h" #include "gpu_hw_texture_cache.h" #include "util/gpu_device.h" @@ -21,7 +21,9 @@ class GPU_SW_Backend; struct GPUBackendCommand; struct GPUBackendDrawCommand; -class GPU_HW final : public GPU +// TODO: Move to cpp +// TODO: Rename to GPUHWBackend, preserved to avoid conflicts. +class GPU_HW final : public GPUBackend { public: enum class BatchRenderMode : u8 @@ -63,22 +65,41 @@ public: GPU_HW(); ~GPU_HW() override; - const Threading::Thread* GetSWThread() const override; - bool IsHardwareRenderer() const override; + bool Initialize(bool upload_vram, Error* error) override; - bool Initialize(Error* error) override; - void Reset(bool clear_vram) override; - bool DoState(StateWrapper& sw, bool update_display) override; - bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) override; + u32 GetResolutionScale() const override; void RestoreDeviceContext() override; +protected: void UpdateSettings(const Settings& old_settings) override; - u32 GetResolutionScale() const override; void UpdateResolutionScale() override; - void UpdateDisplay() override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 active_line_lsb) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, + bool check_mask) override; + void ClearCache() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void OnBufferSwapped() override; + + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; + void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + + void FlushRender() override; + void DrawingAreaChanged() override; + void ClearVRAM() override; + + void LoadState(const GPUBackendLoadStateCommand* cmd) override; + + bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) override; + void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) override; + + void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override; private: enum : u32 @@ -87,6 +108,7 @@ private: MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) * (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u), NUM_TEXTURE_MODES = static_cast(BatchTextureMode::MaxCount), + INVALID_DRAW_MODE_BITS = 0xFFFFFFFFu, }; enum : u8 { @@ -165,8 +187,6 @@ private: bool CompileResolutionDependentPipelines(Error* error); bool CompileDownsamplePipelines(Error* error); - void LoadVertices(); - void PrintSettingsToLog(); void CheckSettings(); @@ -185,8 +205,10 @@ private: u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; + bool ShouldDrawWithSoftwareRenderer() const; + bool IsUsingMultisampling() const; - bool IsUsingDownsampling() const; + bool IsUsingDownsampling(const GPUBackendUpdateDisplayCommand* cmd) const; void SetFullVRAMDirtyRectangle(); void ClearVRAMDirtyRectangle(); @@ -196,12 +218,15 @@ private: void AddUnclampedDrawnRectangle(const GSVector4i rect); void SetTexPageChangedOnOverlap(const GSVector4i update_rect); - void CheckForTexPageOverlap(GSVector4i uv_rect); + void CheckForTexPageOverlap(const GPUBackendDrawCommand* cmd, GSVector4i uv_rect); bool ShouldCheckForTexPageOverlap() const; bool IsFlushed() const; void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices); - void EnsureVertexBufferSpaceForCurrentCommand(); + void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd); + void PrepareDraw(const GPUBackendDrawCommand* cmd); + void FinishPolygonDraw(const GPUBackendDrawCommand* cmd, std::array& vertices, u32 num_vertices, + bool is_precise, bool is_3d); void ResetBatchVertexDepth(); /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. @@ -213,20 +238,6 @@ private: /// Returns true if the draw is going to use shader blending/framebuffer fetch. bool NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture, bool check_mask) const; - void FillBackendCommandParameters(GPUBackendCommand* cmd) const; - void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; - void UpdateSoftwareRenderer(bool copy_vram_from_hw); - - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void DispatchRenderCommand() override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; - void FlushRender() override; - void DrawRendererStats() override; - void OnBufferSwapped() override; - void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, bool check_mask, const GSVector4i bounds); bool BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); @@ -235,17 +246,17 @@ private: void DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth); /// Handles quads with flipped texture coordinate directions. - void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); + void HandleFlippedQuadTextureCoordinates(const GPUBackendDrawCommand* cmd, BatchVertex* vertices); bool IsPossibleSpritePolygon(const BatchVertex* vertices) const; bool ExpandLineTriangles(BatchVertex* vertices); /// Computes polygon U/V boundaries, and for overlap with the current texture page. - void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); + void ComputePolygonUVLimits(const GPUBackendDrawCommand* cmd, BatchVertex* vertices, u32 num_vertices); /// Sets the depth test flag for PGXP depth buffering. - void SetBatchDepthBuffer(bool enabled); - void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices); - void SetBatchSpriteMode(bool enabled); + void SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled); + void CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices); + void SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled); void UpdateDownsamplingLevels(); @@ -263,8 +274,6 @@ private: std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; - std::unique_ptr m_sw_renderer; - BatchVertex* m_batch_vertex_ptr = nullptr; u16* m_batch_index_ptr = nullptr; u32 m_batch_base_vertex = 0; @@ -306,18 +315,32 @@ private: u8 m_texpage_dirty = 0; bool m_batch_ubo_dirty = true; + bool m_drawing_area_changed = true; BatchConfig m_batch; // Changed state BatchUBOData m_batch_ubo_data = {}; // Bounding box of VRAM area that the GPU has drawn into. + GSVector4i m_clamped_drawing_area = {}; GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; GSVector4i m_vram_dirty_write_rect = INVALID_RECT; // TODO: Don't use in TC mode, should be kept at zero. GSVector4i m_current_uv_rect = INVALID_RECT; GSVector4i m_current_draw_rect = INVALID_RECT; alignas(8) s32 m_current_texture_page_offset[2] = {}; + union + { + struct + { + // NOTE: Only the texture-related bits should be used here, the others are not validated. + GPUDrawModeReg mode_reg; + GPUTexturePaletteReg palette_reg; + }; + + u32 bits = INVALID_DRAW_MODE_BITS; + } m_draw_mode = {}; + std::unique_ptr m_wireframe_pipeline; // [wrapped][interlaced] diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index f70552569..cba1b7985 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -5,11 +5,14 @@ #include "gpu_hw.h" #include "gpu_hw_shadergen.h" #include "gpu_sw_rasterizer.h" +#include "gpu_thread.h" #include "host.h" +#include "imgui_overlays.h" #include "settings.h" #include "system.h" #include "util/gpu_device.h" +#include "util/imgui_fullscreen.h" #include "util/imgui_manager.h" #include "util/state_wrapper.h" @@ -50,6 +53,9 @@ static constexpr const GSVector4i& INVALID_RECT = GPU_HW::INVALID_RECT; static constexpr const GPUTexture::Format REPLACEMENT_TEXTURE_FORMAT = GPUTexture::Format::RGBA8; static constexpr const char LOCAL_CONFIG_FILENAME[] = "config.yaml"; +static constexpr u32 STATE_PALETTE_RECORD_SIZE = + sizeof(GSVector4i) + sizeof(SourceKey) + sizeof(PaletteRecordFlags) + sizeof(HashType) + sizeof(u16) * MAX_CLUT_SIZE; + // Has to be public because it's referenced in Source. struct HashCacheEntry { @@ -518,6 +524,7 @@ struct GPUTextureCacheState GPUTexture::Format hash_cache_texture_format = GPUTexture::Format::Unknown; HashCache hash_cache; + GPU_HW* hw_backend = nullptr; // TODO:FIXME: remove me /// List of candidates for purging when the hash cache gets too large. std::vector> hash_cache_purge_list; @@ -529,7 +536,6 @@ struct GPUTextureCacheState std::unique_ptr replacement_draw_pipeline; // copies alpha as-is std::unique_ptr replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent) - std::string game_id; VRAMReplacementMap vram_replacements; // TODO: Combine these into one map? @@ -555,26 +561,28 @@ ALIGN_TO_CACHE_LINE GPUTextureCacheState s_state; bool GPUTextureCache::ShouldTrackVRAMWrites() { - if (!g_settings.gpu_texture_cache) + if (!g_gpu_settings.gpu_texture_cache) return false; #ifdef ALWAYS_TRACK_VRAM_WRITES return true; #else return (IsDumpingVRAMWriteTextures() || - (g_settings.texture_replacements.enable_texture_replacements && HasVRAMWriteTextureReplacements())); + (g_gpu_settings.texture_replacements.enable_texture_replacements && HasVRAMWriteTextureReplacements())); #endif } bool GPUTextureCache::IsDumpingVRAMWriteTextures() { - return (g_settings.texture_replacements.dump_textures && !s_state.config.dump_texture_pages); + return (g_gpu_settings.texture_replacements.dump_textures && !s_state.config.dump_texture_pages); } -bool GPUTextureCache::Initialize() +bool GPUTextureCache::Initialize(GPU_HW* backend) { + s_state.hw_backend = backend; + SetHashCacheTextureFormat(); - LoadLocalConfiguration(false, false); + ReloadTextureReplacements(false); UpdateVRAMTrackingState(); if (!CompilePipelines()) return false; @@ -588,7 +596,7 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old { UpdateVRAMTrackingState(); - if (g_settings.texture_replacements.enable_texture_replacements != + if (g_gpu_settings.texture_replacements.enable_texture_replacements != old_settings.texture_replacements.enable_texture_replacements) { Invalidate(); @@ -602,9 +610,9 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old // Reload textures if configuration changes. const bool old_replacement_scale_linear_filter = s_state.config.replacement_scale_linear_filter; if (LoadLocalConfiguration(false, false) || - g_settings.texture_replacements.enable_texture_replacements != + g_gpu_settings.texture_replacements.enable_texture_replacements != old_settings.texture_replacements.enable_texture_replacements || - g_settings.texture_replacements.enable_vram_write_replacements != + g_gpu_settings.texture_replacements.enable_vram_write_replacements != old_settings.texture_replacements.enable_vram_write_replacements) { if (use_texture_cache) @@ -620,6 +628,37 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old } } +bool GPUTextureCache::GetStateSize(StateWrapper& sw, u32* size) +{ + if (sw.GetVersion() < 73) + { + *size = 0; + return true; + } + + const size_t start = sw.GetPosition(); + if (!sw.DoMarker("GPUTextureCache")) [[unlikely]] + return false; + + u32 num_vram_writes = 0; + sw.Do(&num_vram_writes); + + for (u32 i = 0; i < num_vram_writes; i++) + { + sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType)); + + u32 num_palette_records = 0; + sw.Do(&num_palette_records); + sw.SkipBytes(num_palette_records * STATE_PALETTE_RECORD_SIZE); + } + + if (sw.HasError()) [[unlikely]] + return false; + + *size = static_cast(sw.GetPosition() - start); + return true; +} + bool GPUTextureCache::DoState(StateWrapper& sw, bool skip) { if (sw.GetVersion() < 73) @@ -668,7 +707,7 @@ bool GPUTextureCache::DoState(StateWrapper& sw, bool skip) sw.Do(&num_palette_records); // Skip palette records if we're not dumping now. - if (g_settings.texture_replacements.dump_textures) + if (g_gpu_settings.texture_replacements.dump_textures) { vrw->palette_records.reserve(num_palette_records); for (u32 j = 0; j < num_palette_records; j++) @@ -760,6 +799,7 @@ void GPUTextureCache::Shutdown() s_state.hash_cache_purge_list = {}; s_state.temp_vram_write_list = {}; s_state.track_vram_writes = false; + s_state.hw_backend = nullptr; for (auto it = s_state.gpu_replacement_image_cache.begin(); it != s_state.gpu_replacement_image_cache.end();) { @@ -773,7 +813,6 @@ void GPUTextureCache::Shutdown() s_state.vram_write_texture_replacements.clear(); s_state.texture_page_texture_replacements.clear(); s_state.dumped_textures.clear(); - s_state.game_id = {}; } void GPUTextureCache::SetHashCacheTextureFormat() @@ -791,7 +830,7 @@ void GPUTextureCache::SetHashCacheTextureFormat() bool GPUTextureCache::CompilePipelines() { - if (!g_settings.texture_replacements.enable_texture_replacements) + if (!g_gpu_settings.texture_replacements.enable_texture_replacements) return true; GPUPipeline::GraphicsConfig plconfig = {}; @@ -1390,7 +1429,7 @@ const GPUTextureCache::Source* GPUTextureCache::ReturnSource(Source* source, con source->from_hash_cache->last_used_frame = System::GetFrameNumber(); // TODO: Cache var. - if (g_settings.texture_replacements.dump_textures) + if (g_gpu_settings.texture_replacements.dump_textures) { source->active_uv_rect = source->active_uv_rect.runion(uv_rect); source->palette_record_flags |= flags; @@ -1548,7 +1587,7 @@ void GPUTextureCache::DestroySource(Source* src, bool remove_from_hash_cache) { GL_INS_FMT("Invalidate source {}", SourceToString(src)); - if (g_settings.texture_replacements.dump_textures && !src->active_uv_rect.eq(INVALID_RECT)) + if (g_gpu_settings.texture_replacements.dump_textures && !src->active_uv_rect.eq(INVALID_RECT)) { if (!s_state.config.dump_texture_pages) { @@ -1950,7 +1989,7 @@ void GPUTextureCache::RemoveVRAMWrite(VRAMWrite* entry) void GPUTextureCache::DumpTexturesFromVRAMWrite(VRAMWrite* entry) { - if (g_settings.texture_replacements.dump_textures && !s_state.config.dump_texture_pages) + if (g_gpu_settings.texture_replacements.dump_textures && !s_state.config.dump_texture_pages) { for (const VRAMWrite::PaletteRecord& prec : entry->palette_records) { @@ -2200,7 +2239,7 @@ GPUTextureCache::HashCacheEntry* GPUTextureCache::LookupHashCache(SourceKey key, DecodeTexture(key.page, key.palette, key.mode, entry.texture.get()); - if (g_settings.texture_replacements.enable_texture_replacements) + if (g_gpu_settings.texture_replacements.enable_texture_replacements) ApplyTextureReplacements(key, tex_hash, pal_hash, &entry); s_state.hash_cache_memory_usage += entry.texture->GetVRAMUsage(); @@ -2603,12 +2642,8 @@ size_t GPUTextureCache::DumpedTextureKeyHash::operator()(const DumpedTextureKey& return hash; } -void GPUTextureCache::SetGameID(std::string game_id) +void GPUTextureCache::GameSerialChanged() { - if (s_state.game_id == game_id) - return; - - s_state.game_id = game_id; ReloadTextureReplacements(false); } @@ -2625,7 +2660,8 @@ GPUTexture* GPUTextureCache::GetVRAMReplacement(u32 width, u32 height, const voi bool GPUTextureCache::ShouldDumpVRAMWrite(u32 width, u32 height) { - return (g_settings.texture_replacements.dump_vram_writes && width >= s_state.config.vram_write_dump_width_threshold && + return (g_gpu_settings.texture_replacements.dump_vram_writes && + width >= s_state.config.vram_write_dump_width_threshold && height >= s_state.config.vram_write_dump_height_threshold); } @@ -2716,7 +2752,7 @@ void GPUTextureCache::DumpTexture(TextureReplacementType type, u32 offset_x, u32 }; // skip if dumped already - if (!g_settings.texture_replacements.dump_replaced_textures) + if (!g_gpu_settings.texture_replacements.dump_replaced_textures) { const TextureReplacementMap& map = (type == TextureReplacementType::TextureFromPage) ? s_state.texture_page_texture_replacements : @@ -2942,7 +2978,7 @@ bool GPUTextureCache::HasValidReplacementExtension(const std::string_view path) void GPUTextureCache::FindTextureReplacements(bool load_vram_write_replacements, bool load_texture_replacements) { - if (s_state.game_id.empty()) + if (GPUThread::GetGameSerial().empty()) return; FileSystem::FindResultsArray files; @@ -3015,23 +3051,23 @@ void GPUTextureCache::FindTextureReplacements(bool load_vram_write_replacements, } } - if (g_settings.texture_replacements.enable_texture_replacements) + if (g_gpu_settings.texture_replacements.enable_texture_replacements) { INFO_LOG("Found {} replacement upload textures for '{}'", s_state.vram_write_texture_replacements.size(), - s_state.game_id); + GPUThread::GetGameSerial()); INFO_LOG("Found {} replacement page textures for '{}'", s_state.texture_page_texture_replacements.size(), - s_state.game_id); + GPUThread::GetGameSerial()); } - if (g_settings.texture_replacements.enable_vram_write_replacements) - INFO_LOG("Found {} replacement VRAM for '{}'", s_state.vram_replacements.size(), s_state.game_id); + if (g_gpu_settings.texture_replacements.enable_vram_write_replacements) + INFO_LOG("Found {} replacement VRAM for '{}'", s_state.vram_replacements.size(), GPUThread::GetGameSerial()); } void GPUTextureCache::LoadTextureReplacementAliases(const ryml::ConstNodeRef& root, bool load_vram_write_replacement_aliases, bool load_texture_replacement_aliases) { - if (s_state.game_id.empty()) + if (GPUThread::GetGameSerial().empty()) return; const std::string source_dir = GetTextureReplacementDirectory(); @@ -3107,17 +3143,19 @@ void GPUTextureCache::LoadTextureReplacementAliases(const ryml::ConstNodeRef& ro } } - if (g_settings.texture_replacements.enable_texture_replacements) + if (g_gpu_settings.texture_replacements.enable_texture_replacements) { INFO_LOG("Found {} replacement upload textures after applying aliases for '{}'", - s_state.vram_write_texture_replacements.size(), s_state.game_id); + s_state.vram_write_texture_replacements.size(), GPUThread::GetGameSerial()); INFO_LOG("Found {} replacement page textures after applying aliases for '{}'", - s_state.texture_page_texture_replacements.size(), s_state.game_id); + s_state.texture_page_texture_replacements.size(), GPUThread::GetGameSerial()); } - if (g_settings.texture_replacements.enable_vram_write_replacements) + if (g_gpu_settings.texture_replacements.enable_vram_write_replacements) + { INFO_LOG("Found {} replacement VRAM after applying aliases for '{}'", s_state.vram_replacements.size(), - s_state.game_id); + GPUThread::GetGameSerial()); + } } const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetTextureReplacementImage(const std::string& path) @@ -3241,8 +3279,8 @@ void GPUTextureCache::PreloadReplacementTextures() #define UPDATE_PROGRESS() \ if (last_update_time.GetTimeSeconds() >= UPDATE_INTERVAL) \ { \ - Host::DisplayLoadingScreen("Preloading replacement textures...", 0, static_cast(total_textures), \ - static_cast(num_textures_loaded)); \ + ImGuiFullscreen::RenderLoadingScreen(ImGuiManager::LOGO_IMAGE_NAME, "Preloading replacement textures...", 0, \ + static_cast(total_textures), static_cast(num_textures_loaded)); \ last_update_time.Reset(); \ } @@ -3269,10 +3307,10 @@ void GPUTextureCache::PreloadReplacementTextures() bool GPUTextureCache::EnsureGameDirectoryExists() { - if (s_state.game_id.empty()) + if (GPUThread::GetGameSerial().empty()) return false; - const std::string game_directory = Path::Combine(EmuFolders::Textures, s_state.game_id); + const std::string game_directory = Path::Combine(EmuFolders::Textures, GPUThread::GetGameSerial()); if (FileSystem::DirectoryExists(game_directory.c_str())) return true; @@ -3309,12 +3347,13 @@ bool GPUTextureCache::EnsureGameDirectoryExists() std::string GPUTextureCache::GetTextureReplacementDirectory() { - std::string dir = Path::Combine( - EmuFolders::Textures, SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "replacements", s_state.game_id)); + std::string dir = + Path::Combine(EmuFolders::Textures, + SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "replacements", GPUThread::GetGameSerial())); if (!FileSystem::DirectoryExists(dir.c_str())) { // Check for the old directory structure without a replacements subdirectory. - std::string altdir = Path::Combine(EmuFolders::Textures, s_state.game_id); + std::string altdir = Path::Combine(EmuFolders::Textures, GPUThread::GetGameSerial()); if (FileSystem::DirectoryExists(altdir.c_str())) WARNING_LOG("Using deprecated texture replacement directory {}", altdir); dir = std::move(altdir); @@ -3326,7 +3365,7 @@ std::string GPUTextureCache::GetTextureReplacementDirectory() std::string GPUTextureCache::GetTextureDumpDirectory() { return Path::Combine(EmuFolders::Textures, - SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "dumps", s_state.game_id)); + SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "dumps", GPUThread::GetGameSerial())); } GPUTextureCache::VRAMReplacementName GPUTextureCache::GetVRAMWriteHash(u32 width, u32 height, const void* pixels) @@ -3354,14 +3393,15 @@ bool GPUTextureCache::LoadLocalConfiguration(bool load_vram_write_replacement_al const Settings::TextureReplacementSettings::Configuration old_config = s_state.config; // load settings from ini - s_state.config = g_settings.texture_replacements.config; + s_state.config = g_gpu_settings.texture_replacements.config; - if (s_state.game_id.empty()) + const std::string& game_serial = GPUThread::GetGameSerial(); + if (game_serial.empty()) return (s_state.config != old_config); const std::optional ini_data = FileSystem::ReadFileToString( Path::Combine(EmuFolders::Textures, - SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "{}", s_state.game_id, LOCAL_CONFIG_FILENAME)) + SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "{}", game_serial, LOCAL_CONFIG_FILENAME)) .c_str()); if (!ini_data.has_value() || ini_data->empty()) return (s_state.config != old_config); @@ -3430,15 +3470,15 @@ void GPUTextureCache::ReloadTextureReplacements(bool show_info) s_state.vram_write_texture_replacements.clear(); s_state.texture_page_texture_replacements.clear(); - const bool load_vram_write_replacements = (g_settings.texture_replacements.enable_vram_write_replacements); + const bool load_vram_write_replacements = (g_gpu_settings.texture_replacements.enable_vram_write_replacements); const bool load_texture_replacements = - (g_settings.gpu_texture_cache && g_settings.texture_replacements.enable_texture_replacements); + (g_gpu_settings.gpu_texture_cache && g_gpu_settings.texture_replacements.enable_texture_replacements); if (load_vram_write_replacements || load_texture_replacements) FindTextureReplacements(load_vram_write_replacements, load_texture_replacements); LoadLocalConfiguration(load_vram_write_replacements, load_texture_replacements); - if (g_settings.texture_replacements.preload_textures) + if (g_gpu_settings.texture_replacements.preload_textures) PreloadReplacementTextures(); PurgeUnreferencedTexturesFromCache(); @@ -3596,5 +3636,5 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, g_gpu_device->RecycleTexture(std::move(entry->texture)); entry->texture = std::move(replacement_tex); - g_gpu->RestoreDeviceContext(); + s_state.hw_backend->RestoreDeviceContext(); } \ No newline at end of file diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h index 07482da7b..9b72cf7b2 100644 --- a/src/core/gpu_hw_texture_cache.h +++ b/src/core/gpu_hw_texture_cache.h @@ -10,6 +10,7 @@ class GPUTexture; class StateWrapper; struct Settings; +class GPU_HW; ////////////////////////////////////////////////////////////////////////// // Texture Cache @@ -102,9 +103,12 @@ struct Source TListNode hash_cache_ref; }; -bool Initialize(); +bool Initialize(GPU_HW* backend); void UpdateSettings(bool use_texture_cache, const Settings& old_settings); + +bool GetStateSize(StateWrapper& sw, u32* size); bool DoState(StateWrapper& sw, bool skip); + void Shutdown(); void Invalidate(); @@ -124,7 +128,7 @@ bool AreSourcePagesDrawn(SourceKey key, const GSVector4i rect); void Compact(); -void SetGameID(std::string game_id); +void GameSerialChanged(); void ReloadTextureReplacements(bool show_info); // VRAM Write Replacements diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 8aa99c1dc..b22cf5698 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_sw.h" -#include "gpu_hw_texture_cache.h" +#include "gpu.h" +#include "gpu_sw_rasterizer.h" #include "settings.h" -#include "system.h" +#include "system_private.h" #include "util/gpu_device.h" #include "util/state_wrapper.h" #include "common/align.h" #include "common/assert.h" -#include "common/gsvector.h" -#include "common/gsvector_formatter.h" +#include "common/intrin.h" #include "common/log.h" #include @@ -21,25 +21,16 @@ LOG_CHANNEL(GPU); GPU_SW::GPU_SW() = default; -GPU_SW::~GPU_SW() +GPU_SW::~GPU_SW() = default; + +u32 GPU_SW::GetResolutionScale() const { - g_gpu_device->RecycleTexture(std::move(m_upload_texture)); - m_backend.Shutdown(); + return 1u; } -const Threading::Thread* GPU_SW::GetSWThread() const +bool GPU_SW::Initialize(bool upload_vram, Error* error) { - return m_backend.GetThread(); -} - -bool GPU_SW::IsHardwareRenderer() const -{ - return false; -} - -bool GPU_SW::Initialize(Error* error) -{ - if (!GPU::Initialize(error) || !m_backend.Initialize(g_settings.gpu_use_thread)) + if (!GPUBackend::Initialize(upload_vram, error)) return false; static constexpr const std::array formats_for_16bit = {GPUTexture::Format::RGB5A1, GPUTexture::Format::A1BGR5, @@ -56,41 +47,133 @@ bool GPU_SW::Initialize(Error* error) // RGBA8 will always be supported, hence we'll find one. INFO_LOG("Using {} format for 16-bit display", GPUTexture::GetFormatName(m_16bit_display_format)); Assert(m_16bit_display_format != GPUTexture::Format::Unknown); + + // if we're using "new" vram, clear it out here + if (!upload_vram) + std::memset(g_vram, 0, sizeof(g_vram)); + return true; } -bool GPU_SW::DoState(StateWrapper& sw, bool update_display) +void GPU_SW::ClearVRAM() { - // need to ensure the worker thread is done - m_backend.Sync(true); - - // ignore the host texture for software mode, since we want to save vram here - if (!GPU::DoState(sw, update_display)) - return false; - - // need to still call the TC, to toss any data in the state - return GPUTextureCache::DoState(sw, true); + std::memset(g_vram, 0, sizeof(g_vram)); + std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); } -bool GPU_SW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) +void GPU_SW::UpdateResolutionScale() { - m_backend.Sync(true); - sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - return GPU::DoMemoryState(sw, mss, update_display); } -void GPU_SW::Reset(bool clear_vram) +void GPU_SW::LoadState(const GPUBackendLoadStateCommand* cmd) { - GPU::Reset(clear_vram); - - m_backend.Reset(); + std::memcpy(g_vram, cmd->vram_data, sizeof(g_vram)); + std::memcpy(g_gpu_clut, cmd->clut_data, sizeof(g_gpu_clut)); } -void GPU_SW::UpdateSettings(const Settings& old_settings) +bool GPU_SW::AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) +{ + mss.gpu_state_data.resize(sizeof(g_vram) + sizeof(g_gpu_clut)); + return true; +} + +void GPU_SW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) +{ + sw.DoBytes(g_vram, sizeof(g_vram)); + sw.DoBytes(g_gpu_clut, sizeof(g_gpu_clut)); + DebugAssert(!sw.HasError()); +} + +void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ +} + +void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 active_line_lsb) +{ + GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, interlaced_rendering, active_line_lsb); +} + +void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) +{ + GPU_SW_Rasterizer::WriteVRAM(x, y, width, height, data, set_mask, check_mask); +} + +void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, bool check_mask) +{ + GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, set_mask, check_mask); +} + +void GPU_SW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + cmd->shading_enable, cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable); + + DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); +} + +void GPU_SW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) +{ + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + cmd->shading_enable, cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable); + + // Need to cut out the irrelevant bits. + // TODO: In _theory_ we could use the fixed-point parts here. + GPUBackendDrawPolygonCommand::Vertex vertices[4]; + for (u32 i = 0; i < cmd->num_vertices; i++) + { + const GPUBackendDrawPrecisePolygonCommand::Vertex& src = cmd->vertices[i]; + vertices[i] = GPUBackendDrawPolygonCommand::Vertex{ + .x = src.native_x, .y = src.native_y, .color = src.color, .texcoord = src.texcoord}; + } + + DrawFunction(cmd, &vertices[0], &vertices[1], &vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &vertices[2], &vertices[1], &vertices[3]); +} + +void GPU_SW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) +{ + const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawRectangleFunction(cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable); + + DrawFunction(cmd); +} + +void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawLineFunction(cmd->shading_enable, cmd->transparency_enable); + + for (u16 i = 0; i < cmd->num_vertices; i += 2) + DrawFunction(cmd, &cmd->vertices[i], &cmd->vertices[i + 1]); +} + +void GPU_SW::DrawingAreaChanged() +{ + // GPU_SW_Rasterizer::g_drawing_area set by base class. +} + +void GPU_SW::ClearCache() +{ +} + +void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + GPU_SW_Rasterizer::UpdateCLUT(reg, clut_is_8bit); +} + +void GPU_SW::OnBufferSwapped() +{ +} + +void GPU_SW::FlushRender() +{ +} + +void GPU_SW::RestoreDeviceContext() { - GPU::UpdateSettings(old_settings); - if (g_settings.gpu_use_thread != old_settings.gpu_use_thread) - m_backend.SetThreadEnabled(g_settings.gpu_use_thread); } GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format) @@ -271,32 +354,28 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3 } } -void GPU_SW::UpdateDisplay() +void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { - // fill display texture - m_backend.Sync(true); - if (!g_settings.debugging.show_vram) { - if (IsDisplayDisabled()) + if (cmd->display_disabled) { ClearDisplayTexture(); return; } - const bool is_24bit = m_GPUSTAT.display_area_color_depth_24; - const bool interlaced = IsInterlacedDisplayEnabled(); - const u32 field = GetInterlacedDisplayField(); - const u32 vram_offset_x = is_24bit ? m_crtc_state.regs.X : m_crtc_state.display_vram_left; - const u32 vram_offset_y = - m_crtc_state.display_vram_top + ((interlaced && m_GPUSTAT.vertical_resolution) ? field : 0); - const u32 skip_x = is_24bit ? (m_crtc_state.display_vram_left - m_crtc_state.regs.X) : 0; - const u32 read_width = m_crtc_state.display_vram_width; - const u32 read_height = interlaced ? (m_crtc_state.display_vram_height / 2) : m_crtc_state.display_vram_height; + const bool is_24bit = cmd->display_24bit; + const bool interlaced = cmd->interlaced_display_enabled; + const u32 field = BoolToUInt32(cmd->interlaced_display_field); + const u32 vram_offset_x = is_24bit ? cmd->X : cmd->display_vram_left; + const u32 vram_offset_y = cmd->display_vram_top + ((interlaced && cmd->interlaced_display_interleaved) ? field : 0); + const u32 skip_x = is_24bit ? (cmd->display_vram_left - cmd->X) : 0; + const u32 read_width = cmd->display_vram_width; + const u32 read_height = interlaced ? (cmd->display_vram_height / 2) : cmd->display_vram_height; - if (IsInterlacedDisplayEnabled()) + if (cmd->interlaced_display_enabled) { - const u32 line_skip = m_GPUSTAT.vertical_resolution; + const u32 line_skip = cmd->interlaced_display_interleaved; if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit)) { SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); @@ -328,347 +407,7 @@ void GPU_SW::UpdateDisplay() } } -void GPU_SW::FillBackendCommandParameters(GPUBackendCommand* cmd) const -{ - cmd->params.bits = 0; - cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; - cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; - cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); -} - -void GPU_SW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const -{ - FillBackendCommandParameters(cmd); - cmd->rc.bits = rc.bits; - cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; - cmd->draw_mode.dither_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; - cmd->palette.bits = m_draw_mode.palette_reg.bits; - cmd->window = m_draw_mode.texture_window; -} - -void GPU_SW::DispatchRenderCommand() -{ - if (m_drawing_area_changed) - { - GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand(); - cmd->new_area = m_drawing_area; - GSVector4i::store(cmd->new_clamped_area, m_clamped_drawing_area); - m_backend.PushCommand(cmd); - m_drawing_area_changed = false; - } - - const GPURenderCommand rc{m_render_command.bits}; - - switch (rc.primitive) - { - case GPUPrimitive::Polygon: - { - const u32 num_vertices = rc.quad_polygon ? 4 : 3; - GPUBackendDrawPolygonCommand* cmd = m_backend.NewDrawPolygonCommand(num_vertices); - FillDrawCommand(cmd, rc); - - std::array positions; - const u32 first_color = rc.color_for_first_vertex; - const bool shaded = rc.shading_enable; - const bool textured = rc.texture_enable; - for (u32 i = 0; i < num_vertices; i++) - { - GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; - vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - const u64 maddr_and_pos = m_fifo.Pop(); - const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; - vert->x = m_drawing_offset.x + vp.x; - vert->y = m_drawing_offset.y + vp.y; - vert->texcoord = textured ? Truncate16(FifoPop()) : 0; - positions[i] = GSVector2i::load(&vert->x); - } - - // Cull polygons which are too large. - const GSVector2i min_pos_12 = positions[1].min_s32(positions[2]); - const GSVector2i max_pos_12 = positions[1].max_s32(positions[2]); - const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_s32(positions[0])) - .upl64(GSVector4i(max_pos_12.max_s32(positions[0]))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const bool first_tri_culled = - (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_012)); - if (first_tri_culled) - { - DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, - cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); - - if (!rc.quad_polygon) - return; - } - else - { - AddDrawTriangleTicks(positions[0], positions[1], positions[2], rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - } - - // quads - if (rc.quad_polygon) - { - const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(positions[3])) - .upl64(GSVector4i(max_pos_12.max_s32(positions[3]))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - - // Cull polygons which are too large. - const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_123)); - if (second_tri_culled) - { - DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, - cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); - - if (first_tri_culled) - return; - } - else - { - AddDrawTriangleTicks(positions[2], positions[1], positions[3], rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - } - } - - m_backend.PushCommand(cmd); - } - break; - - case GPUPrimitive::Rectangle: - { - GPUBackendDrawRectangleCommand* cmd = m_backend.NewDrawRectangleCommand(); - FillDrawCommand(cmd, rc); - cmd->color = rc.color_for_first_vertex; - - const GPUVertexPosition vp{FifoPop()}; - cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); - cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); - - if (rc.texture_enable) - { - const u32 texcoord_and_palette = FifoPop(); - cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); - cmd->texcoord = Truncate16(texcoord_and_palette); - } - else - { - cmd->palette.bits = 0; - cmd->texcoord = 0; - } - - switch (rc.rectangle_size) - { - case GPUDrawRectangleSize::R1x1: - cmd->width = 1; - cmd->height = 1; - break; - case GPUDrawRectangleSize::R8x8: - cmd->width = 8; - cmd->height = 8; - break; - case GPUDrawRectangleSize::R16x16: - cmd->width = 16; - cmd->height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); - cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); - } - break; - } - - const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - if (clamped_rect.rempty()) [[unlikely]] - { - DEBUG_LOG("Culling off-screen rectangle {}", rect); - return; - } - - AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); - - m_backend.PushCommand(cmd); - } - break; - - case GPUPrimitive::Line: - { - if (!rc.polyline) - { - GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(2); - FillDrawCommand(cmd, rc); - cmd->palette.bits = 0; - - if (rc.shading_enable) - { - cmd->vertices[0].color = rc.color_for_first_vertex; - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; - - cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; - } - else - { - cmd->vertices[0].color = rc.color_for_first_vertex; - cmd->vertices[1].color = rc.color_for_first_vertex; - - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; - - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; - } - - const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); - const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); - const GSVector4i rect = v0.min_s32(v1).xyxy(v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, - cmd->vertices[1].x, cmd->vertices[1].y); - return; - } - - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - m_backend.PushCommand(cmd); - } - else - { - const u32 num_vertices = GetPolyLineVertexCount(); - - GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand((num_vertices - 1) * 2); - FillDrawCommand(cmd, m_render_command); - - u32 buffer_pos = 0; - const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i draw_offset = GSVector2i::load(&m_drawing_offset.x); - GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset); - u32 start_color = m_render_command.color_for_first_vertex; - - const bool shaded = m_render_command.shading_enable; - u32 out_vertex_count = 0; - for (u32 i = 1; i < num_vertices; i++) - { - const u32 end_color = - shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; - const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset); - - const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[i - 1].x, - cmd->vertices[i - 1].y, cmd->vertices[i].x, cmd->vertices[i].y); - } - else - { - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; - out_vertex_count += 2; - - GSVector2i::store(&out_vertex[0].x, start_pos); - out_vertex[0].color = start_color; - GSVector2i::store(&out_vertex[1].x, end_pos); - out_vertex[1].color = end_color; - } - - start_pos = end_pos; - start_color = end_color; - } - - if (out_vertex_count > 0) - { - DebugAssert(out_vertex_count <= cmd->num_vertices); - cmd->num_vertices = Truncate16(out_vertex_count); - m_backend.PushCommand(cmd); - } - } - } - break; - - default: - UnreachableCode(); - break; - } -} - -void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - m_backend.Sync(false); -} - -void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - GPUBackendFillVRAMCommand* cmd = m_backend.NewFillVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - cmd->color = color; - m_backend.PushCommand(cmd); -} - -void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - const u32 num_words = width * height; - GPUBackendUpdateVRAMCommand* cmd = m_backend.NewUpdateVRAMCommand(num_words); - FillBackendCommandParameters(cmd); - cmd->params.set_mask_while_drawing = set_mask; - cmd->params.check_mask_before_draw = check_mask; - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - std::memcpy(cmd->data, data, sizeof(u16) * num_words); - m_backend.PushCommand(cmd); -} - -void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - GPUBackendCopyVRAMCommand* cmd = m_backend.NewCopyVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->src_x = static_cast(src_x); - cmd->src_y = static_cast(src_y); - cmd->dst_x = static_cast(dst_x); - cmd->dst_y = static_cast(dst_y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - m_backend.PushCommand(cmd); -} - -void GPU_SW::FlushRender() -{ -} - -void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) -{ - GPUBackendUpdateCLUTCommand* cmd = m_backend.NewUpdateCLUTCommand(); - FillBackendCommandParameters(cmd); - cmd->reg.bits = reg.bits; - cmd->clut_is_8bit = clut_is_8bit; - m_backend.PushCommand(cmd); -} - -std::unique_ptr GPU::CreateSoftwareRenderer() +std::unique_ptr GPUBackend::CreateSoftwareBackend() { return std::make_unique(); } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index e113fa48e..b26c4616e 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -4,7 +4,7 @@ #pragma once #include "gpu.h" -#include "gpu_sw_backend.h" +#include "gpu_backend.h" #include "util/gpu_device.h" @@ -12,36 +12,51 @@ #include -namespace Threading { -class Thread; -} - -class GPUTexture; - -class GPU_SW final : public GPU +// TODO: Move to cpp +// TODO: Rename to GPUSWBackend, preserved to avoid conflicts. +class GPU_SW final : public GPUBackend { public: GPU_SW(); ~GPU_SW() override; - ALWAYS_INLINE const GPU_SW_Backend& GetBackend() const { return m_backend; } + bool Initialize(bool upload_vram, Error* error) override; - const Threading::Thread* GetSWThread() const override; - bool IsHardwareRenderer() const override; + void RestoreDeviceContext() override; - bool Initialize(Error* error) override; - bool DoState(StateWrapper& sw, bool update_display) override; - bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) override; - void Reset(bool clear_vram) override; - void UpdateSettings(const Settings& old_settings) override; + u32 GetResolutionScale() const override; protected: void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 active_line_lsb) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void FlushRender() override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, + bool check_mask) override; + + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; + void DrawingAreaChanged() override; + void ClearCache() override; void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void OnBufferSwapped() override; + + void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override; + + void ClearVRAM() override; + + void FlushRender() override; + + void UpdateResolutionScale() override; + + void LoadState(const GPUBackendLoadStateCommand* cmd) override; + + bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) override; + void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) override; + +private: + static constexpr GPUTexture::Format FORMAT_FOR_24BIT = GPUTexture::Format::RGBA8; // RGBA8 always supported. template bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip); @@ -50,21 +65,9 @@ protected: bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit); - void UpdateDisplay() override; - - void DispatchRenderCommand() override; - - void FillBackendCommandParameters(GPUBackendCommand* cmd) const; - void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; - -private: - static constexpr GPUTexture::Format FORMAT_FOR_24BIT = GPUTexture::Format::RGBA8; // RGBA8 always supported. - GPUTexture* GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format); FixedHeapArray m_upload_buffer; GPUTexture::Format m_16bit_display_format = GPUTexture::Format::Unknown; std::unique_ptr m_upload_texture; - - GPU_SW_Backend m_backend; }; diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp deleted file mode 100644 index 925e20bd5..000000000 --- a/src/core/gpu_sw_backend.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "gpu_sw_backend.h" -#include "gpu.h" -#include "gpu_sw_rasterizer.h" -#include "system.h" - -#include "util/gpu_device.h" - -#include - -GPU_SW_Backend::GPU_SW_Backend() = default; - -GPU_SW_Backend::~GPU_SW_Backend() = default; - -bool GPU_SW_Backend::Initialize(bool use_thread) -{ - return GPUBackend::Initialize(use_thread); -} - -void GPU_SW_Backend::Reset() -{ - GPUBackend::Reset(); -} - -void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) -{ - const GPURenderCommand rc{cmd->rc.bits}; - - const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( - rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); - - DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); - if (rc.quad_polygon) - DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); -} - -void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) -{ - const GPURenderCommand rc{cmd->rc.bits}; - - const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = - GPU_SW_Rasterizer::GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); - - DrawFunction(cmd); -} - -void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd) -{ - const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = - GPU_SW_Rasterizer::GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable); - - for (u16 i = 1; i < cmd->num_vertices; i += 2) - DrawFunction(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); -} - -void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) -{ - GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, params.interlaced_rendering, params.active_line_lsb); -} - -void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, - GPUBackendCommandParameters params) -{ - GPU_SW_Rasterizer::WriteVRAM(x, y, width, height, data, params.set_mask_while_drawing, params.check_mask_before_draw); -} - -void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) -{ - GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params.set_mask_while_drawing, - params.check_mask_before_draw); -} - -void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) -{ - GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit); -} - -void GPU_SW_Backend::DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) -{ - GPU_SW_Rasterizer::g_drawing_area = new_drawing_area; -} - -void GPU_SW_Backend::FlushRender() -{ -} diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h deleted file mode 100644 index 7f2c492ca..000000000 --- a/src/core/gpu_sw_backend.h +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#pragma once - -#include "gpu.h" -#include "gpu_backend.h" - -#include - -class GPU_SW_Backend final : public GPUBackend -{ -public: - GPU_SW_Backend(); - ~GPU_SW_Backend() override; - - bool Initialize(bool use_thread) override; - void Reset() override; - -protected: - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) override; - - void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; - void DrawLine(const GPUBackendDrawLineCommand* cmd) override; - void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; - void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; - void FlushRender() override; -}; diff --git a/src/core/gpu_sw_rasterizer.cpp b/src/core/gpu_sw_rasterizer.cpp index 3f2246e65..e934aeebf 100644 --- a/src/core/gpu_sw_rasterizer.cpp +++ b/src/core/gpu_sw_rasterizer.cpp @@ -38,6 +38,31 @@ CopyVRAMFunction CopyVRAM = nullptr; GPUDrawingArea g_drawing_area = {}; } // namespace GPU_SW_Rasterizer +void GPU_SW_Rasterizer::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + const u16* const src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; + const u32 start_x = reg.GetXBase(); + if (!clut_is_8bit) + { + // Wraparound can't happen in 4-bit mode. + std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * 16); + } + else + { + if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] + { + const u32 end = VRAM_WIDTH - start_x; + const u32 start = 256 - end; + std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * end); + std::memcpy(g_gpu_clut + end, src_row, sizeof(u16) * start); + } + else + { + std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * 256); + } + } +} + // Default scalar implementation definitions. namespace GPU_SW_Rasterizer::Scalar { namespace { diff --git a/src/core/gpu_sw_rasterizer.h b/src/core/gpu_sw_rasterizer.h index cdc6e9d5e..94b1c81c7 100644 --- a/src/core/gpu_sw_rasterizer.h +++ b/src/core/gpu_sw_rasterizer.h @@ -4,6 +4,7 @@ #pragma once #include "gpu.h" +#include "gpu_thread_commands.h" #include "gpu_types.h" #include "common/intrin.h" @@ -18,13 +19,15 @@ static constexpr u32 DITHER_LUT_SIZE = 512; using DitherLUT = std::array, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>; extern const DitherLUT g_dither_lut; +// TODO: Pack in struct extern GPUDrawingArea g_drawing_area; +extern void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit); + using DrawRectangleFunction = void (*)(const GPUBackendDrawRectangleCommand* cmd); typedef const DrawRectangleFunction DrawRectangleFunctionTable[2][2][2]; -using DrawTriangleFunction = void (*)(const GPUBackendDrawPolygonCommand* cmd, - const GPUBackendDrawPolygonCommand::Vertex* v0, +using DrawTriangleFunction = void (*)(const GPUBackendDrawCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); typedef const DrawTriangleFunction DrawTriangleFunctionTable[2][2][2][2]; diff --git a/src/core/gpu_sw_rasterizer.inl b/src/core/gpu_sw_rasterizer.inl index d470a93a6..3deb0a9b8 100644 --- a/src/core/gpu_sw_rasterizer.inl +++ b/src/core/gpu_sw_rasterizer.inl @@ -129,7 +129,7 @@ template } else { - const bool dithering_enable = cmd->draw_mode.dither_enable; + const bool dithering_enable = cmd->dither_enable; const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; @@ -143,7 +143,7 @@ template } else { - const bool dithering_enable = cmd->draw_mode.dither_enable; + const bool dithering_enable = cmd->dither_enable; const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; @@ -215,12 +215,12 @@ template } } - const u16 mask_and = cmd->params.GetMaskAND(); + const u16 mask_and = cmd->GetMaskAND(); if ((bg_color & mask_and) != 0) return; DebugAssert(static_cast(x) < VRAM_WIDTH && static_cast(y) < VRAM_HEIGHT); - SetPixel(static_cast(x), static_cast(y), color | cmd->params.GetMaskOR()); + SetPixel(static_cast(x), static_cast(y), color | cmd->GetMaskOR()); } #ifndef USE_VECTOR @@ -237,7 +237,8 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) { const s32 y = origin_y + static_cast(offset_y); if (y < static_cast(g_drawing_area.top) || y > static_cast(g_drawing_area.bottom) || - (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast(y)) & 1u))) + (cmd->interlaced_rendering && + cmd->active_line_lsb == ConvertToBoolUnchecked(Truncate8(static_cast(y)) & 1u))) { continue; } @@ -488,8 +489,8 @@ struct PixelVectors clip_left = GSVectorNi(g_drawing_area.left); clip_right = GSVectorNi(g_drawing_area.right); - mask_and = GSVectorNi(cmd->params.GetMaskAND()); - mask_or = GSVectorNi(cmd->params.GetMaskOR()); + mask_and = GSVectorNi(cmd->GetMaskAND()); + mask_or = GSVectorNi(cmd->GetMaskOR()); if constexpr (texture_enable) { @@ -717,7 +718,8 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) { const s32 y = origin_y + static_cast(offset_y); if (y >= static_cast(g_drawing_area.top) && y <= static_cast(g_drawing_area.bottom) && - (!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (Truncate8(static_cast(y)) & 1u))) + (!cmd->interlaced_rendering || + cmd->active_line_lsb != ConvertToBoolUnchecked(Truncate8(static_cast(y)) & 1u))) { const s32 draw_y = (y & VRAM_HEIGHT_MASK); @@ -817,7 +819,8 @@ static void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawL const s32 x = unfp_xy(curx); const s32 y = unfp_xy(cury); - if ((!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (Truncate8(static_cast(y)) & 1u)) && + if ((!cmd->interlaced_rendering || + cmd->active_line_lsb != ConvertToBoolUnchecked(Truncate8(static_cast(y)) & 1u)) && x >= static_cast(g_drawing_area.left) && x <= static_cast(g_drawing_area.right) && y >= static_cast(g_drawing_area.top) && y <= static_cast(g_drawing_area.bottom)) { @@ -968,7 +971,7 @@ struct TrianglePart #ifndef USE_VECTOR template -static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, +static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep) { s32 width = x_bound - x_start; @@ -1008,7 +1011,7 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start } template -ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCommand* cmd, const TrianglePart& tp, +ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* cmd, const TrianglePart& tp, const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb, const RGBSteps& rgbstep) { @@ -1051,7 +1054,8 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo lrgb.StepY(rgbstep); if (y > static_cast(g_drawing_area.bottom) || - (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (static_cast(current_y) & 1u))) + (cmd->interlaced_rendering && + cmd->active_line_lsb == ConvertToBoolUnchecked(static_cast(current_y) & 1u))) { continue; } @@ -1082,7 +1086,8 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo break; } if (y >= static_cast(g_drawing_area.top) && - (!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (static_cast(current_y) & 1u))) + (!cmd->interlaced_rendering || + cmd->active_line_lsb != ConvertToBoolUnchecked(static_cast(current_y) & 1u))) { DrawSpan( cmd, y & VRAM_HEIGHT_MASK, unfp_xy(left_x), unfp_xy(right_x), luv, uvstep, lrgb, rgbstep); @@ -1145,7 +1150,7 @@ struct TriangleVectors : PixelVectors } // namespace template -ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, +ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep, const TriangleVectors& tv) { @@ -1195,7 +1200,7 @@ ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* c dv = GSVectorNi::zero(); } - const GSVectorNi dither = cmd->draw_mode.dither_enable ? + const GSVectorNi dither = cmd->dither_enable ? GSVectorNi::broadcast128( &VECTOR_DITHER_MATRIX[static_cast(y) & 3][(static_cast(current_x) & 3) * 2]) : GSVectorNi::zero(); @@ -1250,7 +1255,7 @@ ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* c } template -ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCommand* cmd, const TrianglePart& tp, +ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* cmd, const TrianglePart& tp, const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb, const RGBSteps& rgbstep) { @@ -1295,7 +1300,8 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo lrgb.StepY(rgbstep); if (y > static_cast(g_drawing_area.bottom) || - (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (static_cast(current_y) & 1u))) + (cmd->interlaced_rendering && + cmd->active_line_lsb == ConvertToBoolUnchecked(static_cast(current_y) & 1u))) { continue; } @@ -1328,7 +1334,8 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo break; } if (y >= static_cast(g_drawing_area.top) && - (!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (static_cast(current_y) & 1u))) + (!cmd->interlaced_rendering || + cmd->active_line_lsb != ConvertToBoolUnchecked(static_cast(current_y) & 1u))) { DrawSpan( cmd, y & VRAM_HEIGHT_MASK, unfp_xy(left_x), unfp_xy(right_x), luv, uvstep, lrgb, rgbstep, tv); @@ -1349,7 +1356,7 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo #endif // USE_VECTOR template -static void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, +static void DrawTriangle(const GPUBackendDrawCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2) { #ifdef CHECK_VECTOR diff --git a/src/core/gpu_thread.cpp b/src/core/gpu_thread.cpp new file mode 100644 index 000000000..7f538a932 --- /dev/null +++ b/src/core/gpu_thread.cpp @@ -0,0 +1,1363 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#include "gpu_thread.h" +#include "fullscreen_ui.h" +#include "gpu_backend.h" +#include "gpu_hw_texture_cache.h" +#include "gpu_thread_commands.h" +#include "gpu_types.h" +#include "host.h" +#include "imgui_overlays.h" +#include "performance_counters.h" +#include "settings.h" +#include "shader_cache_version.h" +#include "system.h" +#include "system_private.h" + +#include "util/gpu_device.h" +#include "util/imgui_manager.h" +#include "util/input_manager.h" +#include "util/postprocessing.h" +#include "util/state_wrapper.h" + +#include "common/align.h" +#include "common/error.h" +#include "common/log.h" +#include "common/threading.h" +#include "common/timer.h" + +#include "IconsEmoji.h" +#include "IconsFontAwesome5.h" +#include "fmt/format.h" +#include "imgui.h" + +#include + +LOG_CHANNEL(GPUThread); + +// TODO: Smaller settings struct. +// TODO: Remove g_gpu pointer. + +namespace GPUThread { +enum : u32 +{ + COMMAND_QUEUE_SIZE = 16 * 1024 * 1024, + THRESHOLD_TO_WAKE_GPU = 65536, + MAX_SKIPPED_PRESENT_COUNT = 50 +}; + +static constexpr s32 THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING = 0x40000000; // CPU thread needs waking +static constexpr s32 THREAD_WAKE_COUNT_SLEEPING = -1; + +// Use a slightly longer spin time on ARM64 due to power management. +#ifndef _M_ARM64 +static constexpr u32 THREAD_SPIN_TIME_US = 50; +#else +static constexpr u32 THREAD_SPIN_TIME_US = 200; +#endif + +static bool Reconfigure(std::string serial, std::optional renderer, bool upload_vram, + std::optional fullscreen, std::optional start_fullscreen_ui, bool recreate_device, + Error* error); + +// NOTE: Use with care! The handler needs to manually run the destructor. +template +T* AllocateCommand(GPUBackendCommandType type, Args... args); + +static u32 GetPendingCommandSize(); +static void ResetCommandFIFO(); +static bool IsCommandFIFOEmpty(); +static void WakeGPUThread(); +static bool SleepGPUThread(bool allow_sleep); + +static bool CreateDeviceOnThread(RenderAPI api, bool fullscreen, Error* error); +static void DestroyDeviceOnThread(); +static void ResizeDisplayWindowOnThread(u32 width, u32 height, float scale); +static void UpdateDisplayWindowOnThread(bool fullscreen); +static void DisplayWindowResizedOnThread(); +static void HandleGPUDeviceLost(); +static void HandleExclusiveFullscreenLost(); + +static void ReconfigureOnThread(GPUThreadReconfigureCommand* cmd); +static bool CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, Error* error); +static void DestroyGPUBackendOnThread(); + +static void UpdateSettingsOnThread(const Settings& old_settings); + +static void UpdateRunIdle(); + +static void SleepUntilPresentTime(Timer::Value present_time); + +namespace { + +struct ALIGN_TO_CACHE_LINE State +{ + // Owned by CPU thread. + ALIGN_TO_CACHE_LINE Timer::Value thread_spin_time = 0; + Threading::ThreadHandle gpu_thread; + Common::unique_aligned_ptr command_fifo_data; + WindowInfo render_window_info; + std::optional requested_renderer; // TODO: Non thread safe accessof this + bool use_gpu_thread = false; + + // Hot variables between both threads. + ALIGN_TO_CACHE_LINE std::atomic command_fifo_write_ptr{0}; + std::atomic thread_wake_count{0}; // <0 = sleeping, >= 0 = has work + Threading::KernelSemaphore thread_wake_semaphore; + Threading::KernelSemaphore thread_is_done_semaphore; + + // Owned by GPU thread. + ALIGN_TO_CACHE_LINE std::unique_ptr gpu_backend; + std::atomic command_fifo_read_ptr{0}; + u32 skipped_present_count = 0; + u8 run_idle_reasons = 0; + bool run_idle_flag = false; + GPUVSyncMode requested_vsync = GPUVSyncMode::Disabled; + bool requested_allow_present_throttle = false; + bool requested_fullscreen_ui = false; + std::string game_serial; +}; + +} // namespace + +static State s_state; + +} // namespace GPUThread + +const Threading::ThreadHandle& GPUThread::Internal::GetThreadHandle() +{ + return s_state.gpu_thread; +} + +void GPUThread::ResetCommandFIFO() +{ + Assert(!s_state.run_idle_flag && s_state.command_fifo_read_ptr.load(std::memory_order_acquire) == + s_state.command_fifo_write_ptr.load(std::memory_order_relaxed)); + s_state.command_fifo_write_ptr.store(0, std::memory_order_release); + s_state.command_fifo_read_ptr.store(0, std::memory_order_release); +} + +void GPUThread::Internal::SetThreadEnabled(bool enabled) +{ + if (s_state.use_gpu_thread == enabled) + return; + + if (s_state.use_gpu_thread) + { + SyncGPUThread(false); + std::atomic_thread_fence(std::memory_order_acquire); + } + + // Was anything active? + if (!g_gpu_device) + { + // Thread should be idle. Just reset the FIFO. + s_state.use_gpu_thread = enabled; + ResetCommandFIFO(); + return; + } + + const bool fullscreen = Host::IsFullscreen(); + const bool requested_fullscreen_ui = s_state.requested_fullscreen_ui; + const std::optional requested_renderer = s_state.requested_renderer; + std::string serial = s_state.game_serial; + + // Force VRAM download, we're recreating. + if (requested_renderer.has_value()) + { + GPUBackendReadVRAMCommand* cmd = GPUBackend::NewReadVRAMCommand(); + cmd->x = 0; + cmd->y = 0; + cmd->width = VRAM_WIDTH; + cmd->height = VRAM_HEIGHT; + PushCommand(cmd); + } + + // Shutdown reconfigure. + Reconfigure(std::string(), std::nullopt, false, false, false, false, nullptr); + + // Thread should be idle at this point. Reset the FIFO. + ResetCommandFIFO(); + + // Update state and reconfigure again. + s_state.use_gpu_thread = enabled; + + Error error; + if (!Reconfigure(std::move(serial), requested_renderer, requested_renderer.has_value(), fullscreen, + requested_fullscreen_ui, true, &error)) + { + ERROR_LOG("Reconfigure failed: {}", error.GetDescription()); + Panic("Failed to reconfigure when changing thread state."); + } +} + +void GPUThread::Internal::ProcessStartup() +{ + s_state.thread_spin_time = Timer::ConvertNanosecondsToValue(THREAD_SPIN_TIME_US * 1000.0); + s_state.command_fifo_data = Common::make_unique_aligned_for_overwrite(HOST_CACHE_LINE_SIZE, COMMAND_QUEUE_SIZE); + s_state.use_gpu_thread = g_settings.gpu_use_thread; + s_state.run_idle_reasons = static_cast(RunIdleReason::NoGPUBackend); +} + +void GPUThread::Internal::RequestShutdown() +{ + INFO_LOG("Shutting down GPU thread..."); + SyncGPUThread(false); + + // Thread must be enabled to shut it down. + SetThreadEnabled(true); + PushCommandAndWakeThread(AllocateCommand(GPUBackendCommandType::Shutdown, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUThread::AllocateCommand(GPUBackendCommandType command, u32 size) +{ + size = GPUThreadCommand::AlignCommandSize(size); + + for (;;) + { + u32 read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_acquire); + u32 write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_relaxed); + if (read_ptr > write_ptr) + { + u32 available_size = read_ptr - write_ptr; + while (available_size < (size + sizeof(GPUBackendCommandType))) + { + WakeGPUThread(); + read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_acquire); + available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr); + } + } + else + { + const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; + if ((size + sizeof(GPUThreadCommand)) > available_size) + { + // allocate a dummy command to wrap the buffer around + GPUThreadCommand* dummy_cmd = reinterpret_cast(&s_state.command_fifo_data[write_ptr]); + dummy_cmd->type = GPUBackendCommandType::Wraparound; + dummy_cmd->size = available_size; + s_state.command_fifo_write_ptr.store(0, std::memory_order_release); + continue; + } + } + + GPUThreadCommand* cmd = reinterpret_cast(&s_state.command_fifo_data[write_ptr]); + cmd->type = command; + cmd->size = size; + return cmd; + } +} + +template +T* GPUThread::AllocateCommand(GPUBackendCommandType command, Args... args) +{ + const u32 size = GPUThreadCommand::AlignCommandSize(sizeof(T)); + GPUThreadCommand* cmd = AllocateCommand(command, size); + DebugAssert(cmd->size == size); + + new (cmd) T(std::forward(args)...); + + // constructor may overwrite the fields, need to reset them + cmd->type = command; + cmd->size = size; + + return static_cast(cmd); +} + +u32 GPUThread::GetPendingCommandSize() +{ + const u32 read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_acquire); + const u32 write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_relaxed); + return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr); +} + +bool GPUThread::IsCommandFIFOEmpty() +{ + const u32 read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_acquire); + const u32 write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_relaxed); + return (read_ptr == write_ptr); +} + +void GPUThread::PushCommand(GPUThreadCommand* cmd) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + return; + } + + const u32 new_write_ptr = s_state.command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU) // TODO:FIXME: maybe purge this? + WakeGPUThread(); +} + +void GPUThread::PushCommandAndWakeThread(GPUThreadCommand* cmd) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + return; + } + + const u32 new_write_ptr = s_state.command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + WakeGPUThread(); +} + +void GPUThread::PushCommandAndSync(GPUThreadCommand* cmd, bool spin) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + return; + } + + const u32 new_write_ptr = s_state.command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + WakeGPUThread(); + SyncGPUThread(spin); +} + +ALWAYS_INLINE s32 GetThreadWakeCount(s32 state) +{ + return (state & ~GPUThread::THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING); +} + +void GPUThread::WakeGPUThread() +{ + // If sleeping, state will be <0, otherwise this will increment the pending work count. + // We add 2 so that there's a positive work count if we were sleeping, otherwise the thread would go to sleep. + if (s_state.thread_wake_count.fetch_add(2, std::memory_order_release) < 0) + s_state.thread_wake_semaphore.Post(); +} + +void GPUThread::SyncGPUThread(bool spin) +{ + if (!s_state.use_gpu_thread) + return; + + if (spin) + { + // Check if the GPU thread is done/sleeping. + if (GetThreadWakeCount(s_state.thread_wake_count.load(std::memory_order_acquire)) < 0) + { + if (IsCommandFIFOEmpty()) + return; + + WakeGPUThread(); + } + + const Timer::Value start_time = Timer::GetCurrentValue(); + Timer::Value current_time = start_time; + do + { + // Check if the GPU thread is done/sleeping. + if (GetThreadWakeCount(s_state.thread_wake_count.load(std::memory_order_acquire)) < 0) + { + if (IsCommandFIFOEmpty()) + return; + + WakeGPUThread(); + continue; + } + + // Hopefully ought to be enough. + MultiPause(); + + current_time = Timer::GetCurrentValue(); + } while ((current_time - start_time) < s_state.thread_spin_time); + } + + // s_thread_wake_count |= THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING if not zero + s32 value; + do + { + // Check if the GPU thread is done/sleeping. + value = s_state.thread_wake_count.load(std::memory_order_acquire); + if (GetThreadWakeCount(value) < 0) + { + if (IsCommandFIFOEmpty()) + return; + + WakeGPUThread(); + continue; + } + } while (!s_state.thread_wake_count.compare_exchange_weak(value, value | THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING, + std::memory_order_acq_rel, std::memory_order_relaxed)); + s_state.thread_is_done_semaphore.Wait(); +} + +bool GPUThread::SleepGPUThread(bool allow_sleep) +{ + DebugAssert(!allow_sleep || s_state.thread_wake_count.load(std::memory_order_relaxed) >= 0); + for (;;) + { + // Acknowledge any work that has been queued, but preserve the waiting flag if there is any, since we're not done + // yet. + s32 old_state, new_state; + do + { + old_state = s_state.thread_wake_count.load(std::memory_order_relaxed); + new_state = (GetThreadWakeCount(old_state) > 0) ? (old_state & THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING) : + (allow_sleep ? THREAD_WAKE_COUNT_SLEEPING : 0); + } while (!s_state.thread_wake_count.compare_exchange_weak(old_state, new_state, std::memory_order_acq_rel, + std::memory_order_relaxed)); + + // Are we not done yet? + if (GetThreadWakeCount(old_state) > 0) + return true; + + // We're done, so wake the CPU thread if it's waiting. + if (old_state & THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING) + s_state.thread_is_done_semaphore.Post(); + + // Sleep until more work is queued. + if (allow_sleep) + s_state.thread_wake_semaphore.Wait(); + else + return false; + } +} + +void GPUThread::Internal::GPUThreadEntryPoint() +{ + s_state.gpu_thread = Threading::ThreadHandle::GetForCallingThread(); + + // Take a local copy of the FIFO, that way it's not ping-ponging between the threads. + u8* const command_fifo_data = s_state.command_fifo_data.get(); + + for (;;) + { + u32 write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_acquire); + u32 read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_relaxed); + if (read_ptr == write_ptr) + { + if (SleepGPUThread(!s_state.run_idle_flag)) + { + // sleep => wake, need to reload pointers + continue; + } + else + { + DoRunIdle(); + continue; + } + } + + write_ptr = (write_ptr < read_ptr) ? COMMAND_QUEUE_SIZE : write_ptr; + while (read_ptr < write_ptr) + { + GPUThreadCommand* cmd = reinterpret_cast(&command_fifo_data[read_ptr]); + DebugAssert((read_ptr + cmd->size) <= COMMAND_QUEUE_SIZE); + read_ptr += cmd->size; + + if (cmd->type > GPUBackendCommandType::Shutdown) [[likely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + continue; + } + + switch (cmd->type) + { + case GPUBackendCommandType::Wraparound: + { + DebugAssert(read_ptr == COMMAND_QUEUE_SIZE); + write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_acquire); + read_ptr = 0; + + // let the CPU thread know as early as possible that we're here + s_state.command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + } + break; + + case GPUBackendCommandType::AsyncCall: + { + GPUThreadAsyncCallCommand* acmd = static_cast(cmd); + acmd->func(); + acmd->~GPUThreadAsyncCallCommand(); + } + break; + + case GPUBackendCommandType::AsyncBackendCall: + { + GPUThreadAsyncBackendCallCommand* acmd = static_cast(cmd); + acmd->func(s_state.gpu_backend.get()); + acmd->~GPUThreadAsyncBackendCallCommand(); + } + break; + + case GPUBackendCommandType::Reconfigure: + { + GPUThreadReconfigureCommand* ccmd = static_cast(cmd); + ReconfigureOnThread(ccmd); + ccmd->~GPUThreadReconfigureCommand(); + } + break; + + case GPUBackendCommandType::Shutdown: + { + // Should have consumed everything, and be shutdown. + DebugAssert(read_ptr == write_ptr); + s_state.command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + return; + } + break; + + DefaultCaseIsUnreachable(); + } + } + + s_state.command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + } +} + +void GPUThread::Internal::DoRunIdle() +{ + PresentFrame(false, 0); + if (!g_gpu_device->GetMainSwapChain()->IsVSyncModeBlocking()) + g_gpu_device->GetMainSwapChain()->ThrottlePresentation(); +} + +bool GPUThread::Reconfigure(std::string serial, std::optional renderer, bool upload_vram, + std::optional fullscreen, std::optional start_fullscreen_ui, + bool recreate_device, Error* error) +{ + INFO_LOG("Reconfiguring GPU thread."); + + bool result = false; + GPUThreadReconfigureCommand* cmd = AllocateCommand(GPUBackendCommandType::Reconfigure); + cmd->game_serial = std::move(serial); + cmd->renderer = renderer; + cmd->fullscreen = fullscreen; + cmd->start_fullscreen_ui = start_fullscreen_ui; + cmd->vsync_mode = System::GetEffectiveVSyncMode(); + cmd->allow_present_throttle = System::ShouldAllowPresentThrottle(); + cmd->force_recreate_device = recreate_device; + cmd->upload_vram = upload_vram; + cmd->error_ptr = error; + cmd->out_result = &result; + + if (!s_state.use_gpu_thread) [[unlikely]] + ReconfigureOnThread(cmd); + else + PushCommandAndSync(cmd, false); + + return result; +} + +bool GPUThread::StartFullscreenUI(bool fullscreen, Error* error) +{ + // Don't need to reconfigure if we already have a system. + if (System::IsValid()) + { + RunOnThread([]() { s_state.requested_fullscreen_ui = true; }); + return true; + } + + return Reconfigure(std::string(), std::nullopt, false, fullscreen, true, false, error); +} + +bool GPUThread::IsFullscreenUIRequested() +{ + return s_state.requested_fullscreen_ui; +} + +void GPUThread::StopFullscreenUI() +{ + // Don't need to reconfigure if we already have a system. + if (System::IsValid()) + { + RunOnThread([]() { s_state.requested_fullscreen_ui = true; }); + return; + } + + Reconfigure(std::string(), std::nullopt, false, std::nullopt, false, false, nullptr); +} + +std::optional GPUThread::GetRequestedRenderer() +{ + return s_state.requested_renderer; +} + +bool GPUThread::CreateGPUBackend(std::string serial, GPURenderer renderer, bool upload_vram, bool fullscreen, + bool force_recreate_device, Error* error) +{ + s_state.requested_renderer = renderer; + return Reconfigure(std::move(serial), renderer, upload_vram, fullscreen ? std::optional(true) : std::nullopt, + std::nullopt, force_recreate_device, error); +} + +void GPUThread::DestroyGPUBackend() +{ + Reconfigure(std::string(), std::nullopt, false, std::nullopt, std::nullopt, false, nullptr); + s_state.requested_renderer.reset(); +} + +bool GPUThread::HasGPUBackend() +{ + DebugAssert(IsOnThread()); + return (s_state.gpu_backend != nullptr); +} + +bool GPUThread::IsGPUBackendRequested() +{ + return s_state.requested_renderer.has_value(); +} + +bool GPUThread::CreateDeviceOnThread(RenderAPI api, bool fullscreen, Error* error) +{ + DebugAssert(!g_gpu_device); + + INFO_LOG("Trying to create a {} GPU device...", GPUDevice::RenderAPIToString(api)); + g_gpu_device = GPUDevice::CreateDeviceForAPI(api); + + std::optional fullscreen_mode; + if (fullscreen && g_gpu_device && g_gpu_device->SupportsExclusiveFullscreen()) + { + fullscreen_mode = + GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); + } + std::optional exclusive_fullscreen_control; + if (g_gpu_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) + { + exclusive_fullscreen_control = + (g_gpu_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); + } + + u32 disabled_features = 0; + if (g_gpu_settings.gpu_disable_dual_source_blend) + disabled_features |= GPUDevice::FEATURE_MASK_DUAL_SOURCE_BLEND; + if (g_gpu_settings.gpu_disable_framebuffer_fetch) + disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH; + if (g_gpu_settings.gpu_disable_texture_buffers) + disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS; + if (g_gpu_settings.gpu_disable_memory_import) + disabled_features |= GPUDevice::FEATURE_MASK_MEMORY_IMPORT; + if (g_gpu_settings.gpu_disable_raster_order_views) + disabled_features |= GPUDevice::FEATURE_MASK_RASTER_ORDER_VIEWS; + if (g_gpu_settings.gpu_disable_compute_shaders) + disabled_features |= GPUDevice::FEATURE_MASK_COMPUTE_SHADERS; + if (g_gpu_settings.gpu_disable_compressed_textures) + disabled_features |= GPUDevice::FEATURE_MASK_COMPRESSED_TEXTURES; + + // Don't dump shaders on debug builds for Android, users will complain about storage... +#if !defined(__ANDROID__) || defined(_DEBUG) + const std::string_view shader_dump_directory(EmuFolders::DataRoot); +#else + const std::string_view shader_dump_directory; +#endif + + Error create_error; + std::optional wi; + if (!g_gpu_device || + !(wi = Host::AcquireRenderWindow(api, fullscreen, fullscreen_mode.has_value(), &create_error)).has_value() || + !g_gpu_device->Create( + g_gpu_settings.gpu_adapter, static_cast(disabled_features), shader_dump_directory, + g_gpu_settings.gpu_disable_shader_cache ? std::string_view() : std::string_view(EmuFolders::Cache), + SHADER_CACHE_VERSION, g_gpu_settings.gpu_use_debug_device, wi.value(), s_state.requested_vsync, + s_state.requested_allow_present_throttle, fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, + exclusive_fullscreen_control, &create_error)) + { + ERROR_LOG("Failed to create GPU device: {}", create_error.GetDescription()); + if (g_gpu_device) + g_gpu_device->Destroy(); + g_gpu_device.reset(); + if (wi.has_value()) + Host::ReleaseRenderWindow(); + + Error::SetStringFmt( + error, + TRANSLATE_FS("System", "Failed to create render device:\n\n{0}\n\nThis may be due to your GPU not supporting the " + "chosen renderer ({1}), or because your graphics drivers need to be updated."), + create_error.GetDescription(), GPUDevice::RenderAPIToString(api)); + + return false; + } + + if (!ImGuiManager::Initialize(g_gpu_settings.display_osd_scale / 100.0f, g_gpu_settings.display_osd_margin, + &create_error) || + (s_state.requested_fullscreen_ui && !FullscreenUI::Initialize())) + { + ERROR_LOG("Failed to initialize ImGuiManager: {}", create_error.GetDescription()); + Error::SetStringFmt(error, "Failed to initialize ImGuiManager: {}", create_error.GetDescription()); + FullscreenUI::Shutdown(); + ImGuiManager::Shutdown(); + g_gpu_device->Destroy(); + g_gpu_device.reset(); + if (wi.has_value()) + Host::ReleaseRenderWindow(); + return false; + } + + InputManager::SetDisplayWindowSize(ImGuiManager::GetWindowWidth(), ImGuiManager::GetWindowHeight()); + + if (const GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) + s_state.render_window_info = swap_chain->GetWindowInfo(); + else + s_state.render_window_info = WindowInfo(); + + std::atomic_thread_fence(std::memory_order_release); + UpdateRunIdle(); + return true; +} + +void GPUThread::DestroyDeviceOnThread() +{ + if (!g_gpu_device) + return; + + const bool has_window = g_gpu_device->HasMainSwapChain(); + + FullscreenUI::Shutdown(); + ImGuiManager::Shutdown(); + + INFO_LOG("Destroying {} GPU device...", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); + g_gpu_device->Destroy(); + g_gpu_device.reset(); + if (has_window) + Host::ReleaseRenderWindow(); + + UpdateRunIdle(); + s_state.render_window_info = WindowInfo(); + std::atomic_thread_fence(std::memory_order_release); +} + +void GPUThread::HandleGPUDeviceLost() +{ + static Timer::Value s_last_gpu_reset_time = 0; + static constexpr float MIN_TIME_BETWEEN_RESETS = 15.0f; + + // If we're constantly crashing on something in particular, we don't want to end up in an + // endless reset loop.. that'd probably end up leaking memory and/or crashing us for other + // reasons. So just abort in such case. + const Timer::Value current_time = Timer::GetCurrentValue(); + if (s_last_gpu_reset_time != 0 && + Timer::ConvertValueToSeconds(current_time - s_last_gpu_reset_time) < MIN_TIME_BETWEEN_RESETS) + { + Panic("Host GPU lost too many times, device is probably completely wedged."); + } + s_last_gpu_reset_time = current_time; + + const bool is_fullscreen = Host::IsFullscreen(); + + // Device lost, something went really bad. + // Let's just toss out everything, and try to hobble on. + DestroyGPUBackendOnThread(); + DestroyDeviceOnThread(); + + Error error; + if (!CreateDeviceOnThread( + Settings::GetRenderAPIForRenderer(s_state.requested_renderer.value_or(g_gpu_settings.gpu_renderer)), + is_fullscreen, &error) || + (s_state.requested_renderer.has_value() && + !CreateGPUBackendOnThread(s_state.requested_renderer.value(), true, &error))) + { + ERROR_LOG("Failed to recreate GPU device after loss: {}", error.GetDescription()); + Panic("Failed to recreate GPU device after loss."); + return; + } + + // First frame after reopening is definitely going to be trash, so skip it. + Host::AddIconOSDWarning( + "HostGPUDeviceLost", ICON_EMOJI_WARNING, + TRANSLATE_STR("System", "Host GPU device encountered an error and has recovered. This may cause broken rendering."), + Host::OSD_CRITICAL_ERROR_DURATION); +} + +void GPUThread::HandleExclusiveFullscreenLost() +{ + WARNING_LOG("Lost exclusive fullscreen."); + Host::SetFullscreen(false); +} + +bool GPUThread::CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, Error* error) +{ + const bool is_hardware = (renderer != GPURenderer::Software); + + if (is_hardware) + s_state.gpu_backend = GPUBackend::CreateHardwareBackend(); + else + s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(); + + Error local_error; + bool okay = s_state.gpu_backend->Initialize(upload_vram, &local_error); + if (!okay) + { + ERROR_LOG("Failed to create {} renderer: {}", Settings::GetRendererName(renderer), local_error.GetDescription()); + + if (is_hardware && !System::IsStartupCancelled()) + { + Host::AddIconOSDMessage( + "GPUBackendCreationFailed", ICON_FA_PAINT_ROLLER, + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to initialize {} renderer, falling back to software renderer."), + Settings::GetRendererName(s_state.requested_renderer.value())), + Host::OSD_CRITICAL_ERROR_DURATION); + + s_state.requested_renderer = GPURenderer::Software; + s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(); + okay = s_state.gpu_backend->Initialize(upload_vram, &local_error); + } + + if (!okay) + { + if (error) + *error = local_error; + return false; + } + } + + g_gpu_device->SetGPUTimingEnabled(g_gpu_settings.display_show_gpu_usage); + PostProcessing::Initialize(); + ImGuiManager::UpdateDebugWindowConfig(); + SetRunIdleReason(RunIdleReason::NoGPUBackend, false); + std::atomic_thread_fence(std::memory_order_release); + return true; +} + +void GPUThread::ReconfigureOnThread(GPUThreadReconfigureCommand* cmd) +{ + // Store state. + s_state.requested_vsync = cmd->vsync_mode; + s_state.requested_allow_present_throttle = cmd->allow_present_throttle; + s_state.requested_fullscreen_ui = cmd->start_fullscreen_ui.value_or(s_state.requested_fullscreen_ui); + s_state.game_serial = std::move(cmd->game_serial); + + // Are we shutting down everything? + if (!cmd->renderer.has_value() && !s_state.requested_fullscreen_ui) + { + DestroyGPUBackendOnThread(); + DestroyDeviceOnThread(); + return; + } + + // TODO: Make this suck less. + g_gpu_settings = g_settings; + + // Readback old VRAM for hardware renderers. + if (s_state.gpu_backend && cmd->renderer.has_value() && cmd->upload_vram) + { + GPUBackendReadVRAMCommand read_cmd; + read_cmd.type = GPUBackendCommandType::ReadVRAM; + read_cmd.size = sizeof(cmd); + read_cmd.x = 0; + read_cmd.y = 0; + read_cmd.width = VRAM_WIDTH; + read_cmd.height = VRAM_HEIGHT; + s_state.gpu_backend->HandleCommand(&read_cmd); + } + + if (s_state.gpu_backend) + DestroyGPUBackendOnThread(); + + // Device recreation? + const RenderAPI current_api = g_gpu_device ? g_gpu_device->GetRenderAPI() : RenderAPI::None; + const RenderAPI expected_api = + (cmd->renderer.has_value() && cmd->renderer.value() == GPURenderer::Software && current_api != RenderAPI::None) ? + current_api : + Settings::GetRenderAPIForRenderer(s_state.requested_renderer.value_or(g_gpu_settings.gpu_renderer)); + if (cmd->force_recreate_device || !GPUDevice::IsSameRenderAPI(current_api, expected_api)) + { + const bool fullscreen = cmd->fullscreen.value_or(Host::IsFullscreen()); + DestroyDeviceOnThread(); + + Error local_error; + if (!CreateDeviceOnThread(expected_api, fullscreen, &local_error)) + { + Host::AddIconOSDMessage( + "DeviceSwitchFailed", ICON_FA_PAINT_ROLLER, + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to create {} GPU device, reverting to {}.\n{}"), + GPUDevice::RenderAPIToString(expected_api), GPUDevice::RenderAPIToString(current_api), + local_error.GetDescription()), + Host::OSD_CRITICAL_ERROR_DURATION); + + Host::ReleaseRenderWindow(); + if (current_api == RenderAPI::None || !CreateDeviceOnThread(current_api, fullscreen, &local_error)) + { + if (cmd->error_ptr) + *cmd->error_ptr = local_error; + + *cmd->out_result = false; + return; + } + } + } + + if (cmd->renderer.has_value()) + { + // Do we want a renderer? + *cmd->out_result = CreateGPUBackendOnThread(cmd->renderer.value(), cmd->upload_vram, cmd->error_ptr); + } + else if (s_state.requested_fullscreen_ui) + { + if (!g_gpu_device && !CreateDeviceOnThread(expected_api, cmd->fullscreen.value_or(false), cmd->error_ptr)) + { + *cmd->out_result = false; + return; + } + + // Don't need timing to run FSUI. + g_gpu_device->SetGPUTimingEnabled(false); + + if (!(*cmd->out_result = FullscreenUI::IsInitialized() || FullscreenUI::Initialize())) + Error::SetStringView(cmd->error_ptr, "Failed to initialize FullscreenUI."); + } + else + { + // Device is no longer needed. + DestroyDeviceOnThread(); + } +} + +void GPUThread::DestroyGPUBackendOnThread() +{ + if (!s_state.gpu_backend) + return; + + VERBOSE_LOG("Shutting down GPU backend..."); + + SetRunIdleReason(RunIdleReason::NoGPUBackend, true); + + ImGuiManager::DestroyAllDebugWindows(); + ImGuiManager::DestroyOverlayTextures(); + PostProcessing::Shutdown(); + s_state.gpu_backend.reset(); +} + +void GPUThread::UpdateSettingsOnThread(const Settings& old_settings) +{ + if (g_gpu_device) + { + if (g_gpu_settings.display_osd_scale != old_settings.display_osd_scale) + ImGuiManager::SetGlobalScale(g_settings.display_osd_scale / 100.0f); + if (g_gpu_settings.display_osd_margin != old_settings.display_osd_margin) + ImGuiManager::SetScreenMargin(g_settings.display_osd_margin); + + FullscreenUI::CheckForConfigChanges(old_settings); + } + + if (s_state.gpu_backend) + { + if (g_gpu_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage) + g_gpu_device->SetGPUTimingEnabled(g_gpu_settings.display_show_gpu_usage); + + PostProcessing::UpdateSettings(); + + s_state.gpu_backend->UpdateSettings(old_settings); + if (ImGuiManager::UpdateDebugWindowConfig() || (PostProcessing::DisplayChain.IsActive() && !IsSystemPaused())) + Internal::PresentFrame(false, 0); + + s_state.gpu_backend->RestoreDeviceContext(); + } +} + +void GPUThread::RunOnThread(AsyncCallType func) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + func(); + return; + } + + GPUThreadAsyncCallCommand* cmd = + AllocateCommand(GPUBackendCommandType::AsyncCall, std::move(func)); + PushCommandAndWakeThread(cmd); +} + +void GPUThread::RunOnBackend(AsyncBackendCallType func, bool sync, bool spin_or_wake) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + func(s_state.gpu_backend.get()); + return; + } + + GPUThreadAsyncBackendCallCommand* cmd = + AllocateCommand(GPUBackendCommandType::AsyncBackendCall, std::move(func)); + if (sync) + PushCommandAndSync(cmd, spin_or_wake); + else if (spin_or_wake) + PushCommandAndWakeThread(cmd); + else + PushCommand(cmd); +} + +void GPUThread::UpdateSettings(bool gpu_settings_changed, bool device_settings_changed) +{ + if (device_settings_changed) + { + INFO_LOG("Reconfiguring after device settings changed."); + + Error error; + if (!Reconfigure(System::GetGameSerial(), s_state.requested_renderer, s_state.requested_renderer.has_value(), + std::nullopt, std::nullopt, true, &error)) [[unlikely]] + { + Host::ReportErrorAsync("Error", fmt::format("Failed to recreate GPU device: {}", error.GetDescription())); + } + } + else if (gpu_settings_changed) + { + RunOnThread([settings = g_settings]() { + VERBOSE_LOG("Updating GPU settings on thread..."); + + Settings old_settings = std::move(g_gpu_settings); + g_gpu_settings = std::move(settings); + + UpdateSettingsOnThread(old_settings); + }); + } + else + { + RunOnThread([]() { + if (s_state.gpu_backend) + { + PostProcessing::UpdateSettings(); + if (ImGuiManager::UpdateDebugWindowConfig() || (PostProcessing::DisplayChain.IsActive() && !IsSystemPaused())) + Internal::PresentFrame(false, 0); + } + }); + } +} + +bool GPUThread::IsOnThread() +{ + return (!s_state.use_gpu_thread || s_state.gpu_thread.IsCallingThread()); +} + +bool GPUThread::IsUsingThread() +{ + return s_state.use_gpu_thread; +} + +void GPUThread::ResizeDisplayWindow(s32 width, s32 height, float scale) +{ + RunOnThread([width, height, scale]() { ResizeDisplayWindowOnThread(width, height, scale); }); +} + +void GPUThread::ResizeDisplayWindowOnThread(u32 width, u32 height, float scale) +{ + // We should _not_ be getting this without a device, since we should have shut down. + if (!g_gpu_device || !g_gpu_device->HasMainSwapChain()) + return; + + DEV_LOG("Display window resized to {}x{}", width, height); + + Error error; + if (!g_gpu_device->GetMainSwapChain()->ResizeBuffers(width, height, scale, &error)) + { + ERROR_LOG("Failed to resize main swap chain: {}", error.GetDescription()); + UpdateDisplayWindowOnThread(Host::IsFullscreen()); + return; + } + + DisplayWindowResizedOnThread(); +} + +void GPUThread::UpdateDisplayWindow(bool fullscreen) +{ + RunOnThread([fullscreen]() { UpdateDisplayWindowOnThread(fullscreen); }); +} + +void GPUThread::UpdateDisplayWindowOnThread(bool fullscreen) +{ + // In case we get the event late. + if (!g_gpu_device) + return; + + std::optional fullscreen_mode; + if (fullscreen && g_gpu_device->SupportsExclusiveFullscreen()) + { + fullscreen_mode = + GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); + } + std::optional exclusive_fullscreen_control; + if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) + { + exclusive_fullscreen_control = + (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); + } + + g_gpu_device->DestroyMainSwapChain(); + + Error error; + std::optional wi = + Host::AcquireRenderWindow(g_gpu_device->GetRenderAPI(), fullscreen, fullscreen_mode.has_value(), &error); + if (!wi.has_value()) + { + Host::ReportFatalError("Failed to get render window after update", error.GetDescription()); + return; + } + + // if surfaceless, just leave it + if (!wi->IsSurfaceless()) + { + if (!g_gpu_device->RecreateMainSwapChain( + wi.value(), s_state.requested_vsync, s_state.requested_allow_present_throttle, + fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, exclusive_fullscreen_control, &error)) + { + Host::ReportFatalError("Failed to change window after update", error.GetDescription()); + return; + } + } + else + { + WARNING_LOG("Switching to surfaceless rendering"); + if (!g_gpu_device->SwitchToSurfacelessRendering(&error)) + ERROR_LOG("Failed to switch to surfaceless, rendering commands may fail: {}", error.GetDescription()); + } + + DisplayWindowResizedOnThread(); +} + +void GPUThread::DisplayWindowResizedOnThread() +{ + const GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain(); + if (swap_chain) + s_state.render_window_info = swap_chain->GetWindowInfo(); + else + s_state.render_window_info = WindowInfo(); + std::atomic_thread_fence(std::memory_order_release); + + // surfaceless is usually temporary, so just ignore it + if (!swap_chain) + return; + + const float f_width = static_cast(swap_chain->GetWidth()); + const float f_height = static_cast(swap_chain->GetHeight()); + ImGuiManager::WindowResized(f_width, f_height); + InputManager::SetDisplayWindowSize(f_width, f_height); + + if (s_state.gpu_backend) + { + Host::RunOnCPUThread(&System::DisplayWindowResized); + + // If we're paused, re-present the current frame at the new window size. + if (IsSystemPaused()) + { + // Hackity hack, on some systems, presenting a single frame isn't enough to actually get it + // displayed. Two seems to be good enough. Maybe something to do with direct scanout. + Internal::PresentFrame(false, 0); + Internal::PresentFrame(false, 0); + } + + if (g_gpu_settings.gpu_resolution_scale == 0) + s_state.gpu_backend->UpdateResolutionScale(); + } +} + +const WindowInfo& GPUThread::GetRenderWindowInfo() +{ + // This is infrequently used, so we can get away with a full barrier. + std::atomic_thread_fence(std::memory_order_acquire); + return s_state.render_window_info; +} + +void GPUThread::SetVSync(GPUVSyncMode mode, bool allow_present_throttle) +{ + RunOnThread([mode, allow_present_throttle]() { + if (s_state.requested_vsync == mode && s_state.requested_allow_present_throttle == allow_present_throttle) + return; + + s_state.requested_vsync = mode; + s_state.requested_allow_present_throttle = allow_present_throttle; + + if (!g_gpu_device->HasMainSwapChain()) + return; + + Error error; + if (!g_gpu_device->GetMainSwapChain()->SetVSyncMode(s_state.requested_vsync, + s_state.requested_allow_present_throttle, &error)) + { + ERROR_LOG("Failed to update vsync mode: {}", error.GetDescription()); + } + }); + + // If we're turning on vsync or turning off present throttle, we want to drain the GPU thread. + // Otherwise if it is currently behind, it'll be permanently stuck behind. + if (mode != GPUVSyncMode::Disabled) + SyncGPUThread(false); +} + +void GPUThread::PresentCurrentFrame() +{ + RunOnThread([]() { + if (s_state.run_idle_flag) + { + // If we're running idle, we're going to re-present anyway. + return; + } + + Internal::PresentFrame(false, 0); + }); +} + +void GPUThread::SleepUntilPresentTime(Timer::Value present_time) +{ + // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. + // Linux also seems to do a much better job of waking up at the requested time. + +#if !defined(__linux__) && !defined(__ANDROID__) + Timer::SleepUntil(present_time, true); +#else + Timer::SleepUntil(present_time, false); +#endif +} + +void GPUThread::Internal::PresentFrame(bool allow_skip_present, u64 present_time) +{ + const bool skip_present = (!g_gpu_device->HasMainSwapChain() || + (allow_skip_present && g_gpu_device->GetMainSwapChain()->ShouldSkipPresentingFrame() && + s_state.skipped_present_count < MAX_SKIPPED_PRESENT_COUNT)); + + if (!skip_present) + { + // acquire for IO.MousePos and system state. + std::atomic_thread_fence(std::memory_order_acquire); + + FullscreenUI::Render(); + + if (s_state.gpu_backend && System::IsValid()) + ImGuiManager::RenderTextOverlays(s_state.gpu_backend.get()); + + ImGuiManager::RenderOSDMessages(); + + if (s_state.gpu_backend && System::GetState() == System::State::Running) + ImGuiManager::RenderSoftwareCursors(); + + ImGuiManager::RenderOverlayWindows(); + ImGuiManager::RenderDebugWindows(); + } + + const GPUDevice::PresentResult pres = + skip_present ? GPUDevice::PresentResult::SkipPresent : + (s_state.gpu_backend ? s_state.gpu_backend->PresentDisplay() : + g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain())); + if (pres == GPUDevice::PresentResult::OK) + { + s_state.skipped_present_count = 0; + + g_gpu_device->RenderImGui(g_gpu_device->GetMainSwapChain()); + + const GPUDevice::Features features = g_gpu_device->GetFeatures(); + const bool scheduled_present = (present_time != 0); + const bool explicit_present = (scheduled_present && (features.explicit_present && !features.timed_present)); + const bool timed_present = (scheduled_present && features.timed_present); + + if (scheduled_present && !explicit_present) + { + // No explicit present support, simulate it with Flush. + g_gpu_device->FlushCommands(); + SleepUntilPresentTime(present_time); + } + + g_gpu_device->EndPresent(g_gpu_device->GetMainSwapChain(), explicit_present, timed_present ? present_time : 0); + + if (g_gpu_device->IsGPUTimingEnabled()) + PerformanceCounters::AccumulateGPUTime(); + + if (explicit_present) + { + SleepUntilPresentTime(present_time); + g_gpu_device->SubmitPresent(g_gpu_device->GetMainSwapChain()); + } + } + else + { + s_state.skipped_present_count++; + + if (pres == GPUDevice::PresentResult::DeviceLost) [[unlikely]] + HandleGPUDeviceLost(); + else if (pres == GPUDevice::PresentResult::ExclusiveFullscreenLost) + HandleExclusiveFullscreenLost(); + else if (!skip_present) + g_gpu_device->FlushCommands(); + + // Still need to kick ImGui or it gets cranky. + ImGui::EndFrame(); + } + + ImGuiManager::NewFrame(); + + RestoreContextAfterPresent(); +} + +void GPUThread::Internal::RestoreContextAfterPresent() +{ + if (s_state.gpu_backend) + s_state.gpu_backend->RestoreDeviceContext(); +} + +bool GPUThread::GetRunIdleReason(RunIdleReason reason) +{ + return (s_state.run_idle_reasons & static_cast(reason)) != 0; +} + +void GPUThread::SetRunIdleReason(RunIdleReason reason, bool enabled) +{ + const u8 bit = static_cast(reason); + if (((s_state.run_idle_reasons & bit) != 0) == enabled) + return; + + s_state.run_idle_reasons = enabled ? (s_state.run_idle_reasons | bit) : (s_state.run_idle_reasons & ~bit); + UpdateRunIdle(); +} + +bool GPUThread::IsRunningIdle() +{ + return s_state.run_idle_flag; +} + +bool GPUThread::IsSystemPaused() +{ + return ((s_state.run_idle_reasons & static_cast(RunIdleReason::SystemPaused)) != 0); +} + +void GPUThread::UpdateRunIdle() +{ + DebugAssert(IsOnThread()); + + // We require either invalid-system or paused for run idle. + static constexpr u8 REQUIRE_MASK = static_cast(RunIdleReason::NoGPUBackend) | + static_cast(RunIdleReason::SystemPaused) | + static_cast(RunIdleReason::LoadingScreenActive); + static constexpr u8 ACTIVATE_MASK = + static_cast(RunIdleReason::FullscreenUIActive) | static_cast(RunIdleReason::LoadingScreenActive); + + const bool new_flag = (g_gpu_device && ((s_state.run_idle_reasons & REQUIRE_MASK) != 0) && + ((s_state.run_idle_reasons & ACTIVATE_MASK) != 0)); + if (s_state.run_idle_flag == new_flag) + return; + + s_state.run_idle_flag = new_flag; + DEV_LOG("GPU thread now {} idle", new_flag ? "running" : "NOT running"); + Host::OnGPUThreadRunIdleChanged(new_flag); +} + +const std::string& GPUThread::GetGameSerial() +{ + DebugAssert(IsOnThread()); + return s_state.game_serial; +} + +void GPUThread::SetGameSerial(std::string serial) +{ + DebugAssert(!IsOnThread()); + RunOnThread([serial = std::move(serial)]() mutable { + const bool changed = (s_state.game_serial != serial); + s_state.game_serial = std::move(serial); + if (changed) + { + GPUTextureCache::GameSerialChanged(); + if (SaveStateSelectorUI::IsOpen()) + SaveStateSelectorUI::RefreshList(); + } + }); +} diff --git a/src/core/gpu_thread.h b/src/core/gpu_thread.h new file mode 100644 index 000000000..d8debf55b --- /dev/null +++ b/src/core/gpu_thread.h @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#pragma once + +#include "common/types.h" + +#include +#include + +class Error; +struct WindowInfo; + +namespace Threading { +class ThreadHandle; +} + +enum class RenderAPI : u8; +enum class GPUVSyncMode : u8; + +enum class GPURenderer : u8; +enum class GPUBackendCommandType : u8; + +class GPUBackend; +struct GPUThreadCommand; +struct GPUBackendUpdateDisplayCommand; + +namespace GPUThread { +using AsyncCallType = std::function; +using AsyncBackendCallType = std::function; + +enum class RunIdleReason : u8 +{ + NoGPUBackend = (1 << 0), + SystemPaused = (1 << 1), + FullscreenUIActive = (1 << 2), + LoadingScreenActive = (1 << 3), +}; + +/// Starts Big Picture UI. +bool StartFullscreenUI(bool fullscreen, Error* error); +bool IsFullscreenUIRequested(); +void StopFullscreenUI(); + +/// Backend control. +std::optional GetRequestedRenderer(); +bool CreateGPUBackend(std::string serial, GPURenderer renderer, bool upload_vram, bool fullscreen, + bool force_recreate_device, Error* error); +void DestroyGPUBackend(); +bool HasGPUBackend(); +bool IsGPUBackendRequested(); +void SetGameSerial(std::string serial); + +/// Re-presents the current frame. Call when things like window resizes happen to re-display +/// the current frame with the correct proportions. Should only be called from the CPU thread. +void PresentCurrentFrame(); + +/// Handles fullscreen transitions and such. +void UpdateDisplayWindow(bool fullscreen); + +/// Called when the window is resized. +void ResizeDisplayWindow(s32 width, s32 height, float scale); + +/// Access to main window size from CPU thread. +const WindowInfo& GetRenderWindowInfo(); + +void UpdateSettings(bool gpu_settings_changed, bool device_settings_changed); + +bool IsOnThread(); +bool IsUsingThread(); +void RunOnThread(AsyncCallType func); +void RunOnBackend(AsyncBackendCallType func, bool sync, bool spin_or_wake); +void SetVSync(GPUVSyncMode mode, bool allow_present_throttle); + +// Should only be called on the GPU thread. +bool GetRunIdleReason(RunIdleReason reason); +void SetRunIdleReason(RunIdleReason reason, bool enabled); +bool IsRunningIdle(); +bool IsSystemPaused(); +const std::string& GetGameSerial(); + +GPUThreadCommand* AllocateCommand(GPUBackendCommandType command, u32 size); +void PushCommand(GPUThreadCommand* cmd); +void PushCommandAndWakeThread(GPUThreadCommand* cmd); +void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); +void SyncGPUThread(bool spin); + +// NOTE: Only called by GPUBackend +namespace Internal { +const Threading::ThreadHandle& GetThreadHandle(); +void ProcessStartup(); +void SetThreadEnabled(bool enabled); +void DoRunIdle(); +void RequestShutdown(); +void GPUThreadEntryPoint(); +void PresentFrame(bool allow_skip_present, u64 present_time); +void RestoreContextAfterPresent(); +} // namespace Internal +} // namespace GPUThread + +namespace Host { + +/// Called when the pause state changes, or fullscreen UI opens. +void OnGPUThreadRunIdleChanged(bool is_active); + +} // namespace Host diff --git a/src/core/gpu_thread_commands.h b/src/core/gpu_thread_commands.h new file mode 100644 index 000000000..ff14ed1c5 --- /dev/null +++ b/src/core/gpu_thread_commands.h @@ -0,0 +1,324 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#pragma once + +#include "gpu_types.h" + +#include "common/align.h" + +#include +#include +#include + +class Error; + +enum class GPUVSyncMode : u8; +class MediaCapture; +class StateWrapper; + +class GPUBackend; + +namespace System { +struct MemorySaveState; +} + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4200) // warning C4200: nonstandard extension used: zero-sized array in struct/union +#endif + +enum class GPUBackendCommandType : u8 +{ + Wraparound, + AsyncCall, + AsyncBackendCall, + Reconfigure, + Shutdown, + ClearVRAM, + ClearDisplay, + UpdateDisplay, + SubmitFrame, + BufferSwapped, + LoadState, + LoadMemoryState, + SaveMemoryState, + ReadVRAM, + FillVRAM, + UpdateVRAM, + CopyVRAM, + SetDrawingArea, + UpdateCLUT, + ClearCache, + DrawPolygon, + DrawPrecisePolygon, + DrawRectangle, + DrawLine, +}; + +struct GPUThreadCommand +{ + u32 size; + GPUBackendCommandType type; + + static constexpr u32 AlignCommandSize(u32 size) + { + // Ensure size is a multiple of 8 (minimum data size) so we don't end up with an unaligned command. + // NOTE: If we ever end up putting vectors in the command packets, this should be raised. + constexpr u32 COMMAND_QUEUE_ALLOCATION_ALIGNMENT = 8; + return Common::AlignUpPow2(size, COMMAND_QUEUE_ALLOCATION_ALIGNMENT); + } +}; + +struct GPUThreadReconfigureCommand : public GPUThreadCommand +{ + Error* error_ptr; + bool* out_result; + std::string game_serial; + std::optional renderer; + std::optional fullscreen; + std::optional start_fullscreen_ui; + GPUVSyncMode vsync_mode; + bool allow_present_throttle; + bool force_recreate_device; + bool upload_vram; +}; + +struct GPUThreadAsyncCallCommand : public GPUThreadCommand +{ + GPUThreadAsyncCallCommand(std::function func_) : func(std::move(func_)) {} + + std::function func; +}; + +struct GPUThreadAsyncBackendCallCommand : public GPUThreadCommand +{ + GPUThreadAsyncBackendCallCommand(std::function func_) : func(std::move(func_)) {} + + std::function func; +}; + +struct GPUBackendLoadStateCommand : public GPUThreadCommand +{ + u16 vram_data[VRAM_WIDTH * VRAM_HEIGHT]; + u16 clut_data[GPU_CLUT_SIZE]; + u32 texture_cache_state_version; + u32 texture_cache_state_size; + u8 texture_cache_state[0]; // texture_cache_state_size +}; + +struct GPUBackendDoMemoryStateCommand : public GPUThreadCommand +{ + System::MemorySaveState* memory_save_state; +}; + +struct GPUBackendFramePresentationParameters +{ + u32 frame_number; + u32 internal_frame_number; + + u64 present_time; + MediaCapture* media_capture; + + union + { + u8 bits; + + BitField allow_present_skip; + BitField present_frame; + BitField update_performance_counters; + }; +}; + +struct GPUBackendUpdateDisplayCommand : public GPUThreadCommand +{ + u16 display_width; + u16 display_height; + u16 display_origin_left; + u16 display_origin_top; + u16 display_vram_left; + u16 display_vram_top; + u16 display_vram_width; + u16 display_vram_height; + float display_pixel_aspect_ratio; + + u16 X; // TODO: Can we get rid of this? + + bool interlaced_display_enabled : 1; + bool interlaced_display_field : 1; + bool interlaced_display_interleaved : 1; + bool display_24bit : 1; + bool display_disabled : 1; + bool submit_frame : 1; + bool : 2; + + GPUBackendFramePresentationParameters frame; +}; + +// Only used for runahead. +struct GPUBackendSubmitFrameCommand : public GPUThreadCommand +{ + GPUBackendFramePresentationParameters frame; +}; + +struct GPUBackendReadVRAMCommand : public GPUThreadCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; +}; + +struct GPUBackendFillVRAMCommand : public GPUThreadCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; + u32 color; + bool interlaced_rendering; + u8 active_line_lsb; +}; + +struct GPUBackendUpdateVRAMCommand : public GPUThreadCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; + bool set_mask_while_drawing; + bool check_mask_before_draw; + u16 data[0]; +}; + +struct GPUBackendCopyVRAMCommand : public GPUThreadCommand +{ + u16 src_x; + u16 src_y; + u16 dst_x; + u16 dst_y; + u16 width; + u16 height; + bool set_mask_while_drawing; + bool check_mask_before_draw; +}; + +struct GPUBackendSetDrawingAreaCommand : public GPUThreadCommand +{ + GPUDrawingArea new_area; +}; + +struct GPUBackendUpdateCLUTCommand : public GPUThreadCommand +{ + GPUTexturePaletteReg reg; + bool clut_is_8bit; +}; + +struct GPUBackendDrawCommand : public GPUThreadCommand +{ + bool interlaced_rendering : 1; + + /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. + bool active_line_lsb : 1; + + bool set_mask_while_drawing : 1; + bool check_mask_before_draw : 1; + + bool texture_enable : 1; + bool raw_texture_enable : 1; + bool transparency_enable : 1; + bool shading_enable : 1; + bool quad_polygon : 1; + bool dither_enable : 1; + + bool valid_w : 1; // only used for precise polygons + + // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } + ALWAYS_INLINE u16 GetMaskAND() const { return check_mask_before_draw ? 0x8000 : 0x0000; } + ALWAYS_INLINE u16 GetMaskOR() const { return set_mask_while_drawing ? 0x8000 : 0x0000; } + + u16 num_vertices; + GPUDrawModeReg draw_mode; + GPUTexturePaletteReg palette; + GPUTextureWindow window; +}; + +struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand +{ + struct Vertex + { + s32 x, y; + union + { + struct + { + u8 r, g, b, a; + }; + u32 color; + }; + union + { + struct + { + u8 u, v; + }; + u16 texcoord; + }; + }; + + Vertex vertices[0]; +}; + +struct GPUBackendDrawPrecisePolygonCommand : public GPUBackendDrawCommand +{ + GPUBackendDrawCommand params; + + struct Vertex + { + float x, y, w; + s32 native_x, native_y; + u32 color; + u16 texcoord; + }; + + Vertex vertices[0]; +}; + +struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand +{ + u16 width, height; + u16 texcoord; + s32 x, y; + u32 color; +}; + +struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand +{ + u16 num_vertices; + + struct Vertex + { + s32 x, y; + union + { + struct + { + u8 r, g, b, a; + }; + u32 color; + }; + + ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_) + { + x = x_; + y = y_; + color = color_; + } + }; + + Vertex vertices[0]; +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 4782272b0..2f82de723 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -12,7 +12,6 @@ #include "common/gsvector.h" #include -#include enum : u32 { @@ -405,12 +404,17 @@ union GPUTexturePaletteReg ALWAYS_INLINE constexpr u32 GetYBase() const { return static_cast(y); } }; -struct GPUTextureWindow +union GPUTextureWindow { - u8 and_x; - u8 and_y; - u8 or_x; - u8 or_y; + struct + { + u8 and_x; + u8 and_y; + u8 or_x; + u8 or_y; + }; + + u32 bits; ALWAYS_INLINE bool operator==(const GPUTextureWindow& rhs) const { @@ -541,182 +545,3 @@ static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{- {+2, -2, +3, -1}, // row 1 {-3, +1, -4, +0}, // row 2 {+3, -1, +2, -2}}; // row 3 - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4200) // warning C4200: nonstandard extension used: zero-sized array in struct/union -#endif - -enum class GPUBackendCommandType : u8 -{ - Wraparound, - Sync, - FillVRAM, - UpdateVRAM, - CopyVRAM, - SetDrawingArea, - UpdateCLUT, - DrawPolygon, - DrawRectangle, - DrawLine, -}; - -union GPUBackendCommandParameters -{ - u8 bits; - - BitField interlaced_rendering; - - /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. - BitField active_line_lsb; - - BitField set_mask_while_drawing; - BitField check_mask_before_draw; - - // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } - u16 GetMaskAND() const - { - // return check_mask_before_draw ? 0x8000 : 0x0000; - return Truncate16((bits << 12) & 0x8000); - } - u16 GetMaskOR() const - { - // return set_mask_while_drawing ? 0x8000 : 0x0000; - return Truncate16((bits << 13) & 0x8000); - } -}; - -struct GPUBackendCommand -{ - u32 size; - GPUBackendCommandType type; - GPUBackendCommandParameters params; -}; - -struct GPUBackendSyncCommand : public GPUBackendCommand -{ - bool allow_sleep; -}; - -struct GPUBackendFillVRAMCommand : public GPUBackendCommand -{ - u16 x; - u16 y; - u16 width; - u16 height; - u32 color; -}; - -struct GPUBackendUpdateVRAMCommand : public GPUBackendCommand -{ - u16 x; - u16 y; - u16 width; - u16 height; - u16 data[0]; -}; - -struct GPUBackendCopyVRAMCommand : public GPUBackendCommand -{ - u16 src_x; - u16 src_y; - u16 dst_x; - u16 dst_y; - u16 width; - u16 height; -}; - -struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand -{ - GPUDrawingArea new_area; - s32 new_clamped_area[4]; -}; - -struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand -{ - GPUTexturePaletteReg reg; - bool clut_is_8bit; -}; - -struct GPUBackendDrawCommand : public GPUBackendCommand -{ - GPUDrawModeReg draw_mode; - GPURenderCommand rc; - GPUTexturePaletteReg palette; - GPUTextureWindow window; -}; - -struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand -{ - u16 num_vertices; - - struct Vertex - { - s32 x, y; - union - { - struct - { - u8 r, g, b, a; - }; - u32 color; - }; - union - { - struct - { - u8 u, v; - }; - u16 texcoord; - }; - - ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u16 texcoord_) - { - x = x_; - y = y_; - color = color_; - texcoord = texcoord_; - } - }; - - Vertex vertices[0]; -}; - -struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand -{ - s32 x, y; - u16 width, height; - u16 texcoord; - u32 color; -}; - -struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand -{ - u16 num_vertices; - - struct Vertex - { - s32 x, y; - union - { - struct - { - u8 r, g, b, a; - }; - u32 color; - }; - - ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_) - { - x = x_; - y = y_; - color = color_; - } - }; - - Vertex vertices[0]; -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif diff --git a/src/core/host.cpp b/src/core/host.cpp index cc5da139f..0c530fde4 100644 --- a/src/core/host.cpp +++ b/src/core/host.cpp @@ -2,19 +2,13 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "host.h" -#include "fullscreen_ui.h" #include "gpu.h" -#include "imgui_overlays.h" -#include "shader_cache_version.h" #include "system.h" #include "system_private.h" #include "scmversion/scmversion.h" #include "util/compress_helpers.h" -#include "util/gpu_device.h" -#include "util/imgui_manager.h" -#include "util/input_manager.h" #include "common/assert.h" #include "common/error.h" @@ -342,181 +336,3 @@ std::string Host::GetHTTPUserAgent() { return fmt::format("DuckStation for {} ({}) {}", TARGET_OS_STR, CPU_ARCH_STR, g_scm_tag_str); } - -bool Host::CreateGPUDevice(RenderAPI api, bool fullscreen, Error* error) -{ - DebugAssert(!g_gpu_device); - - INFO_LOG("Trying to create a {} GPU device...", GPUDevice::RenderAPIToString(api)); - g_gpu_device = GPUDevice::CreateDeviceForAPI(api); - - std::optional fullscreen_mode; - if (fullscreen && g_gpu_device && g_gpu_device->SupportsExclusiveFullscreen()) - { - fullscreen_mode = - GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); - } - std::optional exclusive_fullscreen_control; - if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) - { - exclusive_fullscreen_control = - (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); - } - - u32 disabled_features = 0; - if (g_settings.gpu_disable_dual_source_blend) - disabled_features |= GPUDevice::FEATURE_MASK_DUAL_SOURCE_BLEND; - if (g_settings.gpu_disable_framebuffer_fetch) - disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH; - if (g_settings.gpu_disable_texture_buffers) - disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS; - if (g_settings.gpu_disable_memory_import) - disabled_features |= GPUDevice::FEATURE_MASK_MEMORY_IMPORT; - if (g_settings.gpu_disable_raster_order_views) - disabled_features |= GPUDevice::FEATURE_MASK_RASTER_ORDER_VIEWS; - if (g_settings.gpu_disable_compute_shaders) - disabled_features |= GPUDevice::FEATURE_MASK_COMPUTE_SHADERS; - if (g_settings.gpu_disable_compressed_textures) - disabled_features |= GPUDevice::FEATURE_MASK_COMPRESSED_TEXTURES; - - // Don't dump shaders on debug builds for Android, users will complain about storage... -#if !defined(__ANDROID__) || defined(_DEBUG) - const std::string_view shader_dump_directory(EmuFolders::DataRoot); -#else - const std::string_view shader_dump_directory; -#endif - - Error create_error; - std::optional wi; - if (!g_gpu_device || - !(wi = Host::AcquireRenderWindow(api, fullscreen, fullscreen_mode.has_value(), &create_error)).has_value() || - !g_gpu_device->Create( - g_settings.gpu_adapter, static_cast(disabled_features), shader_dump_directory, - g_settings.gpu_disable_shader_cache ? std::string_view() : std::string_view(EmuFolders::Cache), - SHADER_CACHE_VERSION, g_settings.gpu_use_debug_device, wi.value(), System::GetEffectiveVSyncMode(), - System::ShouldAllowPresentThrottle(), fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, - exclusive_fullscreen_control, &create_error)) - { - ERROR_LOG("Failed to create GPU device: {}", create_error.GetDescription()); - if (g_gpu_device) - g_gpu_device->Destroy(); - g_gpu_device.reset(); - if (wi.has_value()) - Host::ReleaseRenderWindow(); - - Error::SetStringFmt( - error, - TRANSLATE_FS("System", "Failed to create render device:\n\n{0}\n\nThis may be due to your GPU not supporting the " - "chosen renderer ({1}), or because your graphics drivers need to be updated."), - create_error.GetDescription(), GPUDevice::RenderAPIToString(api)); - return false; - } - - if (!ImGuiManager::Initialize(g_settings.display_osd_scale / 100.0f, g_settings.display_osd_margin, &create_error)) - { - ERROR_LOG("Failed to initialize ImGuiManager: {}", create_error.GetDescription()); - Error::SetStringFmt(error, "Failed to initialize ImGuiManager: {}", create_error.GetDescription()); - g_gpu_device->Destroy(); - g_gpu_device.reset(); - Host::ReleaseRenderWindow(); - return false; - } - - InputManager::SetDisplayWindowSize(ImGuiManager::GetWindowWidth(), ImGuiManager::GetWindowHeight()); - return true; -} - -void Host::UpdateDisplayWindow(bool fullscreen) -{ - if (!g_gpu_device) - return; - - const GPUVSyncMode vsync_mode = System::GetEffectiveVSyncMode(); - const bool allow_present_throttle = System::ShouldAllowPresentThrottle(); - std::optional fullscreen_mode; - if (fullscreen && g_gpu_device->SupportsExclusiveFullscreen()) - { - fullscreen_mode = - GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); - } - std::optional exclusive_fullscreen_control; - if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) - { - exclusive_fullscreen_control = - (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); - } - - g_gpu_device->DestroyMainSwapChain(); - - Error error; - std::optional wi = - Host::AcquireRenderWindow(g_gpu_device->GetRenderAPI(), fullscreen, fullscreen_mode.has_value(), &error); - if (!wi.has_value()) - { - Host::ReportFatalError("Failed to get render window after update", error.GetDescription()); - return; - } - - // if surfaceless, just leave it - if (wi->IsSurfaceless()) - { - DEV_LOG("Switching to surfaceless device"); - if (!g_gpu_device->SwitchToSurfacelessRendering(&error)) - ERROR_LOG("Failed to switch to surfaceless, rendering commands may fail: {}", error.GetDescription()); - - return; - } - - if (!g_gpu_device->RecreateMainSwapChain(wi.value(), vsync_mode, allow_present_throttle, - fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, - exclusive_fullscreen_control, &error)) - { - Host::ReportFatalError("Failed to change window after update", error.GetDescription()); - return; - } - - const float f_width = static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()); - const float f_height = static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); - ImGuiManager::WindowResized(f_width, f_height); - InputManager::SetDisplayWindowSize(f_width, f_height); - System::DisplayWindowResized(); -} - -void Host::ResizeDisplayWindow(s32 width, s32 height, float scale) -{ - if (!g_gpu_device || !g_gpu_device->HasMainSwapChain()) - return; - - DEV_LOG("Display window resized to {}x{}", width, height); - - Error error; - if (!g_gpu_device->GetMainSwapChain()->ResizeBuffers(width, height, scale, &error)) - { - ERROR_LOG("Failed to resize main swap chain: {}", error.GetDescription()); - UpdateDisplayWindow(Host::IsFullscreen()); - return; - } - - const float f_width = static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()); - const float f_height = static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); - ImGuiManager::WindowResized(f_width, f_height); - InputManager::SetDisplayWindowSize(f_width, f_height); - System::DisplayWindowResized(); -} - -void Host::ReleaseGPUDevice() -{ - if (!g_gpu_device) - return; - - ImGuiManager::DestroyAllDebugWindows(); - ImGuiManager::DestroyOverlayTextures(); - FullscreenUI::Shutdown(); - ImGuiManager::Shutdown(); - - INFO_LOG("Destroying {} GPU device...", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); - g_gpu_device->Destroy(); - g_gpu_device.reset(); - - Host::ReleaseRenderWindow(); -} diff --git a/src/core/host.h b/src/core/host.h index 41896a522..aa2856b50 100644 --- a/src/core/host.h +++ b/src/core/host.h @@ -75,10 +75,6 @@ std::span> GetAvailableLanguageList(); /// Refreshes the UI when the language is changed. bool ChangeLanguage(const char* new_language); -/// Displays a loading screen with the logo, rendered with ImGui. Use when executing possibly-time-consuming tasks -/// such as compiling shaders when starting up. -void DisplayLoadingScreen(const char* message, int progress_min = -1, int progress_max = -1, int progress_value = -1); - /// Safely executes a function on the VM thread. void RunOnCPUThread(std::function function, bool block = false); @@ -96,21 +92,6 @@ bool IsFullscreen(); /// Alters fullscreen state of hosting application. void SetFullscreen(bool enabled); -/// Attempts to create the rendering device backend. -bool CreateGPUDevice(RenderAPI api, bool fullscreen, Error* error); - -/// Handles fullscreen transitions and such. -void UpdateDisplayWindow(bool fullscreen); - -/// Called when the window is resized. -void ResizeDisplayWindow(s32 width, s32 height, float scale); - -/// Destroys any active rendering device. -void ReleaseGPUDevice(); - -/// Called at the end of the frame, before presentation. -void FrameDone(); - namespace Internal { /// Returns true if the host should use portable mode. diff --git a/src/core/host_interface_progress_callback.cpp b/src/core/host_interface_progress_callback.cpp deleted file mode 100644 index c9fbc58e6..000000000 --- a/src/core/host_interface_progress_callback.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "host_interface_progress_callback.h" -#include "host.h" - -#include "common/log.h" - -LOG_CHANNEL(Host); - -HostInterfaceProgressCallback::HostInterfaceProgressCallback() : ProgressCallback() -{ -} - -void HostInterfaceProgressCallback::PushState() -{ - ProgressCallback::PushState(); -} - -void HostInterfaceProgressCallback::PopState() -{ - ProgressCallback::PopState(); - Redraw(true); -} - -void HostInterfaceProgressCallback::SetCancellable(bool cancellable) -{ - ProgressCallback::SetCancellable(cancellable); - Redraw(true); -} - -void HostInterfaceProgressCallback::SetTitle(const std::string_view title) -{ - // todo? -} - -void HostInterfaceProgressCallback::SetStatusText(const std::string_view text) -{ - ProgressCallback::SetStatusText(text); - Redraw(true); -} - -void HostInterfaceProgressCallback::SetProgressRange(u32 range) -{ - u32 last_range = m_progress_range; - - ProgressCallback::SetProgressRange(range); - - if (m_progress_range != last_range) - Redraw(false); -} - -void HostInterfaceProgressCallback::SetProgressValue(u32 value) -{ - u32 lastValue = m_progress_value; - - ProgressCallback::SetProgressValue(value); - - if (m_progress_value != lastValue) - Redraw(false); -} - -void HostInterfaceProgressCallback::Redraw(bool force) -{ - if (m_last_progress_percent < 0 && m_open_time.GetTimeSeconds() < m_open_delay) - return; - - const int percent = - static_cast((static_cast(m_progress_value) / static_cast(m_progress_range)) * 100.0f); - if (percent == m_last_progress_percent && !force) - return; - - m_last_progress_percent = percent; - Host::DisplayLoadingScreen(m_status_text.c_str(), 0, static_cast(m_progress_range), - static_cast(m_progress_value)); -} - -void HostInterfaceProgressCallback::ModalError(const std::string_view message) -{ - ERROR_LOG(message); - Host::ReportErrorAsync("Error", message); -} - -bool HostInterfaceProgressCallback::ModalConfirmation(const std::string_view message) -{ - INFO_LOG(message); - return Host::ConfirmMessage("Confirm", message); -} diff --git a/src/core/host_interface_progress_callback.h b/src/core/host_interface_progress_callback.h deleted file mode 100644 index b46ac13d3..000000000 --- a/src/core/host_interface_progress_callback.h +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#pragma once - -#include "common/progress_callback.h" -#include "common/timer.h" - -class HostInterfaceProgressCallback final : public ProgressCallback -{ -public: - HostInterfaceProgressCallback(); - - ALWAYS_INLINE void SetOpenDelay(float delay) { m_open_delay = delay; } - - void PushState() override; - void PopState() override; - - void SetCancellable(bool cancellable) override; - void SetTitle(const std::string_view title) override; - void SetStatusText(const std::string_view text) override; - void SetProgressRange(u32 range) override; - void SetProgressValue(u32 value) override; - - void ModalError(const std::string_view message) override; - bool ModalConfirmation(const std::string_view message) override; - -private: - void Redraw(bool force); - - Timer m_open_time; - float m_open_delay = 1.0f; - int m_last_progress_percent = -1; -}; diff --git a/src/core/hotkeys.cpp b/src/core/hotkeys.cpp index f2ae0568c..309893d27 100644 --- a/src/core/hotkeys.cpp +++ b/src/core/hotkeys.cpp @@ -8,6 +8,7 @@ #include "fullscreen_ui.h" #include "gpu.h" #include "gpu_hw_texture_cache.h" +#include "gpu_thread.h" #include "host.h" #include "imgui_overlays.h" #include "settings.h" @@ -59,8 +60,7 @@ static void HotkeyModifyResolutionScale(s32 increment) if (System::IsValid()) { System::ClearMemorySaveStates(true); - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); + GPUThread::UpdateSettings(true, false); } } @@ -377,10 +377,9 @@ DEFINE_HOTKEY("TogglePGXP", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOO { System::ClearMemorySaveStates(true); - Settings old_settings = g_settings; g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); + GPUThread::UpdateSettings(true, false); + Host::AddKeyedOSDMessage("TogglePGXP", g_settings.gpu_pgxp_enable ? TRANSLATE_STR("OSDMessage", "PGXP is now enabled.") : @@ -427,13 +426,18 @@ DEFINE_HOTKEY("ToggleInternalPostProcessing", TRANSLATE_NOOP("Hotkeys", "Graphic DEFINE_HOTKEY("ReloadPostProcessingShaders", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOOP("Hotkeys", "Reload Post Processing Shaders"), [](s32 pressed) { if (!pressed && System::IsValid()) - PostProcessing::ReloadShaders(); + { + GPUThread::RunOnThread([]() { + if (GPUThread::HasGPUBackend()) + PostProcessing::ReloadShaders(); + }); + } }) DEFINE_HOTKEY("ReloadTextureReplacements", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOOP("Hotkeys", "Reload Texture Replacements"), [](s32 pressed) { if (!pressed && System::IsValid()) - GPUTextureCache::ReloadTextureReplacements(true); + GPUThread::RunOnThread([]() { GPUTextureCache::ReloadTextureReplacements(true); }); }) DEFINE_HOTKEY("ToggleWidescreen", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOOP("Hotkeys", "Toggle Widescreen"), @@ -451,11 +455,9 @@ DEFINE_HOTKEY("TogglePGXPDepth", TRANSLATE_NOOP("Hotkeys", "Graphics"), System::ClearMemorySaveStates(true); - const Settings old_settings = g_settings; g_settings.gpu_pgxp_depth_buffer = !g_settings.gpu_pgxp_depth_buffer; + GPUThread::UpdateSettings(true, false); - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); Host::AddKeyedOSDMessage("TogglePGXPDepth", g_settings.gpu_pgxp_depth_buffer ? TRANSLATE_STR("OSDMessage", "PGXP Depth Buffer is now enabled.") : @@ -473,11 +475,9 @@ DEFINE_HOTKEY("TogglePGXPCPU", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_ System::ClearMemorySaveStates(true); - const Settings old_settings = g_settings; + // GPU thread is unchanged g_settings.gpu_pgxp_cpu = !g_settings.gpu_pgxp_cpu; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); Host::AddKeyedOSDMessage("TogglePGXPCPU", g_settings.gpu_pgxp_cpu ? TRANSLATE_STR("OSDMessage", "PGXP CPU mode is now enabled.") : @@ -587,29 +587,31 @@ DEFINE_HOTKEY("AudioVolumeDown", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_N DEFINE_HOTKEY("LoadSelectedSaveState", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Load From Selected Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread(SaveStateSelectorUI::LoadCurrentSlot); + GPUThread::RunOnThread(SaveStateSelectorUI::LoadCurrentSlot); }) DEFINE_HOTKEY("SaveSelectedSaveState", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Save To Selected Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread(SaveStateSelectorUI::SaveCurrentSlot); + GPUThread::RunOnThread(SaveStateSelectorUI::SaveCurrentSlot); }) DEFINE_HOTKEY("SelectPreviousSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Select Previous Save Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread([]() { SaveStateSelectorUI::SelectPreviousSlot(true); }); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectPreviousSlot(true); }); }) DEFINE_HOTKEY("SelectNextSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Select Next Save Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread([]() { SaveStateSelectorUI::SelectNextSlot(true); }); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectNextSlot(true); }); }) DEFINE_HOTKEY("SaveStateAndSelectNextSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Save State and Select Next Slot"), [](s32 pressed) { if (!pressed && System::IsValid()) { - SaveStateSelectorUI::SaveCurrentSlot(); - SaveStateSelectorUI::SelectNextSlot(false); + GPUThread::RunOnThread([]() { + SaveStateSelectorUI::SaveCurrentSlot(); + SaveStateSelectorUI::SelectNextSlot(false); + }); } }) diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index 33f5479ac..1fa9b5a79 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -9,6 +9,8 @@ #include "dma.h" #include "fullscreen_ui.h" #include "gpu.h" +#include "gpu_backend.h" +#include "gpu_thread.h" #include "host.h" #include "mdec.h" #include "performance_counters.h" @@ -70,10 +72,10 @@ struct DebugWindowInfo } // namespace static void FormatProcessorStat(SmallStringBase& text, double usage, double time); -static void DrawPerformanceOverlay(float& position_y, float scale, float margin, float spacing); +static void DrawPerformanceOverlay(const GPUBackend* gpu, float& position_y, float scale, float margin, float spacing); static void DrawMediaCaptureOverlay(float& position_y, float scale, float margin, float spacing); static void DrawFrameTimeOverlay(float& position_y, float scale, float margin, float spacing); -static void DrawEnhancementsOverlay(); +static void DrawEnhancementsOverlay(const GPUBackend* gpu); static void DrawInputsOverlay(); #ifndef __ANDROID__ @@ -119,93 +121,22 @@ static std::tuple GetMinMax(std::span values) return std::tie(min, max); } -void Host::DisplayLoadingScreen(const char* message, int progress_min /*= -1*/, int progress_max /*= -1*/, - int progress_value /*= -1*/) +bool ImGuiManager::AreAnyDebugWindowsEnabled(const SettingsInterface& si) { - if (!g_gpu_device || !g_gpu_device->HasMainSwapChain()) +#ifndef __ANDROID__ + const bool block_all = Achievements::IsHardcoreModeActive(); + if (block_all) + return false; + + for (size_t i = 0; i < NUM_DEBUG_WINDOWS; i++) { - INFO_LOG("{}: {}/{}", message, progress_value, progress_max); - return; + const DebugWindowInfo& info = s_debug_window_info[i]; + if (si.GetBoolValue(DEBUG_WINDOW_CONFIG_SECTION, info.name, false)) + return true; } +#endif - const auto& io = ImGui::GetIO(); - const float scale = ImGuiManager::GetGlobalScale(); - const float width = (400.0f * scale); - const bool has_progress = (progress_min < progress_max); - - // eat the last imgui frame, it might've been partially rendered by the caller. - ImGui::EndFrame(); - ImGui::NewFrame(); - - const float logo_width = 260.0f * scale; - const float logo_height = 260.0f * scale; - - ImGui::SetNextWindowSize(ImVec2(logo_width, logo_height), ImGuiCond_Always); - ImGui::SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, (io.DisplaySize.y * 0.5f) - (50.0f * scale)), - ImGuiCond_Always, ImVec2(0.5f, 0.5f)); - if (ImGui::Begin("LoadingScreenLogo", nullptr, - ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_NoMove | - ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoNav | - ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoFocusOnAppearing | - ImGuiWindowFlags_NoBackground)) - { - GPUTexture* tex = ImGuiFullscreen::GetCachedTexture("images/duck.png"); - if (tex) - ImGui::Image(tex, ImVec2(logo_width, logo_height)); - } - ImGui::End(); - - const float padding_and_rounding = 18.0f * scale; - ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, padding_and_rounding); - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(padding_and_rounding, padding_and_rounding)); - ImGui::SetNextWindowSize(ImVec2(width, (has_progress ? 90.0f : 55.0f) * scale), ImGuiCond_Always); - ImGui::SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, (io.DisplaySize.y * 0.5f) + (100.0f * scale)), - ImGuiCond_Always, ImVec2(0.5f, 0.0f)); - if (ImGui::Begin("LoadingScreen", nullptr, - ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_NoMove | - ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoNav | - ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoFocusOnAppearing)) - { - if (has_progress) - { - ImGui::TextUnformatted(message); - - TinyString buf; - buf.format("{}/{}", progress_value, progress_max); - - const ImVec2 prog_size = ImGui::CalcTextSize(buf.c_str(), buf.end_ptr()); - ImGui::SameLine(); - ImGui::SetCursorPosX(width - padding_and_rounding - prog_size.x); - ImGui::TextUnformatted(buf.c_str(), buf.end_ptr()); - ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 5.0f); - - ImGui::ProgressBar(static_cast(progress_value) / static_cast(progress_max - progress_min), - ImVec2(-1.0f, 0.0f), ""); - INFO_LOG("{}: {}", message, buf); - } - else - { - const ImVec2 text_size(ImGui::CalcTextSize(message)); - ImGui::SetCursorPosX((width - text_size.x) / 2.0f); - ImGui::TextUnformatted(message); - INFO_LOG(message); - } - } - ImGui::End(); - ImGui::PopStyleVar(2); - - ImGui::EndFrame(); - - // TODO: Glass effect or something. - - GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain(); - if (g_gpu_device->BeginPresent(swap_chain) == GPUDevice::PresentResult::OK) - { - g_gpu_device->RenderImGui(swap_chain); - g_gpu_device->EndPresent(swap_chain, false); - } - - ImGui::NewFrame(); + return false; } bool ImGuiManager::UpdateDebugWindowConfig() @@ -284,26 +215,28 @@ void ImGuiManager::DestroyAllDebugWindows() #endif } -void ImGuiManager::RenderTextOverlays() +void ImGuiManager::RenderTextOverlays(const GPUBackend* gpu) { - const System::State state = System::GetState(); - if (state != System::State::Shutdown) - { - const float scale = ImGuiManager::GetGlobalScale(); - const float f_margin = ImGuiManager::GetScreenMargin() * scale; - const float margin = ImCeil(ImGuiManager::GetScreenMargin() * scale); - const float spacing = ImCeil(5.0f * scale); - float position_y = ImFloor(f_margin); - DrawPerformanceOverlay(position_y, scale, margin, spacing); - DrawFrameTimeOverlay(position_y, scale, margin, spacing); - DrawMediaCaptureOverlay(position_y, scale, margin, spacing); + // Don't draw anything with loading screen open, it'll be nonsensical. + if (ImGuiFullscreen::IsLoadingScreenOpen()) + return; - if (g_settings.display_show_enhancements && state != System::State::Paused) - DrawEnhancementsOverlay(); + const bool paused = GPUThread::IsSystemPaused(); - if (g_settings.display_show_inputs && state != System::State::Paused) - DrawInputsOverlay(); - } + const float scale = ImGuiManager::GetGlobalScale(); + const float f_margin = ImGuiManager::GetScreenMargin() * scale; + const float margin = ImCeil(ImGuiManager::GetScreenMargin() * scale); + const float spacing = ImCeil(5.0f * scale); + float position_y = ImFloor(f_margin); + DrawPerformanceOverlay(gpu, position_y, scale, margin, spacing); + DrawFrameTimeOverlay(position_y, scale, margin, spacing); + DrawMediaCaptureOverlay(position_y, scale, margin, spacing); + + if (g_gpu_settings.display_show_enhancements && !paused) + DrawEnhancementsOverlay(gpu); + + if (g_gpu_settings.display_show_inputs && !paused) + DrawInputsOverlay(); } void ImGuiManager::FormatProcessorStat(SmallStringBase& text, double usage, double time) @@ -317,12 +250,13 @@ void ImGuiManager::FormatProcessorStat(SmallStringBase& text, double usage, doub text.append_format("{:.1f}% ({:.2f}ms)", usage, time); } -void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float margin, float spacing) +void ImGuiManager::DrawPerformanceOverlay(const GPUBackend* gpu, float& position_y, float scale, float margin, + float spacing) { - if (!(g_settings.display_show_fps || g_settings.display_show_speed || g_settings.display_show_gpu_stats || - g_settings.display_show_resolution || g_settings.display_show_cpu_usage || - (g_settings.display_show_status_indicators && - (System::IsPaused() || System::IsFastForwardEnabled() || System::IsTurboEnabled())))) + if (!(g_gpu_settings.display_show_fps || g_gpu_settings.display_show_speed || g_gpu_settings.display_show_gpu_stats || + g_gpu_settings.display_show_resolution || g_gpu_settings.display_show_cpu_usage || + (g_gpu_settings.display_show_status_indicators && + (GPUThread::IsSystemPaused() || System::IsFastForwardEnabled() || System::IsTurboEnabled())))) { return; } @@ -352,9 +286,9 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float if (state == System::State::Running) { const float speed = PerformanceCounters::GetEmulationSpeed(); - if (g_settings.display_show_fps) + if (g_gpu_settings.display_show_fps) text.append_format("G: {:.2f} | V: {:.2f}", PerformanceCounters::GetFPS(), PerformanceCounters::GetVPS()); - if (g_settings.display_show_speed) + if (g_gpu_settings.display_show_speed) { text.append_format("{}{}%", text.empty() ? "" : " | ", static_cast(std::round(speed))); @@ -377,19 +311,19 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float DRAW_LINE(fixed_font, text, color); } - if (g_settings.display_show_gpu_stats) + if (g_gpu_settings.display_show_gpu_stats) { - g_gpu->GetStatsString(text); + gpu->GetStatsString(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - g_gpu->GetMemoryStatsString(text); + gpu->GetMemoryStatsString(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_resolution) + if (g_gpu_settings.display_show_resolution) { - const u32 resolution_scale = g_gpu->GetResolutionScale(); - const auto [display_width, display_height] = g_gpu->GetFullDisplayResolution(); // wrong + const u32 resolution_scale = gpu->GetResolutionScale(); + const auto [display_width, display_height] = g_gpu->GetFullDisplayResolution(); // NOTE: Racey read. const bool interlaced = g_gpu->IsInterlacedDisplayEnabled(); const bool pal = g_gpu->IsInPALMode(); text.format("{}x{} {} {} [{}x]", display_width * resolution_scale, display_height * resolution_scale, @@ -397,13 +331,13 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_latency_stats) + if (g_gpu_settings.display_show_latency_stats) { System::FormatLatencyStats(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_cpu_usage) + if (g_gpu_settings.display_show_cpu_usage) { text.format("{:.2f}ms | {:.2f}ms | {:.2f}ms", PerformanceCounters::GetMinimumFrameTime(), PerformanceCounters::GetAverageFrameTime(), PerformanceCounters::GetMaximumFrameTime()); @@ -454,11 +388,11 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float PerformanceCounters::GetCPUThreadAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - if (g_gpu->GetSWThread()) + if (g_gpu_settings.gpu_use_thread) { - text.assign("SW: "); - FormatProcessorStat(text, PerformanceCounters::GetSWThreadUsage(), - PerformanceCounters::GetSWThreadAverageTime()); + text.assign("RNDR: "); + FormatProcessorStat(text, PerformanceCounters::GetGPUThreadUsage(), + PerformanceCounters::GetGPUThreadAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } @@ -472,14 +406,14 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float #endif } - if (g_settings.display_show_gpu_usage && g_gpu_device->IsGPUTimingEnabled()) + if (g_gpu_settings.display_show_gpu_usage && g_gpu_device->IsGPUTimingEnabled()) { text.assign("GPU: "); FormatProcessorStat(text, PerformanceCounters::GetGPUUsage(), PerformanceCounters::GetGPUAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_status_indicators) + if (g_gpu_settings.display_show_status_indicators) { const bool rewinding = System::IsRewinding(); if (rewinding || System::IsFastForwardEnabled() || System::IsTurboEnabled()) @@ -489,7 +423,7 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float } } } - else if (g_settings.display_show_status_indicators && state == System::State::Paused && + else if (g_gpu_settings.display_show_status_indicators && state == System::State::Paused && !FullscreenUI::HasActiveWindow()) { text.assign(ICON_EMOJI_PAUSE); @@ -499,12 +433,12 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float #undef DRAW_LINE } -void ImGuiManager::DrawEnhancementsOverlay() +void ImGuiManager::DrawEnhancementsOverlay(const GPUBackend* gpu) { LargeString text; text.append_format("{} {}-{}", Settings::GetConsoleRegionName(System::GetRegion()), GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), - g_gpu->IsHardwareRenderer() ? "HW" : "SW"); + GPUBackend::IsUsingHardwareBackend() ? "HW" : "SW"); if (g_settings.rewind_enable) text.append_format(" RW={}/{}", g_settings.rewind_save_frequency, g_settings.rewind_save_slots); @@ -626,7 +560,7 @@ void ImGuiManager::DrawMediaCaptureOverlay(float& position_y, float scale, float void ImGuiManager::DrawFrameTimeOverlay(float& position_y, float scale, float margin, float spacing) { - if (!g_settings.display_show_frame_times || System::IsPaused()) + if (!g_settings.display_show_frame_times || GPUThread::IsSystemPaused()) return; const float shadow_offset = std::ceil(1.0f * scale); @@ -864,8 +798,6 @@ bool SaveStateSelectorUI::IsOpen() void SaveStateSelectorUI::Open(float open_time /* = DEFAULT_OPEN_TIME */) { - const std::string& serial = System::GetGameSerial(); - s_state.open_time = 0.0f; s_state.close_time = open_time; @@ -876,7 +808,7 @@ void SaveStateSelectorUI::Open(float open_time /* = DEFAULT_OPEN_TIME */) s_state.placeholder_texture = ImGuiFullscreen::LoadTexture("no-save.png"); s_state.is_open = true; - RefreshList(serial); + RefreshList(); RefreshHotkeyLegend(); } @@ -889,7 +821,7 @@ void SaveStateSelectorUI::Close() s_state.next_legend = {}; } -void SaveStateSelectorUI::RefreshList(const std::string& serial) +void SaveStateSelectorUI::RefreshList() { for (ListEntry& entry : s_state.slots) { @@ -898,9 +830,7 @@ void SaveStateSelectorUI::RefreshList(const std::string& serial) } s_state.slots.clear(); - if (System::IsShutdown()) - return; - + const std::string& serial = GPUThread::GetGameSerial(); if (!serial.empty()) { for (s32 i = 1; i <= System::PER_GAME_SAVE_STATE_SLOTS; i++) @@ -956,6 +886,7 @@ void SaveStateSelectorUI::Clear() void SaveStateSelectorUI::ClearList() { + DebugAssert(GPUThread::IsOnThread()); for (ListEntry& li : s_state.slots) { if (li.preview_texture) @@ -1001,7 +932,7 @@ void SaveStateSelectorUI::SelectNextSlot(bool open_selector) s_state.current_slot++; if (s_state.current_slot >= total_slots) { - if (!System::GetGameSerial().empty()) + if (!GPUThread::GetGameSerial().empty()) s_state.current_slot_global ^= true; s_state.current_slot -= total_slots; } @@ -1024,7 +955,7 @@ void SaveStateSelectorUI::SelectPreviousSlot(bool open_selector) s_state.current_slot--; if (s_state.current_slot < 0) { - if (!System::GetGameSerial().empty()) + if (!GPUThread::GetGameSerial().empty()) s_state.current_slot_global ^= true; s_state.current_slot += s_state.current_slot_global ? System::GLOBAL_SAVE_STATE_SLOTS : System::PER_GAME_SAVE_STATE_SLOTS; @@ -1245,7 +1176,7 @@ std::string SaveStateSelectorUI::GetCurrentSlotPath() std::string filename; if (!s_state.current_slot_global) { - if (const std::string& serial = System::GetGameSerial(); !serial.empty()) + if (const std::string& serial = GPUThread::GetGameSerial(); !serial.empty()) filename = System::GetGameSaveStateFileName(serial, s_state.current_slot + 1); } else @@ -1258,18 +1189,22 @@ std::string SaveStateSelectorUI::GetCurrentSlotPath() void SaveStateSelectorUI::LoadCurrentSlot() { + DebugAssert(GPUThread::IsOnThread()); + if (std::string path = GetCurrentSlotPath(); !path.empty()) { if (FileSystem::FileExists(path.c_str())) { - Error error; - if (!System::LoadState(path.c_str(), &error, true)) - { - Host::AddKeyedOSDMessage("LoadState", - fmt::format(TRANSLATE_FS("OSDMessage", "Failed to load state from slot {0}:\n{1}"), - GetCurrentSlot(), error.GetDescription()), - Host::OSD_ERROR_DURATION); - } + Host::RunOnCPUThread([path = std::move(path)]() { + Error error; + if (!System::LoadState(path.c_str(), &error, true)) + { + Host::AddKeyedOSDMessage("LoadState", + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to load state from slot {0}:\n{1}"), + GetCurrentSlot(), error.GetDescription()), + Host::OSD_ERROR_DURATION); + } + }); } else { @@ -1289,14 +1224,16 @@ void SaveStateSelectorUI::SaveCurrentSlot() { if (std::string path = GetCurrentSlotPath(); !path.empty()) { - Error error; - if (!System::SaveState(std::move(path), &error, g_settings.create_save_state_backups, false)) - { - Host::AddIconOSDMessage("SaveState", ICON_EMOJI_WARNING, - fmt::format(TRANSLATE_FS("OSDMessage", "Failed to save state to slot {0}:\n{1}"), - GetCurrentSlot(), error.GetDescription()), - Host::OSD_ERROR_DURATION); - } + Host::RunOnCPUThread([path = std::move(path)]() { + Error error; + if (!System::SaveState(std::move(path), &error, g_settings.create_save_state_backups, false)) + { + Host::AddIconOSDMessage("SaveState", ICON_EMOJI_WARNING, + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to save state to slot {0}:\n{1}"), + GetCurrentSlot(), error.GetDescription()), + Host::OSD_ERROR_DURATION); + } + }); } Close(); @@ -1323,7 +1260,7 @@ void SaveStateSelectorUI::ShowSlotOSDMessage() void ImGuiManager::RenderOverlayWindows() { const System::State state = System::GetState(); - if (state != System::State::Shutdown) + if (state == System::State::Paused || state == System::State::Running) { if (SaveStateSelectorUI::s_state.is_open) SaveStateSelectorUI::Draw(); diff --git a/src/core/imgui_overlays.h b/src/core/imgui_overlays.h index 7c9f26540..32d0dce69 100644 --- a/src/core/imgui_overlays.h +++ b/src/core/imgui_overlays.h @@ -7,14 +7,23 @@ #include +class SettingsInterface; + +class GPUBackend; + namespace ImGuiManager { -void RenderTextOverlays(); + +static constexpr const char* LOGO_IMAGE_NAME = "images/duck.png"; + +void RenderTextOverlays(const GPUBackend* gpu); +bool AreAnyDebugWindowsEnabled(const SettingsInterface& si); void RenderDebugWindows(); bool UpdateDebugWindowConfig(); void DestroyAllDebugWindows(); void RenderOverlayWindows(); void DestroyOverlayTextures(); + } // namespace ImGuiManager namespace SaveStateSelectorUI { @@ -23,7 +32,7 @@ static constexpr float DEFAULT_OPEN_TIME = 7.5f; bool IsOpen(); void Open(float open_time = DEFAULT_OPEN_TIME); -void RefreshList(const std::string& serial); +void RefreshList(); void Clear(); void ClearList(); void Close(); diff --git a/src/core/performance_counters.cpp b/src/core/performance_counters.cpp index 14a93fc95..aa6f5aad2 100644 --- a/src/core/performance_counters.cpp +++ b/src/core/performance_counters.cpp @@ -3,6 +3,8 @@ #include "performance_counters.h" #include "gpu.h" +#include "gpu_backend.h" +#include "gpu_thread.h" #include "system.h" #include "system_private.h" @@ -45,9 +47,9 @@ struct State float cpu_thread_usage; float cpu_thread_time; - u64 last_sw_time; - float sw_thread_usage; - float sw_thread_time; + u64 last_gpu_thread_time; + float gpu_thread_usage; + float gpu_thread_time; float average_gpu_time; float accumulated_gpu_time; @@ -105,14 +107,14 @@ float PerformanceCounters::GetCPUThreadAverageTime() return s_state.cpu_thread_time; } -float PerformanceCounters::GetSWThreadUsage() +float PerformanceCounters::GetGPUThreadUsage() { - return s_state.sw_thread_usage; + return s_state.gpu_thread_usage; } -float PerformanceCounters::GetSWThreadAverageTime() +float PerformanceCounters::GetGPUThreadAverageTime() { - return s_state.sw_thread_time; + return s_state.gpu_thread_time; } float PerformanceCounters::GetGPUUsage() @@ -150,17 +152,16 @@ void PerformanceCounters::Reset() s_state.last_frame_number = System::GetFrameNumber(); s_state.last_internal_frame_number = System::GetInternalFrameNumber(); s_state.last_cpu_time = System::GetCPUThreadHandle().GetCPUTime(); - if (const Threading::Thread* sw_thread = g_gpu->GetSWThread(); sw_thread) - s_state.last_sw_time = sw_thread->GetCPUTime(); - else - s_state.last_sw_time = 0; + s_state.last_gpu_thread_time = GPUThread::Internal::GetThreadHandle().GetCPUTime(); s_state.average_frame_time_accumulator = 0.0f; s_state.minimum_frame_time_accumulator = 0.0f; s_state.maximum_frame_time_accumulator = 0.0f; + + std::atomic_thread_fence(std::memory_order_release); } -void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) +void PerformanceCounters::Update(GPUBackend* gpu, u32 frame_number, u32 internal_frame_number) { const Timer::Value now_ticks = Timer::GetCurrentValue(); @@ -177,7 +178,7 @@ void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) // update fps counter const Timer::Value ticks_diff = now_ticks - s_state.last_update_time; const float time = static_cast(Timer::ConvertValueToSeconds(ticks_diff)); - if (time < PERFORMANCE_COUNTER_UPDATE_INTERVAL) + if (time < PERFORMANCE_COUNTER_UPDATE_INTERVAL || s_state.last_frame_number == frame_number) return; s_state.last_update_time = now_ticks; @@ -202,18 +203,17 @@ void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) s_state.fps = static_cast(internal_frames_run) / time; s_state.speed = (s_state.vps / System::GetVideoFrameRate()) * 100.0f; - const Threading::Thread* sw_thread = g_gpu->GetSWThread(); const u64 cpu_time = System::GetCPUThreadHandle().GetCPUTime(); - const u64 sw_time = sw_thread ? sw_thread->GetCPUTime() : 0; + const u64 gpu_thread_time = GPUThread::Internal::GetThreadHandle().GetCPUTime(); const u64 cpu_delta = cpu_time - s_state.last_cpu_time; - const u64 sw_delta = sw_time - s_state.last_sw_time; + const u64 gpu_thread_delta = gpu_thread_time - s_state.last_gpu_thread_time; s_state.last_cpu_time = cpu_time; - s_state.last_sw_time = sw_time; + s_state.last_gpu_thread_time = gpu_thread_time; s_state.cpu_thread_usage = static_cast(static_cast(cpu_delta) * pct_divider); s_state.cpu_thread_time = static_cast(static_cast(cpu_delta) * time_divider); - s_state.sw_thread_usage = static_cast(static_cast(sw_delta) * pct_divider); - s_state.sw_thread_time = static_cast(static_cast(sw_delta) * time_divider); + s_state.gpu_thread_usage = static_cast(static_cast(gpu_thread_delta) * pct_divider); + s_state.gpu_thread_time = static_cast(static_cast(gpu_thread_delta) * time_divider); if (MediaCapture* cap = System::GetMediaCapture()) cap->UpdateCaptureThreadUsage(pct_divider, time_divider); @@ -228,13 +228,13 @@ void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) s_state.presents_since_last_update = 0; if (g_settings.display_show_gpu_stats) - g_gpu->UpdateStatistics(frames_run); + gpu->UpdateStatistics(frames_run); - VERBOSE_LOG("FPS: {:.2f} VPS: {:.2f} CPU: {:.2f} GPU: {:.2f} Avg: {:.2f}ms Min: {:.2f}ms Max: {:.2f}ms", s_state.fps, - s_state.vps, s_state.cpu_thread_usage, s_state.gpu_usage, s_state.average_frame_time, - s_state.minimum_frame_time, s_state.maximum_frame_time); + VERBOSE_LOG("FPS: {:.2f} VPS: {:.2f} CPU: {:.2f} RNDR: {:.2f} GPU: {:.2f} Avg: {:.2f}ms Min: {:.2f}ms Max: {:.2f}ms", + s_state.fps, s_state.vps, s_state.cpu_thread_usage, s_state.gpu_thread_usage, s_state.gpu_usage, + s_state.average_frame_time, s_state.minimum_frame_time, s_state.maximum_frame_time); - Host::OnPerformanceCountersUpdated(); + Host::OnPerformanceCountersUpdated(gpu); } void PerformanceCounters::AccumulateGPUTime() diff --git a/src/core/performance_counters.h b/src/core/performance_counters.h index db3827b5b..fbdfc86cc 100644 --- a/src/core/performance_counters.h +++ b/src/core/performance_counters.h @@ -5,6 +5,8 @@ #include "common/types.h" +class GPUBackend; + namespace PerformanceCounters { static constexpr u32 NUM_FRAME_TIME_SAMPLES = 150; @@ -18,8 +20,8 @@ float GetMinimumFrameTime(); float GetMaximumFrameTime(); float GetCPUThreadUsage(); float GetCPUThreadAverageTime(); -float GetSWThreadUsage(); -float GetSWThreadAverageTime(); +float GetGPUThreadUsage(); +float GetGPUThreadAverageTime(); float GetGPUUsage(); float GetGPUAverageTime(); const FrameTimeHistory& GetFrameTimeHistory(); @@ -27,7 +29,7 @@ u32 GetFrameTimeHistoryPos(); void Clear(); void Reset(); -void Update(u32 frame_number, u32 internal_frame_number); +void Update(GPUBackend* gpu, u32 frame_number, u32 internal_frame_number); void AccumulateGPUTime(); } // namespace Host diff --git a/src/core/settings.cpp b/src/core/settings.cpp index b4ce76d1f..b803dd5b9 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -5,6 +5,7 @@ #include "achievements.h" #include "controller.h" #include "host.h" +#include "imgui_overlays.h" #include "system.h" #include "util/gpu_device.h" @@ -28,7 +29,8 @@ LOG_CHANNEL(Settings); -Settings g_settings; +ALIGN_TO_CACHE_LINE Settings g_settings; +ALIGN_TO_CACHE_LINE Settings g_gpu_settings; const char* SettingInfo::StringDefaultValue() const { @@ -206,6 +208,7 @@ void Settings::Load(const SettingsInterface& si, const SettingsInterface& contro gpu_disable_compressed_textures = si.GetBoolValue("GPU", "DisableCompressedTextures", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); + gpu_max_queued_frames = static_cast(si.GetUIntValue("GPU", "MaxQueuedFrames", DEFAULT_GPU_MAX_QUEUED_FRAMES)); gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", true); @@ -554,6 +557,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const } si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); + si.SetUIntValue("GPU", "MaxQueuedFrames", gpu_max_queued_frames); si.SetBoolValue("GPU", "UseThread", gpu_use_thread); si.SetBoolValue("GPU", "UseSoftwareRendererForReadbacks", gpu_use_software_renderer_for_readbacks); si.SetBoolValue("GPU", "TrueColor", gpu_true_color); @@ -950,7 +954,7 @@ std::string Settings::TextureReplacementSettings::Configuration::ExportToYAML(bo comment_str, replacement_scale_linear_filter); // ReplacementScaleLinearFilter } -void Settings::FixIncompatibleSettings(bool display_osd_messages) +void Settings::FixIncompatibleSettings(const SettingsInterface& si, bool display_osd_messages) { if (g_settings.disable_all_enhancements) { @@ -1022,6 +1026,13 @@ void Settings::FixIncompatibleSettings(bool display_osd_messages) (g_settings.gpu_renderer != GPURenderer::Software && g_settings.gpu_texture_cache); g_settings.texture_replacements.enable_vram_write_replacements &= (g_settings.gpu_renderer != GPURenderer::Software); + // GPU thread should be disabled if any debug windows are active, since they will be racing to read CPU thread state. + if (g_settings.gpu_use_thread && g_settings.gpu_max_queued_frames > 0 && ImGuiManager::AreAnyDebugWindowsEnabled(si)) + { + WARNING_LOG("Setting maximum queued frames to 0 because one or more debug windows are enabled."); + g_settings.gpu_max_queued_frames = 0; + } + #ifndef ENABLE_MMAP_FASTMEM if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) { @@ -1081,6 +1092,21 @@ void Settings::FixIncompatibleSettings(bool display_osd_messages) } } +bool Settings::AreGPUDeviceSettingsChanged(const Settings& old_settings) const +{ + return (gpu_use_debug_device != old_settings.gpu_use_debug_device || + gpu_disable_shader_cache != old_settings.gpu_disable_shader_cache || + gpu_disable_dual_source_blend != old_settings.gpu_disable_dual_source_blend || + gpu_disable_framebuffer_fetch != old_settings.gpu_disable_framebuffer_fetch || + gpu_disable_texture_buffers != old_settings.gpu_disable_texture_buffers || + gpu_disable_texture_copy_to_self != old_settings.gpu_disable_texture_copy_to_self || + gpu_disable_memory_import != old_settings.gpu_disable_memory_import || + gpu_disable_raster_order_views != old_settings.gpu_disable_raster_order_views || + gpu_disable_compute_shaders != old_settings.gpu_disable_compute_shaders || + gpu_disable_compressed_textures != old_settings.gpu_disable_compressed_textures || + display_exclusive_fullscreen_control != old_settings.display_exclusive_fullscreen_control); +} + void Settings::SetDefaultLogConfig(SettingsInterface& si) { si.SetStringValue("Logging", "LogLevel", GetLogLevelName(DEFAULT_LOG_LEVEL)); diff --git a/src/core/settings.h b/src/core/settings.h index a0af31968..8b02e3bce 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -98,6 +98,7 @@ struct Settings std::string gpu_adapter; u8 gpu_resolution_scale = 1; u8 gpu_multisamples = 1; + u8 gpu_max_queued_frames = DEFAULT_GPU_MAX_QUEUED_FRAMES; bool gpu_use_thread : 1 = true; bool gpu_use_software_renderer_for_readbacks : 1 = false; bool gpu_use_debug_device : 1 = false; @@ -378,7 +379,9 @@ struct Settings void Save(SettingsInterface& si, bool ignore_base) const; static void Clear(SettingsInterface& si); - void FixIncompatibleSettings(bool display_osd_messages); + void FixIncompatibleSettings(const SettingsInterface& si, bool display_osd_messages); + + bool AreGPUDeviceSettingsChanged(const Settings& old_settings) const; /// Initializes configuration. static void SetDefaultLogConfig(SettingsInterface& si); @@ -565,13 +568,19 @@ struct Settings static constexpr bool DEFAULT_SAVE_STATE_BACKUPS = true; static constexpr bool DEFAULT_FAST_BOOT_VALUE = false; static constexpr u16 DEFAULT_GDB_SERVER_PORT = 2345; + + // TODO: Maybe lower? But that means fast CPU threads would always stall, could be a problem for power management. + static constexpr u8 DEFAULT_GPU_MAX_QUEUED_FRAMES = 2; #else static constexpr bool DEFAULT_SAVE_STATE_BACKUPS = false; static constexpr bool DEFAULT_FAST_BOOT_VALUE = true; + static constexpr u8 DEFAULT_GPU_MAX_QUEUED_FRAMES = 3; #endif }; -extern Settings g_settings; +// TODO: Use smaller copy for GPU thread copy. +ALIGN_TO_CACHE_LINE extern Settings g_settings; // CPU thread copy. +ALIGN_TO_CACHE_LINE extern Settings g_gpu_settings; // GPU thread copy. namespace EmuFolders { extern std::string AppRoot; diff --git a/src/core/system.cpp b/src/core/system.cpp index 74d6886d5..3e9dc1404 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -16,11 +16,12 @@ #include "game_database.h" #include "game_list.h" #include "gpu.h" +#include "gpu_backend.h" #include "gpu_dump.h" #include "gpu_hw_texture_cache.h" +#include "gpu_thread.h" #include "gte.h" #include "host.h" -#include "host_interface_progress_callback.h" #include "imgui_overlays.h" #include "interrupt_controller.h" #include "mdec.h" @@ -62,6 +63,7 @@ #include "common/memmap.h" #include "common/path.h" #include "common/string_util.h" +#include "common/timer.h" #include "IconsEmoji.h" #include "IconsFontAwesome5.h" @@ -163,13 +165,12 @@ static bool SetBootMode(BootMode new_boot_mode, DiscRegion disc_region, Error* e static void InternalReset(); static void ClearRunningGame(); static void DestroySystem(); -static void JoinTaskThreads(); -static bool CreateGPU(GPURenderer renderer, bool is_switching, bool fullscreen, Error* error); -static bool RecreateGPU(GPURenderer renderer, bool force_recreate_device = false, bool update_display = true); -static void HandleHostGPUDeviceLost(); -static void HandleExclusiveFullscreenLost(); +static void RecreateGPU(GPURenderer new_renderer); static std::string GetScreenshotPath(const char* extension); +static bool StartMediaCapture(std::string path, bool capture_video, bool capture_audio, u32 video_width, + u32 video_height); +static void StopMediaCapture(std::unique_ptr cap); /// Returns true if boot is being fast forwarded. static bool IsFastForwardingBoot(); @@ -179,7 +180,7 @@ static void UpdateThrottlePeriod(); static void ResetThrottler(); /// Throttles the system, i.e. sleeps until it's time to execute the next frame. -static void Throttle(Timer::Value current_time); +static void Throttle(Timer::Value current_time, Timer::Value sleep_until); static void AccumulatePreFrameSleepTime(Timer::Value current_time); static void UpdateDisplayVSync(); @@ -194,6 +195,9 @@ static void UpdatePerGameMemoryCards(); static std::unique_ptr GetMemoryCardForSlot(u32 slot, MemoryCardType type); static void UpdateMultitaps(); +/// Returns the maximum size of a save state, considering the current configuration. +static size_t GetMaxSaveStateSize(); + static std::string GetMediaPathFromSaveState(const char* path); static bool SaveUndoLoadState(); static void UpdateMemorySaveStateSettings(); @@ -209,16 +213,14 @@ static bool SaveStateBufferToFile(const SaveStateBuffer& buffer, std::FILE* fp, static u32 CompressAndWriteStateData(std::FILE* fp, std::span src, SaveStateCompressionMode method, u32* header_type, Error* error); static bool DoState(StateWrapper& sw, bool update_display); -static bool DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_display); +static void DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_display); static bool IsExecutionInterrupted(); static void CheckForAndExitExecution(); static void SetRewinding(bool enabled); -static bool SaveRewindState(); static void DoRewind(); -static void SaveRunaheadState(); static bool DoRunahead(); static bool OpenGPUDump(std::string path, Error* error); @@ -306,7 +308,6 @@ struct ALIGN_TO_CACHE_LINE StateVars GameHash running_game_hash; bool running_game_custom_title = false; - bool keep_gpu_device_on_shutdown = false; std::atomic_bool startup_cancelled{false}; std::unique_ptr game_settings_interface; @@ -515,6 +516,8 @@ bool System::CPUThreadInitialize(Error* error) LogStartupInformation(); + GPUThread::Internal::ProcessStartup(); + if (g_settings.achievements_enabled) Achievements::Initialize(); @@ -567,16 +570,6 @@ System::State System::GetState() return s_state.state; } -void System::SetState(State new_state) -{ - if (s_state.state == new_state) - return; - - Assert(s_state.state == State::Paused || s_state.state == State::Running); - Assert(new_state == State::Paused || new_state == State::Running); - s_state.state = new_state; -} - bool System::IsRunning() { return s_state.state == State::Running; @@ -1172,120 +1165,19 @@ DiscRegion System::GetRegionForPsf(const char* path) return psf.GetRegion(); } -bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool update_display /* = true*/) +void System::RecreateGPU(GPURenderer renderer) { - ClearMemorySaveStates(true); - g_gpu->RestoreDeviceContext(); - - // save current state - DynamicHeapArray state_data(GetMaxSaveStateSize()); - { - StateWrapper sw(state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); - if (!g_gpu->DoState(sw, update_display) || !TimingEvents::DoState(sw)) - { - ERROR_LOG("Failed to save old GPU state when switching renderers"); - state_data.deallocate(); - } - } - - // create new renderer - g_gpu.reset(); - if (force_recreate_device) - { - PostProcessing::Shutdown(); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - } + FreeMemoryStateTextures(); + StopMediaCapture(); Error error; - if (!CreateGPU(renderer, true, Host::IsFullscreen(), &error)) + if (!GPUThread::CreateGPUBackend(s_state.running_game_serial, renderer, true, false, false, &error)) { - if (!IsStartupCancelled()) - Host::ReportErrorAsync("Error", error.GetDescription()); - - DestroySystem(); - return false; + ERROR_LOG("Failed to switch to {} renderer: {}", Settings::GetRendererName(renderer), error.GetDescription()); + Panic("Failed to switch renderer."); } - if (!state_data.empty()) - { - StateWrapper sw(state_data.span(), StateWrapper::Mode::Read, SAVE_STATE_VERSION); - g_gpu->RestoreDeviceContext(); - g_gpu->DoState(sw, update_display); - TimingEvents::DoState(sw); - } - - if (force_recreate_device) - { - ImGuiManager::UpdateDebugWindowConfig(); - InvalidateDisplay(); - } - - // fix up vsync etc - UpdateSpeedLimiterState(); - return true; -} - -void System::HandleHostGPUDeviceLost() -{ - static Timer::Value s_last_gpu_reset_time = 0; - static constexpr float MIN_TIME_BETWEEN_RESETS = 15.0f; - - // If we're constantly crashing on something in particular, we don't want to end up in an - // endless reset loop.. that'd probably end up leaking memory and/or crashing us for other - // reasons. So just abort in such case. - const Timer::Value current_time = Timer::GetCurrentValue(); - if (s_last_gpu_reset_time != 0 && - Timer::ConvertValueToSeconds(current_time - s_last_gpu_reset_time) < MIN_TIME_BETWEEN_RESETS) - { - Panic("Host GPU lost too many times, device is probably completely wedged."); - } - s_last_gpu_reset_time = current_time; - - if (g_gpu) - { - // Little bit janky, but because the device is lost, the VRAM readback is going to give us garbage. - // So back up what we have, it's probably missing bits, but whatever... - DynamicHeapArray vram_backup(VRAM_SIZE); - std::memcpy(vram_backup.data(), g_vram, VRAM_SIZE); - - // Device lost, something went really bad. - // Let's just toss out everything, and try to hobble on. - if (!RecreateGPU(g_gpu->IsHardwareRenderer() ? g_settings.gpu_renderer : GPURenderer::Software, true, false)) - { - Panic("Failed to recreate GPU device after loss."); - return; - } - - // Restore backed-up VRAM. - std::memcpy(g_vram, vram_backup.data(), VRAM_SIZE); - } - else - { - // Only big picture mode was running. - const bool fsui_running = FullscreenUI::IsInitialized(); - const bool fullscreen = Host::IsFullscreen(); - const RenderAPI api = g_gpu_device->GetRenderAPI(); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - if (!Host::CreateGPUDevice(api, fullscreen, nullptr) || (fsui_running && !FullscreenUI::Initialize())) - { - Panic("Failed to recreate GPU device after loss."); - return; - } - } - - // First frame after reopening is definitely going to be trash, so skip it. - Host::AddIconOSDWarning( - "HostGPUDeviceLost", ICON_EMOJI_WARNING, - TRANSLATE_STR("System", "Host GPU device encountered an error and has recovered. This may cause broken rendering."), - Host::OSD_CRITICAL_ERROR_DURATION); -} - -void System::HandleExclusiveFullscreenLost() -{ - WARNING_LOG("Lost exclusive fullscreen."); - Host::SetFullscreen(false); + ClearMemorySaveStates(true); } void System::LoadSettings(bool display_osd_messages) @@ -1315,7 +1207,7 @@ void System::LoadSettings(bool display_osd_messages) // patch overrides take precedence over compat settings Cheats::ApplySettingOverrides(); - g_settings.FixIncompatibleSettings(display_osd_messages); + g_settings.FixIncompatibleSettings(si, display_osd_messages); } void System::ReloadInputSources() @@ -1669,16 +1561,12 @@ void System::PauseSystem(bool paused) if (paused == IsPaused() || !IsValid()) return; - SetState(paused ? State::Paused : State::Running); + s_state.state = (paused ? State::Paused : State::Running); SPU::GetOutputStream()->SetPaused(paused); + GPUThread::RunOnThread([paused]() { GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::SystemPaused, paused); }); if (paused) { - // Make sure the GPU is flushed, otherwise the VB might still be mapped. - g_gpu->FlushRender(); - - FullscreenUI::OnSystemPaused(); - InputManager::PauseVibration(); InputManager::UpdateHostMouseMode(); @@ -1692,9 +1580,8 @@ void System::PauseSystem(bool paused) #endif Host::OnSystemPaused(); - Host::OnIdleStateChanged(); UpdateDisplayVSync(); - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); } else { @@ -1712,8 +1599,6 @@ void System::PauseSystem(bool paused) #endif Host::OnSystemResumed(); - Host::OnIdleStateChanged(); - UpdateDisplayVSync(); PerformanceCounters::Reset(); ResetThrottler(); @@ -1750,8 +1635,8 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) Assert(s_state.state == State::Shutdown); s_state.state = State::Starting; s_state.startup_cancelled.store(false, std::memory_order_relaxed); - s_state.keep_gpu_device_on_shutdown = static_cast(g_gpu_device); s_state.region = g_settings.region; + std::atomic_thread_fence(std::memory_order_release); Host::OnSystemStarting(); // Load CD image up and detect region. @@ -1920,12 +1805,9 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) UpdateMultitaps(); InternalReset(); - // Texture replacement preloading. - // TODO: Move this and everything else below OnSystemStarted(). - GPUTextureCache::SetGameID(s_state.running_game_serial); - // Good to go. s_state.state = State::Running; + std::atomic_thread_fence(std::memory_order_release); SPU::GetOutputStream()->SetPaused(false); // try to load the state, if it fails, bail out @@ -1950,7 +1832,6 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) #endif Host::OnSystemStarted(); - Host::OnIdleStateChanged(); if (parameters.load_image_to_ram || g_settings.cdrom_load_image_to_ram) CDROM::PrecacheMedia(); @@ -1962,7 +1843,6 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) PauseSystem(true); UpdateSpeedLimiterState(); - ImGuiManager::UpdateDebugWindowConfig(); PerformanceCounters::Reset(); ResetThrottler(); return true; @@ -2002,8 +1882,17 @@ bool System::Initialize(std::unique_ptr disc, DiscRegion disc_region, b !CDROM::InsertMedia(std::move(disc), disc_region, s_state.running_game_serial, s_state.running_game_title, error)) return false; - if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, false, fullscreen, error)) + // TODO: Drop pointer + g_gpu = std::make_unique(); + g_gpu->Initialize(); + + // This can fail due to the application being closed during startup. + if (!GPUThread::CreateGPUBackend(s_state.running_game_serial, + force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, false, + fullscreen, false, error)) + { return false; + } if (g_settings.gpu_pgxp_enable) CPU::PGXP::Initialize(); @@ -2022,7 +1911,6 @@ bool System::Initialize(std::unique_ptr disc, DiscRegion disc_region, b MDEC::Initialize(); SIO::Initialize(); PCDrv::Initialize(); - PostProcessing::Initialize(); s_state.cpu_thread_handle = Threading::ThreadHandle::GetForCallingThread(); @@ -2046,8 +1934,6 @@ void System::DestroySystem() if (s_state.media_capture) StopMediaCapture(); - ImGuiManager::DestroyAllDebugWindows(); - s_state.gpu_dump_player.reset(); s_state.undo_load_state.reset(); @@ -2056,21 +1942,20 @@ void System::DestroySystem() GDBServer::Shutdown(); #endif - // TODO-GPU-THREAD: Needs to be called on GPU thread. - Host::ClearOSDMessages(true); + GPUThread::RunOnThread([]() { + GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::SystemPaused, false); + Host::ClearOSDMessages(true); + }); PostProcessing::Shutdown(); - SaveStateSelectorUI::Clear(); - FullscreenUI::OnSystemDestroyed(); - InputManager::PauseVibration(); InputManager::UpdateHostMouseMode(); if (g_settings.inhibit_screensaver) PlatformMisc::ResumeScreensaver(); - ClearMemorySaveStates(true); + FreeMemoryStateStorage(); Cheats::UnloadAll(); PCDrv::Shutdown(); @@ -2088,19 +1973,8 @@ void System::DestroySystem() CPU::Shutdown(); Bus::Shutdown(); TimingEvents::Shutdown(); - GPUTextureCache::Shutdown(); ClearRunningGame(); - - // Restore present-all-frames behavior. - if (s_state.keep_gpu_device_on_shutdown && g_gpu_device) - { - g_gpu_device->SetGPUTimingEnabled(false); - UpdateDisplayVSync(); - } - else - { - Host::ReleaseGPUDevice(); - } + GPUThread::DestroyGPUBackend(); s_state.taints = 0; s_state.bios_hash = {}; @@ -2109,9 +1983,12 @@ void System::DestroySystem() s_state.boot_mode = BootMode::None; s_state.state = State::Shutdown; + std::atomic_thread_fence(std::memory_order_release); + + // NOTE: Must come after DestroyGPUBackend(), otherwise landing page will display. + FullscreenUI::OnSystemDestroyed(); Host::OnSystemDestroyed(); - Host::OnIdleStateChanged(); } void System::ClearRunningGame() @@ -2141,8 +2018,6 @@ void System::Execute() { s_state.system_executing = true; - // TODO: Purge reset/restore - g_gpu->RestoreDeviceContext(); TimingEvents::CommitLeftoverTicks(); if (s_state.gpu_dump_player) [[unlikely]] @@ -2171,9 +2046,6 @@ void System::Execute() void System::FrameDone() { - // Vertex buffer is shared, need to flush what we have. - g_gpu->FlushRender(); - // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. // TODO: when running ahead, we can skip this (and the flush above) if (!IsReplayingGPUDump()) [[likely]] @@ -2195,8 +2067,6 @@ void System::FrameDone() s_state.socket_multiplexer->PollEventsWithTimeout(0); #endif - Host::FrameDone(); - if (s_state.frame_step_request) { s_state.frame_step_request = false; @@ -2208,7 +2078,7 @@ void System::FrameDone() { if (s_state.rewind_save_counter == 0) { - SaveRewindState(); + SaveMemoryState(AllocateMemoryState()); s_state.rewind_save_counter = s_state.rewind_save_frequency; } else @@ -2226,7 +2096,6 @@ void System::FrameDone() // counter-acts that. Host::PumpMessagesOnCPUThread(); InputManager::PollSources(); - g_gpu->RestoreDeviceContext(); CheckForAndExitExecution(); } @@ -2236,30 +2105,10 @@ void System::FrameDone() return; } - SaveRunaheadState(); - } + // Late submission of frame. This is needed because the input poll can determine whether we need to rewind. + g_gpu->QueuePresentCurrentFrame(); - // Kick off media capture early, might take a while. - if (s_state.media_capture && s_state.media_capture->IsCapturingVideo()) [[unlikely]] - { - if (s_state.media_capture->GetVideoFPS() != s_state.video_frame_rate) [[unlikely]] - { - const std::string next_capture_path = s_state.media_capture->GetNextCapturePath(); - INFO_LOG("Video frame rate changed, switching to new capture file {}", Path::GetFileName(next_capture_path)); - - const bool was_capturing_audio = s_state.media_capture->IsCapturingAudio(); - StopMediaCapture(); - if (StartMediaCapture(std::move(next_capture_path), true, was_capturing_audio) && - !g_gpu->SendDisplayToMediaCapture(s_state.media_capture.get())) [[unlikely]] - { - StopMediaCapture(); - } - } - else - { - if (!g_gpu->SendDisplayToMediaCapture(s_state.media_capture.get())) [[unlikely]] - StopMediaCapture(); - } + SaveMemoryState(AllocateMemoryState()); } Timer::Value current_time = Timer::GetCurrentValue(); @@ -2270,55 +2119,6 @@ void System::FrameDone() if (s_state.pre_frame_sleep) AccumulatePreFrameSleepTime(current_time); - // explicit present (frame pacing) - const bool is_unique_frame = (s_state.last_presented_internal_frame_number != s_state.internal_frame_number); - s_state.last_presented_internal_frame_number = s_state.internal_frame_number; - - const bool skip_this_frame = - (((s_state.skip_presenting_duplicate_frames && !is_unique_frame && - s_state.skipped_frame_count < MAX_SKIPPED_DUPLICATE_FRAME_COUNT) || - (!s_state.optimal_frame_pacing && current_time > s_state.next_frame_time && - s_state.skipped_frame_count < MAX_SKIPPED_TIMEOUT_FRAME_COUNT) || - (g_gpu_device->HasMainSwapChain() && g_gpu_device->GetMainSwapChain()->ShouldSkipPresentingFrame())) && - !s_state.syncing_to_host_with_vsync && !IsExecutionInterrupted()); - if (!skip_this_frame) - { - s_state.skipped_frame_count = 0; - - const bool scheduled_present = - (s_state.optimal_frame_pacing && s_state.throttler_enabled && !IsExecutionInterrupted()); - const GPUDevice::Features features = g_gpu_device->GetFeatures(); - if (scheduled_present && features.timed_present) - { - PresentDisplay(false, s_state.next_frame_time); - Throttle(current_time); - } - else if (scheduled_present && features.explicit_present) - { - const bool do_present = PresentDisplay(true, 0); - Throttle(current_time); - if (do_present) - g_gpu_device->SubmitPresent(g_gpu_device->GetMainSwapChain()); - } - else - { - if (scheduled_present) - Throttle(current_time); - - PresentDisplay(false, 0); - - if (!scheduled_present && s_state.throttler_enabled && !IsExecutionInterrupted()) - Throttle(current_time); - } - } - else - { - DEBUG_LOG("Skipping displaying frame"); - s_state.skipped_frame_count++; - if (s_state.throttler_enabled) - Throttle(current_time); - } - // pre-frame sleep (input lag reduction) current_time = Timer::GetCurrentValue(); if (s_state.pre_frame_sleep) @@ -2327,10 +2127,15 @@ void System::FrameDone() if (pre_frame_sleep_until > current_time && Timer::ConvertValueToMilliseconds(pre_frame_sleep_until - current_time) >= 1) { - Timer::SleepUntil(pre_frame_sleep_until, true); + Throttle(current_time, pre_frame_sleep_until); current_time = Timer::GetCurrentValue(); } } + else + { + if (s_state.throttler_enabled) + Throttle(current_time, s_state.next_frame_time); + } s_state.frame_start_time = current_time; @@ -2341,13 +2146,65 @@ void System::FrameDone() InputManager::PollSources(); CheckForAndExitExecution(); } +} - g_gpu->RestoreDeviceContext(); +bool System::GetFramePresentationParameters(GPUBackendFramePresentationParameters* frame) +{ + const Timer::Value current_time = Timer::GetCurrentValue(); - // Update perf counters *after* throttling, we want to measure from start-of-frame - // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different - // amounts of computation happening in each frame). - PerformanceCounters::Update(s_state.frame_number, s_state.internal_frame_number); + frame->frame_number = s_state.frame_number; + frame->internal_frame_number = s_state.internal_frame_number; + + // explicit present (frame pacing) + const bool is_unique_frame = (s_state.last_presented_internal_frame_number != s_state.internal_frame_number); + s_state.last_presented_internal_frame_number = s_state.internal_frame_number; + + const bool is_duplicate_frame = (s_state.skip_presenting_duplicate_frames && !is_unique_frame && + s_state.skipped_frame_count < MAX_SKIPPED_DUPLICATE_FRAME_COUNT); + const bool skip_this_frame = + ((is_duplicate_frame || (!s_state.optimal_frame_pacing && current_time > s_state.next_frame_time && + s_state.skipped_frame_count < MAX_SKIPPED_TIMEOUT_FRAME_COUNT)) && + !s_state.syncing_to_host_with_vsync && !IsExecutionInterrupted()); + const bool should_allow_present_skip = !s_state.syncing_to_host_with_vsync && !s_state.optimal_frame_pacing; + frame->update_performance_counters = !is_duplicate_frame; + frame->present_frame = !skip_this_frame; + frame->allow_present_skip = should_allow_present_skip; + frame->present_time = (s_state.optimal_frame_pacing && s_state.throttler_enabled && !IsExecutionInterrupted()) ? + s_state.next_frame_time : + 0; + + // Video capture setup. + frame->media_capture = nullptr; + if (MediaCapture* cap = s_state.media_capture.get(); cap && cap->IsCapturingVideo()) + { + frame->media_capture = cap; + + if (cap->GetVideoFPS() != s_state.video_frame_rate) [[unlikely]] + { + const std::string next_capture_path = cap->GetNextCapturePath(); + const u32 video_width = cap->GetVideoWidth(); + const u32 video_height = cap->GetVideoHeight(); + INFO_LOG("Video frame rate changed, switching to new capture file {}", Path::GetFileName(next_capture_path)); + + const bool was_capturing_audio = cap->IsCapturingAudio(); + StopMediaCapture(); + StartMediaCapture(std::move(next_capture_path), true, was_capturing_audio, video_width, video_height); + frame->media_capture = s_state.media_capture.get(); + } + } + + if (!skip_this_frame) + { + s_state.skipped_frame_count = 0; + } + else + { + DEBUG_LOG("Skipping displaying frame"); + s_state.skipped_frame_count++; + } + + // Still need to submit frame if we're capturing, even if it's a dupe. + return (!is_duplicate_frame || frame->media_capture); } float System::GetVideoFrameRate() @@ -2387,12 +2244,12 @@ void System::ResetThrottler() s_state.pre_frame_sleep_time = 0; } -void System::Throttle(Timer::Value current_time) +void System::Throttle(Timer::Value current_time, Timer::Value sleep_until) { // If we're running too slow, advance the next frame time based on the time we lost. Effectively skips // running those frames at the intended time, because otherwise if we pause in the debugger, we'll run // hundreds of frames when we resume. - if (current_time > s_state.next_frame_time) + if (current_time > sleep_until) { const Timer::Value diff = static_cast(current_time) - static_cast(s_state.next_frame_time); s_state.next_frame_time += (diff / s_state.frame_period) * s_state.frame_period + s_state.frame_period; @@ -2407,11 +2264,10 @@ void System::Throttle(Timer::Value current_time) Timer::Value poll_start_time = current_time; for (;;) { - const u32 sleep_ms = - static_cast(Timer::ConvertValueToMilliseconds(s_state.next_frame_time - poll_start_time)); + const u32 sleep_ms = static_cast(Timer::ConvertValueToMilliseconds(sleep_until - poll_start_time)); s_state.socket_multiplexer->PollEventsWithTimeout(sleep_ms); poll_start_time = Timer::GetCurrentValue(); - if (poll_start_time >= s_state.next_frame_time || (!g_settings.display_optimal_frame_pacing && sleep_ms == 0)) + if (poll_start_time >= sleep_until || (!g_settings.display_optimal_frame_pacing && sleep_ms == 0)) break; } } @@ -2420,14 +2276,14 @@ void System::Throttle(Timer::Value current_time) // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. // Linux also seems to do a much better job of waking up at the requested time. #if !defined(__linux__) - Timer::SleepUntil(s_state.next_frame_time, g_settings.display_optimal_frame_pacing); + Timer::SleepUntil(sleep_until, g_settings.display_optimal_frame_pacing); #else - Timer::SleepUntil(s_state.next_frame_time, false); + Timer::SleepUntil(sleep_until, false); #endif } #else // No spinwait on Android, see above. - Timer::SleepUntil(s_state.next_frame_time, false); + Timer::SleepUntil(sleep_until, false); #endif #if 0 @@ -2471,65 +2327,6 @@ void System::IncrementInternalFrameNumber() s_state.internal_frame_number++; } -bool System::CreateGPU(GPURenderer renderer, bool is_switching, bool fullscreen, Error* error) -{ - const RenderAPI api = Settings::GetRenderAPIForRenderer(renderer); - - if (!g_gpu_device || - (renderer != GPURenderer::Software && !GPUDevice::IsSameRenderAPI(g_gpu_device->GetRenderAPI(), api))) - { - if (g_gpu_device) - { - WARNING_LOG("Recreating GPU device, expecting {} got {}", GPUDevice::RenderAPIToString(api), - GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); - PostProcessing::Shutdown(); - } - - Host::ReleaseGPUDevice(); - if (!Host::CreateGPUDevice(api, fullscreen, error)) - { - Host::ReleaseRenderWindow(); - return false; - } - - if (is_switching) - PostProcessing::Initialize(); - } - - if (renderer == GPURenderer::Software) - g_gpu = GPU::CreateSoftwareRenderer(); - else - g_gpu = GPU::CreateHardwareRenderer(); - - if (!g_gpu->Initialize(error)) - { - ERROR_LOG("Failed to initialize {} renderer, falling back to software renderer", - Settings::GetRendererName(renderer)); - Host::AddOSDMessage( - fmt::format(TRANSLATE_FS("System", "Failed to initialize {} renderer, falling back to software renderer."), - Settings::GetRendererName(renderer)), - Host::OSD_CRITICAL_ERROR_DURATION); - g_gpu.reset(); - g_gpu = GPU::CreateSoftwareRenderer(); - if (!g_gpu->Initialize(error)) - { - ERROR_LOG("Failed to create fallback software renderer."); - if (!s_state.keep_gpu_device_on_shutdown) - { - PostProcessing::Shutdown(); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - } - return false; - } - } - - if (g_settings.display_show_gpu_usage) - g_gpu_device->SetGPUTimingEnabled(true); - - return true; -} - bool System::DoState(StateWrapper& sw, bool update_display) { if (!sw.DoMarker("System")) @@ -2588,7 +2385,6 @@ bool System::DoState(StateWrapper& sw, bool update_display) if (!sw.DoMarker("InterruptController") || !InterruptController::DoState(sw)) return false; - g_gpu->RestoreDeviceContext(); if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, update_display)) return false; @@ -2695,26 +2491,124 @@ System::MemorySaveState& System::PopMemoryState() return s_state.memory_save_states[s_state.memory_save_state_front]; } -void System::ClearMemorySaveStates(bool deallocate_resources) +bool System::AllocateMemoryStates(size_t state_count) { - if (deallocate_resources) + DEV_LOG("Allocating {} memory save state slots", state_count); + + if (state_count != s_state.memory_save_states.size()) { - for (MemorySaveState& mss : s_state.memory_save_states) + FreeMemoryStateStorage(); + s_state.memory_save_states.resize(state_count); + } + + // Allocate CPU buffers. + // TODO: Maybe look at host memory limits here... + const size_t size = GetMaxSaveStateSize(); + for (MemorySaveState& mss : s_state.memory_save_states) + { + mss.state_size = 0; + if (mss.state_data.size() != size) + mss.state_data.resize(size); + } + + // Allocate GPU buffers. + Error error; + if (!GPUBackend::AllocateMemorySaveStates(s_state.memory_save_states, &error)) + { + ERROR_LOG("Failed to allocate {} memory save states: {}", s_state.memory_save_states.size(), + error.GetDescription()); + ERROR_LOG("Disabling runahead/rewind."); + FreeMemoryStateStorage(); + s_state.runahead_frames = 0; + s_state.memory_save_state_front = 0; + s_state.memory_save_state_count = 0; + s_state.rewind_load_frequency = -1; + s_state.rewind_load_counter = -1; + s_state.rewind_save_frequency = -1; + s_state.rewind_save_counter = -1; + return false; + } + + return true; +} + +void System::ClearMemorySaveStates(bool reallocate_resources) +{ + s_state.memory_save_state_front = 0; + s_state.memory_save_state_count = 0; + + if (reallocate_resources && !s_state.memory_save_states.empty()) + AllocateMemoryStates(s_state.memory_save_states.size()); +} + +void System::FreeMemoryStateTextures() +{ + // TODO: use non-copyable function, that way we don't need to store raw pointers + std::vector textures; + bool gpu_thread_synced = false; + + for (MemorySaveState& mss : s_state.memory_save_states) + { + if ((mss.vram_texture || !mss.gpu_state_data.empty()) && !gpu_thread_synced) { - g_gpu_device->RecycleTexture(std::move(mss.vram_texture)); - mss.state_data.deallocate(); - mss.state_size = 0; + gpu_thread_synced = true; + GPUThread::SyncGPUThread(true); + } + + if (mss.vram_texture) + { + if (textures.empty()) + textures.reserve(s_state.memory_save_states.size()); + + textures.push_back(mss.vram_texture.release()); } } - s_state.memory_save_state_front = 0; - s_state.memory_save_state_count = 0; + if (!textures.empty()) + { + GPUThread::RunOnThread([textures = std::move(textures)]() mutable { + for (GPUTexture* texture : textures) + g_gpu_device->RecycleTexture(std::unique_ptr(texture)); + }); + } } void System::FreeMemoryStateStorage() { + // TODO: use non-copyable function, that way we don't need to store raw pointers + std::vector textures; + bool gpu_thread_synced = false; + for (MemorySaveState& mss : s_state.memory_save_states) - g_gpu_device->RecycleTexture(std::move(mss.vram_texture)); + { + if ((mss.vram_texture || !mss.gpu_state_data.empty()) && !gpu_thread_synced) + { + gpu_thread_synced = true; + GPUThread::SyncGPUThread(true); + } + + if (mss.vram_texture) + { + if (textures.empty()) + textures.reserve(s_state.memory_save_states.size()); + + textures.push_back(mss.vram_texture.release()); + } + + mss.gpu_state_data.deallocate(); + mss.gpu_state_size = 0; + mss.state_data.deallocate(); + mss.state_size = 0; + } + + if (!textures.empty()) + { + GPUThread::RunOnThread([textures = std::move(textures)]() mutable { + for (GPUTexture* texture : textures) + g_gpu_device->RecycleTexture(std::unique_ptr(texture)); + }); + } + s_state.memory_save_states = std::vector(); s_state.memory_save_state_front = 0; s_state.memory_save_state_count = 0; @@ -2722,28 +2616,43 @@ void System::FreeMemoryStateStorage() void System::LoadMemoryState(MemorySaveState& mss, bool update_display) { - StateWrapper sw(mss.state_data.cspan(0, mss.state_size), StateWrapper::Mode::Read, SAVE_STATE_VERSION); - [[maybe_unused]] const bool res = DoMemoryState(sw, mss, update_display); - DebugAssert(res); +#ifdef PROFILE_MEMORY_SAVE_STATES + Timer load_timer; +#endif + StateWrapper sw(mss.state_data.cspan(0, mss.state_size), StateWrapper::Mode::Read, SAVE_STATE_VERSION); + DoMemoryState(sw, mss, update_display); + DebugAssert(!sw.HasError()); + +#ifdef PROFILE_MEMORY_SAVE_STATES + DEV_LOG("Loaded frame {} from memory state slot {} took {:.4f} ms", s_state.frame_number, + &mss - s_state.memory_save_states.data(), load_timer.GetTimeMilliseconds()); +#else DEBUG_LOG("Loaded frame {} from memory state slot {}", s_state.frame_number, &mss - s_state.memory_save_states.data()); +#endif } -bool System::SaveMemoryState(MemorySaveState& mss) +void System::SaveMemoryState(MemorySaveState& mss) { - DEBUG_LOG("Saving frame {} to memory state slot {}", s_state.frame_number, &mss - s_state.memory_save_states.data()); - - if (mss.state_data.empty()) - mss.state_data.resize(GetMaxSaveStateSize()); +#ifdef PROFILE_MEMORY_SAVE_STATES + Timer save_timer; +#endif StateWrapper sw(mss.state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); - const bool res = DoMemoryState(sw, mss, false); + DoMemoryState(sw, mss, false); + DebugAssert(!sw.HasError()); mss.state_size = sw.GetPosition(); - return res; + +#ifdef PROFILE_MEMORY_SAVE_STATES + DEV_LOG("Saving frame {} to memory state slot {} took {} bytes and {:.4f} ms", s_state.frame_number, + &mss - s_state.memory_save_states.data(), mss.state_size, save_timer.GetTimeMilliseconds()); +#else + DEBUG_LOG("Saving frame {} to memory state slot {}", s_state.frame_number, &mss - s_state.memory_save_states.data()); +#endif } -bool System::DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_display) +void System::DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_display) { #if defined(_DEBUG) || defined(_DEVEL) #define SAVE_COMPONENT(name, expr) \ @@ -2772,10 +2681,7 @@ bool System::DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_d SAVE_COMPONENT("DMA", DMA::DoState(sw)); SAVE_COMPONENT("InterruptController", InterruptController::DoState(sw)); - // GPU can fail due to running out of VRAM. - g_gpu->RestoreDeviceContext(); - if (!g_gpu->DoMemoryState(sw, mss, update_display)) [[unlikely]] - return false; + g_gpu->DoMemoryState(sw, mss, update_display); SAVE_COMPONENT("CDROM", CDROM::DoState(sw)); SAVE_COMPONENT("Pad", Pad::DoState(sw, true)); @@ -2787,8 +2693,6 @@ bool System::DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_d SAVE_COMPONENT("Achievements", Achievements::DoState(sw)); #undef SAVE_COMPONENT - - return true; } bool System::LoadBIOS(Error* error) @@ -3029,7 +2933,7 @@ bool System::LoadStateFromBuffer(const SaveStateBuffer& buffer, Error* error, bo ResetThrottler(); if (update_display) - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); return true; } @@ -3304,19 +3208,7 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen // save screenshot if (screenshot_size > 0) { - // assume this size is the width - GSVector4i screenshot_display_rect, screenshot_draw_rect; - g_gpu->CalculateDrawRect(screenshot_size, screenshot_size, true, true, &screenshot_display_rect, - &screenshot_draw_rect); - - const u32 screenshot_width = static_cast(screenshot_display_rect.width()); - const u32 screenshot_height = static_cast(screenshot_display_rect.height()); - screenshot_draw_rect = screenshot_draw_rect.sub32(screenshot_display_rect.xyxy()); - screenshot_display_rect = screenshot_display_rect.sub32(screenshot_display_rect.xyxy()); - VERBOSE_LOG("Saving {}x{} screenshot for state", screenshot_width, screenshot_height); - - if (g_gpu->RenderScreenshotToBuffer(screenshot_width, screenshot_height, screenshot_display_rect, - screenshot_draw_rect, false, &buffer->screenshot)) + if (GPUBackend::RenderScreenshotToBuffer(screenshot_size, screenshot_size, false, &buffer->screenshot)) { if (g_gpu_device->UsesLowerLeftOrigin()) buffer->screenshot.FlipY(); @@ -3340,8 +3232,8 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen } else { - WARNING_LOG("Failed to save {}x{} screenshot for save state due to render/conversion failure", screenshot_width, - screenshot_height); + WARNING_LOG("Failed to save {}x{} screenshot for save state due to render/conversion failure", screenshot_size, + screenshot_size); } } @@ -3349,7 +3241,6 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen if (buffer->state_data.empty()) buffer->state_data.resize(GetMaxSaveStateSize()); - g_gpu->RestoreDeviceContext(); StateWrapper sw(buffer->state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); if (!DoState(sw, false)) { @@ -3563,8 +3454,8 @@ void System::FormatLatencyStats(SmallStringBase& str) Timer::ConvertValueToMilliseconds(s_state.frame_period - s_state.pre_frame_sleep_time) - Timer::ConvertValueToMilliseconds(static_cast(s_state.runahead_frames) * s_state.frame_period)); - str.format("AF: {:.0f}ms | PF: {:.0f}ms | IL: {:.0f}ms | AL: {}ms", active_frame_time, pre_frame_time, input_latency, - audio_latency); + str.format("AL: {}ms | AF: {:.0f}ms | PF: {:.0f}ms | IL: {:.0f}ms | QF: {}", audio_latency, active_frame_time, + pre_frame_time, input_latency, GPUBackend::GetQueuedFrameCount()); } void System::UpdateSpeedLimiterState() @@ -3585,10 +3476,9 @@ void System::UpdateSpeedLimiterState() s_state.syncing_to_host = false; s_state.syncing_to_host_with_vsync = false; - if (g_settings.sync_to_host_refresh_rate && g_gpu_device->HasMainSwapChain()) + if (g_settings.sync_to_host_refresh_rate) { - const float host_refresh_rate = g_gpu_device->GetMainSwapChain()->GetWindowInfo().surface_refresh_rate; - if (host_refresh_rate > 0.0f) + if (const float host_refresh_rate = GPUThread::GetRenderWindowInfo().surface_refresh_rate; host_refresh_rate > 0.0f) { const float ratio = host_refresh_rate / s_state.video_frame_rate; s_state.can_sync_to_host = (ratio >= 0.95f && ratio <= 1.05f); @@ -3640,32 +3530,15 @@ void System::UpdateSpeedLimiterState() void System::UpdateDisplayVSync() { - static constexpr std::array(GPUVSyncMode::Count)> vsync_modes = {{ - "Disabled", - "FIFO", - "Mailbox", - }}; - // Avoid flipping vsync on and off by manually throttling when vsync is on. const GPUVSyncMode vsync_mode = GetEffectiveVSyncMode(); const bool allow_present_throttle = ShouldAllowPresentThrottle(); - if (!g_gpu_device->HasMainSwapChain() || - (g_gpu_device->GetMainSwapChain()->GetVSyncMode() == vsync_mode && - g_gpu_device->GetMainSwapChain()->IsPresentThrottleAllowed() == allow_present_throttle)) - { - return; - } - VERBOSE_LOG("VSync: {}{}{}", vsync_modes[static_cast(vsync_mode)], + VERBOSE_LOG("VSync: {}{}{}", GPUDevice::VSyncModeToString(vsync_mode), s_state.syncing_to_host_with_vsync ? " (for throttling)" : "", allow_present_throttle ? " (present throttle allowed)" : ""); - Error error; - if (!g_gpu_device->GetMainSwapChain()->SetVSyncMode(vsync_mode, allow_present_throttle, &error)) - { - ERROR_LOG("Failed to update vsync mode to {}: {}", vsync_modes[static_cast(vsync_mode)], - error.GetDescription()); - } + GPUThread::SetVSync(vsync_mode, allow_present_throttle); } GPUVSyncMode System::GetEffectiveVSyncMode() @@ -4116,7 +3989,6 @@ bool System::DumpVRAM(const char* filename) if (!IsValid()) return false; - g_gpu->RestoreDeviceContext(); return g_gpu->DumpVRAMToFile(filename); } @@ -4284,7 +4156,7 @@ void System::UpdateRunningGame(const std::string& path, CDImage* image, bool boo } if (!booting) - GPUTextureCache::SetGameID(s_state.running_game_serial); + GPUThread::SetGameSerial(s_state.running_game_serial); if (!IsReplayingGPUDump()) { @@ -4304,11 +4176,6 @@ void System::UpdateRunningGame(const std::string& path, CDImage* image, bool boo if (s_state.running_game_serial != prev_serial) UpdateSessionTime(prev_serial); - if (SaveStateSelectorUI::IsOpen()) - SaveStateSelectorUI::RefreshList(s_state.running_game_serial); - else - SaveStateSelectorUI::ClearList(); - UpdateRichPresence(booting); Host::OnGameChanged(s_state.running_game_path, s_state.running_game_serial, s_state.running_game_title); @@ -4452,42 +4319,6 @@ bool System::ShouldStartPaused() void System::CheckForSettingsChanges(const Settings& old_settings) { - if (IsValid() && - (g_settings.gpu_renderer != old_settings.gpu_renderer || - g_settings.gpu_use_debug_device != old_settings.gpu_use_debug_device || - g_settings.gpu_disable_shader_cache != old_settings.gpu_disable_shader_cache || - g_settings.gpu_disable_dual_source_blend != old_settings.gpu_disable_dual_source_blend || - g_settings.gpu_disable_framebuffer_fetch != old_settings.gpu_disable_framebuffer_fetch || - g_settings.gpu_disable_texture_buffers != old_settings.gpu_disable_texture_buffers || - g_settings.gpu_disable_texture_copy_to_self != old_settings.gpu_disable_texture_copy_to_self || - g_settings.gpu_disable_memory_import != old_settings.gpu_disable_memory_import || - g_settings.gpu_disable_raster_order_views != old_settings.gpu_disable_raster_order_views || - g_settings.gpu_disable_compute_shaders != old_settings.gpu_disable_compute_shaders || - g_settings.gpu_disable_compressed_textures != old_settings.gpu_disable_compressed_textures || - g_settings.display_exclusive_fullscreen_control != old_settings.display_exclusive_fullscreen_control)) - { - // if debug device/threaded presentation change, we need to recreate the whole display - const bool recreate_device = - (g_settings.gpu_use_debug_device != old_settings.gpu_use_debug_device || - g_settings.gpu_disable_shader_cache != old_settings.gpu_disable_shader_cache || - g_settings.gpu_disable_dual_source_blend != old_settings.gpu_disable_dual_source_blend || - g_settings.gpu_disable_framebuffer_fetch != old_settings.gpu_disable_framebuffer_fetch || - g_settings.gpu_disable_texture_buffers != old_settings.gpu_disable_texture_buffers || - g_settings.gpu_disable_texture_copy_to_self != old_settings.gpu_disable_texture_copy_to_self || - g_settings.gpu_disable_memory_import != old_settings.gpu_disable_memory_import || - g_settings.gpu_disable_raster_order_views != old_settings.gpu_disable_raster_order_views || - g_settings.gpu_disable_compute_shaders != old_settings.gpu_disable_compute_shaders || - g_settings.gpu_disable_compressed_textures != old_settings.gpu_disable_compressed_textures || - g_settings.display_exclusive_fullscreen_control != old_settings.display_exclusive_fullscreen_control); - - Host::AddIconOSDMessage("RendererSwitch", ICON_FA_PAINT_ROLLER, - fmt::format(TRANSLATE_FS("OSDMessage", "Switching to {}{} GPU renderer."), - Settings::GetRendererName(g_settings.gpu_renderer), - g_settings.gpu_use_debug_device ? " (debug)" : ""), - Host::OSD_INFO_DURATION); - RecreateGPU(g_settings.gpu_renderer, recreate_device); - } - if (IsValid()) { ClearMemorySaveStates(false); @@ -4566,52 +4397,98 @@ void System::CheckForSettingsChanges(const Settings& old_settings) SPU::GetOutputStream()->SetOutputVolume(GetAudioOutputVolume()); - if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || - g_settings.gpu_multisamples != old_settings.gpu_multisamples || - g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading || - g_settings.gpu_use_thread != old_settings.gpu_use_thread || - g_settings.gpu_use_software_renderer_for_readbacks != old_settings.gpu_use_software_renderer_for_readbacks || - g_settings.gpu_fifo_size != old_settings.gpu_fifo_size || - g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead || - g_settings.gpu_true_color != old_settings.gpu_true_color || - g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering || - g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords || - g_settings.gpu_accurate_blending != old_settings.gpu_accurate_blending || - g_settings.gpu_texture_filter != old_settings.gpu_texture_filter || - g_settings.gpu_sprite_texture_filter != old_settings.gpu_sprite_texture_filter || - g_settings.gpu_line_detect_mode != old_settings.gpu_line_detect_mode || - g_settings.gpu_force_video_timing != old_settings.gpu_force_video_timing || - g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || - g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale || - g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode || - g_settings.gpu_texture_cache != old_settings.gpu_texture_cache || - g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || - g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing || - g_settings.display_crop_mode != old_settings.display_crop_mode || - g_settings.display_aspect_ratio != old_settings.display_aspect_ratio || - g_settings.display_scaling != old_settings.display_scaling || - g_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage || - g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || - g_settings.gpu_pgxp_texture_correction != old_settings.gpu_pgxp_texture_correction || - g_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction || - g_settings.gpu_pgxp_depth_buffer != old_settings.gpu_pgxp_depth_buffer || - g_settings.display_active_start_offset != old_settings.display_active_start_offset || - g_settings.display_active_end_offset != old_settings.display_active_end_offset || - g_settings.display_line_start_offset != old_settings.display_line_start_offset || - g_settings.display_line_end_offset != old_settings.display_line_end_offset || - g_settings.rewind_enable != old_settings.rewind_enable || - g_settings.runahead_frames != old_settings.runahead_frames || - g_settings.texture_replacements.enable_texture_replacements != - old_settings.texture_replacements.enable_texture_replacements || - g_settings.texture_replacements.enable_vram_write_replacements != - old_settings.texture_replacements.enable_vram_write_replacements || - g_settings.texture_replacements.dump_textures != old_settings.texture_replacements.dump_textures || - g_settings.texture_replacements.config != old_settings.texture_replacements.config) + if (g_settings.gpu_renderer != old_settings.gpu_renderer) { + // RecreateGPU() also pushes new settings to the thread. + Host::AddIconOSDMessage("RendererSwitch", ICON_FA_PAINT_ROLLER, + fmt::format(TRANSLATE_FS("OSDMessage", "Switching to {}{} GPU renderer."), + Settings::GetRendererName(g_settings.gpu_renderer), + g_settings.gpu_use_debug_device ? " (debug)" : ""), + Host::OSD_INFO_DURATION); + RecreateGPU(g_settings.gpu_renderer); + } + else if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || + g_settings.gpu_multisamples != old_settings.gpu_multisamples || + g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading || + g_settings.gpu_max_queued_frames != old_settings.gpu_max_queued_frames || + g_settings.gpu_use_software_renderer_for_readbacks != + old_settings.gpu_use_software_renderer_for_readbacks || + g_settings.gpu_fifo_size != old_settings.gpu_fifo_size || + g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead || + g_settings.gpu_true_color != old_settings.gpu_true_color || + g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering || + g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords || + g_settings.gpu_accurate_blending != old_settings.gpu_accurate_blending || + g_settings.gpu_texture_filter != old_settings.gpu_texture_filter || + g_settings.gpu_sprite_texture_filter != old_settings.gpu_sprite_texture_filter || + g_settings.gpu_line_detect_mode != old_settings.gpu_line_detect_mode || + g_settings.gpu_force_video_timing != old_settings.gpu_force_video_timing || + g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || + g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale || + g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode || + g_settings.gpu_texture_cache != old_settings.gpu_texture_cache || + g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || + g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing || + g_settings.display_crop_mode != old_settings.display_crop_mode || + g_settings.display_aspect_ratio != old_settings.display_aspect_ratio || + g_settings.display_scaling != old_settings.display_scaling || + g_settings.display_alignment != old_settings.display_alignment || + g_settings.display_rotation != old_settings.display_rotation || + g_settings.display_stretch_vertically != old_settings.display_stretch_vertically || + g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || + g_settings.display_osd_scale != old_settings.display_osd_scale || + g_settings.display_osd_margin != old_settings.display_osd_margin || + g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || + g_settings.gpu_pgxp_texture_correction != old_settings.gpu_pgxp_texture_correction || + g_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction || + g_settings.gpu_pgxp_depth_buffer != old_settings.gpu_pgxp_depth_buffer || + g_settings.display_active_start_offset != old_settings.display_active_start_offset || + g_settings.display_active_end_offset != old_settings.display_active_end_offset || + g_settings.display_line_start_offset != old_settings.display_line_start_offset || + g_settings.display_line_end_offset != old_settings.display_line_end_offset || + g_settings.debugging.show_vram != old_settings.debugging.show_vram || + g_settings.rewind_enable != old_settings.rewind_enable || + g_settings.runahead_frames != old_settings.runahead_frames || + g_settings.texture_replacements.enable_texture_replacements != + old_settings.texture_replacements.enable_texture_replacements || + g_settings.texture_replacements.enable_vram_write_replacements != + old_settings.texture_replacements.enable_vram_write_replacements || + g_settings.texture_replacements.dump_textures != old_settings.texture_replacements.dump_textures || + g_settings.texture_replacements.config != old_settings.texture_replacements.config) + { + GPUThread::UpdateSettings(true, false); + + // NOTE: Must come after the GPU thread settings update, otherwise it allocs the wrong size textures. ClearMemorySaveStates(true); - g_gpu->UpdateSettings(old_settings); + if (IsPaused()) - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); + } + else if (const bool device_settings_changed = g_settings.AreGPUDeviceSettingsChanged(old_settings); + device_settings_changed || g_settings.display_show_fps != old_settings.display_show_fps || + g_settings.display_show_speed != old_settings.display_show_speed || + g_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats || + g_settings.display_show_resolution != old_settings.display_show_resolution || + g_settings.display_show_latency_stats != old_settings.display_show_latency_stats || + g_settings.display_show_cpu_usage != old_settings.display_show_cpu_usage || + g_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage || + g_settings.display_show_latency_stats != old_settings.display_show_latency_stats || + g_settings.display_show_frame_times != old_settings.display_show_frame_times || + g_settings.display_show_status_indicators != old_settings.display_show_status_indicators || + g_settings.display_show_inputs != old_settings.display_show_inputs || + g_settings.display_show_enhancements != old_settings.display_show_enhancements || + g_settings.display_auto_resize_window != old_settings.display_auto_resize_window || + g_settings.display_screenshot_mode != old_settings.display_screenshot_mode || + g_settings.display_screenshot_format != old_settings.display_screenshot_format || + g_settings.display_screenshot_quality != old_settings.display_screenshot_quality) + { + // don't need to represent when paused + GPUThread::UpdateSettings(true, device_settings_changed); + } + else + { + // still need to update debug windows + GPUThread::UpdateSettings(false, false); } if (g_settings.gpu_widescreen_hack != old_settings.gpu_widescreen_hack || @@ -4639,9 +4516,6 @@ void System::CheckForSettingsChanges(const Settings& old_settings) InterruptExecution(); } - if (g_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) - g_gpu->ResetStatistics(); - if (g_settings.cdrom_readahead_sectors != old_settings.cdrom_readahead_sectors) CDROM::SetReadaheadSectors(g_settings.cdrom_readahead_sectors); @@ -4700,9 +4574,6 @@ void System::CheckForSettingsChanges(const Settings& old_settings) PIO::UpdateSettings(old_settings); } - if (g_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage) - g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); - if (g_settings.inhibit_screensaver != old_settings.inhibit_screensaver) { if (g_settings.inhibit_screensaver) @@ -4711,11 +4582,6 @@ void System::CheckForSettingsChanges(const Settings& old_settings) PlatformMisc::ResumeScreensaver(); } - PostProcessing::UpdateSettings(); - - if (ImGuiManager::UpdateDebugWindowConfig()) - InvalidateDisplay(); - #ifdef ENABLE_GDB_SERVER if (g_settings.debugging.enable_gdb_server != old_settings.debugging.enable_gdb_server || g_settings.debugging.gdb_server_port != old_settings.debugging.gdb_server_port) @@ -4728,8 +4594,12 @@ void System::CheckForSettingsChanges(const Settings& old_settings) } else { - if (g_gpu_device) + if (GPUThread::IsFullscreenUIRequested()) { + // handle device setting updates as well + if (g_settings.AreGPUDeviceSettingsChanged(old_settings)) + GPUThread::UpdateSettings(false, true); + if (g_settings.display_vsync != old_settings.display_vsync || g_settings.display_disable_mailbox_presentation != old_settings.display_disable_mailbox_presentation) { @@ -4738,18 +4608,8 @@ void System::CheckForSettingsChanges(const Settings& old_settings) } } - if (g_gpu_device) - { - if (g_settings.display_osd_scale != old_settings.display_osd_scale) - ImGuiManager::SetGlobalScale(g_settings.display_osd_scale / 100.0f); - if (g_settings.display_osd_margin != old_settings.display_osd_margin) - ImGuiManager::SetScreenMargin(g_settings.display_osd_margin); - } - Achievements::UpdateSettings(old_settings); - FullscreenUI::CheckForConfigChanges(old_settings); - #ifdef ENABLE_DISCORD_PRESENCE if (g_settings.enable_discord_presence != old_settings.enable_discord_presence) { @@ -4769,6 +4629,16 @@ void System::CheckForSettingsChanges(const Settings& old_settings) Panic("Failed to reallocate memory map. The log may contain more information."); } } + + if (g_settings.gpu_use_thread != old_settings.gpu_use_thread) [[unlikely]] + { + GPUThread::Internal::SetThreadEnabled(g_settings.gpu_use_thread); + } + else if (g_settings.gpu_use_thread && g_settings.gpu_max_queued_frames != old_settings.gpu_max_queued_frames) + [[unlikely]] + { + GPUThread::SyncGPUThread(false); + } } void System::SetTaintsFromSettings() @@ -5051,53 +4921,21 @@ void System::UpdateMemorySaveStateSettings() // allocate storage for memory save states if (num_slots > 0) - { - DEV_LOG("Allocating {} memory save state slots", num_slots); - s_state.memory_save_states.resize(num_slots); - } + AllocateMemoryStates(num_slots); // reenter execution loop, don't want to try to save a state now if runahead was turned off InterruptExecution(); } -bool System::SaveRewindState() -{ -#ifdef PROFILE_MEMORY_SAVE_STATES - Timer save_timer; -#endif - - MemorySaveState& mss = AllocateMemoryState(); - if (!SaveMemoryState(mss)) - { - PopMemoryState(); - return false; - } - -#ifdef PROFILE_MEMORY_SAVE_STATES - DEV_LOG("Saved rewind state ({} bytes, took {:.4f} ms)", mss.state_size, save_timer.GetTimeMilliseconds()); -#endif - - return true; -} - bool System::LoadOneRewindState() { if (s_state.memory_save_state_count == 0) return false; -#ifdef PROFILE_MEMORY_SAVE_STATES - Timer load_timer; -#endif - - MemorySaveState& mss = PopMemoryState(); - LoadMemoryState(mss, true); + LoadMemoryState(PopMemoryState(), true); // back in time, need to reset perf counters - PerformanceCounters::Reset(); - -#ifdef PROFILE_MEMORY_SAVE_STATES - DEV_LOG("Rewind load took {:.4f} ms", load_timer.GetTimeMilliseconds()); -#endif + GPUThread::RunOnThread(&PerformanceCounters::Reset); return true; } @@ -5144,19 +4982,17 @@ void System::DoRewind() s_state.rewind_load_counter--; } - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); + Host::PumpMessagesOnCPUThread(); IdlePollUpdate(); - Throttle(Timer::GetCurrentValue()); + Throttle(Timer::GetCurrentValue(), s_state.next_frame_time); } -void System::SaveRunaheadState() +bool System::IsRunaheadActive() { - // try to reuse the frontmost slot - MemorySaveState& mss = AllocateMemoryState(); - if (!SaveMemoryState(mss)) - PopMemoryState(); + return (s_state.runahead_frames > 0); } bool System::DoRunahead() @@ -5207,7 +5043,7 @@ bool System::DoRunahead() if (s_state.runahead_replay_frames > 0) { // keep running ahead - SaveRunaheadState(); + SaveMemoryState(AllocateMemoryState()); return true; } @@ -5255,6 +5091,7 @@ void System::ShutdownSystem(bool save_resume_state) } s_state.state = State::Stopping; + std::atomic_thread_fence(std::memory_order_release); if (!s_state.system_executing) DestroySystem(); } @@ -5371,17 +5208,17 @@ std::string System::GetScreenshotPath(const char* extension) return path; } -bool System::SaveScreenshot(const char* path, DisplayScreenshotMode mode, DisplayScreenshotFormat format, u8 quality, +void System::SaveScreenshot(const char* path, DisplayScreenshotMode mode, DisplayScreenshotFormat format, u8 quality, bool compress_on_thread) { if (!IsValid()) - return false; + return; std::string auto_path; if (!path) path = (auto_path = GetScreenshotPath(Settings::GetDisplayScreenshotFormatExtension(format))).c_str(); - return g_gpu->RenderScreenshotToFile(path, mode, quality, compress_on_thread, true); + GPUBackend::RenderScreenshotToFile(path, mode, quality, compress_on_thread, true); } bool System::StartRecordingGPUDump(const char* path /*= nullptr*/, u32 num_frames /*= 0*/) @@ -5437,10 +5274,40 @@ bool System::StartMediaCapture(std::string path) { const bool capture_video = Host::GetBoolSettingValue("MediaCapture", "VideoCapture", true); const bool capture_audio = Host::GetBoolSettingValue("MediaCapture", "AudioCapture", true); - return StartMediaCapture(std::move(path), capture_video, capture_audio); + + // Auto size is more complex. + if (capture_video && Host::GetBoolSettingValue("MediaCapture", "VideoAutoSize", false)) + { + // need to query this on the GPU thread + GPUThread::RunOnBackend( + [path = std::move(path), capture_audio](GPUBackend* backend) mutable { + if (!backend) + return; + + GSVector4i unused_display_rect, unused_draw_rect; + u32 video_width, video_height; + backend->CalculateScreenshotSize(DisplayScreenshotMode::InternalResolution, &video_width, &video_height, + &unused_display_rect, &unused_draw_rect); + + // fire back to the CPU thread to actually start the capture + Host::RunOnCPUThread([path = std::move(path), capture_audio, video_width, video_height]() mutable { + StartMediaCapture(std::move(path), true, capture_audio, video_width, video_height); + }); + }, + false, true); + return true; + } + + u32 video_width = + Host::GetUIntSettingValue("MediaCapture", "VideoWidth", Settings::DEFAULT_MEDIA_CAPTURE_VIDEO_WIDTH); + u32 video_height = + Host::GetUIntSettingValue("MediaCapture", "VideoHeight", Settings::DEFAULT_MEDIA_CAPTURE_VIDEO_HEIGHT); + + return StartMediaCapture(std::move(path), capture_video, capture_audio, video_width, video_height); } -bool System::StartMediaCapture(std::string path, bool capture_video, bool capture_audio) +bool System::StartMediaCapture(std::string path, bool capture_video, bool capture_audio, u32 video_width, + u32 video_height) { if (!IsValid()) return false; @@ -5448,25 +5315,11 @@ bool System::StartMediaCapture(std::string path, bool capture_video, bool captur if (s_state.media_capture) StopMediaCapture(); - // Need to work out the size. - u32 capture_width = - Host::GetUIntSettingValue("MediaCapture", "VideoWidth", Settings::DEFAULT_MEDIA_CAPTURE_VIDEO_WIDTH); - u32 capture_height = - Host::GetUIntSettingValue("MediaCapture", "VideoHeight", Settings::DEFAULT_MEDIA_CAPTURE_VIDEO_HEIGHT); + const WindowInfo& main_window_info = GPUThread::GetRenderWindowInfo(); const GPUTexture::Format capture_format = - g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; + main_window_info.IsSurfaceless() ? GPUTexture::Format::RGBA8 : main_window_info.surface_format; if (capture_video) - { - // TODO: This will be a mess with GPU thread. - if (Host::GetBoolSettingValue("MediaCapture", "VideoAutoSize", false)) - { - GSVector4i unused_display_rect, unused_draw_rect; - g_gpu->CalculateScreenshotSize(DisplayScreenshotMode::InternalResolution, &capture_width, &capture_height, - &unused_display_rect, &unused_draw_rect); - } - - MediaCapture::AdjustVideoSize(&capture_width, &capture_height); - } + MediaCapture::AdjustVideoSize(&video_width, &video_height); // TODO: Render anamorphic capture instead? constexpr float aspect = 1.0f; @@ -5489,8 +5342,8 @@ bool System::StartMediaCapture(std::string path, bool capture_video, bool captur s_state.media_capture = MediaCapture::Create(backend, &error); if (!s_state.media_capture || !s_state.media_capture->BeginCapture( - s_state.video_frame_rate, aspect, capture_width, capture_height, capture_format, SPU::SAMPLE_RATE, - std::move(path), capture_video, Host::GetSmallStringSettingValue("MediaCapture", "VideoCodec"), + s_state.video_frame_rate, aspect, video_width, video_height, capture_format, SPU::SAMPLE_RATE, std::move(path), + capture_video, Host::GetSmallStringSettingValue("MediaCapture", "VideoCodec"), Host::GetUIntSettingValue("MediaCapture", "VideoBitrate", Settings::DEFAULT_MEDIA_CAPTURE_VIDEO_BITRATE), Host::GetBoolSettingValue("MediaCapture", "VideoCodecUseArgs", false) ? Host::GetStringSettingValue("MediaCapture", "AudioCodecArgs") : @@ -5511,7 +5364,7 @@ bool System::StartMediaCapture(std::string path, bool capture_video, bool captur return false; } - Host::AddIconOSDMessage("MediaCapture", ICON_FA_CAMERA, + Host::AddIconOSDMessage(fmt::format("MediaCapture_{}", s_state.media_capture->GetPath()), ICON_FA_CAMERA, fmt::format(TRANSLATE_FS("System", "Starting {0} to '{1}'."), GetCaptureTypeForMessage(s_state.media_capture->IsCapturingVideo(), s_state.media_capture->IsCapturingAudio()), @@ -5527,30 +5380,45 @@ void System::StopMediaCapture() if (!s_state.media_capture) return; - const bool was_capturing_audio = s_state.media_capture->IsCapturingAudio(); - const bool was_capturing_video = s_state.media_capture->IsCapturingVideo(); + if (s_state.media_capture->IsCapturingVideo()) + { + // If we're capturing video, we need to finish the capture on the GPU thread. + // This is because it owns texture objects, and OpenGL is not thread-safe. + GPUThread::RunOnThread( + [cap = s_state.media_capture.release()]() mutable { StopMediaCapture(std::unique_ptr(cap)); }); + } + else + { + // Otherwise, we can do it on the CPU thread. + StopMediaCapture(std::move(s_state.media_capture)); + } + + Host::OnMediaCaptureStopped(); +} + +void System::StopMediaCapture(std::unique_ptr cap) +{ + const bool was_capturing_audio = cap->IsCapturingAudio(); + const bool was_capturing_video = cap->IsCapturingVideo(); Error error; - if (s_state.media_capture->EndCapture(&error)) + std::string osd_key = fmt::format("MediaCapture_{}", cap->GetPath()); + if (cap->EndCapture(&error)) { - Host::AddIconOSDMessage("MediaCapture", ICON_FA_CAMERA, + Host::AddIconOSDMessage(std::move(osd_key), ICON_FA_CAMERA, fmt::format(TRANSLATE_FS("System", "Stopped {0} to '{1}'."), GetCaptureTypeForMessage(was_capturing_video, was_capturing_audio), - Path::GetFileName(s_state.media_capture->GetPath())), + Path::GetFileName(cap->GetPath())), Host::OSD_INFO_DURATION); } else { - Host::AddIconOSDWarning("MediaCapture", ICON_FA_EXCLAMATION_TRIANGLE, + Host::AddIconOSDWarning(std::move(osd_key), ICON_FA_EXCLAMATION_TRIANGLE, fmt::format(TRANSLATE_FS("System", "Stopped {0}: {1}."), - GetCaptureTypeForMessage(s_state.media_capture->IsCapturingVideo(), - s_state.media_capture->IsCapturingAudio()), + GetCaptureTypeForMessage(was_capturing_video, was_capturing_audio), error.GetDescription()), Host::OSD_INFO_DURATION); } - s_state.media_capture.reset(); - - Host::OnMediaCaptureStopped(); } std::string System::GetGameSaveStateFileName(std::string_view serial, s32 slot) @@ -5823,18 +5691,15 @@ void System::ToggleSoftwareRendering() if (IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software) return; - const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; + const GPURenderer new_renderer = + GPUBackend::IsUsingHardwareBackend() ? GPURenderer::Software : g_settings.gpu_renderer; Host::AddIconOSDMessage("SoftwareRendering", ICON_FA_PAINT_ROLLER, fmt::format(TRANSLATE_FS("OSDMessage", "Switching to {} renderer..."), Settings::GetRendererDisplayName(new_renderer)), Host::OSD_QUICK_DURATION); + RecreateGPU(new_renderer); - - // TODO: GPU-THREAD: Drop this - PerformanceCounters::Reset(); - - g_gpu->UpdateResolutionScale(); } void System::RequestDisplaySize(float scale /*= 0.0f*/) @@ -5843,11 +5708,20 @@ void System::RequestDisplaySize(float scale /*= 0.0f*/) return; if (scale == 0.0f) - scale = g_gpu->IsHardwareRenderer() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; + scale = GPUBackend::IsUsingHardwareBackend() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; - float requested_width = static_cast(g_gpu->GetCRTCDisplayWidth()) * scale; - float requested_height = static_cast(g_gpu->GetCRTCDisplayHeight()) * scale; - g_gpu->ApplyPixelAspectRatioToSize(&requested_width, &requested_height); + float requested_width, requested_height; + if (g_settings.debugging.show_vram) + { + requested_width = static_cast(VRAM_WIDTH) * scale; + requested_height = static_cast(VRAM_HEIGHT) * scale; + } + else + { + requested_width = static_cast(g_gpu->GetCRTCDisplayWidth()) * scale; + requested_height = static_cast(g_gpu->GetCRTCDisplayHeight()) * scale; + g_gpu->ApplyPixelAspectRatioToSize(g_gpu->ComputePixelAspectRatio(), &requested_width, &requested_height); + } if (g_settings.display_rotation == DisplayRotation::Rotate90 || g_settings.display_rotation == DisplayRotation::Rotate270) @@ -5865,18 +5739,6 @@ void System::DisplayWindowResized() return; UpdateGTEAspectRatio(); - - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateResolutionScale(); - - // If we're paused, re-present the current frame at the new window size. - if (IsPaused()) - { - // Hackity hack, on some systems, presenting a single frame isn't enough to actually get it - // displayed. Two seems to be good enough. Maybe something to do with direct scanout. - InvalidateDisplay(); - InvalidateDisplay(); - } } void System::UpdateGTEAspectRatio() @@ -5900,14 +5762,14 @@ void System::UpdateGTEAspectRatio() } else if (gte_ar == DisplayAspectRatio::MatchWindow) { - if (const GPUSwapChain* main_swap_chain = g_gpu_device->GetMainSwapChain()) + if (const WindowInfo& main_window_info = GPUThread::GetRenderWindowInfo(); !main_window_info.IsSurfaceless()) { // Pre-apply the native aspect ratio correction to the window size. // MatchWindow does not correct the display aspect ratio, so we need to apply it here. const float correction = g_gpu->ComputeAspectRatioCorrection(); custom_num = - static_cast(std::max(std::round(static_cast(main_swap_chain->GetWidth()) / correction), 1.0f)); - custom_denom = std::max(main_swap_chain->GetHeight(), 1u); + static_cast(std::max(std::round(static_cast(main_window_info.surface_width) / correction), 1.0f)); + custom_denom = std::max(main_window_info.surface_height, 1u); gte_ar = DisplayAspectRatio::Custom; } else @@ -5920,62 +5782,6 @@ void System::UpdateGTEAspectRatio() GTE::SetAspectRatio(gte_ar, custom_num, custom_denom); } -bool System::PresentDisplay(bool explicit_present, u64 present_time) -{ - // acquire for IO.MousePos. - std::atomic_thread_fence(std::memory_order_acquire); - - FullscreenUI::Render(); - ImGuiManager::RenderTextOverlays(); - ImGuiManager::RenderOSDMessages(); - - if (s_state.state == State::Running) - ImGuiManager::RenderSoftwareCursors(); - - // Debug windows are always rendered, otherwise mouse input breaks on skip. - ImGuiManager::RenderOverlayWindows(); - - if (IsValid()) - ImGuiManager::RenderDebugWindows(); - - const GPUDevice::PresentResult pres = - g_gpu_device->HasMainSwapChain() ? - (g_gpu ? g_gpu->PresentDisplay() : g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain())) : - GPUDevice::PresentResult::SkipPresent; - if (pres == GPUDevice::PresentResult::OK) - { - g_gpu_device->RenderImGui(g_gpu_device->GetMainSwapChain()); - g_gpu_device->EndPresent(g_gpu_device->GetMainSwapChain(), explicit_present, present_time); - - if (g_gpu_device->IsGPUTimingEnabled()) - PerformanceCounters::AccumulateGPUTime(); - } - else - { - if (pres == GPUDevice::PresentResult::DeviceLost) [[unlikely]] - HandleHostGPUDeviceLost(); - else if (pres == GPUDevice::PresentResult::ExclusiveFullscreenLost) - HandleExclusiveFullscreenLost(); - else - g_gpu_device->FlushCommands(); - - // Still need to kick ImGui or it gets cranky. - ImGui::EndFrame(); - } - - ImGuiManager::NewFrame(); - - return (pres == GPUDevice::PresentResult::OK); -} - -void System::InvalidateDisplay() -{ - PresentDisplay(false, 0); - - if (g_gpu) - g_gpu->RestoreDeviceContext(); -} - bool System::OpenGPUDump(std::string path, Error* error) { std::unique_ptr new_dump = GPUDump::Player::Open(std::move(path), error); diff --git a/src/core/system.h b/src/core/system.h index e23468bdf..2a45403c0 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -165,7 +165,6 @@ std::unique_ptr GetGameSettingsInterface(const GameDatabas std::string GetInputProfilePath(std::string_view name); State GetState(); -void SetState(State new_state); bool IsRunning(); bool IsPaused(); bool IsShutdown(); @@ -384,7 +383,7 @@ s32 GetAudioOutputVolume(); void UpdateVolume(); /// Saves a screenshot to the specified file. If no file name is provided, one will be generated automatically. -bool SaveScreenshot(const char* path = nullptr, DisplayScreenshotMode mode = g_settings.display_screenshot_mode, +void SaveScreenshot(const char* path = nullptr, DisplayScreenshotMode mode = g_settings.display_screenshot_mode, DisplayScreenshotFormat format = g_settings.display_screenshot_format, u8 quality = g_settings.display_screenshot_quality, bool compress_on_thread = true); @@ -400,7 +399,6 @@ MediaCapture* GetMediaCapture(); /// Media capture (video and/or audio). If no path is provided, one will be generated automatically. bool StartMediaCapture(std::string path = {}); -bool StartMediaCapture(std::string path, bool capture_video, bool capture_audio); void StopMediaCapture(); /// Toggle Widescreen Hack and Aspect Ratio @@ -413,15 +411,11 @@ void ToggleSoftwareRendering(); /// If the scale is set to 0, the internal resolution will be used, otherwise it is treated as a multiplier to 1x. void RequestDisplaySize(float scale = 0.0f); -/// Renders the display. -bool PresentDisplay(bool explicit_present, u64 present_time); -void InvalidateDisplay(); - ////////////////////////////////////////////////////////////////////////// // Memory Save States (Rewind and Runahead) ////////////////////////////////////////////////////////////////////////// void CalculateRewindMemoryUsage(u32 num_saves, u32 resolution_scale, u64* ram_usage, u64* vram_usage); -void ClearMemorySaveStates(bool deallocate_resources); +void ClearMemorySaveStates(bool reallocate_resources); void SetRunaheadReplayFlag(); /// Shared socket multiplexer, used by PINE/GDB/etc. diff --git a/src/core/system_private.h b/src/core/system_private.h index de3ccab3e..046bec245 100644 --- a/src/core/system_private.h +++ b/src/core/system_private.h @@ -7,26 +7,32 @@ #include +class GPUBackend; +struct GPUBackendFramePresentationParameters; + namespace System { /// Memory save states - only for internal use. struct MemorySaveState { - std::unique_ptr vram_texture; DynamicHeapArray state_data; size_t state_size; + + std::unique_ptr vram_texture; + DynamicHeapArray gpu_state_data; + size_t gpu_state_size; }; MemorySaveState& AllocateMemoryState(); MemorySaveState& GetFirstMemoryState(); MemorySaveState& PopMemoryState(); +bool AllocateMemoryStates(size_t state_count); +void FreeMemoryStateTextures(); void FreeMemoryStateStorage(); void LoadMemoryState(MemorySaveState& mss, bool update_display); -bool SaveMemoryState(MemorySaveState& mss); - -/// Returns the maximum size of a save state, considering the current configuration. -size_t GetMaxSaveStateSize(); +void SaveMemoryState(MemorySaveState& mss); +bool IsRunaheadActive(); void IncrementFrameNumber(); void IncrementInternalFrameNumber(); void FrameDone(); @@ -35,6 +41,10 @@ void FrameDone(); GPUVSyncMode GetEffectiveVSyncMode(); bool ShouldAllowPresentThrottle(); +/// Retrieves timing information for frame presentation on the GPU thread. +/// Returns false if this frame should not be presented or the command buffer flushed. +bool GetFramePresentationParameters(GPUBackendFramePresentationParameters* frame); + /// Call when host display size changes. void DisplayWindowResized(); @@ -65,6 +75,7 @@ void IdlePollUpdate(); /// Task threads, asynchronous work which will block system shutdown. void QueueTaskOnThread(std::function task); void RemoveSelfFromTaskThreads(); +void JoinTaskThreads(); } // namespace System @@ -91,11 +102,8 @@ void OnSystemPaused(); /// Called when the VM is resumed after being paused. void OnSystemResumed(); -/// Called when the pause state changes, or fullscreen UI opens. -void OnIdleStateChanged(); - /// Called when performance metrics are updated, approximately once a second. -void OnPerformanceCountersUpdated(); +void OnPerformanceCountersUpdated(const GPUBackend* gpu_backend); /// Provided by the host; called when the running executable changes. void OnGameChanged(const std::string& disc_path, const std::string& game_serial, const std::string& game_name); diff --git a/src/duckstation-qt/debuggerwindow.cpp b/src/duckstation-qt/debuggerwindow.cpp index 81ef03716..7ecac152c 100644 --- a/src/duckstation-qt/debuggerwindow.cpp +++ b/src/duckstation-qt/debuggerwindow.cpp @@ -241,14 +241,14 @@ void DebuggerWindow::onBreakpointListItemChanged(QTreeWidgetItem* item, int colu void DebuggerWindow::onStepIntoActionTriggered() { - Assert(System::IsPaused()); + Assert(QtHost::IsSystemPaused()); saveCurrentState(); g_emu_thread->singleStepCPU(); } void DebuggerWindow::onStepOverActionTriggered() { - Assert(System::IsPaused()); + Assert(QtHost::IsSystemPaused()); if (!CPU::AddStepOverBreakpoint()) { onStepIntoActionTriggered(); @@ -262,7 +262,7 @@ void DebuggerWindow::onStepOverActionTriggered() void DebuggerWindow::onStepOutActionTriggered() { - Assert(System::IsPaused()); + Assert(QtHost::IsSystemPaused()); if (!CPU::AddStepOutBreakpoint()) { QMessageBox::critical(this, tr("Debugger"), tr("Failed to add step-out breakpoint, are you in a valid function?")); diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index bd9535f91..1fbbaa399 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -293,6 +293,9 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* // Debugging Tab SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gpuThread, "GPU", "UseThread", true); + SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.maxQueuedFrames, "GPU", "MaxQueuedFrames", + Settings::DEFAULT_GPU_MAX_QUEUED_FRAMES); + connect(m_ui.gpuThread, &QCheckBox::checkStateChanged, this, &GraphicsSettingsWidget::onGPUThreadChanged); SettingWidgetBinder::BindWidgetToEnumSetting( sif, m_ui.gpuDumpCompressionMode, "GPU", "DumpCompressionMode", &Settings::ParseGPUDumpCompressionMode, @@ -325,6 +328,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* onMediaCaptureVideoEnabledChanged(); onEnableTextureCacheChanged(); onEnableAnyTextureReplacementsChanged(); + onGPUThreadChanged(); onShowDebugSettingsChanged(QtHost::ShouldShowDebugOptions()); // Rendering Tab @@ -610,8 +614,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* tr("Draws a wireframe outline of the triangles rendered by the console's GPU, either as a " "replacement or an overlay.")); dialog->registerWidgetHelp(m_ui.gpuThread, tr("Threaded Rendering"), tr("Checked"), - tr("Uses a second thread for drawing graphics. Currently only available for the software " - "renderer, but can provide a significant speed improvement, and is safe to use.")); + tr("Uses a second thread for drawing graphics. Provides a significant speed improvement " + "particularly with the software renderer, and is safe to use.")); dialog->registerWidgetHelp( m_ui.useDebugDevice, tr("Use Debug Device"), tr("Unchecked"), @@ -819,8 +823,6 @@ void GraphicsSettingsWidget::updateRendererDependentOptions() m_ui.blitSwapChain->setEnabled(render_api == RenderAPI::D3D11); #endif - m_ui.gpuThread->setEnabled(!is_hardware); - m_ui.exclusiveFullscreenLabel->setEnabled(render_api == RenderAPI::D3D11 || render_api == RenderAPI::D3D12 || render_api == RenderAPI::Vulkan); m_ui.exclusiveFullscreenControl->setEnabled(render_api == RenderAPI::Vulkan); @@ -1181,6 +1183,13 @@ void GraphicsSettingsWidget::onEnableAnyTextureReplacementsChanged() m_ui.preloadTextureReplacements->setEnabled(any_replacements_enabled); } +void GraphicsSettingsWidget::onGPUThreadChanged() +{ + const bool enabled = m_dialog->getEffectiveBoolValue("GPU", "UseThread", true); + m_ui.maxQueuedFrames->setEnabled(enabled); + m_ui.maxQueuedFramesLabel->setEnabled(enabled); +} + void GraphicsSettingsWidget::onTextureReplacementOptionsClicked() { QDialog dlg(QtUtils::GetRootWidget(this)); diff --git a/src/duckstation-qt/graphicssettingswidget.h b/src/duckstation-qt/graphicssettingswidget.h index a807a3f7f..d1ae58060 100644 --- a/src/duckstation-qt/graphicssettingswidget.h +++ b/src/duckstation-qt/graphicssettingswidget.h @@ -44,6 +44,8 @@ private Q_SLOTS: void onEnableAnyTextureReplacementsChanged(); void onTextureReplacementOptionsClicked(); + void onGPUThreadChanged(); + private: static constexpr int TAB_INDEX_RENDERING = 0; static constexpr int TAB_INDEX_ADVANCED = 1; diff --git a/src/duckstation-qt/graphicssettingswidget.ui b/src/duckstation-qt/graphicssettingswidget.ui index 43f0ff2d8..120fb0870 100644 --- a/src/duckstation-qt/graphicssettingswidget.ui +++ b/src/duckstation-qt/graphicssettingswidget.ui @@ -1286,11 +1286,29 @@ - - - Threaded Rendering - - + + + + + Threaded Rendering + + + + + + + Max Queued Frames: + + + + + + + 10 + + + + diff --git a/src/duckstation-qt/mainwindow.cpp b/src/duckstation-qt/mainwindow.cpp index efb7fe603..2c4ee64ec 100644 --- a/src/duckstation-qt/mainwindow.cpp +++ b/src/duckstation-qt/mainwindow.cpp @@ -85,6 +85,7 @@ static bool s_use_central_widget = false; // UI thread VM validity. static bool s_system_valid = false; static bool s_system_paused = false; +static bool s_fullscreen_ui_started = false; static std::atomic_uint32_t s_system_locked{false}; static QString s_current_game_title; static QString s_current_game_serial; @@ -764,7 +765,7 @@ void MainWindow::recreate() { g_emu_thread->setSurfaceless(false); g_main_window->updateEmulationActions(false, System::IsValid(), Achievements::IsHardcoreModeActive()); - g_main_window->onFullscreenUIStateChange(g_emu_thread->isRunningFullscreenUI()); + g_main_window->onFullscreenUIStartedOrStopped(s_fullscreen_ui_started); } if (controller_settings_window_pos.has_value()) @@ -1258,8 +1259,9 @@ void MainWindow::onStartFullscreenUITriggered() g_emu_thread->startFullscreenUI(); } -void MainWindow::onFullscreenUIStateChange(bool running) +void MainWindow::onFullscreenUIStartedOrStopped(bool running) { + s_fullscreen_ui_started = running; m_ui.actionStartFullscreenUI->setText(running ? tr("Stop Big Picture Mode") : tr("Start Big Picture Mode")); m_ui.actionStartFullscreenUI2->setText(running ? tr("Exit Big Picture") : tr("Big Picture")); } @@ -2046,7 +2048,7 @@ void MainWindow::connectSignals() connect(g_emu_thread, &EmuThread::mediaCaptureStarted, this, &MainWindow::onMediaCaptureStarted); connect(g_emu_thread, &EmuThread::mediaCaptureStopped, this, &MainWindow::onMediaCaptureStopped); connect(g_emu_thread, &EmuThread::mouseModeRequested, this, &MainWindow::onMouseModeRequested); - connect(g_emu_thread, &EmuThread::fullscreenUIStateChange, this, &MainWindow::onFullscreenUIStateChange); + connect(g_emu_thread, &EmuThread::fullscreenUIStartedOrStopped, this, &MainWindow::onFullscreenUIStartedOrStopped); connect(g_emu_thread, &EmuThread::achievementsLoginRequested, this, &MainWindow::onAchievementsLoginRequested); connect(g_emu_thread, &EmuThread::achievementsChallengeModeChanged, this, &MainWindow::onAchievementsChallengeModeChanged); @@ -2503,7 +2505,7 @@ bool MainWindow::requestShutdown(bool allow_confirm /* = true */, bool allow_sav // reshow the main window during display updates, because otherwise fullscreen transitions and renderer switches // would briefly show and then hide the main window. So instead, we do it on shutdown, here. Except if we're in // batch mode, when we're going to exit anyway. - if (!isRenderingToMain() && isHidden() && !QtHost::InBatchMode() && !g_emu_thread->isRunningFullscreenUI()) + if (!isRenderingToMain() && isHidden() && !QtHost::InBatchMode() && !s_fullscreen_ui_started) updateWindowState(true); // Now we can actually shut down the VM. diff --git a/src/duckstation-qt/mainwindow.h b/src/duckstation-qt/mainwindow.h index 2a7c2656f..5941595b8 100644 --- a/src/duckstation-qt/mainwindow.h +++ b/src/duckstation-qt/mainwindow.h @@ -168,7 +168,7 @@ private Q_SLOTS: void onCheatsActionTriggered(); void onCheatsMenuAboutToShow(); void onStartFullscreenUITriggered(); - void onFullscreenUIStateChange(bool running); + void onFullscreenUIStartedOrStopped(bool running); void onRemoveDiscActionTriggered(); void onScanForNewGamesTriggered(); void onViewToolbarActionToggled(bool checked); diff --git a/src/duckstation-qt/qthost.cpp b/src/duckstation-qt/qthost.cpp index 5366486f7..ffec2e7e9 100644 --- a/src/duckstation-qt/qthost.cpp +++ b/src/duckstation-qt/qthost.cpp @@ -19,7 +19,9 @@ #include "core/game_list.h" #include "core/gdb_server.h" #include "core/gpu.h" +#include "core/gpu_backend.h" #include "core/gpu_hw_texture_cache.h" +#include "core/gpu_thread.h" #include "core/host.h" #include "core/imgui_overlays.h" #include "core/memory_card.h" @@ -223,7 +225,6 @@ bool QtHost::SaveGameSettings(SettingsInterface* sif, bool delete_if_empty) INISettingsInterface* ini = static_cast(sif); Error error; - // if there's no keys, just toss the whole thing out if (delete_if_empty && ini->IsEmpty()) { @@ -576,13 +577,8 @@ void Host::LoadSettings(const SettingsInterface& si, std::unique_lock params) @@ -889,7 +866,7 @@ void EmuThread::onDisplayWindowMouseWheelEvent(const QPoint& delta_angle) void EmuThread::onDisplayWindowResized(int width, int height, float scale) { - Host::ResizeDisplayWindow(width, height, scale); + GPUThread::ResizeDisplayWindow(width, height, scale); } void EmuThread::redrawDisplayWindow() @@ -900,10 +877,10 @@ void EmuThread::redrawDisplayWindow() return; } - if (!g_gpu_device || System::IsShutdown()) + if (System::IsShutdown()) return; - System::InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); } void EmuThread::toggleFullscreen() @@ -931,7 +908,7 @@ void EmuThread::setFullscreen(bool fullscreen, bool allow_render_to_main) m_is_fullscreen = fullscreen; m_is_rendering_to_main = allow_render_to_main && shouldRenderToMain(); - Host::UpdateDisplayWindow(fullscreen); + GPUThread::UpdateDisplayWindow(fullscreen); } bool Host::IsFullscreen() @@ -960,7 +937,7 @@ void EmuThread::setSurfaceless(bool surfaceless) return; m_is_surfaceless = surfaceless; - Host::UpdateDisplayWindow(false); + GPUThread::UpdateDisplayWindow(false); } void EmuThread::requestDisplaySize(float scale) @@ -1017,6 +994,7 @@ void Host::OnSystemStarting() void Host::OnSystemStarted() { g_emu_thread->stopBackgroundControllerPollTimer(); + g_emu_thread->wakeThread(); emit g_emu_thread->systemStarted(); } @@ -1034,6 +1012,7 @@ void Host::OnSystemResumed() g_emu_thread->setSurfaceless(false); emit g_emu_thread->systemResumed(); + g_emu_thread->wakeThread(); g_emu_thread->stopBackgroundControllerPollTimer(); } @@ -1045,9 +1024,14 @@ void Host::OnSystemDestroyed() emit g_emu_thread->systemDestroyed(); } -void Host::OnIdleStateChanged() +void Host::OnFullscreenUIStartedOrStopped(bool started) { - g_emu_thread->wakeThread(); + g_emu_thread->setFullscreenUIStarted(started); +} + +void Host::OnGPUThreadRunIdleChanged(bool is_active) +{ + g_emu_thread->setGPUThreadRunIdle(is_active); } void EmuThread::reloadInputSources() @@ -1291,7 +1275,12 @@ void EmuThread::reloadPostProcessingShaders() } if (System::IsValid()) - PostProcessing::ReloadShaders(); + { + GPUThread::RunOnThread([]() { + if (GPUThread::HasGPUBackend()) + PostProcessing::ReloadShaders(); + }); + } } void EmuThread::updatePostProcessingSettings() @@ -1303,7 +1292,12 @@ void EmuThread::updatePostProcessingSettings() } if (System::IsValid()) - PostProcessing::UpdateSettings(); + { + GPUThread::RunOnThread([]() { + if (GPUThread::HasGPUBackend()) + PostProcessing::UpdateSettings(); + }); + } } void EmuThread::clearInputBindStateFromSource(InputBindingKey key) @@ -1326,7 +1320,7 @@ void EmuThread::reloadTextureReplacements() } if (System::IsValid()) - GPUTextureCache::ReloadTextureReplacements(true); + GPUThread::RunOnThread([]() { GPUTextureCache::ReloadTextureReplacements(true); }); } void EmuThread::captureGPUFrameDump() @@ -1679,7 +1673,8 @@ void Host::DestroyAuxiliaryRenderWindow(AuxiliaryRenderWindowHandle handle, s32* *height = size.height(); // eat all pending events, to make sure we're not going to write input events back to a dead pointer - g_emu_thread->getEventLoop()->processEvents(QEventLoop::AllEvents); + if (g_emu_thread->isCurrentThread()) + g_emu_thread->getEventLoop()->processEvents(QEventLoop::AllEvents); } void EmuThread::queueAuxiliaryRenderWindowInputEvent(Host::AuxiliaryRenderWindowUserData userdata, @@ -1699,10 +1694,12 @@ void EmuThread::processAuxiliaryRenderWindowInputEvent(void* userdata, quint32 e quint32 param3) { DebugAssert(isCurrentThread()); - ImGuiManager::ProcessAuxiliaryRenderWindowInputEvent(userdata, static_cast(event), - Host::AuxiliaryRenderWindowEventParam{.uint_param = param1}, - Host::AuxiliaryRenderWindowEventParam{.uint_param = param2}, - Host::AuxiliaryRenderWindowEventParam{.uint_param = param3}); + GPUThread::RunOnThread([userdata, event, param1, param2, param3]() { + ImGuiManager::ProcessAuxiliaryRenderWindowInputEvent(userdata, static_cast(event), + Host::AuxiliaryRenderWindowEventParam{.uint_param = param1}, + Host::AuxiliaryRenderWindowEventParam{.uint_param = param2}, + Host::AuxiliaryRenderWindowEventParam{.uint_param = param3}); + }); } void EmuThread::doBackgroundControllerPoll() @@ -1731,7 +1728,7 @@ void EmuThread::startBackgroundControllerPollTimer() return; u32 poll_interval = BACKGROUND_CONTROLLER_POLLING_INTERVAL; - if (FullscreenUI::IsInitialized()) + if (m_gpu_thread_run_idle) poll_interval = FULLSCREEN_UI_CONTROLLER_POLLING_INTERVAL; if (GDBServer::HasAnyClients()) poll_interval = GDB_SERVER_POLLING_INTERVAL; @@ -1747,6 +1744,37 @@ void EmuThread::stopBackgroundControllerPollTimer() m_background_controller_polling_timer->stop(); } +void EmuThread::setGPUThreadRunIdle(bool active) +{ + if (!isCurrentThread()) + { + QMetaObject::invokeMethod(this, "setGPUThreadRunIdle", Qt::QueuedConnection, Q_ARG(bool, active)); + return; + } + + m_gpu_thread_run_idle = active; + + // break out of the event loop if we're not executing a system + if (active && !g_settings.gpu_use_thread && !System::IsRunning()) + m_event_loop->quit(); + + // adjust the timer speed to pick up controller input faster + if (!m_background_controller_polling_timer->isActive()) + return; + + g_emu_thread->stopBackgroundControllerPollTimer(); + g_emu_thread->startBackgroundControllerPollTimer(); +} + +void EmuThread::setFullscreenUIStarted(bool started) +{ + if (m_is_fullscreen_ui_started == started) + return; + + m_is_fullscreen_ui_started = started; + emit fullscreenUIStartedOrStopped(started); +} + void EmuThread::start() { AssertMsg(!g_emu_thread, "Emu thread does not exist"); @@ -1776,8 +1804,6 @@ void EmuThread::stopInThread() void EmuThread::run() { - Threading::SetNameOfCurrentThread("CPU Thread"); - m_event_loop = new QEventLoop(); m_started_semaphore.release(); @@ -1796,6 +1822,9 @@ void EmuThread::run() createBackgroundControllerPollTimer(); startBackgroundControllerPollTimer(); + // kick off GPU thread + Threading::Thread gpu_thread(&EmuThread::gpuThreadEntryPoint); + // main loop while (!m_shutdown_flag) { @@ -1803,24 +1832,17 @@ void EmuThread::run() { System::Execute(); } + else if (!GPUThread::IsUsingThread() && GPUThread::IsRunningIdle()) + { + g_emu_thread->getEventLoop()->processEvents(QEventLoop::AllEvents); + + // have to double-check the condition after processing events, because the events could shut us down + if (!GPUThread::IsUsingThread() && GPUThread::IsRunningIdle()) + GPUThread::Internal::DoRunIdle(); + } else { - // we want to keep rendering the UI when paused and fullscreen UI is enabled - if (!FullscreenUI::HasActiveWindow() && !System::IsRunning()) - { - // wait until we have a system before running - m_event_loop->exec(); - continue; - } - - m_event_loop->processEvents(QEventLoop::AllEvents); - System::IdlePollUpdate(); - if (g_gpu_device && g_gpu_device->HasMainSwapChain()) - { - System::PresentDisplay(false, 0); - if (!g_gpu_device->GetMainSwapChain()->IsVSyncModeBlocking()) - g_gpu_device->GetMainSwapChain()->ThrottlePresentation(); - } + m_event_loop->exec(); } } @@ -1828,13 +1850,25 @@ void EmuThread::run() System::ShutdownSystem(false); destroyBackgroundControllerPollTimer(); + + // tell GPU thread to exit + GPUThread::Internal::RequestShutdown(); + gpu_thread.Join(); + + // and tidy up everything left System::CPUThreadShutdown(); // move back to UI thread moveToThread(m_ui_thread); } -void Host::FrameDone() +void EmuThread::gpuThreadEntryPoint() +{ + Threading::SetNameOfCurrentThread("GPU Thread"); + GPUThread::Internal::GPUThreadEntryPoint(); +} + +void Host::FrameDoneOnGPUThread(GPUBackend* gpu_backend, u32 frame_number) { } @@ -1949,7 +1983,7 @@ void Host::OnInputDeviceConnected(std::string_view identifier, std::string_view { emit g_emu_thread->onInputDeviceConnected(std::string(identifier), std::string(device_name)); - if (System::IsValid() || g_emu_thread->isRunningFullscreenUI()) + if (System::IsValid() || GPUThread::IsFullscreenUIRequested()) { Host::AddIconOSDMessage(fmt::format("ControllerConnected{}", identifier), ICON_FA_GAMEPAD, fmt::format(TRANSLATE_FS("QtHost", "Controller {} connected."), identifier), @@ -1975,7 +2009,7 @@ void Host::OnInputDeviceDisconnected(InputBindingKey key, std::string_view ident Host::AddIconOSDMessage(fmt::format("ControllerConnected{}", identifier), ICON_FA_GAMEPAD, std::move(message), Host::OSD_WARNING_DURATION); } - else if (System::IsValid() || g_emu_thread->isRunningFullscreenUI()) + else if (System::IsValid() || GPUThread::IsFullscreenUIRequested()) { Host::AddIconOSDMessage(fmt::format("ControllerConnected{}", identifier), ICON_FA_GAMEPAD, fmt::format(TRANSLATE_FS("QtHost", "Controller {} disconnected."), identifier), @@ -2037,16 +2071,16 @@ void Host::ReleaseRenderWindow() g_emu_thread->releaseRenderWindow(); } -void EmuThread::updatePerformanceCounters() +void EmuThread::updatePerformanceCounters(const GPUBackend* gpu_backend) { - const RenderAPI render_api = g_gpu_device ? g_gpu_device->GetRenderAPI() : RenderAPI::None; - const bool hardware_renderer = g_gpu && g_gpu->IsHardwareRenderer(); + const RenderAPI render_api = g_gpu_device->GetRenderAPI(); + const bool hardware_renderer = GPUBackend::IsUsingHardwareBackend(); u32 render_width = 0; u32 render_height = 0; - if (g_gpu) + if (gpu_backend) { - const u32 render_scale = g_gpu->GetResolutionScale(); + const u32 render_scale = gpu_backend->GetResolutionScale(); std::tie(render_width, render_height) = g_gpu->GetFullDisplayResolution(); render_width *= render_scale; render_height *= render_scale; @@ -2110,9 +2144,9 @@ void EmuThread::resetPerformanceCounters() Q_ARG(const QString&, blank)); } -void Host::OnPerformanceCountersUpdated() +void Host::OnPerformanceCountersUpdated(const GPUBackend* gpu_backend) { - g_emu_thread->updatePerformanceCounters(); + g_emu_thread->updatePerformanceCounters(gpu_backend); } void Host::OnGameChanged(const std::string& disc_path, const std::string& game_serial, const std::string& game_name) @@ -2209,8 +2243,8 @@ std::optional Host::GetTopLevelWindowInfo() EmuThread::SystemLock EmuThread::pauseAndLockSystem() { - const bool was_fullscreen = System::IsValid() && isFullscreen(); - const bool was_paused = System::IsPaused(); + const bool was_fullscreen = QtHost::IsSystemValid() && isFullscreen(); + const bool was_paused = QtHost::IsSystemPaused(); // We use surfaceless rather than switching out of fullscreen, because // we're paused, so we're not going to be rendering anyway. diff --git a/src/duckstation-qt/qthost.h b/src/duckstation-qt/qthost.h index b0e959c9c..b3917649c 100644 --- a/src/duckstation-qt/qthost.h +++ b/src/duckstation-qt/qthost.h @@ -44,6 +44,8 @@ class INISettingsInterface; enum class RenderAPI : u8; class GPUDevice; +class GPUBackend; + class MainWindow; class DisplayWidget; @@ -93,7 +95,6 @@ public: ALWAYS_INLINE bool isFullscreen() const { return m_is_fullscreen; } ALWAYS_INLINE bool isRenderingToMain() const { return m_is_rendering_to_main; } ALWAYS_INLINE bool isSurfaceless() const { return m_is_surfaceless; } - ALWAYS_INLINE bool isRunningFullscreenUI() const { return m_run_fullscreen_ui; } std::optional acquireRenderWindow(RenderAPI render_api, bool fullscreen, bool exclusive_fullscreen, Error* error); @@ -102,6 +103,7 @@ public: void startBackgroundControllerPollTimer(); void stopBackgroundControllerPollTimer(); + void setFullscreenUIStarted(bool started); void wakeThread(); bool shouldRenderToMain() const; @@ -109,7 +111,7 @@ public: void bootOrLoadState(std::string path); - void updatePerformanceCounters(); + void updatePerformanceCounters(const GPUBackend* gpu_backend); void resetPerformanceCounters(); /// Locks the system by pausing it, while a popup dialog is displayed. @@ -147,7 +149,7 @@ Q_SIGNALS: void runningGameChanged(const QString& filename, const QString& game_serial, const QString& game_title); void inputProfileLoaded(); void mouseModeRequested(bool relative, bool hide_cursor); - void fullscreenUIStateChange(bool running); + void fullscreenUIStartedOrStopped(bool running); void achievementsLoginRequested(Achievements::LoginRequestReason reason); void achievementsRefreshed(quint32 id, const QString& game_info_string); void achievementsChallengeModeChanged(bool enabled); @@ -210,6 +212,7 @@ public Q_SLOTS: void clearInputBindStateFromSource(InputBindingKey key); void reloadTextureReplacements(); void captureGPUFrameDump(); + void setGPUThreadRunIdle(bool active); private Q_SLOTS: void stopInThread(); @@ -227,23 +230,23 @@ protected: void run() override; private: - using InputButtonHandler = std::function; - using InputAxisHandler = std::function; - void createBackgroundControllerPollTimer(); void destroyBackgroundControllerPollTimer(); void confirmActionIfMemoryCardBusy(const QString& action, bool cancel_resume_on_accept, std::function callback) const; + static void gpuThreadEntryPoint(); + QThread* m_ui_thread; QSemaphore m_started_semaphore; QEventLoop* m_event_loop = nullptr; QTimer* m_background_controller_polling_timer = nullptr; bool m_shutdown_flag = false; - bool m_run_fullscreen_ui = false; bool m_is_rendering_to_main = false; bool m_is_fullscreen = false; + bool m_is_fullscreen_ui_started = false; + bool m_gpu_thread_run_idle = false; bool m_is_surfaceless = false; bool m_save_state_on_shutdown = false; diff --git a/src/duckstation-regtest/regtest_host.cpp b/src/duckstation-regtest/regtest_host.cpp index 9f96a69c8..3d1e4364d 100644 --- a/src/duckstation-regtest/regtest_host.cpp +++ b/src/duckstation-regtest/regtest_host.cpp @@ -5,7 +5,8 @@ #include "core/controller.h" #include "core/fullscreen_ui.h" #include "core/game_list.h" -#include "core/gpu.h" +#include "core/gpu_backend.h" +#include "core/gpu_thread.h" #include "core/host.h" #include "core/system.h" #include "core/system_private.h" @@ -46,9 +47,11 @@ static void HookSignals(); static bool SetFolders(); static bool SetNewDataRoot(const std::string& filename); static std::string GetFrameDumpFilename(u32 frame); +static void GPUThreadEntryPoint(); } // namespace RegTestHost static std::unique_ptr s_base_settings_interface; +static Threading::Thread s_gpu_thread; static u32 s_frames_to_run = 60 * 60; static u32 s_frames_remaining = 0; @@ -281,12 +284,17 @@ void Host::OnSystemResumed() // } -void Host::OnIdleStateChanged() +void Host::OnGPUThreadRunIdleChanged(bool is_active) { // } -void Host::OnPerformanceCountersUpdated() +void Host::OnFullscreenUIStartedOrStopped(bool started) +{ + // +} + +void Host::OnPerformanceCountersUpdated(const GPUBackend* gpu_backend) { // } @@ -375,14 +383,10 @@ void Host::DestroyAuxiliaryRenderWindow(AuxiliaryRenderWindowHandle handle, s32* { } -void Host::FrameDone() +void Host::FrameDoneOnGPUThread(GPUBackend* gpu_backend, u32 frame_number) { - const u32 frame = System::GetFrameNumber(); - if (s_frame_dump_interval > 0 && (s_frame_dump_interval == 1 || (frame % s_frame_dump_interval) == 0)) - { - std::string dump_filename(RegTestHost::GetFrameDumpFilename(frame)); - g_gpu->WriteDisplayTextureToFile(std::move(dump_filename)); - } + if (s_frame_dump_interval > 0 && (s_frame_dump_interval == 1 || (frame_number % s_frame_dump_interval) == 0)) + gpu_backend->WriteDisplayTextureToFile(RegTestHost::GetFrameDumpFilename(frame_number)); } void Host::OpenURL(std::string_view url) @@ -508,6 +512,12 @@ void RegTestHost::HookSignals() std::signal(SIGTERM, SignalHandler); } +void RegTestHost::GPUThreadEntryPoint() +{ + Threading::SetNameOfCurrentThread("CPU Thread"); + GPUThread::Internal::GPUThreadEntryPoint(); +} + void RegTestHost::InitializeEarlyConsole() { const bool was_console_enabled = Log::IsConsoleOutputEnabled(); @@ -773,6 +783,7 @@ int main(int argc, char* argv[]) } RegTestHost::HookSignals(); + s_gpu_thread.Start(&RegTestHost::GPUThreadEntryPoint); Error error; int result = -1; @@ -813,6 +824,12 @@ int main(int argc, char* argv[]) result = 0; cleanup: + if (s_gpu_thread.Joinable()) + { + GPUThread::Internal::RequestShutdown(); + s_gpu_thread.Join(); + } + System::CPUThreadShutdown(); System::ProcessShutdown(); return result; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 73ecaa862..658b0396b 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -385,6 +385,17 @@ const char* GPUDevice::ShaderLanguageToString(GPUShaderLanguage language) } } +const char* GPUDevice::VSyncModeToString(GPUVSyncMode mode) +{ + static constexpr std::array(GPUVSyncMode::Count)> vsync_modes = {{ + "Disabled", + "FIFO", + "Mailbox", + }}; + + return vsync_modes[static_cast(mode)]; +} + bool GPUDevice::IsSameRenderAPI(RenderAPI lhs, RenderAPI rhs) { return (lhs == rhs || ((lhs == RenderAPI::OpenGL || lhs == RenderAPI::OpenGLES) && diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 14c46d344..5ed91d7c1 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -657,6 +657,9 @@ public: /// Returns a string representing the specified language. static const char* ShaderLanguageToString(GPUShaderLanguage language); + /// Returns a string representing the specified vsync mode. + static const char* VSyncModeToString(GPUVSyncMode mode); + /// Returns a new device for the specified API. static std::unique_ptr CreateDeviceForAPI(RenderAPI api); diff --git a/src/util/imgui_fullscreen.cpp b/src/util/imgui_fullscreen.cpp index 2a4cd190a..fc64e8126 100644 --- a/src/util/imgui_fullscreen.cpp +++ b/src/util/imgui_fullscreen.cpp @@ -52,6 +52,8 @@ static void DrawChoiceDialog(); static void DrawInputDialog(); static void DrawMessageDialog(); static void DrawBackgroundProgressDialogs(ImVec2& position, float spacing); +static void DrawLoadingScreen(std::string_view image, std::string_view message, s32 progress_min, s32 progress_max, + s32 progress_value, bool is_persistent); static void DrawNotifications(ImVec2& position, float spacing); static void DrawToast(); static bool MenuButtonFrame(const char* str_id, bool enabled, float height, bool* visible, bool* hovered, ImRect* bb, @@ -171,6 +173,13 @@ struct ALIGN_TO_CACHE_LINE UIState std::vector background_progress_dialogs; std::mutex background_progress_lock; + + std::string loading_screen_image; + std::string loading_screen_message; + s32 loading_screen_min = 0; + s32 loading_screen_max = 0; + s32 loading_screen_value = 0; + bool loading_screen_open = false; }; } // namespace @@ -2916,6 +2925,145 @@ void ImGuiFullscreen::DrawBackgroundProgressDialogs(ImVec2& position, float spac ImGui::PopStyleColor(2); } +void ImGuiFullscreen::RenderLoadingScreen(std::string_view image, std::string_view message, s32 progress_min /*= -1*/, + s32 progress_max /*= -1*/, s32 progress_value /*= -1*/) +{ + if (progress_min < progress_max) + INFO_LOG("{}: {}/{}", message, progress_value, progress_max); + + if (!g_gpu_device || !g_gpu_device->HasMainSwapChain()) + return; + + // eat the last imgui frame, it might've been partially rendered by the caller. + ImGui::EndFrame(); + ImGui::NewFrame(); + + DrawLoadingScreen(image, message, progress_min, progress_max, progress_value, false); + + ImGui::EndFrame(); + + GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain(); + if (g_gpu_device->BeginPresent(swap_chain) == GPUDevice::PresentResult::OK) + { + g_gpu_device->RenderImGui(swap_chain); + g_gpu_device->EndPresent(swap_chain, false); + } + + ImGui::NewFrame(); +} + +void ImGuiFullscreen::OpenOrUpdateLoadingScreen(std::string_view image, std::string_view message, + s32 progress_min /*= -1*/, s32 progress_max /*= -1*/, + s32 progress_value /*= -1*/) +{ + if (s_state.loading_screen_image != image) + s_state.loading_screen_image = image; + if (s_state.loading_screen_message != message) + s_state.loading_screen_message = message; + s_state.loading_screen_min = progress_min; + s_state.loading_screen_max = progress_max; + s_state.loading_screen_value = progress_value; + s_state.loading_screen_open = true; +} + +bool ImGuiFullscreen::IsLoadingScreenOpen() +{ + return s_state.loading_screen_open; +} + +void ImGuiFullscreen::RenderLoadingScreen() +{ + if (!s_state.loading_screen_open) + return; + + DrawLoadingScreen(s_state.loading_screen_image, s_state.loading_screen_message, s_state.loading_screen_min, + s_state.loading_screen_max, s_state.loading_screen_value, true); +} + +void ImGuiFullscreen::CloseLoadingScreen() +{ + s_state.loading_screen_image = {}; + s_state.loading_screen_message = {}; + s_state.loading_screen_min = 0; + s_state.loading_screen_max = 0; + s_state.loading_screen_value = 0; + s_state.loading_screen_open = false; +} + +void ImGuiFullscreen::DrawLoadingScreen(std::string_view image, std::string_view message, s32 progress_min, + s32 progress_max, s32 progress_value, bool is_persistent) +{ + const auto& io = ImGui::GetIO(); + const float scale = ImGuiManager::GetGlobalScale(); + const float width = (400.0f * scale); + const bool has_progress = (progress_min < progress_max); + + const float logo_width = 260.0f * scale; + const float logo_height = 260.0f * scale; + + ImGui::SetNextWindowSize(ImVec2(logo_width, logo_height), ImGuiCond_Always); + ImGui::SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, (io.DisplaySize.y * 0.5f) - (50.0f * scale)), + ImGuiCond_Always, ImVec2(0.5f, 0.5f)); + if (ImGui::Begin("LoadingScreenLogo", nullptr, + ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoFocusOnAppearing | + ImGuiWindowFlags_NoBackground)) + { + GPUTexture* tex = GetCachedTexture(image); + if (tex) + ImGui::Image(tex, ImVec2(logo_width, logo_height)); + } + ImGui::End(); + + const float padding_and_rounding = 18.0f * scale; + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, padding_and_rounding); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(padding_and_rounding, padding_and_rounding)); + ImGui::SetNextWindowSize(ImVec2(width, ((has_progress || is_persistent) ? 90.0f : 55.0f) * scale), ImGuiCond_Always); + ImGui::SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, (io.DisplaySize.y * 0.5f) + (100.0f * scale)), + ImGuiCond_Always, ImVec2(0.5f, 0.0f)); + if (ImGui::Begin("LoadingScreen", nullptr, + ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoFocusOnAppearing)) + { + if (has_progress || is_persistent) + { + if (!message.empty()) + ImGui::TextUnformatted(message.data(), message.data() + message.size()); + + if (has_progress) + { + TinyString buf; + buf.format("{}/{}", progress_value, progress_max); + + const ImVec2 prog_size = ImGui::CalcTextSize(buf.c_str(), buf.end_ptr()); + ImGui::SameLine(); + ImGui::SetCursorPosX(width - padding_and_rounding - prog_size.x); + ImGui::TextUnformatted(buf.c_str(), buf.end_ptr()); + } + + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 5.0f); + + ImGui::ProgressBar(has_progress ? + (static_cast(progress_value) / static_cast(progress_max - progress_min)) : + static_cast(-ImGui::GetTime()), + ImVec2(-1.0f, 0.0f), ""); + } + else + { + if (!message.empty()) + { + const ImVec2 text_size(ImGui::CalcTextSize(message.data(), message.data() + message.size())); + ImGui::SetCursorPosX((width - text_size.x) / 2.0f); + ImGui::TextUnformatted(message.data(), message.data() + message.size()); + } + } + } + ImGui::End(); + ImGui::PopStyleVar(2); +} + ////////////////////////////////////////////////////////////////////////// // Notifications ////////////////////////////////////////////////////////////////////////// diff --git a/src/util/imgui_fullscreen.h b/src/util/imgui_fullscreen.h index a9bbba568..044cf6543 100644 --- a/src/util/imgui_fullscreen.h +++ b/src/util/imgui_fullscreen.h @@ -327,6 +327,18 @@ void UpdateBackgroundProgressDialog(const char* str_id, std::string message, s32 void CloseBackgroundProgressDialog(const char* str_id); bool IsBackgroundProgressDialogOpen(const char* str_id); +/// Displays a loading screen with the logo, rendered with ImGui. Use when executing possibly-time-consuming tasks +/// such as compiling shaders when starting up. +void RenderLoadingScreen(std::string_view image, std::string_view message, s32 progress_min = -1, s32 progress_max = -1, + s32 progress_value = -1); +void OpenOrUpdateLoadingScreen(std::string_view image, std::string_view message, s32 progress_min = -1, + s32 progress_max = -1, s32 progress_value = -1); +bool IsLoadingScreenOpen(); +void CloseLoadingScreen(); + +/// Renders a previously-configured loading screen. +void RenderLoadingScreen(); + void AddNotification(std::string key, float duration, std::string title, std::string text, std::string image_path); void ClearNotifications(); diff --git a/src/util/postprocessing.cpp b/src/util/postprocessing.cpp index 81405ca86..5e93e6994 100644 --- a/src/util/postprocessing.cpp +++ b/src/util/postprocessing.cpp @@ -12,7 +12,7 @@ // TODO: Remove me #include "core/host.h" -#include "core/host_interface_progress_callback.h" +#include "core/fullscreen_ui.h" #include "core/settings.h" #include "IconsFontAwesome5.h" @@ -406,7 +406,7 @@ void PostProcessing::Chain::LoadStages() return; Error error; - HostInterfaceProgressCallback progress; + LoadingScreenProgressCallback progress; progress.SetProgressRange(stage_count); for (u32 i = 0; i < stage_count; i++) @@ -476,7 +476,7 @@ void PostProcessing::Chain::UpdateSettings(std::unique_lock& setting m_stages.resize(stage_count); - HostInterfaceProgressCallback progress; + LoadingScreenProgressCallback progress; progress.SetProgressRange(stage_count); const GPUTexture::Format prev_format = m_target_format; diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 1abe6ad47..159406513 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -7,6 +7,7 @@ #include "shadergen.h" // TODO: Remove me +#include "core/gpu_thread.h" #include "core/host.h" #include "core/settings.h" @@ -72,7 +73,7 @@ static std::tuple, GPUShaderLanguage> Create } // Should have a GPU device and be on the GPU thread. - Assert(g_gpu_device); + Assert(GPUThread::IsOnThread() && g_gpu_device); const bool debug_info = g_gpu_device->IsDebugDevice(); const RenderAPI rapi = g_gpu_device->GetRenderAPI(); diff --git a/src/util/state_wrapper.h b/src/util/state_wrapper.h index 1fde2b5b6..a6e61c58d 100644 --- a/src/util/state_wrapper.h +++ b/src/util/state_wrapper.h @@ -34,6 +34,8 @@ public: ALWAYS_INLINE bool IsReading() const { return (m_mode == Mode::Read); } ALWAYS_INLINE bool IsWriting() const { return (m_mode == Mode::Write); } ALWAYS_INLINE u32 GetVersion() const { return m_version; } + ALWAYS_INLINE const u8* GetData() const { return m_data; } + ALWAYS_INLINE size_t GetDataSize() const { return m_size; } ALWAYS_INLINE size_t GetPosition() const { return m_pos; } ALWAYS_INLINE void SetPosition(size_t pos) { m_pos = pos; }