diff --git a/src/common/intrin.h b/src/common/intrin.h index 364a0cc38..b04236719 100644 --- a/src/common/intrin.h +++ b/src/common/intrin.h @@ -14,9 +14,9 @@ #define CPU_ARCH_SIMD 1 #define CPU_ARCH_SSE 1 #include -#include -#include #include +#include +#include #if defined(__AVX2__) #define CPU_ARCH_AVX 1 @@ -96,3 +96,40 @@ ALWAYS_INLINE_RELEASE static void MemsetPtrs(T* ptr, T value, u32 count) for (u32 i = 0; i < remaining_count; i++) *(dest++) = value; } + +ALWAYS_INLINE static void MultiPause() +{ +#if defined(CPU_ARCH_X86) || defined(CPU_ARCH_X64) + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); +#elif defined(CPU_ARCH_ARM64) && defined(_MSC_VER) + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); +#elif defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_ARM32) + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); +#elif defined(CPU_ARCH_RISCV64) + // Probably wrong... pause is optional :/ + asm volatile("fence" ::: "memory"); +#else +#pragma warning("Missing implementation") +#endif +} diff --git a/src/common/log_channels.h b/src/common/log_channels.h index aaa8b7842..c05367249 100644 --- a/src/common/log_channels.h +++ b/src/common/log_channels.h @@ -33,6 +33,7 @@ X(GPUShaderCache) \ X(GPUTexture) \ X(GPUTextureCache) \ + X(GPUThread) \ X(GPU_HW) \ X(GPU_SW) \ X(GPU_SW_Rasterizer) \ diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f6cc9dcc8..98dcadf4e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -61,10 +61,10 @@ add_library(core gpu_shadergen.h gpu_sw.cpp gpu_sw.h - gpu_sw_backend.cpp - gpu_sw_backend.h gpu_sw_rasterizer.cpp gpu_sw_rasterizer.h + gpu_thread.cpp + gpu_thread.h gpu_types.h guncon.cpp guncon.h diff --git a/src/core/achievements.cpp b/src/core/achievements.cpp index 16bfd0ed7..c39453cc9 100644 --- a/src/core/achievements.cpp +++ b/src/core/achievements.cpp @@ -9,6 +9,7 @@ #include "bus.h" #include "cpu_core.h" #include "fullscreen_ui.h" +#include "gpu_thread.h" #include "host.h" #include "system.h" @@ -1144,7 +1145,7 @@ void Achievements::ClientLoadGameCallback(int result, const char* error_message, // ensure fullscreen UI is ready for notifications if (display_summary) - FullscreenUI::Initialize(); + GPUThread::RunOnThread(&FullscreenUI::Initialize); char url_buf[URL_BUFFER_SIZE]; if (int err = rc_client_game_get_image_url(info, url_buf, std::size(url_buf)); err == RC_OK) @@ -1199,7 +1200,7 @@ void Achievements::ClearGameHash() void Achievements::DisplayAchievementSummary() { - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title; if (IsHardcoreModeActive()) @@ -1224,8 +1225,13 @@ void Achievements::DisplayAchievementSummary() summary = TRANSLATE_STR("Achievements", "This game has no achievements."); } - ImGuiFullscreen::AddNotification("achievement_summary", ACHIEVEMENT_SUMMARY_NOTIFICATION_TIME, std::move(title), - std::move(summary), s_game_icon); + GPUThread::RunOnThread([title = std::move(title), summary = std::move(summary)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievement_summary", ACHIEVEMENT_SUMMARY_NOTIFICATION_TIME, std::move(title), + std::move(summary), s_game_icon); + }); } // Technically not going through the resource API, but since we're passing this to something else, we can't. @@ -1235,11 +1241,16 @@ void Achievements::DisplayAchievementSummary() void Achievements::DisplayHardcoreDeferredMessage() { - if (g_settings.achievements_hardcore_mode && !s_hardcore_mode && System::IsValid() && FullscreenUI::Initialize()) + if (g_settings.achievements_hardcore_mode && !s_hardcore_mode && System::IsValid()) { - ImGuiFullscreen::ShowToast(std::string(), - TRANSLATE_STR("Achievements", "Hardcore mode will be enabled on system reset."), - Host::OSD_WARNING_DURATION); + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::ShowToast(std::string(), + TRANSLATE_STR("Achievements", "Hardcore mode will be enabled on system reset."), + Host::OSD_WARNING_DURATION); + }); } } @@ -1261,7 +1272,7 @@ void Achievements::HandleUnlockEvent(const rc_client_event_t* event) INFO_LOG("Achievement {} ({}) for game {} unlocked", cheevo->title, cheevo->id, s_game_id); UpdateGameSummary(); - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title; if (cheevo->category == RC_CLIENT_ACHIEVEMENT_CATEGORY_UNOFFICIAL) @@ -1271,9 +1282,15 @@ void Achievements::HandleUnlockEvent(const rc_client_event_t* event) std::string badge_path = GetAchievementBadgePath(cheevo, cheevo->state); - ImGuiFullscreen::AddNotification(fmt::format("achievement_unlock_{}", cheevo->id), - static_cast(g_settings.achievements_notification_duration), - std::move(title), cheevo->description, std::move(badge_path)); + GPUThread::RunOnThread([id = cheevo->id, duration = g_settings.achievements_notification_duration, + title = std::move(title), description = std::string(cheevo->description), + badge_path = std::move(badge_path)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("achievement_unlock_{}", id), static_cast(duration), + std::move(title), std::move(description), std::move(badge_path)); + }); } if (g_settings.achievements_sound_effects) @@ -1285,7 +1302,7 @@ void Achievements::HandleGameCompleteEvent(const rc_client_event_t* event) INFO_LOG("Game {} complete", s_game_id); UpdateGameSummary(); - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title = fmt::format(TRANSLATE_FS("Achievements", "Mastered {}"), s_game_title); std::string message = fmt::format( @@ -1294,8 +1311,13 @@ void Achievements::HandleGameCompleteEvent(const rc_client_event_t* event) s_game_summary.num_unlocked_achievements), TRANSLATE_PLURAL_STR("Achievements", "%n points", "Achievement points", s_game_summary.points_unlocked)); - ImGuiFullscreen::AddNotification("achievement_mastery", GAME_COMPLETE_NOTIFICATION_TIME, std::move(title), - std::move(message), s_game_icon); + GPUThread::RunOnThread([title = std::move(title), message = std::move(message), icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievement_mastery", GAME_COMPLETE_NOTIFICATION_TIME, std::move(title), + std::move(message), std::move(icon)); + }); } } @@ -1303,14 +1325,19 @@ void Achievements::HandleLeaderboardStartedEvent(const rc_client_event_t* event) { DEV_LOG("Leaderboard {} ({}) started", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { std::string title = event->leaderboard->title; std::string message = TRANSLATE_STR("Achievements", "Leaderboard attempt started."); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - LEADERBOARD_STARTED_NOTIFICATION_TIME, std::move(title), std::move(message), - s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), LEADERBOARD_STARTED_NOTIFICATION_TIME, + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1318,14 +1345,19 @@ void Achievements::HandleLeaderboardFailedEvent(const rc_client_event_t* event) { DEV_LOG("Leaderboard {} ({}) failed", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { std::string title = event->leaderboard->title; std::string message = TRANSLATE_STR("Achievements", "Leaderboard attempt failed."); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - LEADERBOARD_FAILED_NOTIFICATION_TIME, std::move(title), std::move(message), - s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), LEADERBOARD_FAILED_NOTIFICATION_TIME, + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1333,7 +1365,7 @@ void Achievements::HandleLeaderboardSubmittedEvent(const rc_client_event_t* even { DEV_LOG("Leaderboard {} ({}) submitted", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { static const char* value_strings[NUM_RC_CLIENT_LEADERBOARD_FORMATS] = { TRANSLATE_NOOP("Achievements", "Your Time: {}{}"), @@ -1349,9 +1381,14 @@ void Achievements::HandleLeaderboardSubmittedEvent(const rc_client_event_t* even event->leaderboard->tracker_value ? event->leaderboard->tracker_value : "Unknown", g_settings.achievements_spectator_mode ? std::string_view() : TRANSLATE_SV("Achievements", " (Submitting)")); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - static_cast(g_settings.achievements_leaderboard_duration), std::move(title), - std::move(message), s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), + static_cast(g_settings.achievements_leaderboard_duration), + std::move(title), std::move(message), std::move(icon)); + }); } if (g_settings.achievements_sound_effects) @@ -1363,7 +1400,7 @@ void Achievements::HandleLeaderboardScoreboardEvent(const rc_client_event_t* eve DEV_LOG("Leaderboard {} scoreboard rank {} of {}", event->leaderboard_scoreboard->leaderboard_id, event->leaderboard_scoreboard->new_rank, event->leaderboard_scoreboard->num_entries); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { static const char* value_strings[NUM_RC_CLIENT_LEADERBOARD_FORMATS] = { TRANSLATE_NOOP("Achievements", "Your Time: {} (Best: {})"), @@ -1380,9 +1417,15 @@ void Achievements::HandleLeaderboardScoreboardEvent(const rc_client_event_t* eve event->leaderboard_scoreboard->submitted_score, event->leaderboard_scoreboard->best_score), event->leaderboard_scoreboard->new_rank, event->leaderboard_scoreboard->num_entries); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - static_cast(g_settings.achievements_leaderboard_duration), std::move(title), - std::move(message), s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), + static_cast(g_settings.achievements_leaderboard_duration), + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1512,26 +1555,30 @@ void Achievements::HandleServerDisconnectedEvent(const rc_client_event_t* event) { WARNING_LOG("Server disconnected."); - if (FullscreenUI::Initialize()) - { + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::ShowToast( TRANSLATE_STR("Achievements", "Achievements Disconnected"), TRANSLATE_STR("Achievements", "An unlock request could not be completed. We will keep retrying to submit this request."), Host::OSD_ERROR_DURATION); - } + }); } void Achievements::HandleServerReconnectedEvent(const rc_client_event_t* event) { WARNING_LOG("Server reconnected."); - if (FullscreenUI::Initialize()) - { + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::ShowToast(TRANSLATE_STR("Achievements", "Achievements Reconnected"), TRANSLATE_STR("Achievements", "All pending unlock requests have completed."), Host::OSD_INFO_DURATION); - } + }); } void Achievements::ResetClient() @@ -1609,12 +1656,17 @@ void Achievements::SetHardcoreMode(bool enabled, bool force_display_message) // new mode s_hardcore_mode = enabled; - if (System::IsValid() && (HasActiveGame() || force_display_message) && FullscreenUI::Initialize()) + if (System::IsValid() && (HasActiveGame() || force_display_message)) { - ImGuiFullscreen::ShowToast(std::string(), - enabled ? TRANSLATE_STR("Achievements", "Hardcore mode is now enabled.") : - TRANSLATE_STR("Achievements", "Hardcore mode is now disabled."), - Host::OSD_INFO_DURATION); + GPUThread::RunOnThread([enabled]() { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::ShowToast(std::string(), + enabled ? TRANSLATE_STR("Achievements", "Hardcore mode is now enabled.") : + TRANSLATE_STR("Achievements", "Hardcore mode is now disabled."), + Host::OSD_INFO_DURATION); + }); } rc_client_set_hardcore_enabled(s_client, enabled); @@ -1925,7 +1977,7 @@ void Achievements::ShowLoginNotification() if (!user) return; - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string badge_path = GetLoggedInUserBadgePath(); std::string title = user->display_name; @@ -1934,8 +1986,14 @@ void Achievements::ShowLoginNotification() std::string summary = fmt::format(TRANSLATE_FS("Achievements", "Score: {} ({} softcore)\nUnread messages: {}"), user->score, user->score_softcore, user->num_unread_messages); - ImGuiFullscreen::AddNotification("achievements_login", LOGIN_NOTIFICATION_TIME, std::move(title), - std::move(summary), std::move(badge_path)); + GPUThread::RunOnThread( + [title = std::move(title), summary = std::move(summary), badge_path = std::move(badge_path)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievements_login", LOGIN_NOTIFICATION_TIME, std::move(title), + std::move(summary), std::move(badge_path)); + }); } } @@ -2035,14 +2093,6 @@ void Achievements::ConfirmHardcoreModeDisableAsync(const char* trigger, std::fun } #endif - if (!FullscreenUI::Initialize()) - { - Host::AddOSDMessage(fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), - Host::OSD_WARNING_DURATION); - callback(false); - return; - } - auto real_callback = [callback = std::move(callback)](bool res) mutable { // don't run the callback in the middle of rendering the UI Host::RunOnCPUThread([callback = std::move(callback), res]() { @@ -2052,13 +2102,25 @@ void Achievements::ConfirmHardcoreModeDisableAsync(const char* trigger, std::fun }); }; - ImGuiFullscreen::OpenConfirmMessageDialog( - TRANSLATE_STR("Achievements", "Confirm Hardcore Mode"), - fmt::format(TRANSLATE_FS("Achievements", "{0} cannot be performed while hardcore mode is active. Do you " - "want to disable hardcore mode? {0} will be cancelled if you select No."), - trigger), - std::move(real_callback), fmt::format(ICON_FA_CHECK " {}", TRANSLATE_SV("Achievements", "Yes")), - fmt::format(ICON_FA_TIMES " {}", TRANSLATE_SV("Achievements", "No"))); + GPUThread::RunOnThread([trigger = std::string(trigger), real_callback = std::move(real_callback)]() mutable { + if (!FullscreenUI::Initialize()) + { + Host::AddOSDMessage( + fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), + Host::OSD_WARNING_DURATION); + real_callback(false); + return; + } + + ImGuiFullscreen::OpenConfirmMessageDialog( + TRANSLATE_STR("Achievements", "Confirm Hardcore Mode"), + fmt::format(TRANSLATE_FS("Achievements", + "{0} cannot be performed while hardcore mode is active. Do you " + "want to disable hardcore mode? {0} will be cancelled if you select No."), + trigger), + std::move(real_callback), fmt::format(ICON_FA_CHECK " {}", TRANSLATE_SV("Achievements", "Yes")), + fmt::format(ICON_FA_TIMES " {}", TRANSLATE_SV("Achievements", "No"))); + }); #else Host::AddOSDMessage(fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), Host::OSD_WARNING_DURATION); diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index f3798eb86..0beaf2e2a 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -52,8 +52,8 @@ - + @@ -133,8 +133,8 @@ - + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 5a8d278a2..5e5e105f5 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -45,7 +45,6 @@ - @@ -71,6 +70,7 @@ + @@ -119,7 +119,6 @@ - @@ -149,6 +148,7 @@ + diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index 244682c2b..c90d941ad 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -8,6 +8,7 @@ #include "controller.h" #include "game_list.h" #include "gpu.h" +#include "gpu_thread.h" #include "host.h" #include "settings.h" #include "system.h" @@ -219,6 +220,7 @@ struct PostProcessingStageInfo ////////////////////////////////////////////////////////////////////////// // Main ////////////////////////////////////////////////////////////////////////// +static void UpdateRunIdleState(); static void PauseForMenuOpen(bool set_pause_menu_open); static bool AreAnyDialogsOpen(); static void ClosePauseMenu(); @@ -600,12 +602,13 @@ bool FullscreenUI::Initialize() s_about_window_open = false; s_hotkey_list_cache = InputManager::GetHotkeyList(); + if (s_initialized) + Host::RunOnCPUThread([]() { Host::OnFullscreenUIStartedOrStopped(true); }); + if (!System::IsValid()) SwitchToLanding(); - if (!System::IsRunning()) - Host::OnIdleStateChanged(); - + UpdateRunIdleState(); ForceKeyNavEnabled(); return true; } @@ -629,6 +632,7 @@ bool FullscreenUI::AreAnyDialogsOpen() void FullscreenUI::CheckForConfigChanges(const Settings& old_settings) { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; @@ -636,54 +640,102 @@ void FullscreenUI::CheckForConfigChanges(const Settings& old_settings) // That means we're going to be reading achievement state. if (old_settings.achievements_enabled && !g_settings.achievements_enabled) { - if (s_current_main_window == MainWindowType::Achievements || s_current_main_window == MainWindowType::Leaderboards) - ReturnToPreviousWindow(); + if (!IsInitialized()) + return; + + GPUThread::RunOnThread([]() { + if (s_current_main_window == MainWindowType::Achievements || + s_current_main_window == MainWindowType::Leaderboards) + { + ReturnToPreviousWindow(); + } + }); } } +void FullscreenUI::UpdateRunIdleState() +{ + const bool new_run_idle = HasActiveWindow(); + if (GPUThread::GetRunIdleOnThread() == new_run_idle) + return; + + GPUThread::SetRunIdleOnThread(new_run_idle); + Host::RunOnCPUThread([new_run_idle]() { Host::OnFullscreenUIActiveChanged(new_run_idle); }); +} + void FullscreenUI::OnSystemStarted() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; - s_current_main_window = MainWindowType::None; - QueueResetFocus(FocusResetType::ViewChanged); -} + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; -void FullscreenUI::OnSystemPaused() -{ - // noop + s_current_main_window = MainWindowType::None; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + }); } void FullscreenUI::OnSystemResumed() { - // get rid of pause menu if we unpaused another way - if (s_current_main_window == MainWindowType::PauseMenu) - ClosePauseMenu(); + // NOTE: Called on CPU thread. + if (!IsInitialized()) + return; + + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + // get rid of pause menu if we unpaused another way + if (s_current_main_window == MainWindowType::PauseMenu) + ClosePauseMenu(); + + UpdateRunIdleState(); + }); } void FullscreenUI::OnSystemDestroyed() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; - s_pause_menu_was_open = false; - s_was_paused_on_quick_menu_open = false; - s_current_pause_submenu = PauseSubMenu::None; - SwitchToLanding(); + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + s_pause_menu_was_open = false; + s_was_paused_on_quick_menu_open = false; + s_current_pause_submenu = PauseSubMenu::None; + SwitchToLanding(); + UpdateRunIdleState(); + }); } void FullscreenUI::OnRunningGameChanged() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; const std::string& path = System::GetDiscPath(); const std::string& serial = System::GetGameSerial(); + + std::string subtitle; if (!serial.empty()) - s_current_game_subtitle = fmt::format("{0} - {1}", serial, Path::GetFileName(path)); + subtitle = fmt::format("{0} - {1}", serial, Path::GetFileName(path)); else - s_current_game_subtitle = {}; + subtitle = {}; + + GPUThread::RunOnThread([subtitle = std::move(subtitle)]() mutable { + if (!IsInitialized()) + return; + + s_current_game_subtitle = std::move(subtitle); + }); } void FullscreenUI::PauseForMenuOpen(bool set_pause_menu_open) @@ -700,15 +752,18 @@ void FullscreenUI::OpenPauseMenu() if (!System::IsValid()) return; - if (!Initialize() || s_current_main_window != MainWindowType::None) - return; + GPUThread::RunOnThread([]() { + if (!Initialize() || s_current_main_window != MainWindowType::None) + return; - PauseForMenuOpen(true); - s_current_main_window = MainWindowType::PauseMenu; - s_current_pause_submenu = PauseSubMenu::None; - QueueResetFocus(FocusResetType::ViewChanged); - ForceKeyNavEnabled(); - FixStateIfPaused(); + PauseForMenuOpen(true); + s_current_main_window = MainWindowType::PauseMenu; + s_current_pause_submenu = PauseSubMenu::None; + QueueResetFocus(FocusResetType::ViewChanged); + ForceKeyNavEnabled(); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } void FullscreenUI::OpenCheatsMenu() @@ -722,6 +777,7 @@ void FullscreenUI::OpenCheatsMenu() s_settings_page = SettingsPage::Cheats; PauseForMenuOpen(true); ForceKeyNavEnabled(); + UpdateRunIdleState(); FixStateIfPaused(); } @@ -732,31 +788,27 @@ void FullscreenUI::FixStateIfPaused() // When we're paused, we won't have trickled the key up event for escape yet. Do it now. ImGui::UpdateInputEvents(false); - - Host::OnIdleStateChanged(); - Host::RunOnCPUThread([]() { - if (System::IsValid()) - { - // Why twice? To clear the "wants keyboard input" flag. - System::InvalidateDisplay(); - System::InvalidateDisplay(); - } - }); } void FullscreenUI::ClosePauseMenu() { - if (!IsInitialized() || !System::IsValid()) + if (!System::IsValid()) return; - if (System::GetState() == System::State::Paused && !s_was_paused_on_quick_menu_open) - Host::RunOnCPUThread([]() { System::PauseSystem(false); }); + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; - s_current_main_window = MainWindowType::None; - s_current_pause_submenu = PauseSubMenu::None; - s_pause_menu_was_open = false; - QueueResetFocus(FocusResetType::ViewChanged); - FixStateIfPaused(); + if (System::GetState() == System::State::Paused && !s_was_paused_on_quick_menu_open) + Host::RunOnCPUThread([]() { System::PauseSystem(false); }); + + s_current_main_window = MainWindowType::None; + s_current_pause_submenu = PauseSubMenu::None; + s_pause_menu_was_open = false; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } void FullscreenUI::OpenPauseSubMenu(PauseSubMenu submenu) @@ -787,8 +839,12 @@ void FullscreenUI::Shutdown() s_current_game_subtitle = {}; DestroyResources(); ImGuiFullscreen::Shutdown(); + if (s_initialized) + Host::RunOnCPUThread([]() { Host::OnFullscreenUIStartedOrStopped(false); }); + s_initialized = false; s_tried_to_initialize = false; + UpdateRunIdleState(); } void FullscreenUI::Render() @@ -920,6 +976,7 @@ void FullscreenUI::ReturnToMainWindow() { ClosePauseMenu(); s_current_main_window = System::IsValid() ? MainWindowType::None : MainWindowType::Landing; + UpdateRunIdleState(); FixStateIfPaused(); } @@ -958,6 +1015,11 @@ void FullscreenUI::DoStartPath(std::string path, std::string state, std::optiona if (System::IsValid()) return; + // Switch to nothing, we'll get called back via OnSystemDestroyed() if startup fails. + s_current_main_window = MainWindowType::None; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + SystemBootParameters params; params.filename = std::move(path); params.save_state = std::move(state); @@ -1152,108 +1214,116 @@ void FullscreenUI::DoChangeDiscFromFile() void FullscreenUI::DoChangeDisc() { - ImGuiFullscreen::ChoiceDialogOptions options; + Host::RunOnCPUThread([]() { + ImGuiFullscreen::ChoiceDialogOptions options; - if (System::HasMediaSubImages()) - { - const u32 current_index = System::GetMediaSubImageIndex(); - const u32 count = System::GetMediaSubImageCount(); - options.reserve(count + 1); - options.emplace_back(FSUI_STR("From File..."), false); - - for (u32 i = 0; i < count; i++) - options.emplace_back(System::GetMediaSubImageTitle(i), i == current_index); - - auto callback = [](s32 index, const std::string& title, bool checked) { - if (index == 0) - { - CloseChoiceDialog(); - DoChangeDiscFromFile(); - return; - } - else if (index > 0) - { - System::SwitchMediaSubImage(static_cast(index - 1)); - } - - CloseChoiceDialog(); - ReturnToPreviousWindow(); - }; - - OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), - std::move(callback)); - - return; - } - - if (const GameDatabase::Entry* entry = System::GetGameDatabaseEntry(); entry && !entry->disc_set_serials.empty()) - { - const auto lock = GameList::GetLock(); - auto matches = GameList::GetMatchingEntriesForSerial(entry->disc_set_serials); - if (matches.size() > 1) + if (System::HasMediaSubImages()) { - options.reserve(matches.size() + 1); + const u32 current_index = System::GetMediaSubImageIndex(); + const u32 count = System::GetMediaSubImageCount(); + options.reserve(count + 1); options.emplace_back(FSUI_STR("From File..."), false); - std::vector paths; - paths.reserve(matches.size()); + for (u32 i = 0; i < count; i++) + options.emplace_back(System::GetMediaSubImageTitle(i), i == current_index); - const std::string& current_path = System::GetDiscPath(); - for (auto& [title, glentry] : matches) - { - options.emplace_back(std::move(title), current_path == glentry->path); - paths.push_back(glentry->path); - } + GPUThread::RunOnThread([options = std::move(options)]() mutable { + auto callback = [](s32 index, const std::string& title, bool checked) { + if (index == 0) + { + CloseChoiceDialog(); + DoChangeDiscFromFile(); + return; + } + else if (index > 0) + { + System::SwitchMediaSubImage(static_cast(index - 1)); + } - auto callback = [paths = std::move(paths)](s32 index, const std::string& title, bool checked) { - if (index == 0) - { CloseChoiceDialog(); - DoChangeDiscFromFile(); - return; - } - else if (index > 0) - { - System::InsertMedia(paths[index - 1].c_str()); - } + ReturnToPreviousWindow(); + }; - CloseChoiceDialog(); - ReturnToMainWindow(); - }; - - OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), - std::move(callback)); + OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), + std::move(callback)); + }); return; } - } - DoChangeDiscFromFile(); + if (const GameDatabase::Entry* entry = System::GetGameDatabaseEntry(); entry && !entry->disc_set_serials.empty()) + { + const auto lock = GameList::GetLock(); + auto matches = GameList::GetMatchingEntriesForSerial(entry->disc_set_serials); + if (matches.size() > 1) + { + options.reserve(matches.size() + 1); + options.emplace_back(FSUI_STR("From File..."), false); + + std::vector paths; + paths.reserve(matches.size()); + + const std::string& current_path = System::GetDiscPath(); + for (auto& [title, glentry] : matches) + { + options.emplace_back(std::move(title), current_path == glentry->path); + paths.push_back(glentry->path); + } + + GPUThread::RunOnThread([options = std::move(options), paths = std::move(paths)]() mutable { + auto callback = [paths = std::move(paths)](s32 index, const std::string& title, bool checked) { + if (index == 0) + { + CloseChoiceDialog(); + DoChangeDiscFromFile(); + return; + } + else if (index > 0) + { + System::InsertMedia(paths[index - 1].c_str()); + } + + CloseChoiceDialog(); + ReturnToMainWindow(); + }; + + OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), + std::move(callback)); + }); + + return; + } + } + + GPUThread::RunOnThread([]() { DoChangeDiscFromFile(); }); + }); } void FullscreenUI::DoToggleAnalogMode() { // hacky way to toggle analog mode - for (u32 i = 0; i < NUM_CONTROLLER_AND_CARD_PORTS; i++) - { - Controller* ctrl = System::GetController(i); - if (!ctrl) - continue; - - const Controller::ControllerInfo* cinfo = Controller::GetControllerInfo(ctrl->GetType()); - if (!cinfo) - continue; - - for (const Controller::ControllerBindingInfo& bi : cinfo->bindings) + Host::RunOnCPUThread([]() { + for (u32 i = 0; i < NUM_CONTROLLER_AND_CARD_PORTS; i++) { - if (std::strcmp(bi.name, "Analog") == 0) + Controller* ctrl = System::GetController(i); + if (!ctrl) + continue; + + const Controller::ControllerInfo* cinfo = Controller::GetControllerInfo(ctrl->GetType()); + if (!cinfo) + continue; + + for (const Controller::ControllerBindingInfo& bi : cinfo->bindings) { - ctrl->SetBindState(bi.bind_index, 1.0f); - ctrl->SetBindState(bi.bind_index, 0.0f); - break; + if (std::strcmp(bi.name, "Analog") == 0) + { + ctrl->SetBindState(bi.bind_index, 1.0f); + ctrl->SetBindState(bi.bind_index, 0.0f); + break; + } } } - } + }); } void FullscreenUI::DoRequestExit() @@ -3760,12 +3830,9 @@ void FullscreenUI::DrawControllerSettingsPage() &Settings::GetMultitapModeName, &Settings::GetMultitapModeDisplayName, MultitapMode::Count); // load mtap settings - MultitapMode mtap_mode = g_settings.multitap_mode; - if (IsEditingGameSettings(bsi)) - { - mtap_mode = Settings::ParseMultitapModeName(bsi->GetTinyStringValue("ControllerPorts", "MultitapMode", "").c_str()) - .value_or(g_settings.multitap_mode); - } + const MultitapMode mtap_mode = + Settings::ParseMultitapModeName(bsi->GetTinyStringValue("ControllerPorts", "MultitapMode", "").c_str()) + .value_or(Settings::DEFAULT_MULTITAP_MODE); const std::array mtap_enabled = { {(mtap_mode == MultitapMode::Port1Only || mtap_mode == MultitapMode::BothPorts), (mtap_mode == MultitapMode::Port2Only || mtap_mode == MultitapMode::BothPorts)}}; @@ -7388,31 +7455,36 @@ void FullscreenUI::DrawAboutWindow() void FullscreenUI::OpenAchievementsWindow() { + if (!System::IsValid()) + return; + if (!Achievements::IsActive()) { Host::AddKeyedOSDMessage("achievements_disabled", FSUI_STR("Achievements are not enabled."), Host::OSD_INFO_DURATION); return; } - - if (!System::IsValid() || !Initialize()) - return; - - if (!Achievements::HasAchievements() || !Achievements::PrepareAchievementsWindow()) + else if (!Achievements::HasAchievements()) { ShowToast(std::string(), FSUI_STR("This game has no achievements.")); return; } - if (s_current_main_window != MainWindowType::PauseMenu) - { - PauseForMenuOpen(false); - ForceKeyNavEnabled(); - } + GPUThread::RunOnThread([]() { + if (!Initialize() || !Achievements::PrepareAchievementsWindow()) + return; - s_current_main_window = MainWindowType::Achievements; - QueueResetFocus(FocusResetType::ViewChanged); - FixStateIfPaused(); + if (s_current_main_window != MainWindowType::PauseMenu) + { + PauseForMenuOpen(false); + ForceKeyNavEnabled(); + } + + s_current_main_window = MainWindowType::Achievements; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } bool FullscreenUI::IsAchievementsWindowOpen() @@ -7422,31 +7494,36 @@ bool FullscreenUI::IsAchievementsWindowOpen() void FullscreenUI::OpenLeaderboardsWindow() { + if (!System::IsValid()) + return; + if (!Achievements::IsActive()) { Host::AddKeyedOSDMessage("achievements_disabled", FSUI_STR("Leaderboards are not enabled."), Host::OSD_INFO_DURATION); return; } - - if (!System::IsValid() || !Initialize()) - return; - - if (!Achievements::HasLeaderboards() || !Achievements::PrepareLeaderboardsWindow()) + else if (!Achievements::HasLeaderboards()) { ShowToast(std::string(), FSUI_STR("This game has no leaderboards.")); return; } - if (s_current_main_window != MainWindowType::PauseMenu) - { - PauseForMenuOpen(false); - ForceKeyNavEnabled(); - } + GPUThread::RunOnThread([]() { + if (!Initialize() || !Achievements::PrepareLeaderboardsWindow()) + return; - s_current_main_window = MainWindowType::Leaderboards; - QueueResetFocus(FocusResetType::ViewChanged); - FixStateIfPaused(); + if (s_current_main_window != MainWindowType::PauseMenu) + { + PauseForMenuOpen(false); + ForceKeyNavEnabled(); + } + + s_current_main_window = MainWindowType::Leaderboards; + QueueResetFocus(FocusResetType::ViewChanged); + UpdateRunIdleState(); + FixStateIfPaused(); + }); } bool FullscreenUI::IsLeaderboardsWindowOpen() diff --git a/src/core/fullscreen_ui.h b/src/core/fullscreen_ui.h index 9e8405442..f770955b3 100644 --- a/src/core/fullscreen_ui.h +++ b/src/core/fullscreen_ui.h @@ -21,7 +21,6 @@ bool IsInitialized(); bool HasActiveWindow(); void CheckForConfigChanges(const Settings& old_settings); void OnSystemStarted(); -void OnSystemPaused(); void OnSystemResumed(); void OnSystemDestroyed(); void OnRunningGameChanged(); @@ -50,6 +49,12 @@ namespace Host { #ifndef __ANDROID__ +/// Called whenever fullscreen UI starts/stops. +void OnFullscreenUIStartedOrStopped(bool started); + +/// Called when the pause state changes, or fullscreen UI opens. +void OnFullscreenUIActiveChanged(bool is_active); + /// Requests shut down and exit of the hosting application. This may not actually exit, /// if the user cancels the shutdown confirmation. void RequestExitApplication(bool allow_confirm); diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 9ec22af4a..56f080c3b 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -3,9 +3,12 @@ #include "gpu.h" #include "dma.h" +#include "gpu_backend.h" #include "gpu_dump.h" +#include "gpu_hw_texture_cache.h" #include "gpu_shadergen.h" #include "gpu_sw_rasterizer.h" +#include "gpu_thread.h" #include "host.h" #include "interrupt_controller.h" #include "performance_counters.h" @@ -72,18 +75,7 @@ static u64 s_active_gpu_cycles = 0; static u32 s_active_gpu_cycles_frames = 0; #endif -static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; - -static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, - u32 texture_data_stride, GPUTexture::Format texture_format, - std::string osd_key); - -GPU::GPU() -{ - GPU_SW_Rasterizer::SelectImplementation(); - ResetStatistics(); -} +GPU::GPU() = default; GPU::~GPU() { @@ -92,11 +84,9 @@ GPU::~GPU() s_frame_done_event.Deactivate(); StopRecordingGPUDump(); - DestroyDeinterlaceTextures(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); } -bool GPU::Initialize(Error* error) +void GPU::Initialize() { if (!System::IsReplayingGPUDump()) s_crtc_tick_event.Activate(); @@ -108,21 +98,14 @@ bool GPU::Initialize(Error* error) m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); - if (!CompileDisplayPipelines(true, true, g_settings.display_24bit_chroma_smoothing, error)) - return false; - #ifdef PSX_GPU_STATS s_active_gpu_cycles = 0; s_active_gpu_cycles_frames = 0; #endif - - return true; } void GPU::UpdateSettings(const Settings& old_settings) { - FlushRender(); - m_force_progressive_scan = (g_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive); m_fifo_size = g_settings.gpu_fifo_size; m_max_run_ahead = g_settings.gpu_max_run_ahead; @@ -138,23 +121,6 @@ void GPU::UpdateSettings(const Settings& old_settings) // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); } - - if (g_settings.display_scaling != old_settings.display_scaling || - g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || - g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing) - { - // Toss buffers on mode change. - if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) - DestroyDeinterlaceTextures(); - - if (!CompileDisplayPipelines( - g_settings.display_scaling != old_settings.display_scaling, - g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, - g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing, nullptr)) - { - Panic("Failed to compile display pipeline on settings change."); - } - } } void GPU::CPUClockChanged() @@ -162,20 +128,6 @@ void GPU::CPUClockChanged() UpdateCRTCConfig(); } -u32 GPU::GetResolutionScale() const -{ - return 1u; -} - -void GPU::UpdateResolutionScale() -{ -} - -std::tuple GPU::GetFullDisplayResolution() const -{ - return std::tie(m_crtc_state.display_width, m_crtc_state.display_height); -} - void GPU::Reset(bool clear_vram) { m_GPUSTAT.bits = 0x14802000; @@ -190,12 +142,6 @@ void GPU::Reset(bool clear_vram) m_crtc_state.interlaced_field = 0; m_crtc_state.interlaced_display_field = 0; - if (clear_vram) - { - std::memset(g_vram, 0, sizeof(g_vram)); - std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); - } - // Cancel VRAM writes. m_blitter_state = BlitterState::Idle; @@ -204,12 +150,14 @@ void GPU::Reset(bool clear_vram) s_command_tick_event.Deactivate(); SoftReset(); - UpdateDisplay(); + + // Can skip the VRAM clear if it's not a hardware reset. + if (clear_vram) + GPUBackend::PushCommand(GPUBackend::NewClearVRAMCommand()); } void GPU::SoftReset() { - FlushRender(); if (m_blitter_state == BlitterState::WritingVRAM) FinishVRAMWrite(); @@ -255,14 +203,17 @@ void GPU::SoftReset() UpdateGPUIdle(); } -bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) +bool GPU::DoState(StateWrapper& sw, bool update_display) { - FlushRender(); - if (sw.IsReading()) { // perform a reset to discard all pending draws/fb state - Reset(host_texture == nullptr); + Reset(false); + } + else + { + // Need to ensure our copy of VRAM is good. + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); } sw.Do(&m_GPUSTAT.bits); @@ -333,16 +284,20 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ sw.Do(&m_command_total_words); sw.Do(&m_GPUREAD_latch); + u16 load_clut_data[GPU_CLUT_SIZE]; if (sw.GetVersion() < 64) [[unlikely]] { // Clear CLUT cache and let it populate later. InvalidateCLUT(); + std::memset(load_clut_data, 0, sizeof(load_clut_data)); } else { sw.Do(&m_current_clut_reg_bits); sw.Do(&m_current_clut_is_8bit); - sw.DoArray(g_gpu_clut, std::size(g_gpu_clut)); + + // I hate this extra copy... because I'm a moron and put it in the middle of the state data. + sw.DoArray(sw.IsReading() ? load_clut_data : g_gpu_clut, std::size(g_gpu_clut)); } sw.Do(&m_vram_transfer.x); @@ -362,36 +317,52 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ if (sw.IsReading()) { - m_draw_mode.texture_page_changed = true; + if (!sw.DoMarker("GPU-VRAM")) + return false; + + // Need to calculate the TC data size. But skip over VRAM first, we'll grab it later. + const size_t vram_start_pos = sw.GetPosition(); + sw.SkipBytes(VRAM_SIZE); + u32 tc_data_size; + if (!GPUTextureCache::GetStateSize(sw, &tc_data_size)) [[unlikely]] + return false; + + // Now we can actually allocate FIFO storage, and push it to the GPU thread. + GPUBackendLoadStateCommand* cmd = static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::LoadState, sizeof(GPUBackendLoadStateCommand) + tc_data_size)); + cmd->drawing_area = m_drawing_area; + std::memcpy(cmd->clut_data, load_clut_data, sizeof(cmd->clut_data)); + std::memcpy(cmd->vram_data, sw.GetData() + vram_start_pos, VRAM_SIZE); + cmd->texture_cache_state_version = sw.GetVersion(); + cmd->texture_cache_state_size = tc_data_size; + if (tc_data_size > 0) + std::memcpy(cmd->texture_cache_state, sw.GetData() + vram_start_pos + VRAM_SIZE, tc_data_size); + GPUThread::PushCommand(cmd); + m_drawing_area_changed = true; SetClampedDrawingArea(); UpdateDMARequest(); - } + UpdateCRTCConfig(); + if (update_display) + UpdateDisplay(false); - if (!host_texture) + UpdateCommandTickEvent(); + } + else // if not memory state { if (!sw.DoMarker("GPU-VRAM")) return false; - sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - } + // write vram + sw.DoBytes(g_vram, VRAM_SIZE); - if (sw.IsReading()) - { - UpdateCRTCConfig(); - if (update_display) - UpdateDisplay(); - - UpdateCommandTickEvent(); + // write TC data, we have to be super careful here, since we're reading GPU thread state... + GPUTextureCache::SaveState(sw); } return !sw.HasError(); } -void GPU::RestoreDeviceContext() -{ -} - void GPU::UpdateDMARequest() { switch (m_blitter_state) @@ -874,8 +845,11 @@ void GPU::UpdateCRTCDisplayParameters() << height_shift; } - if (cs.display_vram_width != old_vram_width || cs.display_vram_height != old_vram_height) - UpdateResolutionScale(); + if ((cs.display_vram_width != old_vram_width || cs.display_vram_height != old_vram_height) && + g_settings.gpu_resolution_scale == 0) + { + GPUBackend::PushCommand(GPUBackend::NewUpdateResolutionScaleCommand()); + } } TickCount GPU::GetPendingCRTCTicks() const @@ -1061,9 +1035,8 @@ void GPU::CRTCTickEvent(TickCount ticks) // flush any pending draws and "scan out" the image // TODO: move present in here I guess - FlushRender(); - UpdateDisplay(); System::IncrementFrameNumber(); + UpdateDisplay(true); frame_done = true; // switch fields early. this is needed so we draw to the correct one. @@ -1175,16 +1148,21 @@ void GPU::UpdateCommandTickEvent() void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const { - if (!g_gpu_device->HasMainSwapChain()) [[unlikely]] + const WindowInfo& wi = GPUThread::GetRenderWindowInfo(); + if (wi.IsSurfaceless()) { - *display_x = 0.0f; - *display_y = 0.0f; + *display_x = *display_y = -1.0f; return; } GSVector4i display_rc, draw_rc; - CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), true, - true, &display_rc, &draw_rc); + CalculateDrawRect(wi.surface_width, wi.surface_height, m_crtc_state.display_width, m_crtc_state.display_height, + m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_crtc_state.display_vram_width, + m_crtc_state.display_vram_height, g_settings.display_rotation, ComputeDisplayAspectRatio(), + g_settings.display_stretch_vertically, + (g_settings.display_scaling == DisplayScalingMode::NearestInteger || + g_settings.display_scaling == DisplayScalingMode::BilinearInteger), + &display_rc, &draw_rc); // convert coordinates to active display region, then to full display region const float scaled_display_x = @@ -1199,7 +1177,7 @@ void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float win // TODO: apply rotation matrix DEV_LOG("win {:.0f},{:.0f} -> local {:.0f},{:.0f}, disp {:.2f},{:.2f} (size {},{} frac {},{})", window_x, window_y, - window_x - draw_rc.left, window_y - draw_rc.top, *display_x, *display_y, m_crtc_state.display_width, + window_x - display_rc.left, window_y - display_rc.top, *display_x, *display_y, m_crtc_state.display_width, m_crtc_state.display_height, *display_x / static_cast(m_crtc_state.display_width), *display_y / static_cast(m_crtc_state.display_height)); } @@ -1378,7 +1356,7 @@ void GPU::WriteGP1(u32 value) SynchronizeCRTC(); m_crtc_state.regs.display_address_start = new_value; UpdateCRTCDisplayParameters(); - OnBufferSwapped(); + GPUBackend::PushCommand(GPUBackend::NewBufferSwappedCommand()); } } break; @@ -1533,9 +1511,14 @@ void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut) { DEBUG_LOG("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit"); AddCommandTicks(needs_8bit ? 256 : 16); - UpdateCLUT(clut, needs_8bit); m_current_clut_reg_bits = clut.bits; m_current_clut_is_8bit = needs_8bit; + + GPUBackendUpdateCLUTCommand* cmd = GPUBackend::NewUpdateCLUTCommand(); + FillBackendCommandParameters(cmd); + cmd->reg.bits = clut.bits; + cmd->clut_is_8bit = needs_8bit; + GPUBackend::PushCommand(cmd); } } @@ -1550,27 +1533,21 @@ bool GPU::IsCLUTValid() const return (m_current_clut_reg_bits != std::numeric_limits::max()); } -void GPU::ClearDisplay() -{ - ClearDisplayTexture(); - - // Just recycle the textures, it'll get re-fetched. - DestroyDeinterlaceTextures(); -} - void GPU::SetClampedDrawingArea() { - if (m_drawing_area.left > m_drawing_area.right || m_drawing_area.top > m_drawing_area.bottom) [[unlikely]] - { - m_clamped_drawing_area = GSVector4i::zero(); - return; - } + m_clamped_drawing_area = GetClampedDrawingArea(m_drawing_area); +} - const u32 right = std::min(m_drawing_area.right + 1, static_cast(VRAM_WIDTH)); - const u32 left = std::min(m_drawing_area.left, std::min(m_drawing_area.right, VRAM_WIDTH - 1)); - const u32 bottom = std::min(m_drawing_area.bottom + 1, static_cast(VRAM_HEIGHT)); - const u32 top = std::min(m_drawing_area.top, std::min(m_drawing_area.bottom, VRAM_HEIGHT - 1)); - m_clamped_drawing_area = GSVector4i(left, top, right, bottom); +GSVector4i GPU::GetClampedDrawingArea(const GPUDrawingArea& drawing_area) +{ + if (drawing_area.left > drawing_area.right || drawing_area.top > drawing_area.bottom) [[unlikely]] + return GSVector4i::zero(); + + const u32 right = std::min(drawing_area.right + 1, static_cast(VRAM_WIDTH)); + const u32 left = std::min(drawing_area.left, std::min(drawing_area.right, VRAM_WIDTH - 1)); + const u32 bottom = std::min(drawing_area.bottom + 1, static_cast(VRAM_HEIGHT)); + const u32 top = std::min(drawing_area.top, std::min(drawing_area.bottom, VRAM_HEIGHT - 1)); + return GSVector4i(left, top, right, bottom); } void GPU::SetDrawMode(u16 value) @@ -1579,16 +1556,8 @@ void GPU::SetDrawMode(u16 value) if (!m_set_texture_disable_mask) new_mode_reg.texture_disable = false; - if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) - return; - - m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK) != - (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK)); m_draw_mode.mode_reg.bits = new_mode_reg.bits; - if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) - FlushRender(); - // Bits 0..10 are returned in the GPU status register. m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) | (ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK); @@ -1598,11 +1567,7 @@ void GPU::SetDrawMode(u16 value) void GPU::SetTexturePalette(u16 value) { value &= DrawMode::PALETTE_MASK; - if (m_draw_mode.palette_reg.bits == value) - return; - m_draw_mode.palette_reg.bits = value; - m_draw_mode.texture_page_changed = true; } void GPU::SetTextureWindow(u32 value) @@ -1624,708 +1589,21 @@ void GPU::SetTextureWindow(u32 value) m_draw_mode.texture_window_value = value; } -void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit) +void GPU::CalculateDrawRect(u32 window_width, u32 window_height, u32 crtc_display_width, u32 crtc_display_height, + s32 display_origin_left, s32 display_origin_top, u32 display_vram_width, + u32 display_vram_height, DisplayRotation rotation, float aspect_ratio, + bool stretch_vertically, bool integer_scale, GSVector4i* display_rect, + GSVector4i* draw_rect) { - const u16* src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; - const u32 start_x = reg.GetXBase(); - if (!clut_is_8bit) - { - // Wraparound can't happen in 4-bit mode. - std::memcpy(dest, &src_row[start_x], sizeof(u16) * 16); - } - else - { - if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] - { - const u32 end = VRAM_WIDTH - start_x; - const u32 start = 256 - end; - std::memcpy(dest, &src_row[start_x], sizeof(u16) * end); - std::memcpy(dest + end, src_row, sizeof(u16) * start); - } - else - { - std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256); - } - } -} - -bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error) -{ - GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, - g_gpu_device->GetFeatures().framebuffer_fetch); - - GPUPipeline::GraphicsConfig plconfig; - plconfig.input_layout.vertex_stride = 0; - plconfig.primitive = GPUPipeline::Primitive::Triangles; - plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); - plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); - plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - plconfig.geometry_shader = nullptr; - plconfig.depth_format = GPUTexture::Format::Unknown; - plconfig.samples = 1; - plconfig.per_sample_shading = false; - plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; - - if (display) - { - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : - GPUTexture::Format::RGBA8); - - std::string vs = shadergen.GenerateDisplayVertexShader(); - std::string fs; - switch (g_settings.display_scaling) - { - case DisplayScalingMode::BilinearSharp: - fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); - break; - - case DisplayScalingMode::BilinearSmooth: - case DisplayScalingMode::BilinearInteger: - fs = shadergen.GenerateDisplayFragmentShader(true, false); - break; - - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - default: - fs = shadergen.GenerateDisplayFragmentShader(false, true); - break; - } - - std::unique_ptr vso = - g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error); - std::unique_ptr fso = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Display Vertex Shader"); - GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", - Settings::GetDisplayScalingName(g_settings.display_scaling)); - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", - Settings::GetDisplayScalingName(g_settings.display_scaling)); - } - - if (deinterlace) - { - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader(), error); - if (!vso) - return false; - GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); - - std::unique_ptr fso; - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateInterleavedFieldExtractFragmentShader(), error))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); - - switch (g_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - case DisplayDeinterlacingMode::Progressive: - break; - - case DisplayDeinterlacingMode::Weave: - { - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateDeinterlaceWeaveFragmentShader(), error))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Blend: - { - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateDeinterlaceBlendFragmentShader(), error))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Adaptive: - { - fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateFastMADReconstructFragmentShader(), error); - if (!fso) - return false; - - GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); - } - break; - - default: - UnreachableCode(); - } - } - - if (chroma_smoothing) - { - m_chroma_smoothing_pipeline.reset(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); - - if (g_settings.display_24bit_chroma_smoothing) - { - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader(), error); - std::unique_ptr fso = g_gpu_device->CreateShader( - GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateChromaSmoothingFragmentShader(), error); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); - GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); - - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); - } - } - - return true; -} - -void GPU::ClearDisplayTexture() -{ - m_display_texture = nullptr; - m_display_texture_view_x = 0; - m_display_texture_view_y = 0; - m_display_texture_view_width = 0; - m_display_texture_view_height = 0; -} - -void GPU::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, - s32 view_height) -{ - DebugAssert(texture); - - if (g_settings.display_auto_resize_window && - (view_width != m_display_texture_view_width || view_height != m_display_texture_view_height)) - { - System::RequestDisplaySize(); - } - - m_display_texture = texture; - m_display_depth_buffer = depth_buffer; - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; -} - -GPUDevice::PresentResult GPU::PresentDisplay() -{ - FlushRender(); - - if (!g_gpu_device->HasMainSwapChain()) - return GPUDevice::PresentResult::SkipPresent; - - GSVector4i display_rect; - GSVector4i draw_rect; - CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), - !g_settings.debugging.show_vram, true, &display_rect, &draw_rect); - return RenderDisplay(nullptr, display_rect, draw_rect, !g_settings.debugging.show_vram); -} - -GPUDevice::PresentResult GPU::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, - const GSVector4i draw_rect, bool postfx) -{ - GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); - - if (m_display_texture) - m_display_texture->MakeReadyForSampling(); - - // Internal post-processing. - GPUTexture* display_texture = m_display_texture; - s32 display_texture_view_x = m_display_texture_view_x; - s32 display_texture_view_y = m_display_texture_view_y; - s32 display_texture_view_width = m_display_texture_view_width; - s32 display_texture_view_height = m_display_texture_view_height; - if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && - PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, - display_texture_view_height)) - { - DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && - static_cast(display_texture->GetWidth()) == display_texture_view_width && - static_cast(display_texture->GetHeight()) == display_texture_view_height); - - // Now we can apply the post chain. - GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); - if (const GPUDevice::PresentResult pres = PostProcessing::InternalChain.Apply( - display_texture, m_display_depth_buffer, post_output_texture, - GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), display_texture_view_width, - display_texture_view_height, m_crtc_state.display_width, m_crtc_state.display_height); - pres != GPUDevice::PresentResult::OK) - { - return pres; - } - else - { - display_texture_view_x = 0; - display_texture_view_y = 0; - display_texture = post_output_texture; - display_texture->MakeReadyForSampling(); - } - } - - const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetMainSwapChain()->GetFormat(); - const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetMainSwapChain()->GetWidth(); - const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetMainSwapChain()->GetHeight(); - const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && g_gpu_device->HasMainSwapChain() && - hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && - PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); - const GSVector4i real_draw_rect = - g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect; - if (really_postfx) - { - g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); - g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); - } - else - { - if (target) - { - g_gpu_device->SetRenderTarget(target); - } - else - { - const GPUDevice::PresentResult pres = g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain()); - if (pres != GPUDevice::PresentResult::OK) - return pres; - } - } - - if (display_texture) - { - bool texture_filter_linear = false; - - struct Uniforms - { - float src_rect[4]; - float src_size[4]; - float clamp_rect[4]; - float params[4]; - float rotation_matrix[2][2]; - } uniforms; - std::memset(uniforms.params, 0, sizeof(uniforms.params)); - - switch (g_settings.display_scaling) - { - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - break; - - case DisplayScalingMode::BilinearSmooth: - case DisplayScalingMode::BilinearInteger: - texture_filter_linear = true; - break; - - case DisplayScalingMode::BilinearSharp: - { - texture_filter_linear = true; - uniforms.params[0] = std::max( - std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); - uniforms.params[1] = std::max( - std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); - uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; - uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; - } - break; - - default: - UnreachableCode(); - break; - } - - g_gpu_device->SetPipeline(m_display_pipeline.get()); - g_gpu_device->SetTextureSampler( - 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); - - // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because - // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. - const float rcp_width = 1.0f / static_cast(display_texture->GetWidth()); - const float rcp_height = 1.0f / static_cast(display_texture->GetHeight()); - uniforms.src_rect[0] = static_cast(display_texture_view_x) * rcp_width; - uniforms.src_rect[1] = static_cast(display_texture_view_y) * rcp_height; - uniforms.src_rect[2] = static_cast(display_texture_view_width) * rcp_width; - uniforms.src_rect[3] = static_cast(display_texture_view_height) * rcp_height; - uniforms.clamp_rect[0] = (static_cast(display_texture_view_x) + 0.5f) * rcp_width; - uniforms.clamp_rect[1] = (static_cast(display_texture_view_y) + 0.5f) * rcp_height; - uniforms.clamp_rect[2] = - (static_cast(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; - uniforms.clamp_rect[3] = - (static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; - uniforms.src_size[0] = static_cast(display_texture->GetWidth()); - uniforms.src_size[1] = static_cast(display_texture->GetHeight()); - uniforms.src_size[2] = rcp_width; - uniforms.src_size[3] = rcp_height; - - if (g_settings.display_rotation != DisplayRotation::Normal) - { - static constexpr const std::array(DisplayRotation::Count) - 1> rotation_radians = {{ - static_cast(std::numbers::pi * 1.5f), // Rotate90 - static_cast(std::numbers::pi), // Rotate180 - static_cast(std::numbers::pi / 2.0), // Rotate270 - }}; - - GSMatrix2x2::Rotation(rotation_radians[static_cast(g_settings.display_rotation) - 1]) - .store(uniforms.rotation_matrix); - } - else - { - GSMatrix2x2::Identity().store(uniforms.rotation_matrix); - } - - g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); - - g_gpu_device->SetViewportAndScissor(real_draw_rect); - g_gpu_device->Draw(3, 0); - } - - if (really_postfx) - { - DebugAssert(!g_settings.debugging.show_vram); - - // "original size" in postfx includes padding. - const float upscale_x = m_display_texture ? static_cast(m_display_texture_view_width) / - static_cast(m_crtc_state.display_vram_width) : - 1.0f; - const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / - static_cast(m_crtc_state.display_vram_height) : - 1.0f; - const s32 orig_width = static_cast(std::ceil(static_cast(m_crtc_state.display_width) * upscale_x)); - const s32 orig_height = static_cast(std::ceil(static_cast(m_crtc_state.display_height) * upscale_y)); - - return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, - display_rect, orig_width, orig_height, m_crtc_state.display_width, - m_crtc_state.display_height); - } - else - { - return GPUDevice::PresentResult::OK; - } -} - -bool GPU::SendDisplayToMediaCapture(MediaCapture* cap) -{ - GPUTexture* target = cap->GetRenderTexture(); - if (!target) [[unlikely]] - return false; - - const bool apply_aspect_ratio = - (g_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution); - const bool postfx = (g_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution); - GSVector4i display_rect, draw_rect; - CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_settings.debugging.show_vram, apply_aspect_ratio, - &display_rect, &draw_rect); - - // Not cleared by RenderDisplay(). - g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR); - - if (RenderDisplay(target, display_rect, draw_rect, postfx) != GPUDevice::PresentResult::OK) [[unlikely]] - return false; - - return cap->DeliverVideoFrame(target); -} - -void GPU::DestroyDeinterlaceTextures() -{ - for (std::unique_ptr& tex : m_deinterlace_buffers) - g_gpu_device->RecycleTexture(std::move(tex)); - g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); - m_current_deinterlace_buffer = 0; -} - -bool GPU::Deinterlace(u32 field, u32 line_skip) -{ - GPUTexture* src = m_display_texture; - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - - switch (g_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - { - if (line_skip == 0) - return true; - - // Still have to extract the field. - if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] - return false; - - SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); - return true; - } - - case DisplayDeinterlacingMode::Weave: - { - GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); - - const u32 full_height = height * 2; - if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - src->MakeReadyForSampling(); - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, field, line_skip}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - case DisplayDeinterlacingMode::Blend: - { - constexpr u32 NUM_BLEND_BUFFERS = 2; - - GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); - - const u32 this_buffer = m_current_deinterlace_buffer; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || - !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - // TODO: could be implemented with alpha blending instead.. - - g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; - } - - case DisplayDeinterlacingMode::Adaptive: - { - GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, - line_skip); - - const u32 full_height = height * 2; - const u32 this_buffer = m_current_deinterlace_buffer; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || - !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {field, full_height}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - default: - UnreachableCode(); - } -} - -bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip) -{ - if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || - m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, false)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); - } - - GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); - g_gpu_device->InvalidateRenderTarget(dst); - - // If we're not skipping lines, then we can simply copy the texture. - if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) - { - GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); - g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); - } - else - { - GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, - line_skip); - - // Otherwise, we need to extract every other line from the texture. - src->MakeReadyForSampling(); - g_gpu_device->SetRenderTarget(dst); - g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); - g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, line_skip}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - GL_POP(); - } - - dst->MakeReadyForSampling(); - return true; -} - -bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) -{ - if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || - m_deinterlace_texture->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, preserve)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); - } - - return true; -} - -bool GPU::ApplyChromaSmoothing() -{ - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || - m_chroma_smoothing_texture->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, false)) - { - ClearDisplayTexture(); - return false; - } - - GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); - } - - GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); - - m_display_texture->MakeReadyForSampling(); - g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, width - 1, height - 1}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_chroma_smoothing_texture->MakeReadyForSampling(); - SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; -} - -void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, - GSVector4i* display_rect, GSVector4i* draw_rect) const -{ - const bool integer_scale = (g_settings.display_scaling == DisplayScalingMode::NearestInteger || - g_settings.display_scaling == DisplayScalingMode::BilinearInteger); - const bool show_vram = g_settings.debugging.show_vram; - const float display_aspect_ratio = ComputeDisplayAspectRatio(); const float window_ratio = static_cast(window_width) / static_cast(window_height); - const float crtc_display_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_width); - const float crtc_display_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_height); - const float x_scale = - apply_aspect_ratio ? - (display_aspect_ratio / (static_cast(crtc_display_width) / static_cast(crtc_display_height))) : - 1.0f; - float display_width = crtc_display_width; - float display_height = crtc_display_height; - float active_left = static_cast(show_vram ? 0 : m_crtc_state.display_origin_left); - float active_top = static_cast(show_vram ? 0 : m_crtc_state.display_origin_top); - float active_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_vram_width); - float active_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_vram_height); - if (!g_settings.display_stretch_vertically) + float display_width = static_cast(crtc_display_width); + float display_height = static_cast(crtc_display_height); + const float x_scale = (aspect_ratio != 0.0f) ? (aspect_ratio / (display_width / display_height)) : 1.0f; + float active_left = static_cast(display_origin_left); + float active_top = static_cast(display_origin_top); + float active_width = static_cast(display_vram_width); + float active_height = static_cast(display_vram_height); + if (!stretch_vertically) { display_width *= x_scale; active_left *= x_scale; @@ -2339,8 +1617,7 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota } // swap width/height when rotated, the flipping of padding is taken care of in the shader with the rotation matrix - if (g_settings.display_rotation == DisplayRotation::Rotate90 || - g_settings.display_rotation == DisplayRotation::Rotate270) + if (rotation == DisplayRotation::Rotate90 || rotation == DisplayRotation::Rotate270) { std::swap(display_width, display_height); std::swap(active_width, active_height); @@ -2421,285 +1698,96 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota GSVector4(left_padding, top_padding, left_padding + display_width * scale, top_padding + display_height * scale)); } -bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, - u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key) +void GPU::ReadVRAM(u16 x, u16 y, u16 width, u16 height) { - bool result; + GPUBackendReadVRAMCommand* cmd = GPUBackend::NewReadVRAMCommand(); + cmd->x = x; + cmd->y = y; + cmd->width = width; + cmd->height = height; + GPUBackend::PushCommandAndSync(cmd, true); +} - const char* extension = std::strrchr(filename.c_str(), '.'); - if (extension) +void GPU::UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask) +{ + const u32 num_words = width * height; + GPUBackendUpdateVRAMCommand* cmd = GPUBackend::NewUpdateVRAMCommand(num_words); + cmd->params.bits = 0; + cmd->params.set_mask_while_drawing = set_mask; + cmd->params.check_mask_before_draw = check_mask; + cmd->x = x; + cmd->y = y; + cmd->width = width; + cmd->height = height; + std::memcpy(cmd->data, data, num_words * sizeof(u16)); + GPUBackend::PushCommand(cmd); +} + +void GPU::ClearDisplay() +{ + GPUBackend::PushCommand(GPUBackend::NewClearDisplayCommand()); +} + +void GPU::UpdateDisplay(bool is_frame) +{ + GPUBackendUpdateDisplayCommand* cmd = GPUBackend::NewUpdateDisplayCommand(); + cmd->frame_number = System::GetFrameNumber(); + cmd->internal_frame_number = System::GetInternalFrameNumber(); + cmd->display_width = m_crtc_state.display_width; + cmd->display_height = m_crtc_state.display_height; + cmd->display_origin_left = m_crtc_state.display_origin_left; + cmd->display_origin_top = m_crtc_state.display_origin_top; + cmd->display_vram_left = m_crtc_state.display_vram_left; + cmd->display_vram_top = m_crtc_state.display_vram_top; + cmd->display_vram_width = m_crtc_state.display_vram_width; + cmd->display_vram_height = m_crtc_state.display_vram_height; + cmd->X = m_crtc_state.regs.X; + cmd->bits = 0; + cmd->interlaced_display_enabled = IsInterlacedDisplayEnabled(); + cmd->interlaced_display_field = GetInterlacedDisplayField(); + cmd->interlaced_display_interleaved = cmd->interlaced_display_enabled && m_GPUSTAT.vertical_resolution; + cmd->display_24bit = m_GPUSTAT.display_area_color_depth_24; + cmd->display_disabled = IsDisplayDisabled(); + cmd->display_aspect_ratio = ComputeDisplayAspectRatio(); + cmd->media_capture = nullptr; + if (is_frame) { - if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) + bool present_frame; + bool should_allow_present_skip; + System::GetFramePresentationDetails(&is_frame, &present_frame, &should_allow_present_skip, &cmd->present_time); + cmd->is_frame = is_frame; + cmd->present_frame = present_frame; + cmd->allow_present_skip = should_allow_present_skip; + + // Video capture setup. + if (MediaCapture* cap = System::GetMediaCapture(); cap && cap->IsCapturingVideo()) { - if (clear_alpha) + if (cap->GetVideoFPS() != System::GetVideoFrameRate()) { - for (u32& pixel : texture_data) - pixel |= 0xFF000000u; + const std::string next_capture_path = cap->GetNextCapturePath(); + INFO_LOG("Video frame rate changed, switching to new capture file {}", Path::GetFileName(next_capture_path)); + + const bool was_capturing_audio = cap->IsCapturingAudio(); + System::StopMediaCapture(); + System::StartMediaCapture(std::move(next_capture_path), true, was_capturing_audio); + cap = System::GetMediaCapture(); } - if (flip_y) - GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast(texture_data.data()), - texture_data_stride); - - Assert(texture_data_stride == sizeof(u32) * width); - RGBA8Image image(width, height, std::move(texture_data)); - if (image.SaveToFile(filename.c_str(), fp.get(), quality)) - { - result = true; - } - else - { - ERROR_LOG("Unknown extension in filename '{}' or save error: '{}'", filename, extension); - result = false; - } - } - else - { - result = false; + cmd->media_capture = cap; } } else { - ERROR_LOG("Unable to determine file extension for '{}'", filename); - result = false; + cmd->is_frame = false; + cmd->present_time = 0; + cmd->present_frame = false; + cmd->allow_present_skip = false; } - if (!osd_key.empty()) - { - Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, - fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : - TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), - Path::GetFileName(filename), - result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); - } - - return result; -} - -bool GPU::WriteDisplayTextureToFile(std::string filename) -{ - if (!m_display_texture) - return false; - - const u32 read_x = static_cast(m_display_texture_view_x); - const u32 read_y = static_cast(m_display_texture_view_y); - const u32 read_width = static_cast(m_display_texture_view_width); - const u32 read_height = static_cast(m_display_texture_view_height); - - const u32 texture_data_stride = - Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); - std::vector texture_data((texture_data_stride * read_height) / sizeof(u32)); - - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = - g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(), - texture_data.size() * sizeof(u32), texture_data_stride); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) - { - ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, - GPUTexture::GetFormatName(m_display_texture->GetFormat())); - return false; - } - } - - dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); - if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) - { - RestoreDeviceContext(); - return false; - } - - RestoreDeviceContext(); - - Error error; - auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); - return false; - } - - constexpr bool clear_alpha = true; - const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); - - return CompressAndWriteTextureToFile( - read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha, - flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), std::string()); -} - -bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx, std::vector* out_pixels, u32* out_stride, - GPUTexture::Format* out_format) -{ - const GPUTexture::Format hdformat = - g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; - - auto render_texture = - g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); - if (!render_texture) - return false; - - g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR); - - // TODO: this should use copy shader instead. - RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); - - const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32)); - out_pixels->resize((height * stride) / sizeof(u32)); - - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), - out_pixels->size() * sizeof(u32), stride); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) - { - ERROR_LOG("Failed to create {}x{} download texture", width, height); - return false; - } - } - - dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); - if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride)) - { - RestoreDeviceContext(); - return false; - } - - *out_stride = stride; - *out_format = hdformat; - RestoreDeviceContext(); - return true; -} - -void GPU::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, - GSVector4i* draw_rect) const -{ - *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; - *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; - CalculateDrawRect(*width, *height, true, !g_settings.debugging.show_vram, display_rect, draw_rect); - - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram); - if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) - { - if (mode == DisplayScreenshotMode::InternalResolution) - { - const u32 draw_width = static_cast(display_rect->width()); - const u32 draw_height = static_cast(display_rect->height()); - - // If internal res, scale the computed draw rectangle to the internal res. - // We re-use the draw rect because it's already been AR corrected. - const float sar = - static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); - const float dar = static_cast(draw_width) / static_cast(draw_height); - if (sar >= dar) - { - // stretch height, preserve width - const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); - *width = m_display_texture_view_width; - *height = static_cast(std::round(static_cast(draw_height) * scale)); - } - else - { - // stretch width, preserve height - const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); - *width = static_cast(std::round(static_cast(draw_width) * scale)); - *height = m_display_texture_view_height; - } - - // DX11 won't go past 16K texture size. - const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); - if (*width > max_texture_size) - { - *height = static_cast(static_cast(*height) / - (static_cast(*width) / static_cast(max_texture_size))); - *width = max_texture_size; - } - if (*height > max_texture_size) - { - *height = max_texture_size; - *width = static_cast(static_cast(*width) / - (static_cast(*height) / static_cast(max_texture_size))); - } - } - else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) - { - *width = m_display_texture_view_width; - *height = m_display_texture_view_height; - } - - // Remove padding, it's not part of the framebuffer. - *draw_rect = GSVector4i(0, 0, static_cast(*width), static_cast(*height)); - *display_rect = *draw_rect; - } -} - -bool GPU::RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, - bool show_osd_message) -{ - u32 width, height; - GSVector4i display_rect, draw_rect; - CalculateScreenshotSize(mode, &width, &height, &display_rect, &draw_rect); - - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); - if (width == 0 || height == 0) - return false; - - std::vector pixels; - u32 pixels_stride; - GPUTexture::Format pixels_format; - if (!RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &pixels, &pixels_stride, - &pixels_format)) - { - ERROR_LOG("Failed to render {}x{} screenshot", width, height); - return false; - } - - Error error; - auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); - return false; - } - - std::string osd_key; - if (show_osd_message) - { - // Use a 60 second timeout to give it plenty of time to actually save. - osd_key = fmt::format("ScreenshotSaver_{}", path); - Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, - fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)), - 60.0f); - } - - if (compress_on_thread) - { - System::QueueTaskOnThread([width, height, path = std::move(path), fp = fp.release(), quality, - flip_y = g_gpu_device->UsesLowerLeftOrigin(), pixels = std::move(pixels), pixels_stride, - pixels_format, osd_key = std::move(osd_key)]() mutable { - CompressAndWriteTextureToFile(width, height, std::move(path), FileSystem::ManagedCFilePtr(fp), quality, true, - flip_y, std::move(pixels), pixels_stride, pixels_format, std::move(osd_key)); - System::RemoveSelfFromTaskThreads(); - }); - - return true; - } + if (is_frame) + GPUThread::PushCommandAndFrame(cmd); else - { - return CompressAndWriteTextureToFile(width, height, std::move(path), std::move(fp), quality, true, - g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride, - pixels_format, std::move(osd_key)); - } + GPUThread::PushCommand(cmd); } bool GPU::DumpVRAMToFile(const char* filename) @@ -2748,8 +1836,6 @@ bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride void GPU::DrawDebugStateWindow(float scale) { - DrawRendererStats(); - if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) { static constexpr std::array state_strings = { @@ -2804,76 +1890,6 @@ void GPU::DrawDebugStateWindow(float scale) } } -void GPU::DrawRendererStats() -{ -} - -void GPU::OnBufferSwapped() -{ -} - -void GPU::GetStatsString(SmallStringBase& str) -{ - if (IsHardwareRenderer()) - { - str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", - GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, - m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes, - m_stats.host_num_downloads, m_stats.num_copies, m_stats.num_writes); - } - else - { - str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), - m_stats.num_primitives, m_stats.num_reads, m_stats.num_copies, m_stats.num_writes); - } -} - -void GPU::GetMemoryStatsString(SmallStringBase& str) -{ - const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); - const u32 stream_kb = static_cast((m_stats.host_buffer_streamed + (1024 - 1)) / 1024); - - str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, m_stats.host_num_copies, - m_stats.host_num_uploads); -} - -void GPU::ResetStatistics() -{ - m_counters = {}; - g_gpu_device->ResetStatistics(); -} - -void GPU::UpdateStatistics(u32 frame_count) -{ - const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); - const u32 round = (frame_count - 1); - -#define UPDATE_COUNTER(x) m_stats.x = (m_counters.x + round) / frame_count -#define UPDATE_GPU_STAT(x) m_stats.host_##x = (stats.x + round) / frame_count - - UPDATE_COUNTER(num_reads); - UPDATE_COUNTER(num_writes); - UPDATE_COUNTER(num_copies); - UPDATE_COUNTER(num_vertices); - UPDATE_COUNTER(num_primitives); - - // UPDATE_COUNTER(num_read_texture_updates); - // UPDATE_COUNTER(num_ubo_updates); - - UPDATE_GPU_STAT(buffer_streamed); - UPDATE_GPU_STAT(num_draws); - UPDATE_GPU_STAT(num_barriers); - UPDATE_GPU_STAT(num_render_passes); - UPDATE_GPU_STAT(num_copies); - UPDATE_GPU_STAT(num_downloads); - UPDATE_GPU_STAT(num_uploads); - -#undef UPDATE_GPU_STAT -#undef UPDATE_COUNTER - - ResetStatistics(); -} - bool GPU::StartRecordingGPUDump(const char* path, u32 num_frames /* = 1 */) { if (m_gpu_dump) @@ -2912,7 +1928,8 @@ bool GPU::StartRecordingGPUDump(const char* path, u32 num_frames /* = 1 */) Host::OSD_QUICK_DURATION); // save screenshot to same location to identify it - RenderScreenshotToFile(Path::ReplaceExtension(path, "png"), DisplayScreenshotMode::ScreenResolution, 85, true, false); + GPUBackend::RenderScreenshotToFile(Path::ReplaceExtension(path, "png"), DisplayScreenshotMode::ScreenResolution, 85, + true, false); return true; } @@ -3085,10 +2102,8 @@ void GPU::ProcessGPUDumpPacket(GPUDump::PacketType type, const std::span(system_ticks_per_frame)); - - FlushRender(); - UpdateDisplay(); System::IncrementFrameNumber(); + UpdateDisplay(true); System::FrameDone(); } break; diff --git a/src/core/gpu.h b/src/core/gpu.h index a4f858794..9c15a0236 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -38,13 +38,11 @@ enum class PacketType : u8; class Recorder; class Player; } // namespace GPUDump + +class GPUBackend; struct Settings; -namespace Threading { -class Thread; -} - -class GPU +class GPU final { public: enum class BlitterState : u8 @@ -61,7 +59,6 @@ public: DOT_TIMER_INDEX = 0, HBLANK_TIMER_INDEX = 1, MAX_RESOLUTION_SCALE = 32, - DEINTERLACE_BUFFER_COUNT = 4, DRAWING_AREA_COORD_MASK = 1023, }; @@ -87,25 +84,14 @@ public: // Base class constructor. GPU(); - virtual ~GPU(); + ~GPU(); - virtual const Threading::Thread* GetSWThread() const = 0; - virtual bool IsHardwareRenderer() const = 0; - - virtual bool Initialize(Error* error); - virtual void Reset(bool clear_vram); - virtual bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display); - - // Graphics API state reset/restore - call when drawing the UI etc. - // TODO: replace with "invalidate cached state" - virtual void RestoreDeviceContext(); + void Initialize(); + void Reset(bool clear_vram); + bool DoState(StateWrapper& sw, bool update_display); // Render statistics debug window. void DrawDebugStateWindow(float scale); - void GetStatsString(SmallStringBase& str); - void GetMemoryStatsString(SmallStringBase& str); - void ResetStatistics(); - void UpdateStatistics(u32 frame_count); void CPUClockChanged(); @@ -169,24 +155,15 @@ public: void SynchronizeCRTC(); /// Recompile shaders/recreate framebuffers when needed. - virtual void UpdateSettings(const Settings& old_settings); + void UpdateSettings(const Settings& old_settings); - /// Returns the current resolution scale. - virtual u32 GetResolutionScale() const; - - /// Updates the resolution scale when it's set to automatic. - virtual void UpdateResolutionScale(); - - /// Returns the full display resolution of the GPU, including padding. - std::tuple GetFullDisplayResolution() const; + /// Computes clamped drawing area. + static GSVector4i GetClampedDrawingArea(const GPUDrawingArea& drawing_area); float ComputeHorizontalFrequency() const; float ComputeVerticalFrequency() const; float ComputeDisplayAspectRatio() const; - static std::unique_ptr CreateHardwareRenderer(Error* error); - static std::unique_ptr CreateSoftwareRenderer(Error* error); - // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. void ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const; @@ -217,39 +194,14 @@ public: // Dumps raw VRAM to a file. bool DumpVRAMToFile(const char* filename); - // Ensures all buffered vertices are drawn. - virtual void FlushRender() = 0; - /// Helper function for computing the draw rectangle in a larger window. - void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, - GSVector4i* display_rect, GSVector4i* draw_rect) const; + static void CalculateDrawRect(u32 window_width, u32 window_height, u32 crtc_display_width, u32 crtc_display_height, + s32 display_origin_left, s32 display_origin_top, u32 display_vram_width, + u32 display_vram_height, DisplayRotation rotation, float aspect_ratio, + bool stretch_vertically, bool integer_scale, GSVector4i* display_rect, + GSVector4i* draw_rect); - /// Helper function for computing screenshot bounds. - void CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, - GSVector4i* draw_rect) const; - - /// Helper function to save current display texture to PNG. - bool WriteDisplayTextureToFile(std::string path); - - /// Renders the display, optionally with postprocessing to the specified image. - bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx, std::vector* out_pixels, u32* out_stride, - GPUTexture::Format* out_format); - - /// Helper function to save screenshot to PNG. - bool RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, - bool show_osd_message); - - /// Draws the current display texture, with any post-processing. - GPUDevice::PresentResult PresentDisplay(); - - /// Sends the current frame to media capture. - bool SendDisplayToMediaCapture(MediaCapture* cap); - - /// Reads the CLUT from the specified coordinates, accounting for wrap-around. - static void ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit); - -protected: +private: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; @@ -260,16 +212,6 @@ protected: } ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; } - static constexpr std::tuple UnpackTexcoord(u16 texcoord) - { - return std::make_tuple(static_cast(texcoord), static_cast(texcoord >> 8)); - } - - static constexpr std::tuple UnpackColorRGB24(u32 rgb24) - { - return std::make_tuple(static_cast(rgb24), static_cast(rgb24 >> 8), static_cast(rgb24 >> 16)); - } - static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha); @@ -289,10 +231,10 @@ protected: void UpdateGPUIdle(); /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...). - ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); } + ALWAYS_INLINE u8 GetInterlacedDisplayField() const { return m_crtc_state.interlaced_field; } /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. - ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); } + ALWAYS_INLINE u8 GetActiveLineLSB() const { return m_crtc_state.active_line_lsb; } /// Updates drawing area that's suitablef or clamping. void SetClampedDrawingArea(); @@ -327,16 +269,15 @@ protected: void InvalidateCLUT(); bool IsCLUTValid() const; - // Rendering in the backend - virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0; - virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) = 0; - virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) = 0; - virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) = 0; - virtual void DispatchRenderCommand() = 0; - virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; - virtual void UpdateDisplay() = 0; - virtual void DrawRendererStats(); - virtual void OnBufferSwapped(); + void ReadVRAM(u16 x, u16 y, u16 width, u16 height); + void UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask); + + void UpdateDisplay(bool is_frame); + + void PrepareForDraw(); + void FinishPolyline(); + void FillBackendCommandParameters(GPUBackendCommand* cmd) const; + void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded, bool textured, bool semitransparent) @@ -433,14 +374,10 @@ protected: u32 texture_window_value; // decoded values + // TODO: Make this a command GPUTextureWindow texture_window; bool texture_x_flip; bool texture_y_flip; - bool texture_page_changed; - - ALWAYS_INLINE bool IsTexturePageChanged() const { return texture_page_changed; } - ALWAYS_INLINE void SetTexturePageChanged() { texture_page_changed = true; } - ALWAYS_INLINE void ClearTexturePageChangedFlag() { texture_page_changed = false; } } m_draw_mode = {}; GPUDrawingArea m_drawing_area = {}; @@ -574,65 +511,7 @@ protected: TickCount m_max_run_ahead = 128; u32 m_fifo_size = 128; - void ClearDisplayTexture(); - void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, s32 view_width, - s32 view_height); - - GPUDevice::PresentResult RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx); - - bool Deinterlace(u32 field, u32 line_skip); - bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); - bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); - void DestroyDeinterlaceTextures(); - bool ApplyChromaSmoothing(); - - u32 m_current_deinterlace_buffer = 0; - std::unique_ptr m_deinterlace_pipeline; - std::unique_ptr m_deinterlace_extract_pipeline; - std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; - std::unique_ptr m_deinterlace_texture; - - std::unique_ptr m_chroma_smoothing_pipeline; - std::unique_ptr m_chroma_smoothing_texture; - - std::unique_ptr m_display_pipeline; - GPUTexture* m_display_texture = nullptr; - GPUTexture* m_display_depth_buffer = nullptr; - s32 m_display_texture_view_x = 0; - s32 m_display_texture_view_y = 0; - s32 m_display_texture_view_width = 0; - s32 m_display_texture_view_height = 0; - - struct Counters - { - u32 num_reads; - u32 num_writes; - u32 num_copies; - u32 num_vertices; - u32 num_primitives; - - // u32 num_read_texture_updates; - // u32 num_ubo_updates; - }; - - struct Stats : Counters - { - size_t host_buffer_streamed; - u32 host_num_draws; - u32 host_num_barriers; - u32 host_num_render_passes; - u32 host_num_copies; - u32 host_num_downloads; - u32 host_num_uploads; - }; - - Counters m_counters = {}; - Stats m_stats = {}; - private: - bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error); - using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index a8b17818a..49faaad02 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -2,289 +2,398 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_backend.h" +#include "gpu.h" +#include "gpu_shadergen.h" +#include "gpu_sw_rasterizer.h" +#include "gpu_thread.h" +#include "host.h" +#include "performance_counters.h" +#include "settings.h" +#include "system.h" +#include "system_private.h" +#include "util/gpu_device.h" +#include "util/image.h" +#include "util/imgui_manager.h" +#include "util/media_capture.h" +#include "util/postprocessing.h" #include "util/state_wrapper.h" #include "common/align.h" +#include "common/error.h" +#include "common/file_system.h" +#include "common/gsvector_formatter.h" #include "common/log.h" +#include "common/path.h" +#include "common/small_string.h" +#include "common/string_util.h" #include "common/timer.h" +#include "IconsEmoji.h" +#include "IconsFontAwesome5.h" +#include "fmt/format.h" + +#include +#include + LOG_CHANNEL(GPUBackend); -std::unique_ptr g_gpu_backend; - -GPUBackend::GPUBackend() = default; - -GPUBackend::~GPUBackend() = default; - -bool GPUBackend::Initialize(bool use_thread) +namespace { +struct Counters { - if (use_thread) - StartGPUThread(); + u32 num_reads; + u32 num_writes; + u32 num_copies; + u32 num_vertices; + u32 num_primitives; +}; + +struct Stats : Counters +{ + size_t host_buffer_streamed; + u32 host_num_draws; + u32 host_num_barriers; + u32 host_num_render_passes; + u32 host_num_copies; + u32 host_num_downloads; + u32 host_num_uploads; +}; +} // namespace + +static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, + u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, + u32 texture_data_stride, GPUTexture::Format texture_format, + std::string osd_key); + +static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; + +static Counters s_counters = {}; +static Stats s_stats = {}; + +GPUBackend::GPUBackend() +{ + GPU_SW_Rasterizer::SelectImplementation(); + ResetStatistics(); + + m_queued_frames.store(0, std::memory_order_release); + m_waiting_for_gpu_thread.store(false, std::memory_order_release); + + m_display_width = 0; + m_display_height = 0; + m_display_origin_left = 0; + m_display_origin_top = 0; + m_display_vram_width = 0; + m_display_vram_height = 0; + m_display_aspect_ratio = 1.0f; +} + +GPUBackend::~GPUBackend() +{ + DestroyDeinterlaceTextures(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); +} + +bool GPUBackend::Initialize(bool clear_vram, Error* error) +{ + if (!CompileDisplayPipelines(true, true, g_gpu_settings.display_24bit_chroma_smoothing, error)) + return false; return true; } -void GPUBackend::Reset() +void GPUBackend::UpdateSettings(const Settings& old_settings) { - Sync(true); - DrawingAreaChanged(GPUDrawingArea{0, 0, 0, 0}, GSVector4i::zero()); -} + FlushRender(); -void GPUBackend::SetThreadEnabled(bool use_thread) -{ - Sync(true); + if (g_gpu_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) + GPUBackend::ResetStatistics(); - if (m_use_gpu_thread != use_thread) + if (g_gpu_settings.display_scaling != old_settings.display_scaling || + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || + g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing) { - if (!use_thread) - StopGPUThread(); - else - StartGPUThread(); + // Toss buffers on mode change. + if (g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) + DestroyDeinterlaceTextures(); + + if (!CompileDisplayPipelines( + g_gpu_settings.display_scaling != old_settings.display_scaling, + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, + g_gpu_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing, nullptr)) + { + Panic("Failed to compile display pipeline on settings change."); + } } } -void GPUBackend::Shutdown() +void GPUBackend::UpdateResolutionScale() { - StopGPUThread(); +} + +u32 GPUBackend::GetResolutionScale() const +{ + return 1u; +} + +std::tuple GPUBackend::GetFullDisplayResolution() const +{ + return std::tie(m_display_width, m_display_height); +} + +void GPUBackend::RestoreDeviceContext() +{ +} + +GPUThreadCommand* GPUBackend::NewClearVRAMCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearVRAM, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewClearDisplayCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearDisplay, sizeof(GPUThreadCommand))); +} + +GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateDisplay, sizeof(GPUBackendUpdateDisplayCommand))); +} + +GPUThreadCommand* GPUBackend::NewClearCacheCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearCache, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewBufferSwappedCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::BufferSwapped, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewUpdateResolutionScaleCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateResolutionScale, sizeof(GPUThreadCommand))); +} + +GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ReadVRAM, sizeof(GPUBackendReadVRAMCommand))); } GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand))); } GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words) { const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16)); GPUBackendUpdateVRAMCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::UpdateVRAM, size)); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::UpdateVRAM, size)); return cmd; } GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand))); } GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); } GPUBackendUpdateCLUTCommand* GPUBackend::NewUpdateCLUTCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); } GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex)); GPUBackendDrawPolygonCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::DrawPolygon, size)); - cmd->num_vertices = Truncate16(num_vertices); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawPolygon, size)); + cmd->num_vertices = Truncate8(num_vertices); + return cmd; +} + +GPUBackendDrawPrecisePolygonCommand* GPUBackend::NewDrawPrecisePolygonCommand(u32 num_vertices) +{ + const u32 size = + sizeof(GPUBackendDrawPrecisePolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + GPUBackendDrawPrecisePolygonCommand* cmd = static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::DrawPrecisePolygon, size)); + cmd->num_vertices = Truncate8(num_vertices); return cmd; } GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand))); } GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex)); GPUBackendDrawLineCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::DrawLine, size)); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawLine, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } -void* GPUBackend::AllocateCommand(GPUBackendCommandType command, u32 size) +void GPUBackend::PushCommand(GPUThreadCommand* cmd) { - // Ensure size is a multiple of 4 so we don't end up with an unaligned command. - size = Common::AlignUpPow2(size, 4); + GPUThread::PushCommand(cmd); +} - for (;;) +void GPUBackend::PushCommandAndWakeThread(GPUThreadCommand* cmd) +{ + GPUThread::PushCommandAndWakeThread(cmd); +} + +void GPUBackend::PushCommandAndSync(GPUThreadCommand* cmd, bool spin) +{ + GPUThread::PushCommandAndSync(cmd, spin); +} + +bool GPUBackend::IsUsingHardwareBackend() +{ + return (GPUThread::GetRequestedRenderer().value_or(GPURenderer::Software) != GPURenderer::Software); +} + +bool GPUBackend::BeginQueueFrame() +{ + const u32 queued_frames = m_queued_frames.fetch_add(1, std::memory_order_acq_rel) + 1; + if (queued_frames < g_settings.gpu_max_queued_frames) + return false; + + DEV_LOG("<-- {} queued frames, {} max, blocking CPU thread", queued_frames, g_settings.gpu_max_queued_frames); + m_waiting_for_gpu_thread.store(true, std::memory_order_release); + return true; +} + +void GPUBackend::WaitForOneQueuedFrame() +{ + // Inbetween this and the post call, we may have finished the frame. Check. + if (m_queued_frames.load(std::memory_order_acquire) < g_settings.gpu_max_queued_frames) { - u32 read_ptr = m_command_fifo_read_ptr.load(); - u32 write_ptr = m_command_fifo_write_ptr.load(); - if (read_ptr > write_ptr) + // It's possible that the GPU thread has already signaled the semaphore. + // If so, then we still need to drain it, otherwise waits in the future will return prematurely. + bool expected = true; + if (m_waiting_for_gpu_thread.compare_exchange_strong(expected, false, std::memory_order_acq_rel, + std::memory_order_relaxed)) { - u32 available_size = read_ptr - write_ptr; - while (available_size < (size + sizeof(GPUBackendCommandType))) - { - WakeGPUThread(); - read_ptr = m_command_fifo_read_ptr.load(); - available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr); - } + return; } - else - { - const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; - if ((size + sizeof(GPUBackendCommand)) > available_size) - { - // allocate a dummy command to wrap the buffer around - GPUBackendCommand* dummy_cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); - dummy_cmd->type = GPUBackendCommandType::Wraparound; - dummy_cmd->size = available_size; - dummy_cmd->params.bits = 0; - m_command_fifo_write_ptr.store(0); - continue; - } - } - - GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); - cmd->type = command; - cmd->size = size; - return cmd; } + + m_gpu_thread_wait.Wait(); + + // Sanity check: queued frames should be in range now. If they're not, we fucked up the semaphore. + Assert(m_queued_frames.load(std::memory_order_acquire) < g_settings.gpu_max_queued_frames); } -u32 GPUBackend::GetPendingCommandSize() const +bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, u32* out_width, u32* out_height, + std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) { - const u32 read_ptr = m_command_fifo_read_ptr.load(); - const u32 write_ptr = m_command_fifo_write_ptr.load(); - return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr); + bool result; + + GPUThreadRenderScreenshotToBufferCommand* cmd = + static_cast(GPUThread::AllocateCommand( + GPUBackendCommandType::RenderScreenshotToBuffer, sizeof(GPUThreadRenderScreenshotToBufferCommand))); + cmd->width = width; + cmd->height = height; + cmd->out_width = out_width; + cmd->out_height = out_height; + cmd->out_pixels = out_pixels; + cmd->out_stride = out_stride; + cmd->out_format = out_format; + cmd->out_result = &result; + cmd->postfx = postfx; + PushCommandAndSync(cmd, false); + + return result; } -void GPUBackend::PushCommand(GPUBackendCommand* cmd) -{ - if (!m_use_gpu_thread) - { - // single-thread mode - if (cmd->type != GPUBackendCommandType::Sync) - HandleCommand(cmd); - } - else - { - const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size; - DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); - UNREFERENCED_VARIABLE(new_write_ptr); - if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU) - WakeGPUThread(); - } -} - -void GPUBackend::WakeGPUThread() -{ - std::unique_lock lock(m_sync_mutex); - if (!m_gpu_thread_sleeping.load()) - return; - - m_wake_gpu_thread_cv.notify_one(); -} - -void GPUBackend::StartGPUThread() -{ - m_gpu_loop_done.store(false); - m_use_gpu_thread = true; - m_gpu_thread.Start([this]() { RunGPULoop(); }); - INFO_LOG("GPU thread started."); -} - -void GPUBackend::StopGPUThread() -{ - if (!m_use_gpu_thread) - return; - - m_gpu_loop_done.store(true); - WakeGPUThread(); - m_gpu_thread.Join(); - m_use_gpu_thread = false; - INFO_LOG("GPU thread stopped."); -} - -void GPUBackend::Sync(bool allow_sleep) -{ - if (!m_use_gpu_thread) - return; - - GPUBackendSyncCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::Sync, sizeof(GPUBackendSyncCommand))); - cmd->allow_sleep = allow_sleep; - PushCommand(cmd); - WakeGPUThread(); - - m_sync_semaphore.Wait(); -} - -void GPUBackend::RunGPULoop() -{ - static constexpr double SPIN_TIME_NS = 1 * 1000000; - Common::Timer::Value last_command_time = 0; - - for (;;) - { - u32 write_ptr = m_command_fifo_write_ptr.load(); - u32 read_ptr = m_command_fifo_read_ptr.load(); - if (read_ptr == write_ptr) - { - const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); - if (Common::Timer::ConvertValueToNanoseconds(current_time - last_command_time) < SPIN_TIME_NS) - continue; - - std::unique_lock lock(m_sync_mutex); - m_gpu_thread_sleeping.store(true); - m_wake_gpu_thread_cv.wait(lock, [this]() { return m_gpu_loop_done.load() || GetPendingCommandSize() > 0; }); - m_gpu_thread_sleeping.store(false); - - if (m_gpu_loop_done.load()) - break; - else - continue; - } - - if (write_ptr < read_ptr) - write_ptr = COMMAND_QUEUE_SIZE; - - bool allow_sleep = false; - while (read_ptr < write_ptr) - { - const GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[read_ptr]); - read_ptr += cmd->size; - - switch (cmd->type) - { - case GPUBackendCommandType::Wraparound: - { - DebugAssert(read_ptr == COMMAND_QUEUE_SIZE); - write_ptr = m_command_fifo_write_ptr.load(); - read_ptr = 0; - } - break; - - case GPUBackendCommandType::Sync: - { - DebugAssert(read_ptr == write_ptr); - m_sync_semaphore.Post(); - allow_sleep = static_cast(cmd)->allow_sleep; - } - break; - - default: - HandleCommand(cmd); - break; - } - } - - last_command_time = allow_sleep ? 0 : Common::Timer::GetCurrentValue(); - m_command_fifo_read_ptr.store(read_ptr); - } -} - -void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) +void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) { switch (cmd->type) { + case GPUBackendCommandType::ClearVRAM: + { + ClearVRAM(); + } + break; + + case GPUBackendCommandType::LoadState: + { + LoadState(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::ClearDisplay: + { + ClearDisplay(); + } + break; + + case GPUBackendCommandType::UpdateDisplay: + { + HandleUpdateDisplayCommand(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::ClearCache: + { + ClearCache(); + } + break; + + case GPUBackendCommandType::BufferSwapped: + { + OnBufferSwapped(); + } + break; + + case GPUBackendCommandType::UpdateResolutionScale: + { + UpdateResolutionScale(); + } + break; + + case GPUBackendCommandType::RenderScreenshotToBuffer: + { + HandleRenderScreenshotToBuffer(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::RenderScreenshotToFile: + { + HandleRenderScreenshotToFile(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::ReadVRAM: + { + const GPUBackendReadVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_reads++; + ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height)); + } + break; + case GPUBackendCommandType::FillVRAM: { - FlushRender(); const GPUBackendFillVRAMCommand* ccmd = static_cast(cmd); FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->color, ccmd->params); @@ -293,8 +402,8 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) case GPUBackendCommandType::UpdateVRAM: { - FlushRender(); const GPUBackendUpdateVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_writes++; UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->data, ccmd->params); } @@ -302,8 +411,8 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) case GPUBackendCommandType::CopyVRAM: { - FlushRender(); const GPUBackendCopyVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_copies++; CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x), ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params); } @@ -313,7 +422,8 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) { FlushRender(); const GPUBackendSetDrawingAreaCommand* ccmd = static_cast(cmd); - DrawingAreaChanged(ccmd->new_area, GSVector4i::load(ccmd->new_clamped_area)); + GPU_SW_Rasterizer::g_drawing_area = ccmd->new_area; + DrawingAreaChanged(); } break; @@ -326,23 +436,1155 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) case GPUBackendCommandType::DrawPolygon: { - DrawPolygon(static_cast(cmd)); + const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives++; + DrawPolygon(ccmd); + } + break; + + case GPUBackendCommandType::DrawPrecisePolygon: + { + const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives++; + DrawPrecisePolygon(static_cast(cmd)); } break; case GPUBackendCommandType::DrawRectangle: { - DrawRectangle(static_cast(cmd)); + const GPUBackendDrawRectangleCommand* ccmd = static_cast(cmd); + s_counters.num_vertices++; + s_counters.num_primitives++; + DrawSprite(ccmd); } break; case GPUBackendCommandType::DrawLine: { - DrawLine(static_cast(cmd)); + const GPUBackendDrawLineCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives += ccmd->num_vertices / 2; + DrawLine(ccmd); } break; + DefaultCaseIsUnreachable(); + } +} + +bool GPUBackend::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error) +{ + const GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); + + GPUPipeline::GraphicsConfig plconfig; + plconfig.input_layout.vertex_stride = 0; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.geometry_shader = nullptr; + plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.samples = 1; + plconfig.per_sample_shading = false; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + + if (display) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : + GPUTexture::Format::RGBA8); + + std::string vs = shadergen.GenerateDisplayVertexShader(); + std::string fs; + switch (g_settings.display_scaling) + { + case DisplayScalingMode::BilinearSharp: + fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); + break; + + case DisplayScalingMode::BilinearSmooth: + case DisplayScalingMode::BilinearInteger: + fs = shadergen.GenerateDisplayFragmentShader(true, false); + break; + + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + default: + fs = shadergen.GenerateDisplayFragmentShader(false, true); + break; + } + + std::unique_ptr vso = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error); + std::unique_ptr fso = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Display Vertex Shader"); + GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", + Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", + Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); + } + + if (deinterlace) + { + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader(), error); + if (!vso) + return false; + GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); + + std::unique_ptr fso; + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateInterleavedFieldExtractFragmentShader(), error))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); + + switch (g_gpu_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + case DisplayDeinterlacingMode::Progressive: + break; + + case DisplayDeinterlacingMode::Weave: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateDeinterlaceWeaveFragmentShader(), error))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Blend: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateDeinterlaceBlendFragmentShader(), error))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Adaptive: + { + fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateFastMADReconstructFragmentShader(), error); + if (!fso) + return false; + + GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); + } + break; + + default: + UnreachableCode(); + } + } + + if (chroma_smoothing) + { + m_chroma_smoothing_pipeline.reset(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); + + if (g_gpu_settings.display_24bit_chroma_smoothing) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader(), error); + std::unique_ptr fso = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateChromaSmoothingFragmentShader(), error); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); + GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); + + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + return false; + GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); + } + } + + return true; +} + +void GPUBackend::HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd) +{ + const GPUBackendUpdateDisplayCommand* ccmd = static_cast(cmd); + m_display_width = ccmd->display_width; + m_display_height = ccmd->display_height; + m_display_origin_left = ccmd->display_origin_left; + m_display_origin_top = ccmd->display_origin_top; + m_display_vram_width = ccmd->display_vram_width; + m_display_vram_height = ccmd->display_vram_height; + m_display_aspect_ratio = ccmd->display_aspect_ratio; + + UpdateDisplay(ccmd); + + if (ccmd->media_capture) + SendDisplayToMediaCapture(ccmd->media_capture); + + if (ccmd->is_frame) + Host::FrameDoneOnGPUThread(this, cmd->frame_number); + + if (ccmd->present_frame) + { + GPUThread::Internal::PresentFrame(ccmd->allow_present_skip, ccmd->present_time); + + m_queued_frames.fetch_sub(1, std::memory_order_acq_rel); + + bool expected = true; + if (m_waiting_for_gpu_thread.compare_exchange_strong(expected, false, std::memory_order_acq_rel, + std::memory_order_relaxed)) + { + DEV_LOG("--> Unblocking CPU thread"); + m_gpu_thread_wait.Post(); + } + } + + // Update perf counters *after* throttling, we want to measure from start-of-frame + // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different + // amounts of computation happening in each frame). + if (ccmd->is_frame) + PerformanceCounters::Update(this, ccmd->frame_number, ccmd->internal_frame_number); +} + +void GPUBackend::ClearDisplay() +{ + ClearDisplayTexture(); + + // Just recycle the textures, it'll get re-fetched. + DestroyDeinterlaceTextures(); +} + +void GPUBackend::ClearDisplayTexture() +{ + m_display_texture = nullptr; + m_display_texture_view_x = 0; + m_display_texture_view_y = 0; + m_display_texture_view_width = 0; + m_display_texture_view_height = 0; +} + +void GPUBackend::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, + s32 view_width, s32 view_height) +{ + DebugAssert(texture); + + if (g_settings.display_auto_resize_window && + (view_width != m_display_texture_view_width || view_height != m_display_texture_view_height)) + { + Host::RunOnCPUThread([]() { System::RequestDisplaySize(); }); + } + + m_display_texture = texture; + m_display_depth_buffer = depth_buffer; + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; +} + +GPUDevice::PresentResult GPUBackend::PresentDisplay() +{ + FlushRender(); + + if (!g_gpu_device->HasMainSwapChain()) + return GPUDevice::PresentResult::SkipPresent; + + GSVector4i display_rect; + GSVector4i draw_rect; + CalculateDrawRect(g_gpu_device->GetMainSwapChain()->GetWidth(), g_gpu_device->GetMainSwapChain()->GetHeight(), + !g_gpu_settings.debugging.show_vram, true, &display_rect, &draw_rect); + return RenderDisplay(nullptr, display_rect, draw_rect, !g_gpu_settings.debugging.show_vram); +} + +GPUDevice::PresentResult GPUBackend::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, + const GSVector4i draw_rect, bool postfx) +{ + GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); + + if (m_display_texture) + m_display_texture->MakeReadyForSampling(); + + // Internal post-processing. + GPUTexture* display_texture = m_display_texture; + s32 display_texture_view_x = m_display_texture_view_x; + s32 display_texture_view_y = m_display_texture_view_y; + s32 display_texture_view_width = m_display_texture_view_width; + s32 display_texture_view_height = m_display_texture_view_height; + if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && + PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, + display_texture_view_height)) + { + DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && + static_cast(display_texture->GetWidth()) == display_texture_view_width && + static_cast(display_texture->GetHeight()) == display_texture_view_height); + + // Now we can apply the post chain. + GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); + if (const GPUDevice::PresentResult pres = PostProcessing::InternalChain.Apply( + display_texture, m_display_depth_buffer, post_output_texture, + GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), display_texture_view_width, + display_texture_view_height, m_display_width, m_display_height); + pres != GPUDevice::PresentResult::OK) + { + return pres; + } + else + { + display_texture_view_x = 0; + display_texture_view_y = 0; + display_texture = post_output_texture; + display_texture->MakeReadyForSampling(); + } + } + + const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetMainSwapChain()->GetFormat(); + const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetMainSwapChain()->GetWidth(); + const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetMainSwapChain()->GetHeight(); + const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && !g_gpu_device->HasMainSwapChain() && + hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && + PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); + const GSVector4i real_draw_rect = + g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect; + if (really_postfx) + { + g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); + g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); + } + else + { + if (target) + { + g_gpu_device->SetRenderTarget(target); + } + else + { + const GPUDevice::PresentResult pres = g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain()); + if (pres != GPUDevice::PresentResult::OK) + return pres; + } + } + + if (display_texture) + { + bool texture_filter_linear = false; + + struct Uniforms + { + float src_rect[4]; + float src_size[4]; + float clamp_rect[4]; + float params[4]; + float rotation_matrix[2][2]; + } uniforms; + std::memset(uniforms.params, 0, sizeof(uniforms.params)); + + switch (g_gpu_settings.display_scaling) + { + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + break; + + case DisplayScalingMode::BilinearSmooth: + case DisplayScalingMode::BilinearInteger: + texture_filter_linear = true; + break; + + case DisplayScalingMode::BilinearSharp: + { + texture_filter_linear = true; + uniforms.params[0] = std::max( + std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); + uniforms.params[1] = std::max( + std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); + uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; + uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; + } + break; + + default: + UnreachableCode(); + break; + } + + g_gpu_device->SetPipeline(m_display_pipeline.get()); + g_gpu_device->SetTextureSampler( + 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); + + // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because + // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. + const float rcp_width = 1.0f / static_cast(display_texture->GetWidth()); + const float rcp_height = 1.0f / static_cast(display_texture->GetHeight()); + uniforms.src_rect[0] = static_cast(display_texture_view_x) * rcp_width; + uniforms.src_rect[1] = static_cast(display_texture_view_y) * rcp_height; + uniforms.src_rect[2] = static_cast(display_texture_view_width) * rcp_width; + uniforms.src_rect[3] = static_cast(display_texture_view_height) * rcp_height; + uniforms.clamp_rect[0] = (static_cast(display_texture_view_x) + 0.5f) * rcp_width; + uniforms.clamp_rect[1] = (static_cast(display_texture_view_y) + 0.5f) * rcp_height; + uniforms.clamp_rect[2] = + (static_cast(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; + uniforms.clamp_rect[3] = + (static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; + uniforms.src_size[0] = static_cast(display_texture->GetWidth()); + uniforms.src_size[1] = static_cast(display_texture->GetHeight()); + uniforms.src_size[2] = rcp_width; + uniforms.src_size[3] = rcp_height; + + if (g_gpu_settings.display_rotation != DisplayRotation::Normal) + { + static constexpr const std::array(DisplayRotation::Count) - 1> rotation_radians = {{ + static_cast(std::numbers::pi * 1.5f), // Rotate90 + static_cast(std::numbers::pi), // Rotate180 + static_cast(std::numbers::pi / 2.0), // Rotate270 + }}; + + GSMatrix2x2::Rotation(rotation_radians[static_cast(g_gpu_settings.display_rotation) - 1]) + .store(uniforms.rotation_matrix); + } + else + { + GSMatrix2x2::Identity().store(uniforms.rotation_matrix); + } + + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + + g_gpu_device->SetViewportAndScissor(real_draw_rect); + g_gpu_device->Draw(3, 0); + } + + if (really_postfx) + { + DebugAssert(!g_gpu_settings.debugging.show_vram); + + // "original size" in postfx includes padding. + const float upscale_x = + m_display_texture ? static_cast(m_display_texture_view_width) / static_cast(m_display_vram_width) : + 1.0f; + const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / + static_cast(m_display_vram_height) : + 1.0f; + const s32 orig_width = static_cast(std::ceil(static_cast(m_display_width) * upscale_x)); + const s32 orig_height = static_cast(std::ceil(static_cast(m_display_height) * upscale_y)); + + return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, + display_rect, orig_width, orig_height, m_display_width, m_display_height); + } + else + { + return GPUDevice::PresentResult::OK; + } +} + +void GPUBackend::SendDisplayToMediaCapture(MediaCapture* cap) +{ + GPUTexture* target = cap->GetRenderTexture(); + if (!target) [[unlikely]] + { + WARNING_LOG("Failed to get video capture render texture."); + Host::RunOnCPUThread(&System::StopMediaCapture); + return; + } + + const bool apply_aspect_ratio = + (g_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution); + const bool postfx = (g_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution); + GSVector4i display_rect, draw_rect; + CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_settings.debugging.show_vram, apply_aspect_ratio, + &display_rect, &draw_rect); + + // Not cleared by RenderDisplay(). + g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR); + + if (RenderDisplay(target, display_rect, draw_rect, postfx) != GPUDevice::PresentResult::OK || + !cap->DeliverVideoFrame(target)) [[unlikely]] + { + WARNING_LOG("Failed to render/deliver video capture frame."); + Host::RunOnCPUThread(&System::StopMediaCapture); + return; + } +} + +void GPUBackend::DestroyDeinterlaceTextures() +{ + for (std::unique_ptr& tex : m_deinterlace_buffers) + g_gpu_device->RecycleTexture(std::move(tex)); + g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); + m_current_deinterlace_buffer = 0; +} + +bool GPUBackend::Deinterlace(u32 field, u32 line_skip) +{ + GPUTexture* src = m_display_texture; + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + + switch (g_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + { + if (line_skip == 0) + return true; + + // Still have to extract the field. + if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] + return false; + + SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Weave: + { + GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 full_height = height * 2; + if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + src->MakeReadyForSampling(); + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, field, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + + case DisplayDeinterlacingMode::Blend: + { + constexpr u32 NUM_BLEND_BUFFERS = 2; + + GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + // TODO: could be implemented with alpha blending instead.. + + g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Adaptive: + { + GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, + line_skip); + + const u32 full_height = height * 2; + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {field, full_height}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + default: UnreachableCode(); } } + +bool GPUBackend::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, + u32 line_skip) +{ + if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || + m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, false)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); + } + + GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); + g_gpu_device->InvalidateRenderTarget(dst); + + // If we're not skipping lines, then we can simply copy the texture. + if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) + { + GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); + g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); + } + else + { + GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, + line_skip); + + // Otherwise, we need to extract every other line from the texture. + src->MakeReadyForSampling(); + g_gpu_device->SetRenderTarget(dst); + g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + GL_POP(); + } + + dst->MakeReadyForSampling(); + return true; +} + +bool GPUBackend::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) +{ + if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || + m_deinterlace_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, preserve)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); + } + + return true; +} + +bool GPUBackend::ApplyChromaSmoothing() +{ + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || + m_chroma_smoothing_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, false)) + { + ClearDisplayTexture(); + return false; + } + + GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); + } + + GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); + + m_display_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, width - 1, height - 1}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_chroma_smoothing_texture->MakeReadyForSampling(); + SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; +} + +void GPUBackend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ +} + +void GPUBackend::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, + GSVector4i* display_rect, GSVector4i* draw_rect) const +{ + const bool integer_scale = (g_gpu_settings.display_scaling == DisplayScalingMode::NearestInteger || + g_gpu_settings.display_scaling == DisplayScalingMode::BilinearInteger); + const bool show_vram = g_gpu_settings.debugging.show_vram; + const u32 display_width = show_vram ? VRAM_WIDTH : m_display_width; + const u32 display_height = show_vram ? VRAM_WIDTH : m_display_height; + const s32 display_origin_left = show_vram ? 0 : m_display_origin_left; + const s32 display_origin_top = show_vram ? 0 : m_display_origin_top; + const u32 display_vram_width = show_vram ? VRAM_WIDTH : m_display_vram_width; + const u32 display_vram_height = show_vram ? VRAM_HEIGHT : m_display_vram_height; + const float display_aspect_ratio = + show_vram ? (static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)) : m_display_aspect_ratio; + GPU::CalculateDrawRect(window_width, window_height, display_width, display_height, display_origin_left, + display_origin_top, display_vram_width, display_vram_height, g_gpu_settings.display_rotation, + display_aspect_ratio, g_gpu_settings.display_stretch_vertically, integer_scale, display_rect, + draw_rect); +} + +bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, + u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, + u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key) +{ + + bool result; + + const char* extension = std::strrchr(filename.c_str(), '.'); + if (extension) + { + if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) + { + if (clear_alpha) + { + for (u32& pixel : texture_data) + pixel |= 0xFF000000u; + } + + if (flip_y) + GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast(texture_data.data()), + texture_data_stride); + + Assert(texture_data_stride == sizeof(u32) * width); + RGBA8Image image(width, height, std::move(texture_data)); + if (image.SaveToFile(filename.c_str(), fp.get(), quality)) + { + result = true; + } + else + { + ERROR_LOG("Unknown extension in filename '{}' or save error: '{}'", filename, extension); + result = false; + } + } + else + { + result = false; + } + } + else + { + ERROR_LOG("Unable to determine file extension for '{}'", filename); + result = false; + } + + if (!osd_key.empty()) + { + Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, + fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : + TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), + Path::GetFileName(filename), + result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); + } + + return result; +} + +bool GPUBackend::WriteDisplayTextureToFile(std::string filename) +{ + if (!m_display_texture) + return false; + + const u32 read_x = static_cast(m_display_texture_view_x); + const u32 read_y = static_cast(m_display_texture_view_y); + const u32 read_width = static_cast(m_display_texture_view_width); + const u32 read_height = static_cast(m_display_texture_view_height); + + const u32 texture_data_stride = + Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); + std::vector texture_data((texture_data_stride * read_height) / sizeof(u32)); + + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = + g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(), + texture_data.size() * sizeof(u32), texture_data_stride); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) + { + ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, + GPUTexture::GetFormatName(m_display_texture->GetFormat())); + return false; + } + } + + dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); + if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) + { + RestoreDeviceContext(); + return false; + } + + RestoreDeviceContext(); + + Error error; + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); + return false; + } + + constexpr bool clear_alpha = true; + const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); + + return CompressAndWriteTextureToFile( + read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha, + flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), std::string()); +} + +void GPUBackend::HandleRenderScreenshotToBuffer(const GPUThreadRenderScreenshotToBufferCommand* cmd) +{ + GSVector4i draw_rect, display_rect; + CalculateDrawRect(static_cast(cmd->width), static_cast(cmd->height), true, true, &display_rect, &draw_rect); + + // Crop it. + const u32 width = static_cast(display_rect.width()); + const u32 height = static_cast(display_rect.height()); + draw_rect = draw_rect.sub32(display_rect.xyxy()); + display_rect = display_rect.sub32(display_rect.xyxy()); + *cmd->out_width = width; + *cmd->out_height = height; + *cmd->out_result = RenderScreenshotToBuffer(width, height, display_rect, draw_rect, cmd->postfx, cmd->out_pixels, + cmd->out_stride, cmd->out_format); + + RestoreDeviceContext(); +} + +bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, + const GSVector4i draw_rect, bool postfx, std::vector* out_pixels, + u32* out_stride, GPUTexture::Format* out_format) +{ + const GPUTexture::Format hdformat = + g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; + + auto render_texture = + g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); + if (!render_texture) + return false; + + g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR); + + // TODO: this should use copy shader instead. + RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); + + const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32)); + out_pixels->resize((height * stride) / sizeof(u32)); + + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), + out_pixels->size() * sizeof(u32), stride); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) + { + ERROR_LOG("Failed to create {}x{} download texture", width, height); + RestoreDeviceContext(); + return false; + } + } + + dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); + if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride)) + { + RestoreDeviceContext(); + return false; + } + + *out_stride = stride; + *out_format = hdformat; + RestoreDeviceContext(); + return true; +} + +void GPUBackend::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, + GSVector4i* draw_rect) const +{ + *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; + *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; + CalculateDrawRect(*width, *height, true, !g_gpu_settings.debugging.show_vram, display_rect, draw_rect); + + const bool internal_resolution = + (mode != DisplayScreenshotMode::ScreenResolution || g_gpu_settings.debugging.show_vram); + if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) + { + if (mode == DisplayScreenshotMode::InternalResolution) + { + const u32 draw_width = static_cast(display_rect->width()); + const u32 draw_height = static_cast(display_rect->height()); + + // If internal res, scale the computed draw rectangle to the internal res. + // We re-use the draw rect because it's already been AR corrected. + const float sar = + static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); + const float dar = static_cast(draw_width) / static_cast(draw_height); + if (sar >= dar) + { + // stretch height, preserve width + const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); + *width = m_display_texture_view_width; + *height = static_cast(std::round(static_cast(draw_height) * scale)); + } + else + { + // stretch width, preserve height + const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); + *width = static_cast(std::round(static_cast(draw_width) * scale)); + *height = m_display_texture_view_height; + } + + // DX11 won't go past 16K texture size. + const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); + if (*width > max_texture_size) + { + *height = static_cast(static_cast(*height) / + (static_cast(*width) / static_cast(max_texture_size))); + *width = max_texture_size; + } + if (*height > max_texture_size) + { + *height = max_texture_size; + *width = static_cast(static_cast(*width) / + (static_cast(*height) / static_cast(max_texture_size))); + } + } + else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) + { + *width = m_display_texture_view_width; + *height = m_display_texture_view_height; + } + + // Remove padding, it's not part of the framebuffer. + *draw_rect = GSVector4i(0, 0, static_cast(*width), static_cast(*height)); + *display_rect = *draw_rect; + } +} + +void GPUBackend::RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, + bool compress_on_thread, bool show_osd_message) +{ + GPUThreadRenderScreenshotToFileCommand* cmd = static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::RenderScreenshotToFile, + sizeof(GPUThreadRenderScreenshotToFileCommand) + static_cast(path.length()))); + cmd->mode = mode; + cmd->quality = quality; + cmd->compress_on_thread = compress_on_thread; + cmd->show_osd_message = show_osd_message; + cmd->path_length = static_cast(path.length()); + std::memcpy(cmd->path, path.data(), cmd->path_length); + GPUThread::PushCommandAndWakeThread(cmd); +} + +void GPUBackend::HandleRenderScreenshotToFile(const GPUThreadRenderScreenshotToFileCommand* cmd) +{ + const std::string path(cmd->path, cmd->path_length); + + u32 width, height; + GSVector4i display_rect, draw_rect; + CalculateScreenshotSize(cmd->mode, &width, &height, &display_rect, &draw_rect); + + const bool internal_resolution = (cmd->mode != DisplayScreenshotMode::ScreenResolution); + if (width == 0 || height == 0) + return; + + std::vector pixels; + u32 pixels_stride; + GPUTexture::Format pixels_format; + if (!RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &pixels, &pixels_stride, + &pixels_format)) + { + ERROR_LOG("Failed to render {}x{} screenshot", width, height); + return; + } + + Error error; + auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); + return; + } + + std::string osd_key; + if (cmd->show_osd_message) + { + // Use a 60 second timeout to give it plenty of time to actually save. + osd_key = fmt::format("ScreenshotSaver_{}", path); + Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, + fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)), + 60.0f); + } + + if (cmd->compress_on_thread) + { + System::QueueTaskOnThread([width, height, path = std::move(path), fp = fp.release(), quality = cmd->quality, + flip_y = g_gpu_device->UsesLowerLeftOrigin(), pixels = std::move(pixels), pixels_stride, + pixels_format, osd_key = std::move(osd_key)]() mutable { + CompressAndWriteTextureToFile(width, height, std::move(path), FileSystem::ManagedCFilePtr(fp), quality, true, + flip_y, std::move(pixels), pixels_stride, pixels_format, std::move(osd_key)); + System::RemoveSelfFromTaskThreads(); + }); + } + else + { + CompressAndWriteTextureToFile(width, height, std::move(path), std::move(fp), cmd->quality, true, + g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride, pixels_format, + std::move(osd_key)); + } +} + +void GPUBackend::GetStatsString(SmallStringBase& str) const +{ + if (IsUsingHardwareBackend()) + { + str.format("{}{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", + GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), g_gpu_settings.gpu_use_thread ? "-MT" : "", + s_stats.num_primitives, s_stats.host_num_draws, s_stats.host_num_barriers, + s_stats.host_num_render_passes, s_stats.host_num_downloads, s_stats.num_copies, s_stats.num_writes); + } + else + { + str.format("{}{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), + g_gpu_settings.gpu_use_thread ? "-MT" : "", s_stats.num_primitives, s_stats.num_reads, s_stats.num_copies, + s_stats.num_writes); + } +} + +void GPUBackend::GetMemoryStatsString(SmallStringBase& str) const +{ + const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); + const u32 stream_kb = static_cast((s_stats.host_buffer_streamed + (1024 - 1)) / 1024); + + str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, s_stats.host_num_copies, + s_stats.host_num_uploads); +} + +void GPUBackend::ResetStatistics() +{ + s_counters = {}; + g_gpu_device->ResetStatistics(); +} + +void GPUBackend::UpdateStatistics(u32 frame_count) +{ + const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); + const u32 round = (frame_count - 1); + +#define UPDATE_COUNTER(x) s_stats.x = (s_counters.x + round) / frame_count +#define UPDATE_GPU_STAT(x) s_stats.host_##x = (stats.x + round) / frame_count + + UPDATE_COUNTER(num_reads); + UPDATE_COUNTER(num_writes); + UPDATE_COUNTER(num_copies); + UPDATE_COUNTER(num_vertices); + UPDATE_COUNTER(num_primitives); + + // UPDATE_COUNTER(num_read_texture_updates); + // UPDATE_COUNTER(num_ubo_updates); + + UPDATE_GPU_STAT(buffer_streamed); + UPDATE_GPU_STAT(num_draws); + UPDATE_GPU_STAT(num_barriers); + UPDATE_GPU_STAT(num_render_passes); + UPDATE_GPU_STAT(num_copies); + UPDATE_GPU_STAT(num_downloads); + UPDATE_GPU_STAT(num_uploads); + +#undef UPDATE_GPU_STAT +#undef UPDATE_COUNTER + + ResetStatistics(); +} diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index ea25a36a1..b8fc8664d 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -5,6 +5,8 @@ #include "gpu_types.h" +#include "util/gpu_device.h" + #include "common/heap_array.h" #include "common/threading.h" @@ -12,84 +14,196 @@ #include #include #include +#include -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324) // warning C4324: 'GPUBackend': structure was padded due to alignment specifier -#endif +class Error; +class SmallStringBase; + +class GPUFramebuffer; +class GPUPipeline; + +struct Settings; +class StateWrapper; + +// DESIGN NOTE: Only static methods should be called on the CPU thread. +// You specifically don't have a global pointer available for this reason. class GPUBackend { +public: + static GPUThreadCommand* NewClearVRAMCommand(); + static GPUThreadCommand* NewClearDisplayCommand(); + static GPUBackendUpdateDisplayCommand* NewUpdateDisplayCommand(); + static GPUThreadCommand* NewClearCacheCommand(); + static GPUThreadCommand* NewBufferSwappedCommand(); + static GPUThreadCommand* NewUpdateResolutionScaleCommand(); + static GPUBackendReadVRAMCommand* NewReadVRAMCommand(); + static GPUBackendFillVRAMCommand* NewFillVRAMCommand(); + static GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); + static GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); + static GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); + static GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand(); + static GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); + static GPUBackendDrawPrecisePolygonCommand* NewDrawPrecisePolygonCommand(u32 num_vertices); + static GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); + static GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + static void PushCommand(GPUThreadCommand* cmd); + static void PushCommandAndWakeThread(GPUThreadCommand* cmd); + static void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); + + static bool IsUsingHardwareBackend(); + + static std::unique_ptr CreateHardwareBackend(); + static std::unique_ptr CreateSoftwareBackend(); + + static bool RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, u32* out_width, u32* out_height, + std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format); + static void RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, + bool compress_on_thread, bool show_osd_message); + public: GPUBackend(); virtual ~GPUBackend(); - ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; } + virtual bool IsHardwareRenderer() const = 0; - virtual bool Initialize(bool use_thread); - virtual void Reset(); - virtual void Shutdown(); + virtual bool Initialize(bool upload_vram, Error* error); - void SetThreadEnabled(bool use_thread); + virtual void UpdateSettings(const Settings& old_settings); - GPUBackendFillVRAMCommand* NewFillVRAMCommand(); - GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); - GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); - GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); - GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand(); - GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); - GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); - GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + /// Returns the current resolution scale. + virtual u32 GetResolutionScale() const = 0; - void PushCommand(GPUBackendCommand* cmd); - void Sync(bool allow_sleep); + /// Updates the resolution scale when it's set to automatic. + virtual void UpdateResolutionScale() = 0; - /// Processes all pending GPU commands. - void RunGPULoop(); + /// Returns the full display resolution of the GPU, including padding. + std::tuple GetFullDisplayResolution() const; + + // Graphics API state reset/restore - call when drawing the UI etc. + // TODO: replace with "invalidate cached state" + virtual void RestoreDeviceContext() = 0; + + /// Main command handler for GPU thread. + void HandleCommand(const GPUThreadCommand* cmd); + + /// Draws the current display texture, with any post-processing. + GPUDevice::PresentResult PresentDisplay(); + + /// Helper function to save current display texture to PNG. Used for regtest. + bool WriteDisplayTextureToFile(std::string filename); + + bool BeginQueueFrame(); + void WaitForOneQueuedFrame(); + + void GetStatsString(SmallStringBase& str) const; + void GetMemoryStatsString(SmallStringBase& str) const; + + void ResetStatistics(); + void UpdateStatistics(u32 frame_count); protected: - void* AllocateCommand(GPUBackendCommandType command, u32 size); - u32 GetPendingCommandSize() const; - void WakeGPUThread(); - void StartGPUThread(); - void StopGPUThread(); + enum : u32 + { + DEINTERLACE_BUFFER_COUNT = 4, + }; + virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0; virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0; virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) = 0; virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, GPUBackendCommandParameters params) = 0; + virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0; - virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0; + virtual void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) = 0; + virtual void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) = 0; virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; - virtual void FlushRender() = 0; - virtual void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) = 0; + + virtual void DrawingAreaChanged() = 0; virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; + virtual void ClearCache() = 0; + virtual void OnBufferSwapped() = 0; + virtual void ClearVRAM() = 0; - void HandleCommand(const GPUBackendCommand* cmd); + virtual void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) = 0; - Threading::KernelSemaphore m_sync_semaphore; - std::atomic_bool m_gpu_thread_sleeping{false}; - std::atomic_bool m_gpu_loop_done{false}; - Threading::Thread m_gpu_thread; - bool m_use_gpu_thread = false; + virtual void LoadState(const GPUBackendLoadStateCommand* cmd) = 0; - std::mutex m_sync_mutex; - std::condition_variable m_sync_cpu_thread_cv; - std::condition_variable m_wake_gpu_thread_cv; - bool m_sync_done = false; + /// Ensures all pending draws are flushed to the host GPU. + virtual void FlushRender() = 0; - enum : u32 - { - COMMAND_QUEUE_SIZE = 4 * 1024 * 1024, - THRESHOLD_TO_WAKE_GPU = 256 - }; + /// Helper function for computing the draw rectangle in a larger window. + void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, + GSVector4i* display_rect, GSVector4i* draw_rect) const; - FixedHeapArray m_command_fifo_data; - alignas(HOST_CACHE_LINE_SIZE) std::atomic m_command_fifo_read_ptr{0}; - alignas(HOST_CACHE_LINE_SIZE) std::atomic m_command_fifo_write_ptr{0}; + /// Helper function for computing screenshot bounds. + void CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, + GSVector4i* draw_rect) const; + + /// Renders the display, optionally with postprocessing to the specified image. + void HandleRenderScreenshotToBuffer(const GPUThreadRenderScreenshotToBufferCommand* cmd); + void HandleRenderScreenshotToFile(const GPUThreadRenderScreenshotToFileCommand* cmd); + + /// Renders the display, optionally with postprocessing to the specified image. + bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, + bool postfx, std::vector* out_pixels, u32* out_stride, + GPUTexture::Format* out_format); + + bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing, Error* error); + + void HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd); + + void ClearDisplay(); + void ClearDisplayTexture(); + void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, + s32 view_height); + + GPUDevice::PresentResult RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, + bool postfx); + + /// Sends the current frame to media capture. + void SendDisplayToMediaCapture(MediaCapture* cap); + + bool Deinterlace(u32 field, u32 line_skip); + bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); + bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); + void DestroyDeinterlaceTextures(); + bool ApplyChromaSmoothing(); + + s32 m_display_width = 0; + s32 m_display_height = 0; + s32 m_display_origin_left = 0; + s32 m_display_origin_top = 0; + s32 m_display_vram_width = 0; + s32 m_display_vram_height = 0; + float m_display_aspect_ratio = 0.0f; + + u32 m_current_deinterlace_buffer = 0; + std::unique_ptr m_deinterlace_pipeline; + std::unique_ptr m_deinterlace_extract_pipeline; + std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; + std::unique_ptr m_deinterlace_texture; + + std::unique_ptr m_chroma_smoothing_pipeline; + std::unique_ptr m_chroma_smoothing_texture; + + std::unique_ptr m_display_pipeline; + GPUTexture* m_display_texture = nullptr; + GPUTexture* m_display_depth_buffer = nullptr; + s32 m_display_texture_view_x = 0; + s32 m_display_texture_view_y = 0; + s32 m_display_texture_view_width = 0; + s32 m_display_texture_view_height = 0; + + std::atomic m_queued_frames; + std::atomic_bool m_waiting_for_gpu_thread; + Threading::KernelSemaphore m_gpu_thread_wait; }; -#ifdef _MSC_VER -#pragma warning(pop) -#endif +namespace Host { + +/// Called at the end of the frame, before presentation. +void FrameDoneOnGPUThread(GPUBackend* gpu_backend, u32 frame_number); + +} // namespace Host diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 73c4a9d21..673ce6433 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -1,13 +1,16 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 +#include "cpu_pgxp.h" #include "gpu.h" +#include "gpu_backend.h" #include "gpu_dump.h" #include "gpu_hw_texture_cache.h" #include "interrupt_controller.h" #include "system.h" #include "common/assert.h" +#include "common/gsvector_formatter.h" #include "common/log.h" #include "common/string_util.h" @@ -93,7 +96,7 @@ void GPU::TryExecuteCommands() // drop terminator m_fifo.RemoveOne(); DEBUG_LOG("Drawing poly-line with {} vertices", GetPolyLineVertexCount()); - DispatchRenderCommand(); + FinishPolyline(); m_blit_buffer.clear(); EndCommand(); continue; @@ -200,8 +203,8 @@ bool GPU::HandleNOPCommand() bool GPU::HandleClearCacheCommand() { DEBUG_LOG("GP0 clear cache"); - m_draw_mode.SetTexturePageChanged(); InvalidateCLUT(); + GPUBackend::PushCommand(GPUBackend::NewClearCacheCommand()); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); @@ -248,8 +251,6 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand() DEBUG_LOG("Set drawing area top-left: ({}, {})", left, top); if (m_drawing_area.left != left || m_drawing_area.top != top) { - FlushRender(); - m_drawing_area.left = left; m_drawing_area.top = top; m_drawing_area_changed = true; @@ -270,8 +271,6 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand() DEBUG_LOG("Set drawing area bottom-right: ({}, {})", m_drawing_area.right, m_drawing_area.bottom); if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) { - FlushRender(); - m_drawing_area.right = right; m_drawing_area.bottom = bottom; m_drawing_area_changed = true; @@ -291,8 +290,6 @@ bool GPU::HandleSetDrawingOffsetCommand() DEBUG_LOG("Set drawing offset ({}, {})", m_drawing_offset.x, m_drawing_offset.y); if (m_drawing_offset.x != x || m_drawing_offset.y != y) { - FlushRender(); - m_drawing_offset.x = x; m_drawing_offset.y = y; } @@ -308,11 +305,7 @@ bool GPU::HandleSetMaskBitCommand() constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; - if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits) - { - FlushRender(); - m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; - } + m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; DEBUG_LOG("Set mask bit {} {}", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); @@ -321,6 +314,36 @@ bool GPU::HandleSetMaskBitCommand() return true; } +void GPU::PrepareForDraw() +{ + if (m_drawing_area_changed) + { + m_drawing_area_changed = false; + GPUBackendSetDrawingAreaCommand* cmd = GPUBackend::NewSetDrawingAreaCommand(); + cmd->new_area = m_drawing_area; + GPUBackend::PushCommand(cmd); + } +} + +void GPU::FillBackendCommandParameters(GPUBackendCommand* cmd) const +{ + cmd->params.bits = 0; + cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; + cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); +} + +void GPU::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const +{ + FillBackendCommandParameters(cmd); + cmd->rc.bits = rc.bits; + cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; + cmd->draw_mode.dither_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; + cmd->palette.bits = m_draw_mode.palette_reg.bits; + cmd->window = m_draw_mode.texture_window; +} + bool GPU::HandleRenderPolygonCommand() { const GPURenderCommand rc{FifoPeek(0)}; @@ -346,6 +369,7 @@ bool GPU::HandleRenderPolygonCommand() words_per_vertex, setup_ticks); // set draw state up + // TODO: Get rid of SetTexturePalette() and just fill it as needed if (rc.texture_enable) { const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16); @@ -355,12 +379,218 @@ bool GPU::HandleRenderPolygonCommand() UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg); } - m_counters.num_vertices += num_vertices; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + + if (g_settings.gpu_pgxp_enable) + { + GPUBackendDrawPrecisePolygonCommand* cmd = GPUBackend::NewDrawPrecisePolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + bool valid_w = g_settings.gpu_pgxp_texture_correction; + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPrecisePolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->native_x = m_drawing_offset.x + vp.x; + vert->native_y = m_drawing_offset.y + vp.y; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + + valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->native_x, vert->native_y, + m_drawing_offset.x, m_drawing_offset.y, &vert->x, &vert->y, &vert->w); + } + + cmd->valid_w = valid_w; + if (!valid_w) + { + if (g_settings.gpu_pgxp_disable_2d) + { + // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPrecisePolygonCommand::Vertex& v = cmd->vertices[i]; + GSVector2::store(&v.x, GSVector2(GSVector2i::load(&v.native_x))); + v.w = 1.0f; + } + } + else + { + for (u32 i = 0; i < num_vertices; i++) + cmd->vertices[i].w = 1.0f; + } + } + + // Cull polygons which are too large. + const GSVector2 v0f = GSVector2::load(&cmd->vertices[0].x); + const GSVector2 v1f = GSVector2::load(&cmd->vertices[1].x); + const GSVector2 v2f = GSVector2::load(&cmd->vertices[2].x); + const GSVector2 min_pos_12 = v1f.min(v2f); + const GSVector2 max_pos_12 = v1f.max(v2f); + const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_012.rintersects(m_clamped_drawing_area)); + if (first_tri_culled) + { + // TODO: GPU events... somehow. + DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, + cmd->vertices[0].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y, + cmd->vertices[2].native_x, cmd->vertices[2].native_y); + + if (!rc.quad_polygon) + { + EndCommand(); + return true; + } + } + else + { + AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[0].native_x), GSVector2i::load(&cmd->vertices[1].native_x), + GSVector2i::load(&cmd->vertices[2].native_x), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const GSVector2 v3f = GSVector2::load(&cmd->vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + + // Cull polygons which are too large. + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_123.rintersects(m_clamped_drawing_area)); + if (second_tri_culled) + { + DEBUG_LOG("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", + cmd->vertices[2].native_x, cmd->vertices[2].native_y, cmd->vertices[1].native_x, + cmd->vertices[1].native_y, cmd->vertices[0].native_x, cmd->vertices[0].native_y); + + if (first_tri_culled) + { + EndCommand(); + return true; + } + + // Remove second part of quad. + cmd->num_vertices = 3; + } + else + { + AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[2].native_x), GSVector2i::load(&cmd->vertices[1].native_x), + GSVector2i::load(&cmd->vertices[3].native_x), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + + // If first part was culled, move the second part to the first. + if (first_tri_culled) + { + std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + } + } + + GPUBackend::PushCommand(cmd); + } + else + { + GPUBackendDrawPolygonCommand* cmd = GPUBackend::NewDrawPolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->x = m_drawing_offset.x + vp.x; + vert->y = m_drawing_offset.y + vp.y; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + } + + // Cull polygons which are too large. + const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); + const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); + const GSVector2i v2 = GSVector2i::load(&cmd->vertices[2].x); + const GSVector2i min_pos_12 = v1.min_s32(v2); + const GSVector2i max_pos_12 = v1.max_s32(v2); + const GSVector4i draw_rect_012 = + GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_012.rintersects(m_clamped_drawing_area)); + if (first_tri_culled) + { + DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); + + if (!rc.quad_polygon) + { + EndCommand(); + return true; + } + } + else + { + AddDrawTriangleTicks(v0, v1, v2, rc.shading_enable, rc.texture_enable, rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const GSVector2i v3 = GSVector2i::load(&cmd->vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(v3)) + .upl64(GSVector4i(max_pos_12.max_s32(v3))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + + // Cull polygons which are too large. + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_123.rintersects(m_clamped_drawing_area)); + if (second_tri_culled) + { + DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, + cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); + + if (first_tri_culled) + { + EndCommand(); + return true; + } + + // Remove second part of quad. + cmd->num_vertices = 3; + } + else + { + AddDrawTriangleTicks(v2, v1, v3, rc.shading_enable, rc.texture_enable, rc.transparency_enable); + + // If first part was culled, move the second part to the first. + if (first_tri_culled) + { + std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + } + } + + GPUBackend::PushCommand(cmd); + } + EndCommand(); return true; } @@ -389,12 +619,65 @@ bool GPU::HandleRenderRectangleCommand() rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", total_words, setup_ticks); - m_counters.num_vertices++; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + GPUBackendDrawRectangleCommand* cmd = GPUBackend::NewDrawRectangleCommand(); + FillDrawCommand(cmd, rc); + cmd->color = rc.color_for_first_vertex; + + const GPUVertexPosition vp{FifoPop()}; + cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); + cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); + + if (rc.texture_enable) + { + const u32 texcoord_and_palette = FifoPop(); + cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); + cmd->texcoord = Truncate16(texcoord_and_palette); + } + else + { + cmd->palette.bits = 0; + cmd->texcoord = 0; + } + + switch (rc.rectangle_size) + { + case GPUDrawRectangleSize::R1x1: + cmd->width = 1; + cmd->height = 1; + break; + case GPUDrawRectangleSize::R8x8: + cmd->width = 8; + cmd->height = 8; + break; + case GPUDrawRectangleSize::R16x16: + cmd->width = 16; + cmd->height = 16; + break; + default: + { + const u32 width_and_height = FifoPop(); + cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); + cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); + } + break; + } + + const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + if (clamped_rect.rempty()) [[unlikely]] + { + DEBUG_LOG("Culling off-screen rectangle {}", rect); + EndCommand(); + return true; + } + + AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); + + GPUBackend::PushCommand(cmd); EndCommand(); return true; } @@ -411,12 +694,55 @@ bool GPU::HandleRenderLineCommand() TRACE_LOG("Render {} {} line ({} total words)", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", total_words); - m_counters.num_vertices += 2; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->palette.bits = 0; + + if (rc.shading_enable) + { + cmd->vertices[0].color = rc.color_for_first_vertex; + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + else + { + cmd->vertices[0].color = rc.color_for_first_vertex; + cmd->vertices[1].color = rc.color_for_first_vertex; + + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + + const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); + const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); + const GSVector4i rect = GSVector4i::xyxy(v0.min_s32(v1), v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y); + EndCommand(); + return true; + } + + AddDrawLineTicks(clamped_rect, rc.shading_enable); + GPUBackend::PushCommand(cmd); EndCommand(); return true; } @@ -453,6 +779,64 @@ bool GPU::HandleRenderPolyLineCommand() return true; } +void GPU::FinishPolyline() +{ + PrepareForDraw(); + + const u32 num_vertices = GetPolyLineVertexCount(); + DebugAssert(num_vertices >= 2); + + GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand((num_vertices - 1) * 2); + FillDrawCommand(cmd, m_render_command); + + u32 buffer_pos = 0; + const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; + const GSVector2i draw_offset = GSVector2i::load(&m_drawing_offset.x); + GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset); + u32 start_color = m_render_command.color_for_first_vertex; + + const bool shaded = m_render_command.shading_enable; + u32 out_vertex_count = 0; + for (u32 i = 1; i < num_vertices; i++) + { + const u32 end_color = + shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; + const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; + const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset); + + const GSVector4i rect = + GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); + } + else + { + AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + + GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; + out_vertex_count += 2; + + GSVector2i::store(&out_vertex[0].x, start_pos); + out_vertex[0].color = start_color; + GSVector2i::store(&out_vertex[1].x, end_pos); + out_vertex[1].color = end_color; + } + + start_pos = end_pos; + start_color = end_color; + } + + if (out_vertex_count > 0) + { + DebugAssert(out_vertex_count <= cmd->num_vertices); + cmd->num_vertices = Truncate16(out_vertex_count); + GPUBackend::PushCommand(cmd); + } +} + bool GPU::HandleFillRectangleCommand() { CHECK_COMMAND_SIZE(3); @@ -460,8 +844,6 @@ bool GPU::HandleFillRectangleCommand() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); - const u32 color = FifoPop() & 0x00FFFFFF; const u32 dst_x = FifoPeek() & 0x3F0; const u32 dst_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; @@ -471,9 +853,17 @@ bool GPU::HandleFillRectangleCommand() DEBUG_LOG("Fill VRAM rectangle offset=({},{}), size=({},{})", dst_x, dst_y, width, height); if (width > 0 && height > 0) - FillVRAM(dst_x, dst_y, width, height, color); + { + GPUBackendFillVRAMCommand* cmd = GPUBackend::NewFillVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->x = static_cast(dst_x); + cmd->y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + cmd->color = color; + GPUBackend::PushCommand(cmd); + } - m_counters.num_writes++; AddCommandTicks(46 + ((width / 8) + 9) * height); EndCommand(); return true; @@ -523,8 +913,6 @@ void GPU::FinishVRAMWrite() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); - if (m_blit_remaining_words == 0) { if (g_settings.debugging.dump_cpu_to_vram_copies) @@ -557,18 +945,18 @@ void GPU::FinishVRAMWrite() const u8* blit_ptr = reinterpret_cast(m_blit_buffer.data()); if (transferred_full_rows > 0) { - UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, transferred_full_rows, blit_ptr, - m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); + UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, static_cast(transferred_full_rows), + blit_ptr, m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); blit_ptr += (ZeroExtend32(m_vram_transfer.width) * transferred_full_rows) * sizeof(u16); } if (transferred_width_last_row > 0) { - UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y + transferred_full_rows, transferred_width_last_row, 1, blit_ptr, - m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); + UpdateVRAM(m_vram_transfer.x, static_cast(m_vram_transfer.y + transferred_full_rows), + static_cast(transferred_width_last_row), 1, blit_ptr, m_GPUSTAT.set_mask_while_drawing, + m_GPUSTAT.check_mask_before_draw); } } - m_counters.num_writes++; m_blit_buffer.clear(); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; @@ -588,9 +976,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() m_vram_transfer.width, m_vram_transfer.height); DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); - // all rendering should be done first... - FlushRender(); - // ensure VRAM shadow is up to date ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); @@ -602,7 +987,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() } // switch to pixel-by-pixel read state - m_counters.num_reads++; m_blitter_state = BlitterState::ReadingVRAM; m_command_total_words = 0; @@ -633,10 +1017,15 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand() width == 0 || height == 0 || (src_x == dst_x && src_y == dst_y && !m_GPUSTAT.set_mask_while_drawing); if (!skip_copy) { - m_counters.num_copies++; - - FlushRender(); - CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + GPUBackendCopyVRAMCommand* cmd = GPUBackend::NewCopyVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->src_x = static_cast(src_x); + cmd->src_y = static_cast(src_y); + cmd->dst_x = static_cast(dst_x); + cmd->dst_y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + GPUBackend::PushCommand(cmd); } AddCommandTicks(width * height * 2); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 65b019c54..f9da24f4c 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -4,8 +4,8 @@ #include "gpu_hw.h" #include "cpu_core.h" #include "cpu_pgxp.h" +#include "gpu.h" #include "gpu_hw_shadergen.h" -#include "gpu_sw_backend.h" #include "gpu_sw_rasterizer.h" #include "host.h" #include "settings.h" @@ -26,6 +26,7 @@ #include "IconsEmoji.h" #include "IconsFontAwesome5.h" +#include "fmt/format.h" #include "imgui.h" #include @@ -87,7 +88,7 @@ ALWAYS_INLINE static u32 GetMaxResolutionScale() ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) { - u32 scale = std::min(resolution_scale, g_settings.gpu_downsample_scale); + u32 scale = std::min(resolution_scale, g_gpu_settings.gpu_downsample_scale); while ((resolution_scale % scale) != 0) scale--; return scale; @@ -96,19 +97,21 @@ ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) ALWAYS_INLINE static bool ShouldClampUVs(GPUTextureFilter texture_filter) { // We only need UV limits if PGXP is enabled, or texture filtering is enabled. - return g_settings.gpu_pgxp_enable || texture_filter != GPUTextureFilter::Nearest; + return g_gpu_settings.gpu_pgxp_enable || texture_filter != GPUTextureFilter::Nearest; } ALWAYS_INLINE static bool ShouldAllowSpriteMode(u8 resolution_scale, GPUTextureFilter texture_filter, GPUTextureFilter sprite_texture_filter) { // Use sprite shaders/mode when texcoord rounding is forced, or if the filters are different. - return (sprite_texture_filter != texture_filter || (resolution_scale > 1 && g_settings.gpu_force_round_texcoords)); + return (sprite_texture_filter != texture_filter || + (resolution_scale > 1 && g_gpu_settings.gpu_force_round_texcoords)); } ALWAYS_INLINE static bool ShouldDisableColorPerspective() { - return g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction && !g_settings.gpu_pgxp_color_correction; + return g_gpu_settings.gpu_pgxp_enable && g_gpu_settings.gpu_pgxp_texture_correction && + !g_gpu_settings.gpu_pgxp_color_correction; } /// Returns true if the specified texture filtering mode requires dual-source blending. @@ -187,7 +190,7 @@ private: }; } // namespace -GPU_HW::GPU_HW() : GPU() +GPU_HW::GPU_HW() : GPUBackend() { #ifdef _DEBUG s_draw_number = 0; @@ -197,12 +200,11 @@ GPU_HW::GPU_HW() : GPU() GPU_HW::~GPU_HW() { GPUTextureCache::Shutdown(); +} - if (m_sw_renderer) - { - m_sw_renderer->Shutdown(); - m_sw_renderer.reset(); - } +bool GPU_HW::IsHardwareRenderer() const +{ + return true; } ALWAYS_INLINE void GPU_HW::BatchVertex::Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, @@ -235,34 +237,24 @@ ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 mi uv_limits = PackUVLimits(min_u, max_u, min_v, max_v); } -const Threading::Thread* GPU_HW::GetSWThread() const +bool GPU_HW::Initialize(bool upload_vram, Error* error) { - return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr; -} - -bool GPU_HW::IsHardwareRenderer() const -{ - return true; -} - -bool GPU_HW::Initialize(Error* error) -{ - if (!GPU::Initialize(error)) + if (!GPUBackend::Initialize(upload_vram, error)) return false; const GPUDevice::Features features = g_gpu_device->GetFeatures(); m_resolution_scale = Truncate8(CalculateResolutionScale()); - m_multisamples = Truncate8(std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); - m_texture_filtering = g_settings.gpu_texture_filter; - m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; - m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; + m_multisamples = Truncate8(std::min(g_gpu_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); + m_texture_filtering = g_gpu_settings.gpu_texture_filter; + m_sprite_texture_filtering = g_gpu_settings.gpu_sprite_texture_filter; + m_line_detect_mode = (m_resolution_scale > 1) ? g_gpu_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_downsample_mode = GetDownsampleMode(m_resolution_scale); - m_wireframe_mode = g_settings.gpu_wireframe_mode; + m_wireframe_mode = g_gpu_settings.gpu_wireframe_mode; m_supports_dual_source_blend = features.dual_source_blend; m_supports_framebuffer_fetch = features.framebuffer_fetch; - m_true_color = g_settings.gpu_true_color; - m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + m_true_color = g_gpu_settings.gpu_true_color; + m_pgxp_depth_buffer = g_gpu_settings.UsingPGXPDepthBuffer(); m_clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); @@ -271,8 +263,6 @@ bool GPU_HW::Initialize(Error* error) CheckSettings(); - UpdateSoftwareRenderer(false); - PrintSettingsToLog(); if (!CompileCommonShaders(error) || !CompilePipelines(error)) @@ -286,7 +276,7 @@ bool GPU_HW::Initialize(Error* error) if (m_use_texture_cache) { - if (!GPUTextureCache::Initialize()) + if (!GPUTextureCache::Initialize(this)) { ERROR_LOG("Failed to initialize texture cache, disabling."); m_use_texture_cache = false; @@ -296,33 +286,50 @@ bool GPU_HW::Initialize(Error* error) UpdateDownsamplingLevels(); RestoreDeviceContext(); + + // If we're not initializing VRAM, need to upload it here. Implies RestoreDeviceContext(). + if (upload_vram) + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); + + DrawingAreaChanged(); return true; } -void GPU_HW::Reset(bool clear_vram) +u32 GPU_HW::GetResolutionScale() const +{ + return m_resolution_scale; +} + +void GPU_HW::ClearVRAM() { // Texture cache needs to be invalidated before we load, otherwise we dump black. if (m_use_texture_cache) GPUTextureCache::Invalidate(); + // Don't need to finish the current draw. if (m_batch_vertex_ptr) UnmapGPUBuffer(0, 0); - GPU::Reset(clear_vram); + m_texpage_dirty = false; + m_compute_uv_range = m_clamp_uvs; - if (m_sw_renderer) - m_sw_renderer->Reset(); + if (ShouldDrawWithSoftwareRenderer()) + { + std::memset(g_vram, 0, sizeof(g_vram)); + std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); + } m_batch = {}; m_current_depth = 1; - SetClampedDrawingArea(); - - if (clear_vram) - ClearFramebuffer(); + ClearFramebuffer(); } -bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) +#if 0 + +bool GPU_HW::DoState(GPUTexture** host_texture, bool is_reading, bool update_display) { +#if 0 + // TODO: FIXME // Need to download local VRAM copy before calling the base class, because it serializes this. if (m_sw_renderer) { @@ -339,11 +346,15 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di if (!GPU::DoState(sw, host_texture, update_display)) return false; +#else + if (!is_reading && !host_texture) + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); +#endif if (host_texture) { GPUTexture* tex = *host_texture; - if (sw.IsReading()) + if (is_reading) { if (tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || tex->GetSamples() != m_vram_texture->GetSamples()) @@ -377,7 +388,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di tex->GetHeight()); } } - else if (sw.IsReading()) + else if (is_reading) { // Need to update the VRAM copy on the GPU with the state data. // Would invalidate the TC, but base DoState() calls Reset(). @@ -385,7 +396,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di } // invalidate the whole VRAM read texture when loading state - if (sw.IsReading()) + if (is_reading) { DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); ClearVRAMDirtyRectangle(); @@ -395,7 +406,35 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di ResetBatchVertexDepth(); } - return GPUTextureCache::DoState(sw, !m_use_texture_cache); + // TODO:FIXME return GPUTextureCache::DoState(sw, !m_use_texture_cache); + return true; +} + +#endif + +void GPU_HW::LoadState(const GPUBackendLoadStateCommand* cmd) +{ + DebugAssert((m_batch_vertex_ptr != nullptr) == (m_batch_index_ptr != nullptr)); + if (m_batch_vertex_ptr) + UnmapGPUBuffer(0, 0); + + std::memcpy(g_vram, cmd->vram_data, sizeof(g_vram)); + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); + + if (ShouldDrawWithSoftwareRenderer()) + std::memcpy(g_gpu_clut, cmd->clut_data, sizeof(g_gpu_clut)); + + if (m_use_texture_cache) + { + GPUTextureCache::LoadState(std::span(cmd->texture_cache_state, cmd->texture_cache_state_size), + cmd->texture_cache_state_version); + } + + ClearVRAMDirtyRectangle(); + SetFullVRAMDirtyRectangle(); + UpdateVRAMReadTexture(true, false); + ClearVRAMDirtyRectangle(); + ResetBatchVertexDepth(); } void GPU_HW::RestoreDeviceContext() @@ -409,50 +448,51 @@ void GPU_HW::RestoreDeviceContext() void GPU_HW::UpdateSettings(const Settings& old_settings) { - const bool prev_force_progressive_scan = m_force_progressive_scan; - - GPU::UpdateSettings(old_settings); + GPUBackend::UpdateSettings(old_settings); const GPUDevice::Features features = g_gpu_device->GetFeatures(); const u8 resolution_scale = Truncate8(CalculateResolutionScale()); - const u8 multisamples = Truncate8(std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); + const u8 multisamples = Truncate8(std::min(g_gpu_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); const bool clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); - const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || - g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || - m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || - (!old_settings.gpu_texture_cache && g_settings.gpu_texture_cache)); + const bool framebuffer_changed = + (m_resolution_scale != resolution_scale || m_multisamples != multisamples || + g_gpu_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || + m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() || + (!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache)); const bool shaders_changed = ((m_resolution_scale > 1) != (resolution_scale > 1) || (m_multisamples > 1) != (multisamples > 1) || - m_true_color != g_settings.gpu_true_color || prev_force_progressive_scan != m_force_progressive_scan || - (multisamples > 1 && g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || - (resolution_scale > 1 && g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || - (resolution_scale > 1 && g_settings.gpu_texture_filter == GPUTextureFilter::Nearest && - g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || - g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || - m_texture_filtering != g_settings.gpu_texture_filter || - m_sprite_texture_filtering != g_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || - (features.geometry_shaders && g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode) || - m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || - (features.noperspective_interpolation && g_settings.gpu_pgxp_enable && - g_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction) || - m_allow_sprite_mode != - ShouldAllowSpriteMode(m_resolution_scale, g_settings.gpu_texture_filter, g_settings.gpu_sprite_texture_filter)); + m_true_color != g_gpu_settings.gpu_true_color || + (old_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive) != + (g_gpu_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive) || + (multisamples > 1 && g_gpu_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || + (resolution_scale > 1 && g_gpu_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || + (resolution_scale > 1 && g_gpu_settings.gpu_texture_filter == GPUTextureFilter::Nearest && + g_gpu_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || + g_gpu_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || + m_texture_filtering != g_gpu_settings.gpu_texture_filter || + m_sprite_texture_filtering != g_gpu_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || + (features.geometry_shaders && g_gpu_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode) || + m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() || + (features.noperspective_interpolation && g_gpu_settings.gpu_pgxp_enable && + g_gpu_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction) || + m_allow_sprite_mode != ShouldAllowSpriteMode(m_resolution_scale, g_gpu_settings.gpu_texture_filter, + g_gpu_settings.gpu_sprite_texture_filter)); const bool resolution_dependent_shaders_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples); const bool downsampling_shaders_changed = ((m_resolution_scale > 1) != (resolution_scale > 1) || - (resolution_scale > 1 && (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || + (resolution_scale > 1 && (g_gpu_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || (m_downsample_mode == GPUDownsampleMode::Box && (resolution_scale != m_resolution_scale || - g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale))))); + g_gpu_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale))))); if (m_resolution_scale != resolution_scale) { Host::AddIconOSDMessage("ResolutionScaleChanged", ICON_FA_PAINT_BRUSH, fmt::format(TRANSLATE_FS("GPU_HW", "Internal resolution set to {0}x ({1}x{2})."), - resolution_scale, m_crtc_state.display_width * resolution_scale, - resolution_scale * m_crtc_state.display_height), + resolution_scale, m_display_width * resolution_scale, + resolution_scale * m_display_height), Host::OSD_INFO_DURATION); } @@ -484,31 +524,29 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_resolution_scale = resolution_scale; m_multisamples = multisamples; - m_texture_filtering = g_settings.gpu_texture_filter; - m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; - m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; + m_texture_filtering = g_gpu_settings.gpu_texture_filter; + m_sprite_texture_filtering = g_gpu_settings.gpu_sprite_texture_filter; + m_line_detect_mode = (m_resolution_scale > 1) ? g_gpu_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_downsample_mode = GetDownsampleMode(resolution_scale); - m_wireframe_mode = g_settings.gpu_wireframe_mode; - m_true_color = g_settings.gpu_true_color; + m_wireframe_mode = g_gpu_settings.gpu_wireframe_mode; + m_true_color = g_gpu_settings.gpu_true_color; m_clamp_uvs = clamp_uvs; m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering); - m_use_texture_cache = g_settings.gpu_texture_cache; - m_texture_dumping = m_use_texture_cache && g_settings.texture_replacements.dump_textures; + m_use_texture_cache = g_gpu_settings.gpu_texture_cache; + m_texture_dumping = m_use_texture_cache && g_gpu_settings.texture_replacements.dump_textures; m_batch.sprite_mode = (m_allow_sprite_mode && m_batch.sprite_mode); - const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); + const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer()); if (depth_buffer_changed) { - m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + m_pgxp_depth_buffer = g_gpu_settings.UsingPGXPDepthBuffer(); m_batch.use_depth_buffer = false; m_depth_was_copied = false; } CheckSettings(); - UpdateSoftwareRenderer(true); - PrintSettingsToLog(); if (shaders_changed) @@ -543,10 +581,9 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) UpdateDownsamplingLevels(); RestoreDeviceContext(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false); + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); if (m_write_mask_as_depth) UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); } else if (m_vram_depth_texture && depth_buffer_changed) { @@ -558,7 +595,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) if (m_use_texture_cache && !old_settings.gpu_texture_cache) { - if (!GPUTextureCache::Initialize()) + if (!GPUTextureCache::Initialize(this)) { ERROR_LOG("Failed to initialize texture cache, disabling."); m_use_texture_cache = false; @@ -571,23 +608,33 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) GPUTextureCache::UpdateSettings(m_use_texture_cache, old_settings); - if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || - (g_settings.gpu_downsample_mode == GPUDownsampleMode::Box && - g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)) + if (g_gpu_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || + (g_gpu_settings.gpu_downsample_mode == GPUDownsampleMode::Box && + g_gpu_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)) { UpdateDownsamplingLevels(); } + + // Need to reload CLUT if we're enabling SW rendering. + if (g_gpu_settings.gpu_use_software_renderer_for_readbacks && !old_settings.gpu_use_software_renderer_for_readbacks) + { + if (m_draw_mode.mode_reg.texture_mode <= GPUTextureMode::Palette8Bit) + { + GPU_SW_Rasterizer::UpdateCLUT(m_draw_mode.palette_reg, + m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Palette8Bit); + } + } } void GPU_HW::CheckSettings() { const GPUDevice::Features features = g_gpu_device->GetFeatures(); - if (m_multisamples != g_settings.gpu_multisamples) + if (m_multisamples != g_gpu_settings.gpu_multisamples) { Host::AddIconOSDMessage("MSAAUnsupported", ICON_EMOJI_WARNING, fmt::format(TRANSLATE_FS("GPU_HW", "{}x MSAA is not supported, using {}x instead."), - g_settings.gpu_multisamples, m_multisamples), + g_gpu_settings.gpu_multisamples, m_multisamples), Host::OSD_CRITICAL_ERROR_DURATION); } else @@ -595,7 +642,7 @@ void GPU_HW::CheckSettings() Host::RemoveKeyedOSDMessage("MSAAUnsupported"); } - if (g_settings.gpu_per_sample_shading && !features.per_sample_shading) + if (g_gpu_settings.gpu_per_sample_shading && !features.per_sample_shading) { Host::AddIconOSDMessage("SSAAUnsupported", ICON_EMOJI_WARNING, TRANSLATE_STR("GPU_HW", "SSAA is not supported, using MSAA instead."), @@ -661,13 +708,13 @@ void GPU_HW::CheckSettings() { const u32 resolution_scale = CalculateResolutionScale(); const u32 box_downscale = GetBoxDownsampleScale(resolution_scale); - if (box_downscale != g_settings.gpu_downsample_scale || box_downscale == resolution_scale) + if (box_downscale != g_gpu_settings.gpu_downsample_scale || box_downscale == resolution_scale) { Host::AddIconOSDMessage( "BoxDownsampleUnsupported", ICON_FA_PAINT_BRUSH, fmt::format(TRANSLATE_FS( "GPU_HW", "Resolution scale {0}x is not divisible by downsample scale {1}x, using {2}x instead."), - resolution_scale, g_settings.gpu_downsample_scale, box_downscale), + resolution_scale, g_gpu_settings.gpu_downsample_scale, box_downscale), Host::OSD_WARNING_DURATION); } else @@ -675,7 +722,7 @@ void GPU_HW::CheckSettings() Host::RemoveKeyedOSDMessage("BoxDownsampleUnsupported"); } - if (box_downscale == g_settings.gpu_resolution_scale) + if (box_downscale == g_gpu_settings.gpu_resolution_scale) m_downsample_mode = GPUDownsampleMode::Disabled; } } @@ -683,15 +730,15 @@ void GPU_HW::CheckSettings() u32 GPU_HW::CalculateResolutionScale() const { u32 scale; - if (g_settings.gpu_resolution_scale != 0) + if (g_gpu_settings.gpu_resolution_scale != 0) { - scale = g_settings.gpu_resolution_scale; + scale = g_gpu_settings.gpu_resolution_scale; } else { // Auto scaling. - if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0 || m_crtc_state.display_vram_width == 0 || - m_crtc_state.display_vram_height == 0 || m_GPUSTAT.display_disable || !g_gpu_device->HasMainSwapChain()) + if (m_display_width == 0 || m_display_height == 0 || m_display_vram_width == 0 || m_display_vram_height == 0 || + !m_display_texture || !g_gpu_device->HasMainSwapChain()) { // When the system is starting and all borders crop is enabled, the registers are zero, and // display_height therefore is also zero. Keep the existing resolution until it updates. @@ -708,19 +755,19 @@ u32 GPU_HW::CalculateResolutionScale() const const s32 draw_width = draw_rect.width(); const s32 draw_height = draw_rect.height(); scale = static_cast( - std::ceil(std::max(static_cast(draw_width) / static_cast(m_crtc_state.display_vram_width), - static_cast(draw_height) / static_cast(m_crtc_state.display_vram_height)))); + std::ceil(std::max(static_cast(draw_width) / static_cast(m_display_vram_width), + static_cast(draw_height) / static_cast(m_display_vram_height)))); VERBOSE_LOG("Draw Size = {}x{}, VRAM Size = {}x{}, Preferred Scale = {}", draw_width, draw_height, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, scale); + m_display_vram_width, m_display_vram_height, scale); } } - if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) + if (g_gpu_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) { const u32 new_scale = Common::PreviousPow2(scale); WARNING_LOG("Resolution scale {}x not supported for adaptive downsampling, using {}x", scale, new_scale); - if (g_settings.gpu_resolution_scale != 0) + if (g_gpu_settings.gpu_resolution_scale != 0) { Host::AddIconOSDMessage( "ResolutionNotPow2", ICON_FA_PAINT_BRUSH, @@ -736,11 +783,6 @@ u32 GPU_HW::CalculateResolutionScale() const return std::clamp(scale, 1, GetMaxResolutionScale()); } -u32 GPU_HW::GetResolutionScale() const -{ - return m_resolution_scale; -} - void GPU_HW::UpdateResolutionScale() { if (CalculateResolutionScale() != m_resolution_scale) @@ -749,7 +791,13 @@ void GPU_HW::UpdateResolutionScale() GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const { - return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode; + return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_gpu_settings.gpu_downsample_mode; +} + +bool GPU_HW::ShouldDrawWithSoftwareRenderer() const +{ + // TODO: FIXME: Move into class. + return g_gpu_settings.gpu_use_software_renderer_for_readbacks; } bool GPU_HW::IsUsingMultisampling() const @@ -757,15 +805,15 @@ bool GPU_HW::IsUsingMultisampling() const return m_multisamples > 1; } -bool GPU_HW::IsUsingDownsampling() const +bool GPU_HW::IsUsingDownsampling(const GPUBackendUpdateDisplayCommand* cmd) const { - return (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24); + return (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit); } void GPU_HW::SetFullVRAMDirtyRectangle() { m_vram_dirty_draw_rect = VRAM_SIZE_RECT; - m_draw_mode.SetTexturePageChanged(); + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; } void GPU_HW::ClearVRAMDirtyRectangle() @@ -810,12 +858,12 @@ void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) { // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // shadow texture is updated - if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && + if (m_draw_mode.bits != INVALID_DRAW_MODE_BITS && m_batch.texture_mode != BatchTextureMode::Disabled && (GetTextureRect(m_draw_mode.mode_reg.texture_page, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect) || (m_draw_mode.mode_reg.IsUsingPalette() && GetPaletteRect(m_draw_mode.palette_reg, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) { - m_draw_mode.SetTexturePageChanged(); + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; } } @@ -824,12 +872,13 @@ void GPU_HW::PrintSettingsToLog() INFO_LOG("Resolution Scale: {} ({}x{}), maximum {}", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, VRAM_HEIGHT * m_resolution_scale, GetMaxResolutionScale()); INFO_LOG("Multisampling: {}x{}", m_multisamples, - (g_settings.gpu_per_sample_shading && g_gpu_device->GetFeatures().per_sample_shading) ? + (g_gpu_settings.gpu_per_sample_shading && g_gpu_device->GetFeatures().per_sample_shading) ? " (per sample shading)" : ""); - INFO_LOG("Dithering: {}", m_true_color ? "Disabled" : "Enabled", (!m_true_color && g_settings.gpu_scaled_dithering)); + INFO_LOG("Dithering: {}", m_true_color ? "Disabled" : "Enabled", + (!m_true_color && g_gpu_settings.gpu_scaled_dithering)); INFO_LOG("Force round texture coordinates: {}", - (m_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) ? "Enabled" : "Disabled"); + (m_resolution_scale > 1 && g_gpu_settings.gpu_force_round_texcoords) ? "Enabled" : "Disabled"); INFO_LOG("Texture Filtering: {}/{}", Settings::GetTextureFilterDisplayName(m_texture_filtering), Settings::GetTextureFilterDisplayName(m_sprite_texture_filtering)); INFO_LOG("Dual-source blending: {}", m_supports_dual_source_blend ? "Supported" : "Not supported"); @@ -838,7 +887,7 @@ void GPU_HW::PrintSettingsToLog() INFO_LOG("Downsampling: {}", Settings::GetDownsampleModeDisplayName(m_downsample_mode)); INFO_LOG("Wireframe rendering: {}", Settings::GetGPUWireframeModeDisplayName(m_wireframe_mode)); INFO_LOG("Line detection: {}", Settings::GetLineDetectModeDisplayName(m_line_detect_mode)); - INFO_LOG("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); + INFO_LOG("Using software renderer for readbacks: {}", ShouldDrawWithSoftwareRenderer() ? "YES" : "NO"); INFO_LOG("Separate sprite shaders: {}", m_allow_sprite_mode ? "YES" : "NO"); } @@ -947,6 +996,7 @@ void GPU_HW::ClearFramebuffer() if (m_use_texture_cache) GPUTextureCache::Invalidate(); m_last_depth_z = 1.0f; + m_current_depth = 1; } void GPU_HW::SetVRAMRenderTarget() @@ -1016,12 +1066,14 @@ bool GPU_HW::CompilePipelines(Error* error) const GPUDevice::Features features = g_gpu_device->GetFeatures(); const bool upscaled = (m_resolution_scale > 1); const bool msaa = (m_multisamples > 1); - const bool per_sample_shading = (msaa && g_settings.gpu_per_sample_shading && features.per_sample_shading); + const bool per_sample_shading = (msaa && g_gpu_settings.gpu_per_sample_shading && features.per_sample_shading); const bool force_round_texcoords = - (upscaled && m_texture_filtering == GPUTextureFilter::Nearest && g_settings.gpu_force_round_texcoords); - const bool true_color = g_settings.gpu_true_color; - const bool scaled_dithering = (!m_true_color && upscaled && g_settings.gpu_scaled_dithering); + (upscaled && m_texture_filtering == GPUTextureFilter::Nearest && g_gpu_settings.gpu_force_round_texcoords); + const bool true_color = g_gpu_settings.gpu_true_color; + const bool scaled_dithering = (!m_true_color && upscaled && g_gpu_settings.gpu_scaled_dithering); const bool disable_color_perspective = ShouldDisableColorPerspective(); + const bool force_progressive_scan = + (g_gpu_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive); // Determine when to use shader blending. // FBFetch is free, we need it for filtering without DSB, or when accurate blending is forced. @@ -1030,10 +1082,10 @@ bool GPU_HW::CompilePipelines(Error* error) // Abuse the depth buffer for the mask bit when it's free (FBFetch), or PGXP depth buffering is enabled. m_allow_shader_blend = features.framebuffer_fetch || ((features.feedback_loops || features.raster_order_views) && - (m_pgxp_depth_buffer || g_settings.IsUsingAccurateBlending() || + (m_pgxp_depth_buffer || g_gpu_settings.IsUsingAccurateBlending() || (!m_supports_dual_source_blend && (IsBlendedTextureFiltering(m_texture_filtering) || IsBlendedTextureFiltering(m_sprite_texture_filtering))))); - m_prefer_shader_blend = (m_allow_shader_blend && g_settings.IsUsingAccurateBlending()); + m_prefer_shader_blend = (m_allow_shader_blend && g_gpu_settings.IsUsingAccurateBlending()); m_use_rov_for_shader_blend = (m_allow_shader_blend && !features.framebuffer_fetch && features.raster_order_views && (m_prefer_shader_blend || !features.feedback_loops)); m_write_mask_as_depth = (!m_pgxp_depth_buffer && !features.framebuffer_fetch && !m_prefer_shader_blend); @@ -1068,11 +1120,11 @@ bool GPU_HW::CompilePipelines(Error* error) (NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast(BatchTextureMode::SpriteStart))); const u32 total_vertex_shaders = (m_allow_sprite_mode ? 7 : 4); const u32 total_fragment_shaders = ((1 + BoolToUInt32(needs_rov_depth)) * 5 * 5 * active_texture_modes * 2 * - (1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!m_force_progressive_scan))); + (1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!force_progressive_scan))); const u32 total_items = total_vertex_shaders + total_fragment_shaders + ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * - (1 + BoolToUInt32(!m_force_progressive_scan))) + // batch pipelines + (1 + BoolToUInt32(!force_progressive_scan))) + // batch pipelines ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe (2 * 2) + // vram fill (1 + BoolToUInt32(m_write_mask_as_depth)) + // vram copy @@ -1160,7 +1212,7 @@ bool GPU_HW::CompilePipelines(Error* error) (needs_rov_depth && render_mode != static_cast(BatchRenderMode::ShaderBlend))) { progress.Increment(active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * - (1 + BoolToUInt32(!m_force_progressive_scan))); + (1 + BoolToUInt32(!force_progressive_scan))); continue; } @@ -1171,7 +1223,7 @@ bool GPU_HW::CompilePipelines(Error* error) if (check_mask && render_mode != static_cast(BatchRenderMode::ShaderBlend)) { // mask bit testing is only valid with shader blending. - progress.Increment((1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!m_force_progressive_scan))); + progress.Increment((1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!force_progressive_scan))); continue; } @@ -1184,7 +1236,7 @@ bool GPU_HW::CompilePipelines(Error* error) for (u8 interlacing = 0; interlacing < 2; interlacing++) { // Never going to draw with line skipping in force progressive. - if (interlacing && m_force_progressive_scan) + if (interlacing && force_progressive_scan) continue; const bool sprite = (static_cast(texture_mode) >= BatchTextureMode::SpriteStart); @@ -1267,7 +1319,7 @@ bool GPU_HW::CompilePipelines(Error* error) (needs_rov_depth && render_mode != static_cast(BatchRenderMode::ShaderBlend))) { progress.Increment(active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * - (1 + BoolToUInt32(!m_force_progressive_scan))); + (1 + BoolToUInt32(!force_progressive_scan))); continue; } @@ -1282,7 +1334,7 @@ bool GPU_HW::CompilePipelines(Error* error) for (u8 interlacing = 0; interlacing < 2; interlacing++) { // Never going to draw with line skipping in force progressive. - if (interlacing && m_force_progressive_scan) + if (interlacing && force_progressive_scan) continue; for (u8 check_mask = 0; check_mask < 2; check_mask++) @@ -2007,7 +2059,8 @@ ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode } } -ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) +ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(const GPUBackendDrawCommand* cmd, + BatchVertex* vertices) { // Taken from beetle-psx gpu_polygon.cpp // For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior. If U or V is decreasing in X @@ -2113,7 +2166,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVert // 2D polygons should have zero change in V on the X axis, and vice versa. if (m_allow_sprite_mode) - SetBatchSpriteMode(zero_dudy && zero_dvdx); + SetBatchSpriteMode(cmd, zero_dudy && zero_dvdx); } bool GPU_HW::IsPossibleSpritePolygon(const BatchVertex* vertices) const @@ -2293,7 +2346,7 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::ExpandLineTriangles(BatchVertex* vertices) return true; } -void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) +void GPU_HW::ComputePolygonUVLimits(const GPUBackendDrawCommand* cmd, BatchVertex* vertices, u32 num_vertices) { DebugAssert(num_vertices == 3 || num_vertices == 4); @@ -2321,10 +2374,10 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); if (ShouldCheckForTexPageOverlap()) - CheckForTexPageOverlap(GSVector4i(min).upl32(GSVector4i(max)).u16to32()); + CheckForTexPageOverlap(cmd, GSVector4i(min).upl32(GSVector4i(max)).u16to32()); } -void GPU_HW::SetBatchDepthBuffer(bool enabled) +void GPU_HW::SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled) { if (m_batch.use_depth_buffer == enabled) return; @@ -2332,13 +2385,13 @@ void GPU_HW::SetBatchDepthBuffer(bool enabled) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } m_batch.use_depth_buffer = enabled; } -void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) +void GPU_HW::CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices) { DebugAssert(num_vertices == 3 || num_vertices == 4); float average_z; @@ -2347,17 +2400,17 @@ void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) else average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w + vertices[3].w) / 4.0f, 1.0f); - if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold) + if ((average_z - m_last_depth_z) >= g_gpu_settings.gpu_pgxp_depth_clear_threshold) { FlushRender(); CopyAndClearDepthBuffer(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } m_last_depth_z = average_z; } -void GPU_HW::SetBatchSpriteMode(bool enabled) +void GPU_HW::SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled) { if (m_batch.sprite_mode == enabled) return; @@ -2365,7 +2418,7 @@ void GPU_HW::SetBatchSpriteMode(bool enabled) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } GL_INS_FMT("Sprite mode is now {}", enabled ? "ON" : "OFF"); @@ -2373,6 +2426,43 @@ void GPU_HW::SetBatchSpriteMode(bool enabled) m_batch.sprite_mode = enabled; } +void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + + const u32 num_vertices = cmd->num_vertices; + DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); + + const float depth = GetCurrentNormalizedVertexDepth(); + + for (u32 i = 0; i < num_vertices; i += 2) + { + const GSVector2i start_pos = GSVector2i::load(&cmd->vertices[i].x); + const u32 start_color = cmd->vertices[i].color; + const GSVector2i end_pos = GSVector2i::load(&cmd->vertices[i + 1].x); + const u32 end_color = cmd->vertices[i + 1].color; + + const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); + const GSVector4i rect = + GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty()) + + AddDrawnRectangle(clamped_rect); + DrawLine(GSVector4(bounds), start_color, end_color, depth); + } + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable); + + for (u32 i = 0; i < num_vertices; i += 2) + DrawFunction(cmd, &cmd->vertices[i], &cmd->vertices[i + 1]); + } +} + void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) { DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); @@ -2471,454 +2561,241 @@ void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) m_batch_index_space -= 6; } -void GPU_HW::LoadVertices() +void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) { - if (m_GPUSTAT.check_mask_before_draw) - m_current_depth++; + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + SetBatchSpriteMode(cmd, m_allow_sprite_mode); + DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); - const GPURenderCommand rc{m_render_command.bits}; - const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg.bits) << 16); + const s32 pos_x = cmd->x; + const s32 pos_y = cmd->y; + const u32 texpage = m_draw_mode.bits; + const u32 color = (cmd->rc.texture_enable && cmd->rc.raw_texture_enable) ? UINT32_C(0x00808080) : cmd->color; const float depth = GetCurrentNormalizedVertexDepth(); + const u32 orig_tex_left = ZeroExtend32(Truncate8(cmd->texcoord)); + const u32 orig_tex_top = ZeroExtend32(cmd->texcoord) >> 8; + const u32 rectangle_width = cmd->width; + const u32 rectangle_height = cmd->height; - switch (rc.primitive) + const GSVector4i rect = + GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + DebugAssert(!clamped_rect.rempty()); + + // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. + u32 tex_top = orig_tex_top; + for (u32 y_offset = 0; y_offset < rectangle_height;) { - case GPUPrimitive::Polygon: + const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); + const float quad_start_y = static_cast(pos_y + static_cast(y_offset)); + const float quad_end_y = quad_start_y + static_cast(quad_height); + const u32 tex_bottom = tex_top + quad_height; + + u32 tex_left = orig_tex_left; + for (u32 x_offset = 0; x_offset < rectangle_width;) { - const bool textured = rc.texture_enable; - const bool raw_texture = textured && rc.raw_texture_enable; - const bool shaded = rc.shading_enable; - const bool pgxp = g_settings.gpu_pgxp_enable; + const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); + const float quad_start_x = static_cast(pos_x + static_cast(x_offset)); + const float quad_end_x = quad_start_x + static_cast(quad_width); + const u32 tex_right = tex_left + quad_width; + const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - const u32 first_color = rc.color_for_first_vertex; - u32 num_vertices = rc.quad_polygon ? 4 : 3; - std::array vertices; - std::array native_vertex_positions; - std::array native_texcoords; - bool valid_w = g_settings.gpu_pgxp_texture_correction; - for (u32 i = 0; i < num_vertices; i++) + if (cmd->rc.texture_enable && ShouldCheckForTexPageOverlap()) { - const u32 vert_color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - const u32 color = raw_texture ? UINT32_C(0x00808080) : vert_color; - const u64 maddr_and_pos = m_fifo.Pop(); - const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; - const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; - const s32 native_x = native_vertex_positions[i].x = m_drawing_offset.x + vp.x; - const s32 native_y = native_vertex_positions[i].y = m_drawing_offset.y + vp.y; - native_texcoords[i] = texcoord; - vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage, - texcoord, 0xFFFF0000u); - - if (pgxp) - { - valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, - m_drawing_offset.x, m_drawing_offset.y, &vertices[i].x, &vertices[i].y, - &vertices[i].w); - } - } - if (pgxp) - { - if (!valid_w) - { - SetBatchDepthBuffer(false); - if (g_settings.gpu_pgxp_disable_2d) - { - // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. - for (size_t i = 0; i < vertices.size(); i++) - { - BatchVertex& v = vertices[i]; - v.x = static_cast(native_vertex_positions[i].x); - v.y = static_cast(native_vertex_positions[i].y); - v.w = 1.0f; - } - } - else - { - for (BatchVertex& v : vertices) - v.w = 1.0f; - } - } - else if (m_pgxp_depth_buffer) - { - SetBatchDepthBuffer(true); - CheckForDepthClear(vertices.data(), num_vertices); - } + CheckForTexPageOverlap(cmd, GSVector4i(static_cast(tex_left), static_cast(tex_top), + static_cast(tex_right), static_cast(tex_bottom))); } - // Use PGXP to exclude primitives that are definitely 3D. - const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); - if (m_resolution_scale > 1 && !is_3d && rc.quad_polygon) - HandleFlippedQuadTextureCoordinates(vertices.data()); - else if (m_allow_sprite_mode) - SetBatchSpriteMode((pgxp && !is_3d) || IsPossibleSpritePolygon(vertices.data())); + const u32 base_vertex = m_batch_vertex_count; + (m_batch_vertex_ptr++) + ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), + uv_limits); + m_batch_vertex_count += 4; + m_batch_vertex_space -= 4; - if (m_sw_renderer) - { - GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); - FillDrawCommand(cmd, rc); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 0); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); + m_batch_index_count += 6; + m_batch_index_space -= 6; - const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3; - for (u32 i = 0; i < sw_num_vertices; i++) - { - GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; - vert->x = native_vertex_positions[i].x; - vert->y = native_vertex_positions[i].y; - vert->texcoord = native_texcoords[i]; - vert->color = vertices[i].color; - } - - m_sw_renderer->PushCommand(cmd); - } - - // Cull polygons which are too large. - const GSVector2 v0f = GSVector2::load(&vertices[0].x); - const GSVector2 v1f = GSVector2::load(&vertices[1].x); - const GSVector2 v2f = GSVector2::load(&vertices[2].x); - const GSVector2 min_pos_12 = v1f.min(v2f); - const GSVector2 max_pos_12 = v1f.max(v2f); - const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); - const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || - draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || clamped_draw_rect_012.rempty()); - if (first_tri_culled) - { - GL_INS_FMT("Culling off-screen/too-large polygon: {},{} {},{} {},{}", native_vertex_positions[0].x, - native_vertex_positions[0].y, native_vertex_positions[1].x, native_vertex_positions[1].y, - native_vertex_positions[2].x, native_vertex_positions[2].y); - - if (!rc.quad_polygon) - return; - } - else - { - if (textured && m_compute_uv_range) - ComputePolygonUVLimits(vertices.data(), num_vertices); - - AddDrawnRectangle(clamped_draw_rect_012); - AddDrawTriangleTicks(native_vertex_positions[0], native_vertex_positions[1], native_vertex_positions[2], - rc.shading_enable, rc.texture_enable, rc.transparency_enable); - - // Expand lines to triangles (Doom, Soul Blade, etc.) - if (!rc.quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && - ExpandLineTriangles(vertices.data())) - { - return; - } - - const u32 start_index = m_batch_vertex_count; - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - - // quads - if (rc.quad_polygon) - { - const GSVector2 v3f = GSVector2::load(&vertices[3].x); - const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); - - // Cull polygons which are too large. - const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - clamped_draw_rect_123.rempty()); - if (second_tri_culled) - { - GL_INS_FMT("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", - native_vertex_positions[2].x, native_vertex_positions[2].y, native_vertex_positions[1].x, - native_vertex_positions[1].y, native_vertex_positions[0].x, native_vertex_positions[0].y); - - if (first_tri_culled) - return; - } - else - { - if (first_tri_culled && textured && m_compute_uv_range) - ComputePolygonUVLimits(vertices.data(), num_vertices); - - AddDrawnRectangle(clamped_draw_rect_123); - AddDrawTriangleTicks(native_vertex_positions[2], native_vertex_positions[1], native_vertex_positions[3], - rc.shading_enable, rc.texture_enable, rc.transparency_enable); - - const u32 start_index = m_batch_vertex_count; - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 3); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - } - - if (num_vertices == 4) - { - DebugAssert(m_batch_vertex_space >= 4); - std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); - m_batch_vertex_ptr += 4; - m_batch_vertex_count += 4; - m_batch_vertex_space -= 4; - } - else - { - DebugAssert(m_batch_vertex_space >= 3); - std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); - m_batch_vertex_ptr += 3; - m_batch_vertex_count += 3; - m_batch_vertex_space -= 3; - } + x_offset += quad_width; + tex_left = 0; } - break; - case GPUPrimitive::Rectangle: + y_offset += quad_height; + tex_top = 0; + } + + AddDrawnRectangle(clamped_rect); + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawRectangleFunction( + cmd->rc.texture_enable, cmd->rc.raw_texture_enable, cmd->rc.transparency_enable); + DrawFunction(cmd); + } +} + +void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + + // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. + const float depth = GetCurrentNormalizedVertexDepth(); + const bool raw_texture = (cmd->rc.texture_enable && cmd->rc.raw_texture_enable); + const u32 num_vertices = cmd->num_vertices; + const u32 texpage = m_draw_mode.bits; + std::array vertices; + for (u32 i = 0; i < num_vertices; i++) + { + const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i]; + const GSVector2 vert_pos = GSVector2(GSVector2i::load(&vert.x)); + vertices[i].Set(vert_pos.x, vert_pos.y, depth, 1.0f, raw_texture ? UINT32_C(0x00808080) : vert.color, texpage, + vert.texcoord, 0xFFFF0000u); + } + + FinishPolygonDraw(cmd, vertices, num_vertices, false); + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + cmd->rc.shading_enable, cmd->rc.texture_enable, cmd->rc.raw_texture_enable, cmd->rc.transparency_enable); + DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); + } +} + +void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) +{ + PrepareDraw(cmd); + + // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. + const float depth = GetCurrentNormalizedVertexDepth(); + const bool raw_texture = (cmd->rc.texture_enable && cmd->rc.raw_texture_enable); + const u32 num_vertices = cmd->num_vertices; + const u32 texpage = m_draw_mode.bits; + std::array vertices; + for (u32 i = 0; i < num_vertices; i++) + { + const GPUBackendDrawPrecisePolygonCommand::Vertex& vert = cmd->vertices[i]; + vertices[i].Set(vert.x, vert.y, depth, vert.w, raw_texture ? UINT32_C(0x00808080) : vert.color, texpage, + vert.texcoord, 0xFFFF0000u); + } + + const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w; + SetBatchDepthBuffer(cmd, use_depth); + if (use_depth) + CheckForDepthClear(cmd, vertices.data(), num_vertices); + + // Use PGXP to exclude primitives that are definitely 3D. + const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); + FinishPolygonDraw(cmd, vertices, num_vertices, is_3d); + + if (ShouldDrawWithSoftwareRenderer()) + { + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + cmd->rc.shading_enable, cmd->rc.texture_enable, cmd->rc.raw_texture_enable, cmd->rc.transparency_enable); + GPUBackendDrawPolygonCommand::Vertex sw_vertices[4]; + for (u32 i = 0; i < cmd->num_vertices; i++) { - const u32 color = (rc.texture_enable && rc.raw_texture_enable) ? UINT32_C(0x00808080) : rc.color_for_first_vertex; - const GPUVertexPosition vp{FifoPop()}; - const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); - const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); - - const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0); - u32 orig_tex_left = ZeroExtend16(texcoord_x); - u32 orig_tex_top = ZeroExtend16(texcoord_y); - u32 rectangle_width; - u32 rectangle_height; - switch (rc.rectangle_size) - { - case GPUDrawRectangleSize::R1x1: - rectangle_width = 1; - rectangle_height = 1; - break; - case GPUDrawRectangleSize::R8x8: - rectangle_width = 8; - rectangle_height = 8; - break; - case GPUDrawRectangleSize::R16x16: - rectangle_width = 16; - rectangle_height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - rectangle_width = (width_and_height & VRAM_WIDTH_MASK); - rectangle_height = ((width_and_height >> 16) & VRAM_HEIGHT_MASK); - } - break; - } - - const GSVector4i rect = - GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - if (clamped_rect.rempty()) [[unlikely]] - { - GL_INS_FMT("Culling off-screen rectangle {}", rect); - return; - } - - // we can split the rectangle up into potentially 8 quads - SetBatchDepthBuffer(false); - SetBatchSpriteMode(m_allow_sprite_mode); - DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && - m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); - - // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. - u32 tex_top = orig_tex_top; - for (u32 y_offset = 0; y_offset < rectangle_height;) - { - const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); - const float quad_start_y = static_cast(pos_y + static_cast(y_offset)); - const float quad_end_y = quad_start_y + static_cast(quad_height); - const u32 tex_bottom = tex_top + quad_height; - - u32 tex_left = orig_tex_left; - for (u32 x_offset = 0; x_offset < rectangle_width;) - { - const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); - const float quad_start_x = static_cast(pos_x + static_cast(x_offset)); - const float quad_end_x = quad_start_x + static_cast(quad_width); - const u32 tex_right = tex_left + quad_width; - const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - - if (rc.texture_enable && ShouldCheckForTexPageOverlap()) - { - CheckForTexPageOverlap(GSVector4i(static_cast(tex_left), static_cast(tex_top), - static_cast(tex_right), static_cast(tex_bottom))); - } - - const u32 base_vertex = m_batch_vertex_count; - (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), - uv_limits); - m_batch_vertex_count += 4; - m_batch_vertex_space -= 4; - - *(m_batch_index_ptr++) = Truncate16(base_vertex + 0); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); - m_batch_index_count += 6; - m_batch_index_space -= 6; - - x_offset += quad_width; - tex_left = 0; - } - - y_offset += quad_height; - tex_top = 0; - } - - AddDrawnRectangle(clamped_rect); - AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); - - if (m_sw_renderer) - { - GPUBackendDrawRectangleCommand* cmd = m_sw_renderer->NewDrawRectangleCommand(); - FillDrawCommand(cmd, rc); - cmd->color = color; - cmd->x = pos_x; - cmd->y = pos_y; - cmd->width = static_cast(rectangle_width); - cmd->height = static_cast(rectangle_height); - cmd->texcoord = (static_cast(texcoord_y) << 8) | static_cast(texcoord_x); - m_sw_renderer->PushCommand(cmd); - } + const GPUBackendDrawPrecisePolygonCommand::Vertex& src = cmd->vertices[i]; + sw_vertices[i] = GPUBackendDrawPolygonCommand::Vertex{ + .x = src.native_x, .y = src.native_y, .color = src.color, .texcoord = src.texcoord}; } - break; - case GPUPrimitive::Line: - { - SetBatchDepthBuffer(false); + DrawFunction(cmd, &sw_vertices[0], &sw_vertices[1], &sw_vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &sw_vertices[2], &sw_vertices[1], &sw_vertices[3]); + } +} - if (!rc.polyline) - { - DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); +ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand* cmd, + std::array& vertices, u32 num_vertices, bool is_3d) +{ + // Use PGXP to exclude primitives that are definitely 3D. + if (m_resolution_scale > 1 && !is_3d && cmd->rc.quad_polygon) + HandleFlippedQuadTextureCoordinates(cmd, vertices.data()); + else if (m_allow_sprite_mode) + SetBatchSpriteMode(cmd, !is_3d || IsPossibleSpritePolygon(vertices.data())); - u32 start_color, end_color; - GPUVertexPosition start_pos, end_pos; - if (rc.shading_enable) - { - start_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_color = FifoPop() & UINT32_C(0x00FFFFFF); - end_pos.bits = FifoPop(); - } - else - { - start_color = end_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_pos.bits = FifoPop(); - } + const GSVector2 v0f = GSVector2::load(&vertices[0].x); + const GSVector2 v1f = GSVector2::load(&vertices[1].x); + const GSVector2 v2f = GSVector2::load(&vertices[2].x); + const GSVector2 min_pos_12 = v1f.min(v2f); + const GSVector2 max_pos_12 = v1f.max(v2f); + const GSVector4i draw_rect_012 = + GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); + DebugAssert(draw_rect_012.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_012.height() <= MAX_PRIMITIVE_HEIGHT && + !clamped_draw_rect_012.rempty()); - const GSVector2i vstart_pos = GSVector2i(start_pos.x + m_drawing_offset.x, start_pos.y + m_drawing_offset.y); - const GSVector2i vend_pos = GSVector2i(end_pos.x + m_drawing_offset.x, end_pos.y + m_drawing_offset.y); - const GSVector4i bounds = GSVector4i::xyxy(vstart_pos, vend_pos); - const GSVector4i rect = GSVector4i::xyxy(vstart_pos.min_s32(vend_pos), vstart_pos.max_s32(vend_pos)) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + if (cmd->rc.texture_enable && m_compute_uv_range) + ComputePolygonUVLimits(cmd, vertices.data(), num_vertices); - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - GL_INS_FMT("Culling too-large/off-screen line: {},{} - {},{}", bounds.x, bounds.y, bounds.z, bounds.w); - return; - } + AddDrawnRectangle(clamped_draw_rect_012); - AddDrawnRectangle(clamped_rect); - AddDrawLineTicks(clamped_rect, rc.shading_enable); + // Expand lines to triangles (Doom, Soul Blade, etc.) + if (!cmd->rc.quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && + ExpandLineTriangles(vertices.data())) + { + return; + } - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(GSVector4(bounds), start_color, end_color, depth); + const u32 start_index = m_batch_vertex_count; + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + m_batch_index_count += 3; + m_batch_index_space -= 3; - if (m_sw_renderer) - { - GPUBackendDrawLineCommand* cmd = m_sw_renderer->NewDrawLineCommand(2); - FillDrawCommand(cmd, rc); - GSVector4i::storel(&cmd->vertices[0], bounds); - cmd->vertices[0].color = start_color; - GSVector4i::storeh(&cmd->vertices[1], bounds); - cmd->vertices[1].color = end_color; - m_sw_renderer->PushCommand(cmd); - } - } - else - { - // Multiply by two because we don't use line strips. - const u32 num_vertices = GetPolyLineVertexCount(); - DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); + // quads, use num_vertices here, because the first half might be culled + if (num_vertices == 4) + { + const GSVector2 v3f = GSVector2::load(&vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); + DebugAssert(draw_rect_123.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_123.height() <= MAX_PRIMITIVE_HEIGHT && + !clamped_draw_rect_123.rempty()); + AddDrawnRectangle(clamped_draw_rect_123); - const bool shaded = rc.shading_enable; + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 3); + m_batch_index_count += 3; + m_batch_index_space -= 3; - u32 buffer_pos = 0; - const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - GSVector2i start_pos = GSVector2i(start_vp.x + m_drawing_offset.x, start_vp.y + m_drawing_offset.y); - u32 start_color = rc.color_for_first_vertex; - - GPUBackendDrawLineCommand* cmd; - if (m_sw_renderer) - { - cmd = m_sw_renderer->NewDrawLineCommand(num_vertices); - FillDrawCommand(cmd, rc); - GSVector2i::store(&cmd->vertices[0].x, start_pos); - cmd->vertices[0].color = start_color; - } - else - { - cmd = nullptr; - } - - for (u32 i = 1; i < num_vertices; i++) - { - const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color; - const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); - const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); - const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - GL_INS_FMT("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); - } - else - { - AddDrawnRectangle(clamped_rect); - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(GSVector4(bounds), start_color, end_color, depth); - } - - start_pos = end_pos; - start_color = end_color; - - if (cmd) - { - GSVector2i::store(&cmd->vertices[i], end_pos); - cmd->vertices[i].color = end_color; - } - } - - if (cmd) - m_sw_renderer->PushCommand(cmd); - } - } - break; - - default: - UnreachableCode(); - break; + DebugAssert(m_batch_vertex_space >= 4); + std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); + m_batch_vertex_ptr += 4; + m_batch_vertex_count += 4; + m_batch_vertex_space -= 4; + } + else + { + DebugAssert(m_batch_vertex_space >= 3); + std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); + m_batch_vertex_ptr += 3; + m_batch_vertex_count += 3; + m_batch_vertex_space -= 3; } } @@ -2963,7 +2840,7 @@ bool GPU_HW::BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacemen return true; } -ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) +ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(const GPUBackendDrawCommand* cmd, GSVector4i uv_rect) { DebugAssert((m_texpage_dirty != 0 || m_texture_dumping) && m_batch.texture_mode != BatchTextureMode::Disabled); @@ -3004,7 +2881,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } // We need to swap the dirty tracking over to drawn/written. @@ -3046,7 +2923,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } UpdateVRAMReadTexture(update_drawn, update_written); @@ -3099,26 +2976,27 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices MapGPUBuffer(required_vertices, required_indices); } -void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() +void GPU_HW::EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd) { u32 required_vertices; u32 required_indices; - switch (m_render_command.primitive) + switch (cmd->type) { - case GPUPrimitive::Polygon: + case GPUBackendCommandType::DrawPolygon: + case GPUBackendCommandType::DrawPrecisePolygon: required_vertices = 4; // assume quad, in case of expansion required_indices = 6; break; - case GPUPrimitive::Rectangle: + case GPUBackendCommandType::DrawRectangle: required_vertices = MAX_VERTICES_FOR_RECTANGLE; // TODO: WRong required_indices = MAX_VERTICES_FOR_RECTANGLE; break; - case GPUPrimitive::Line: + case GPUBackendCommandType::DrawLine: { // assume expansion - const u32 vert_count = m_render_command.polyline ? GetPolyLineVertexCount() : 2; - required_vertices = vert_count * 4; - required_indices = vert_count * 6; + const GPUBackendDrawLineCommand* lcmd = static_cast(cmd); + required_vertices = lcmd->num_vertices * 4; + required_indices = lcmd->num_vertices * 6; } break; @@ -3153,95 +3031,31 @@ ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const return 1.0f - (static_cast(m_current_depth) / 65535.0f); } -void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) +void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) { - const bool current_enabled = (m_sw_renderer != nullptr); - const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks; - const bool use_thread = !g_settings.gpu_texture_cache; - if (current_enabled == new_enabled) - { - if (m_sw_renderer) - m_sw_renderer->SetThreadEnabled(use_thread); - return; - } + FlushRender(); - if (!new_enabled) - { - if (m_sw_renderer) - m_sw_renderer->Shutdown(); - m_sw_renderer.reset(); - return; - } - - std::unique_ptr sw_renderer = std::make_unique(); - if (!sw_renderer->Initialize(use_thread)) - return; - - // We need to fill in the SW renderer's VRAM with the current state for hot toggles. - if (copy_vram_from_hw) - { - FlushRender(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - - // Sync the drawing area and CLUT. - GPUBackendSetDrawingAreaCommand* clip_cmd = sw_renderer->NewSetDrawingAreaCommand(); - clip_cmd->new_area = m_drawing_area; - sw_renderer->PushCommand(clip_cmd); - - if (IsCLUTValid()) - { - GPUBackendUpdateCLUTCommand* clut_cmd = sw_renderer->NewUpdateCLUTCommand(); - FillBackendCommandParameters(clut_cmd); - clut_cmd->reg.bits = static_cast(m_current_clut_reg_bits); - clut_cmd->clut_is_8bit = m_current_clut_is_8bit; - sw_renderer->PushCommand(clut_cmd); - } - } - - m_sw_renderer = std::move(sw_renderer); -} - -void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const -{ - cmd->params.bits = 0; - cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; - cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; - cmd->params.interlaced_rendering = m_GPUSTAT.SkipDrawingToActiveField(); -} - -void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const -{ - FillBackendCommandParameters(cmd); - cmd->rc.bits = rc.bits; - cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; - cmd->palette.bits = m_draw_mode.palette_reg.bits; - cmd->window = m_draw_mode.texture_window; -} - -void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ GL_SCOPE_FMT("FillVRAM({},{} => {},{} ({}x{}) with 0x{:08X}", x, y, x + width, y + height, width, height, color); DeactivateROV(); - if (m_sw_renderer) - { - GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - cmd->color = color; - m_sw_renderer->PushCommand(cmd); - } + // if (m_sw_renderer) + // { + // GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); + // FillBackendCommandParameters(cmd); + // cmd->x = static_cast(x); + // cmd->y = static_cast(y); + // cmd->width = static_cast(width); + // cmd->height = static_cast(height); + // cmd->color = color; + // m_sw_renderer->PushCommand(cmd); + // } GL_INS_FMT("Dirty draw area before: {}", m_vram_dirty_draw_rect); const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); // If TC is enabled, we have to update local memory. - if (m_use_texture_cache && !IsInterlacedRenderingEnabled()) + if (m_use_texture_cache && !params.interlaced_rendering) { AddWrittenRectangle(bounds); GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, false, 0); @@ -3255,7 +3069,7 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)); g_gpu_device->SetPipeline( - m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(IsInterlacedRenderingEnabled())].get()); + m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(params.interlaced_rendering)].get()); const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(scaled_bounds); @@ -3277,7 +3091,7 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) // drop precision unless true colour is enabled uniforms.u_fill_color = GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); - uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + uniforms.u_interlaced_displayed_field = params.active_line_lsb; g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); @@ -3286,11 +3100,13 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { + FlushRender(); + GL_PUSH_FMT("ReadVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); - if (m_sw_renderer) + if (ShouldDrawWithSoftwareRenderer()) { - m_sw_renderer->Sync(false); + GL_INS("VRAM is already up to date due to SW draws."); GL_POP(); return; } @@ -3344,8 +3160,10 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) RestoreDeviceContext(); } -void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) +void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) { + FlushRender(); + GL_SCOPE_FMT("UpdateVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); // TODO: Handle wrapped transfers... break them up or something @@ -3353,26 +3171,10 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); AddWrittenRectangle(bounds); - if (m_sw_renderer) - { - const u32 num_words = width * height; - GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); - FillBackendCommandParameters(cmd); - cmd->params.set_mask_while_drawing = set_mask; - cmd->params.check_mask_before_draw = check_mask; - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - std::memcpy(cmd->data, data, sizeof(u16) * num_words); - m_sw_renderer->PushCommand(cmd); - } - else - { - GPUTextureCache::WriteVRAM(x, y, width, height, data, set_mask, check_mask, bounds); - } + GPUTextureCache::WriteVRAM(x, y, width, height, data, params.set_mask_while_drawing, params.check_mask_before_draw, + bounds); - if (check_mask) + if (params.check_mask_before_draw) { // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; @@ -3387,7 +3189,8 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b } } - UpdateVRAMOnGPU(x, y, width, height, data, sizeof(u16) * width, set_mask, check_mask, bounds); + UpdateVRAMOnGPU(x, y, width, height, data, sizeof(u16) * width, params.set_mask_while_drawing, + params.check_mask_before_draw, bounds); } void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, @@ -3463,8 +3266,11 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da RestoreDeviceContext(); } -void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) { + FlushRender(); + GL_SCOPE_FMT("CopyVRAM({}x{} @ {},{} => {},{}", width, height, src_x, src_y, dst_x, dst_y); // masking enabled, oversized, or overlapping @@ -3473,7 +3279,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 const bool intersect_with_draw = m_vram_dirty_draw_rect.rintersects(src_bounds); const bool intersect_with_write = m_vram_dirty_write_rect.rintersects(src_bounds); const bool use_shader = - (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + (params.set_mask_while_drawing || params.check_mask_before_draw || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT) || (!intersect_with_draw && !intersect_with_write); @@ -3482,24 +3288,17 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 if (m_use_texture_cache && !GPUTextureCache::IsRectDrawn(src_bounds)) { GL_INS("Performed in local memory."); - GPUTextureCache::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, m_GPUSTAT.set_mask_while_drawing, - m_GPUSTAT.check_mask_before_draw, src_bounds, dst_bounds); + GPUTextureCache::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params.set_mask_while_drawing, + params.check_mask_before_draw, src_bounds, dst_bounds); UpdateVRAMOnGPU(dst_bounds.left, dst_bounds.top, dst_bounds.width(), dst_bounds.height(), &g_vram[dst_bounds.top * VRAM_WIDTH + dst_bounds.left], VRAM_WIDTH * sizeof(u16), false, false, dst_bounds); return; } - else if (m_sw_renderer) + else if (ShouldDrawWithSoftwareRenderer()) { - GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->src_x = static_cast(src_x); - cmd->src_y = static_cast(src_y); - cmd->dst_x = static_cast(dst_x); - cmd->dst_y = static_cast(dst_y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - m_sw_renderer->PushCommand(cmd); + GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params.set_mask_while_drawing, + params.check_mask_before_draw); } if (use_shader || IsUsingMultisampling()) @@ -3533,20 +3332,20 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 static_cast(m_vram_texture->GetWidth()), static_cast(m_vram_texture->GetHeight()), static_cast(m_resolution_scale), - m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, + params.set_mask_while_drawing ? 1u : 0u, GetCurrentNormalizedVertexDepth()}; // VRAM read texture should already be bound. const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); g_gpu_device->SetPipeline( - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && m_write_mask_as_depth)].get()); + m_vram_copy_pipelines[BoolToUInt8(params.check_mask_before_draw && m_write_mask_as_depth)].get()); g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); - if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) + if (params.check_mask_before_draw && !m_pgxp_depth_buffer) m_current_depth++; return; @@ -3581,7 +3380,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 AddUnclampedDrawnRectangle(dst_bounds); } - if (m_GPUSTAT.check_mask_before_draw) + if (params.check_mask_before_draw) { // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; @@ -3594,19 +3393,29 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 m_vram_read_texture->MakeReadyForSampling(); } -void GPU_HW::DispatchRenderCommand() +void GPU_HW::ClearCache() { - const GPURenderCommand rc{m_render_command.bits}; + FlushRender(); + // Force the check below to fail. + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; +} + +void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd) +{ // TODO: avoid all this for vertex loading, only do when the type of draw changes - BatchTextureMode texture_mode = rc.IsTexturingEnabled() ? m_batch.texture_mode : BatchTextureMode::Disabled; + BatchTextureMode texture_mode = cmd->rc.IsTexturingEnabled() ? m_batch.texture_mode : BatchTextureMode::Disabled; GPUTextureCache::SourceKey texture_cache_key = m_batch.texture_cache_key; - if (rc.IsTexturingEnabled()) + if (cmd->rc.IsTexturingEnabled()) { // texture page changed - check that the new page doesn't intersect the drawing area - if (m_draw_mode.IsTexturePageChanged() || texture_mode == BatchTextureMode::Disabled) + if (((m_draw_mode.bits ^ cmd->draw_mode.bits) & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK) != 0 || + (cmd->draw_mode.IsUsingPalette() && m_draw_mode.palette_reg.bits != cmd->palette.bits) || + texture_mode == BatchTextureMode::Disabled) + { - m_draw_mode.ClearTexturePageChangedFlag(); + m_draw_mode.mode_reg.bits = cmd->draw_mode.bits; + m_draw_mode.palette_reg.bits = cmd->palette.bits; // start by assuming we can use the TC bool use_texture_cache = m_use_texture_cache; @@ -3682,39 +3491,41 @@ void GPU_HW::DispatchRenderCommand() } } - DebugAssert((rc.IsTexturingEnabled() && (texture_mode == BatchTextureMode::PageTexture && - texture_cache_key.mode == m_draw_mode.mode_reg.texture_mode) || + DebugAssert((cmd->rc.IsTexturingEnabled() && (texture_mode == BatchTextureMode::PageTexture && + texture_cache_key.mode == m_draw_mode.mode_reg.texture_mode) || texture_mode == static_cast( (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit) ? GPUTextureMode::Direct16Bit : m_draw_mode.mode_reg.texture_mode)) || - (!rc.IsTexturingEnabled() && texture_mode == BatchTextureMode::Disabled)); + (!cmd->rc.IsTexturingEnabled() && texture_mode == BatchTextureMode::Disabled)); DebugAssert(!(m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT) || texture_mode == BatchTextureMode::PageTexture || - !rc.IsTexturingEnabled()); + !cmd->rc.IsTexturingEnabled()); // has any state changed which requires a new batch? // Reverse blending breaks with mixed transparent and opaque pixels, so we have to do one draw per polygon. // If we have fbfetch, we don't need to draw it in two passes. Test case: Suikoden 2 shadows. + // TODO: make this suck less.. somehow. probably arrange the relevant bits in a comparable pattern const GPUTransparencyMode transparency_mode = - rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; - const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; + cmd->rc.transparency_enable ? cmd->draw_mode.transparency_mode : GPUTransparencyMode::Disabled; + const bool dithering_enable = (!m_true_color && cmd->draw_mode.dither_enable); if (!IsFlushed()) { if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || - dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window || + dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != cmd->window || + m_batch_ubo_data.u_set_mask_while_drawing != BoolToUInt32(cmd->params.set_mask_while_drawing) || (texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key)) { FlushRender(); } } - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); if (m_batch_index_count == 0) { // transparency mode change - const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + const bool check_mask_before_draw = cmd->params.check_mask_before_draw; if (transparency_mode != GPUTransparencyMode::Disabled && !m_rov_active && !m_prefer_shader_blend && !NeedsShaderBlending(transparency_mode, texture_mode, check_mask_before_draw)) { @@ -3728,7 +3539,7 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor; } - const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + const bool set_mask_while_drawing = cmd->params.set_mask_while_drawing; if (m_batch.check_mask_before_draw != check_mask_before_draw || m_batch.set_mask_while_drawing != set_mask_while_drawing) { @@ -3738,10 +3549,10 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(set_mask_while_drawing); } - m_batch.interlacing = IsInterlacedRenderingEnabled(); + m_batch.interlacing = cmd->params.interlaced_rendering; if (m_batch.interlacing) { - const u32 displayed_field = GetActiveLineLSB(); + const u32 displayed_field = cmd->params.active_line_lsb; m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field); m_batch_ubo_data.u_interlaced_displayed_field = displayed_field; } @@ -3752,51 +3563,36 @@ void GPU_HW::DispatchRenderCommand() m_batch.dithering = dithering_enable; m_batch.texture_cache_key = texture_cache_key; - if (m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window) + if (m_batch_ubo_data.u_texture_window_bits != cmd->window) { - m_batch_ubo_data.u_texture_window_bits = m_draw_mode.texture_window; - m_texture_window_active = (m_draw_mode.texture_window != GPUTextureWindow{0xFF, 0xFF, 0x00, 0x00}); - GSVector4i::store(&m_batch_ubo_data.u_texture_window[0], - GSVector4i::load32(&m_draw_mode.texture_window).u8to32()); + m_batch_ubo_data.u_texture_window_bits = cmd->window; + m_texture_window_active = (cmd->window != GPUTextureWindow{{0xFF, 0xFF, 0x00, 0x00}}); + GSVector4i::store(&m_batch_ubo_data.u_texture_window[0], GSVector4i::load32(&cmd->window).u8to32()); m_batch_ubo_dirty = true; } if (m_drawing_area_changed) { m_drawing_area_changed = false; - SetClampedDrawingArea(); SetScissor(); if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) { FlushRender(); CopyAndClearDepthBuffer(); - EnsureVertexBufferSpaceForCurrentCommand(); - } - - if (m_sw_renderer) - { - GPUBackendSetDrawingAreaCommand* cmd = m_sw_renderer->NewSetDrawingAreaCommand(); - cmd->new_area = m_drawing_area; - m_sw_renderer->PushCommand(cmd); + EnsureVertexBufferSpaceForCommand(cmd); } } } - LoadVertices(); + if (cmd->params.check_mask_before_draw) + m_current_depth++; } void GPU_HW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) { - // Not done in HW, but need to forward through to SW if using that for readbacks - if (m_sw_renderer) - { - GPUBackendUpdateCLUTCommand* cmd = m_sw_renderer->NewUpdateCLUTCommand(); - FillBackendCommandParameters(cmd); - cmd->reg.bits = reg.bits; - cmd->clut_is_8bit = clut_is_8bit; - m_sw_renderer->PushCommand(cmd); - } + if (ShouldDrawWithSoftwareRenderer()) + GPU_SW_Rasterizer::UpdateCLUT(reg, clut_is_8bit); } void GPU_HW::FlushRender() @@ -3864,7 +3660,13 @@ void GPU_HW::FlushRender() } } -void GPU_HW::UpdateDisplay() +void GPU_HW::DrawingAreaChanged() +{ + m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area); + m_drawing_area_changed = true; +} + +void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { FlushRender(); DeactivateROV(); @@ -3873,7 +3675,7 @@ void GPU_HW::UpdateDisplay() GPUTextureCache::Compact(); - if (g_settings.debugging.show_vram) + if (g_gpu_settings.debugging.show_vram) { if (IsUsingMultisampling()) { @@ -3889,30 +3691,30 @@ void GPU_HW::UpdateDisplay() return; } - const bool interlaced = IsInterlacedDisplayEnabled(); - const u32 interlaced_field = GetInterlacedDisplayField(); - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 scaled_vram_offset_x = m_crtc_state.display_vram_left * resolution_scale; - const u32 scaled_vram_offset_y = (m_crtc_state.display_vram_top * resolution_scale) + - ((interlaced && m_GPUSTAT.vertical_resolution) ? interlaced_field : 0); - const u32 scaled_display_width = m_crtc_state.display_vram_width * resolution_scale; - const u32 scaled_display_height = m_crtc_state.display_vram_height * resolution_scale; + const bool interlaced = cmd->interlaced_display_enabled; + const u32 interlaced_field = cmd->interlaced_display_field; + const u32 resolution_scale = cmd->display_24bit ? 1 : m_resolution_scale; + const u32 scaled_vram_offset_x = cmd->display_vram_left * resolution_scale; + const u32 scaled_vram_offset_y = (cmd->display_vram_top * resolution_scale) + + ((interlaced && cmd->interlaced_display_interleaved) ? interlaced_field : 0); + const u32 scaled_display_width = cmd->display_vram_width * resolution_scale; + const u32 scaled_display_height = cmd->display_vram_height * resolution_scale; const u32 read_height = interlaced ? (scaled_display_height / 2u) : scaled_display_height; - const u32 line_skip = BoolToUInt32(interlaced && m_GPUSTAT.vertical_resolution); + const u32 line_skip = cmd->interlaced_display_interleaved; bool drew_anything = false; // Don't bother grabbing depth if postfx doesn't need it. - GPUTexture* depth_source = (!m_GPUSTAT.display_area_color_depth_24 && m_pgxp_depth_buffer && - PostProcessing::InternalChain.NeedsDepthBuffer()) ? - (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : - nullptr; + GPUTexture* depth_source = + (!cmd->display_24bit && m_pgxp_depth_buffer && PostProcessing::InternalChain.NeedsDepthBuffer()) ? + (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : + nullptr; - if (IsDisplayDisabled()) + if (cmd->display_disabled) { ClearDisplayTexture(); return; } - else if (!m_GPUSTAT.display_area_color_depth_24 && !IsUsingMultisampling() && + else if (!cmd->display_24bit && !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight() && !PostProcessing::InternalChain.IsActive()) @@ -3967,14 +3769,14 @@ void GPU_HW::UpdateDisplay() else { g_gpu_device->SetRenderTarget(m_vram_extract_texture.get()); - g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)].get()); + g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(cmd->display_24bit)].get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); } - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 skip_x = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + const u32 reinterpret_start_x = cmd->X * resolution_scale; + const u32 skip_x = (cmd->display_vram_left - cmd->X) * resolution_scale; GL_INS_FMT("VRAM extract, depth = {}, 24bpp = {}, skip_x = {}, line_skip = {}", depth_source ? "yes" : "no", - m_GPUSTAT.display_area_color_depth_24.GetValue(), skip_x, line_skip); + cmd->display_24bit.GetValue(), skip_x, line_skip); GL_INS_FMT("Source: {},{} => {},{} ({}x{})", reinterpret_start_x, scaled_vram_offset_y, reinterpret_start_x + scaled_display_width, scaled_vram_offset_y + read_height, scaled_display_width, read_height); @@ -4020,7 +3822,7 @@ void GPU_HW::UpdateDisplay() } } - if (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24) + if (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit) { DebugAssert(m_display_texture); DownsampleFramebuffer(); @@ -4231,68 +4033,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, ds_width, ds_height); } -void GPU_HW::DrawRendererStats() +std::unique_ptr GPUBackend::CreateHardwareBackend() { - if (ImGui::CollapsingHeader("Renderer Statistics", ImGuiTreeNodeFlags_DefaultOpen)) - { - static const ImVec4 active_color{1.0f, 1.0f, 1.0f, 1.0f}; - static const ImVec4 inactive_color{0.4f, 0.4f, 0.4f, 1.0f}; - - ImGui::Columns(2); - ImGui::SetColumnWidth(0, 200.0f * ImGuiManager::GetGlobalScale()); - - ImGui::TextUnformatted("Resolution Scale:"); - ImGui::NextColumn(); - ImGui::Text("%u (VRAM %ux%u)", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, - VRAM_HEIGHT * m_resolution_scale); - ImGui::NextColumn(); - - ImGui::TextUnformatted("Effective Display Resolution:"); - ImGui::NextColumn(); - ImGui::Text("%ux%u", m_crtc_state.display_vram_width * m_resolution_scale, - m_crtc_state.display_vram_height * m_resolution_scale); - ImGui::NextColumn(); - - ImGui::TextUnformatted("True Color:"); - ImGui::NextColumn(); - ImGui::TextColored(m_true_color ? active_color : inactive_color, m_true_color ? "Enabled" : "Disabled"); - ImGui::NextColumn(); - - const bool scaled_dithering = (m_resolution_scale > 1 && g_settings.gpu_scaled_dithering); - ImGui::TextUnformatted("Scaled Dithering:"); - ImGui::NextColumn(); - ImGui::TextColored(scaled_dithering ? active_color : inactive_color, scaled_dithering ? "Enabled" : "Disabled"); - ImGui::NextColumn(); - - ImGui::TextUnformatted("Texture Filtering:"); - ImGui::NextColumn(); - ImGui::TextColored((m_texture_filtering != GPUTextureFilter::Nearest) ? active_color : inactive_color, "%s", - Settings::GetTextureFilterDisplayName(m_texture_filtering)); - ImGui::NextColumn(); - - ImGui::TextUnformatted("PGXP:"); - ImGui::NextColumn(); - ImGui::TextColored(g_settings.gpu_pgxp_enable ? active_color : inactive_color, "Geom"); - ImGui::SameLine(); - ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling) ? active_color : inactive_color, - "Cull"); - ImGui::SameLine(); - ImGui::TextColored( - (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction) ? active_color : inactive_color, "Tex"); - ImGui::SameLine(); - ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_vertex_cache) ? active_color : inactive_color, - "Cache"); - ImGui::NextColumn(); - - ImGui::Columns(1); - } -} - -std::unique_ptr GPU::CreateHardwareRenderer(Error* error) -{ - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize(error)) - gpu.reset(); - - return gpu; + return std::make_unique(); } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index a35f88bad..280baf400 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -3,7 +3,7 @@ #pragma once -#include "gpu.h" +#include "gpu_backend.h" #include "gpu_hw_texture_cache.h" #include "util/gpu_device.h" @@ -21,7 +21,9 @@ class GPU_SW_Backend; struct GPUBackendCommand; struct GPUBackendDrawCommand; -class GPU_HW final : public GPU +// TODO: Move to cpp +// TODO: Rename to GPUHWBackend, preserved to avoid conflicts. +class GPU_HW final : public GPUBackend { public: enum class BatchRenderMode : u8 @@ -63,21 +65,40 @@ public: GPU_HW(); ~GPU_HW() override; - const Threading::Thread* GetSWThread() const override; bool IsHardwareRenderer() const override; - bool Initialize(Error* error) override; - void Reset(bool clear_vram) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + bool Initialize(bool upload_vram, Error* error) override; + + u32 GetResolutionScale() const override; void RestoreDeviceContext() override; +protected: void UpdateSettings(const Settings& old_settings) override; - u32 GetResolutionScale() const override; void UpdateResolutionScale() override; - void UpdateDisplay() override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; + void ClearCache() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void OnBufferSwapped() override; + + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; + void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + + void FlushRender() override; + void DrawingAreaChanged() override; + void ClearVRAM() override; + + void LoadState(const GPUBackendLoadStateCommand* cmd) override; + + void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override; private: enum : u32 @@ -86,6 +107,7 @@ private: MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) * (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u), NUM_TEXTURE_MODES = static_cast(BatchTextureMode::MaxCount), + INVALID_DRAW_MODE_BITS = 0xFFFFFFFFu, }; enum : u8 { @@ -164,8 +186,6 @@ private: bool CompileResolutionDependentPipelines(Error* error); bool CompileDownsamplePipelines(Error* error); - void LoadVertices(); - void PrintSettingsToLog(); void CheckSettings(); @@ -184,8 +204,10 @@ private: u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; + bool ShouldDrawWithSoftwareRenderer() const; + bool IsUsingMultisampling() const; - bool IsUsingDownsampling() const; + bool IsUsingDownsampling(const GPUBackendUpdateDisplayCommand* cmd) const; void SetFullVRAMDirtyRectangle(); void ClearVRAMDirtyRectangle(); @@ -195,12 +217,15 @@ private: void AddUnclampedDrawnRectangle(const GSVector4i rect); void SetTexPageChangedOnOverlap(const GSVector4i update_rect); - void CheckForTexPageOverlap(GSVector4i uv_rect); + void CheckForTexPageOverlap(const GPUBackendDrawCommand* cmd, GSVector4i uv_rect); bool ShouldCheckForTexPageOverlap() const; bool IsFlushed() const; void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices); - void EnsureVertexBufferSpaceForCurrentCommand(); + void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd); + void PrepareDraw(const GPUBackendDrawCommand* cmd); + void FinishPolygonDraw(const GPUBackendDrawCommand* cmd, std::array& vertices, u32 num_vertices, + bool is_3d); void ResetBatchVertexDepth(); /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. @@ -212,20 +237,6 @@ private: /// Returns true if the draw is going to use shader blending/framebuffer fetch. bool NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture, bool check_mask) const; - void FillBackendCommandParameters(GPUBackendCommand* cmd) const; - void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; - void UpdateSoftwareRenderer(bool copy_vram_from_hw); - - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void DispatchRenderCommand() override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; - void FlushRender() override; - void DrawRendererStats() override; - void OnBufferSwapped() override; - void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, bool check_mask, const GSVector4i bounds); bool BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width, @@ -235,17 +246,17 @@ private: void DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth); /// Handles quads with flipped texture coordinate directions. - void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); + void HandleFlippedQuadTextureCoordinates(const GPUBackendDrawCommand* cmd, BatchVertex* vertices); bool IsPossibleSpritePolygon(const BatchVertex* vertices) const; bool ExpandLineTriangles(BatchVertex* vertices); /// Computes polygon U/V boundaries, and for overlap with the current texture page. - void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); + void ComputePolygonUVLimits(const GPUBackendDrawCommand* cmd, BatchVertex* vertices, u32 num_vertices); /// Sets the depth test flag for PGXP depth buffering. - void SetBatchDepthBuffer(bool enabled); - void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices); - void SetBatchSpriteMode(bool enabled); + void SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled); + void CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices); + void SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled); void UpdateDownsamplingLevels(); @@ -264,8 +275,6 @@ private: std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; - std::unique_ptr m_sw_renderer; - BatchVertex* m_batch_vertex_ptr = nullptr; u16* m_batch_index_ptr = nullptr; u32 m_batch_base_vertex = 0; @@ -307,18 +316,32 @@ private: u8 m_texpage_dirty = 0; bool m_batch_ubo_dirty = true; + bool m_drawing_area_changed = true; BatchConfig m_batch; // Changed state BatchUBOData m_batch_ubo_data = {}; // Bounding box of VRAM area that the GPU has drawn into. + GSVector4i m_clamped_drawing_area = {}; GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; GSVector4i m_vram_dirty_write_rect = INVALID_RECT; // TODO: Don't use in TC mode, should be kept at zero. GSVector4i m_current_uv_rect = INVALID_RECT; GSVector4i m_current_draw_rect = INVALID_RECT; s32 m_current_texture_page_offset[2] = {}; + union + { + struct + { + // NOTE: Only the texture-related bits should be used here, the others are not validated. + GPUDrawModeReg mode_reg; + GPUTexturePaletteReg palette_reg; + }; + + u32 bits = INVALID_DRAW_MODE_BITS; + } m_draw_mode = {}; + std::unique_ptr m_wireframe_pipeline; // [wrapped][interlaced] diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index 630456b78..5777b6add 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -49,6 +49,9 @@ static constexpr const GSVector4i& INVALID_RECT = GPU_HW::INVALID_RECT; static constexpr const GPUTexture::Format REPLACEMENT_TEXTURE_FORMAT = GPUTexture::Format::RGBA8; static constexpr const char LOCAL_CONFIG_FILENAME[] = "config.yaml"; +static constexpr u32 STATE_PALETTE_RECORD_SIZE = + sizeof(GSVector4i) + sizeof(SourceKey) + sizeof(PaletteRecordFlags) + sizeof(HashType) + sizeof(u16) * MAX_CLUT_SIZE; + // Has to be public because it's referenced in Source. struct HashCacheEntry { @@ -517,6 +520,7 @@ static std::unique_ptr s_replacement_texture_render_target; static std::unique_ptr s_replacement_draw_pipeline; // copies alpha as-is static std::unique_ptr s_replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent) +static GPU_HW* s_hw_backend = nullptr; // TODO:FIXME: remove me static bool s_track_vram_writes = false; static std::string s_game_id; @@ -551,8 +555,10 @@ bool GPUTextureCache::IsDumpingVRAMWriteTextures() return (g_settings.texture_replacements.dump_textures && !s_config.dump_texture_pages); } -bool GPUTextureCache::Initialize() +bool GPUTextureCache::Initialize(GPU_HW* backend) { + s_hw_backend = backend; + LoadLocalConfiguration(false, false); UpdateVRAMTrackingState(); if (!CompilePipelines()) @@ -599,134 +605,164 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old } } -bool GPUTextureCache::DoState(StateWrapper& sw, bool skip) +bool GPUTextureCache::GetStateSize(StateWrapper& sw, u32* size) { if (sw.GetVersion() < 73) { - if (!skip) - WARNING_LOG("Texture cache not in save state due to old version."); - - Invalidate(); + *size = 0; return true; } - if (!sw.DoMarker("GPUTextureCache")) + const size_t start = sw.GetPosition(); + if (!sw.DoMarker("GPUTextureCache")) [[unlikely]] return false; - if (sw.IsReading()) + u32 num_vram_writes = 0; + sw.Do(&num_vram_writes); + + for (u32 i = 0; i < num_vram_writes; i++) { - if (!skip) - Invalidate(); + sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType)); - u32 num_vram_writes = 0; - sw.Do(&num_vram_writes); - - const bool skip_writes = (skip || !s_track_vram_writes); - - for (u32 i = 0; i < num_vram_writes; i++) - { - static constexpr u32 PALETTE_RECORD_SIZE = sizeof(GSVector4i) + sizeof(SourceKey) + sizeof(PaletteRecordFlags) + - sizeof(HashType) + sizeof(u16) * MAX_CLUT_SIZE; - - if (skip_writes) - { - sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType)); - - u32 num_palette_records = 0; - sw.Do(&num_palette_records); - sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE); - } - else - { - VRAMWrite* vrw = new VRAMWrite(); - DoStateVector(sw, &vrw->active_rect); - DoStateVector(sw, &vrw->write_rect); - sw.Do(&vrw->hash); - - u32 num_palette_records = 0; - sw.Do(&num_palette_records); - - // Skip palette records if we're not dumping now. - if (g_settings.texture_replacements.dump_textures) - { - vrw->palette_records.reserve(num_palette_records); - for (u32 j = 0; j < num_palette_records; j++) - { - VRAMWrite::PaletteRecord& rec = vrw->palette_records.emplace_back(); - DoStateVector(sw, &rec.rect); - sw.DoBytes(&rec.key, sizeof(rec.key)); - sw.Do(&rec.flags); - sw.Do(&rec.palette_hash); - sw.DoBytes(rec.palette, sizeof(rec.palette)); - } - } - else - { - sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE); - } - - if (sw.HasError()) - { - delete vrw; - Invalidate(); - return false; - } - - vrw->num_page_refs = 0; - LoopRectPages(vrw->active_rect, [vrw](u32 pn) { - DebugAssert(vrw->num_page_refs < MAX_PAGE_REFS_PER_WRITE); - ListAppend(&s_pages[pn].writes, vrw, &vrw->page_refs[vrw->num_page_refs++]); - return true; - }); - } - } + u32 num_palette_records = 0; + sw.Do(&num_palette_records); + sw.SkipBytes(num_palette_records * STATE_PALETTE_RECORD_SIZE); } - else + + if (sw.HasError()) [[unlikely]] + return false; + + *size = static_cast(sw.GetPosition() - start); + return true; +} + +void GPUTextureCache::LoadState(std::span data, u32 data_version) +{ + Invalidate(); + + if (data.empty()) { - s_temp_vram_write_list.clear(); + WARNING_LOG("Texture cache not in save state due to old version."); + return; + } - if (!skip && s_track_vram_writes) + // Don't need anything if we're not tracking VRAM writes. + if (!s_track_vram_writes) + return; + + StateWrapper sw(data, StateWrapper::Mode::Read, data_version); + + if (!sw.DoMarker("GPUTextureCache")) [[unlikely]] + { + WARNING_LOG("Invalid save state data."); + return; + } + + u32 num_vram_writes = 0; + sw.Do(&num_vram_writes); + + for (u32 i = 0; i < num_vram_writes; i++) + { + if (!s_track_vram_writes) { - for (PageEntry& page : s_pages) - { - ListIterate(page.writes, [](VRAMWrite* vrw) { - if (std::find(s_temp_vram_write_list.begin(), s_temp_vram_write_list.end(), vrw) != - s_temp_vram_write_list.end()) - { - return; - } + sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType)); - // try not to lose data... pull it from the sources - if (g_settings.texture_replacements.dump_textures) - SyncVRAMWritePaletteRecords(vrw); - - s_temp_vram_write_list.push_back(vrw); - }); - } + u32 num_palette_records = 0; + sw.Do(&num_palette_records); + sw.SkipBytes(num_palette_records * STATE_PALETTE_RECORD_SIZE); } - - u32 num_vram_writes = static_cast(s_temp_vram_write_list.size()); - sw.Do(&num_vram_writes); - for (VRAMWrite* vrw : s_temp_vram_write_list) + else { + VRAMWrite* vrw = new VRAMWrite(); DoStateVector(sw, &vrw->active_rect); DoStateVector(sw, &vrw->write_rect); sw.Do(&vrw->hash); - u32 num_palette_records = static_cast(vrw->palette_records.size()); + u32 num_palette_records = 0; sw.Do(&num_palette_records); - for (VRAMWrite::PaletteRecord& rec : vrw->palette_records) + + // Skip palette records if we're not dumping now. + if (g_settings.texture_replacements.dump_textures) { - DoStateVector(sw, &rec.rect); - sw.DoBytes(&rec.key, sizeof(rec.key)); - sw.Do(&rec.flags); - sw.Do(&rec.palette_hash); - sw.DoBytes(rec.palette, sizeof(rec.palette)); + vrw->palette_records.reserve(num_palette_records); + for (u32 j = 0; j < num_palette_records; j++) + { + VRAMWrite::PaletteRecord& rec = vrw->palette_records.emplace_back(); + DoStateVector(sw, &rec.rect); + sw.DoBytes(&rec.key, sizeof(rec.key)); + sw.Do(&rec.flags); + sw.Do(&rec.palette_hash); + sw.DoBytes(rec.palette, sizeof(rec.palette)); + } } + else + { + sw.SkipBytes(num_palette_records * STATE_PALETTE_RECORD_SIZE); + } + + if (sw.HasError()) + { + WARNING_LOG("Invalid save state data."); + delete vrw; + Invalidate(); + return; + } + + vrw->num_page_refs = 0; + LoopRectPages(vrw->active_rect, [vrw](u32 pn) { + DebugAssert(vrw->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, vrw, &vrw->page_refs[vrw->num_page_refs++]); + return true; + }); + } + } +} + +void GPUTextureCache::SaveState(StateWrapper& sw) +{ + sw.DoMarker("GPUTextureCache"); + + s_temp_vram_write_list.clear(); + + if (s_track_vram_writes) + { + for (PageEntry& page : s_pages) + { + ListIterate(page.writes, [](VRAMWrite* vrw) { + if (std::find(s_temp_vram_write_list.begin(), s_temp_vram_write_list.end(), vrw) != + s_temp_vram_write_list.end()) + { + return; + } + + // try not to lose data... pull it from the sources + if (g_settings.texture_replacements.dump_textures) + SyncVRAMWritePaletteRecords(vrw); + + s_temp_vram_write_list.push_back(vrw); + }); } } - return !sw.HasError(); + u32 num_vram_writes = static_cast(s_temp_vram_write_list.size()); + sw.Do(&num_vram_writes); + for (VRAMWrite* vrw : s_temp_vram_write_list) + { + DoStateVector(sw, &vrw->active_rect); + DoStateVector(sw, &vrw->write_rect); + sw.Do(&vrw->hash); + + u32 num_palette_records = static_cast(vrw->palette_records.size()); + sw.Do(&num_palette_records); + for (VRAMWrite::PaletteRecord& rec : vrw->palette_records) + { + DoStateVector(sw, &rec.rect); + sw.DoBytes(&rec.key, sizeof(rec.key)); + sw.Do(&rec.flags); + sw.Do(&rec.palette_hash); + sw.DoBytes(rec.palette, sizeof(rec.palette)); + } + } } void GPUTextureCache::Shutdown() @@ -737,6 +773,7 @@ void GPUTextureCache::Shutdown() s_replacement_texture_render_target.reset(); s_hash_cache_purge_list = {}; s_temp_vram_write_list = {}; + s_hw_backend = nullptr; s_track_vram_writes = false; s_replacement_image_cache.clear(); @@ -3305,5 +3342,5 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, g_gpu_device->RecycleTexture(std::move(entry->texture)); entry->texture = std::move(replacement_tex); - g_gpu->RestoreDeviceContext(); + s_hw_backend->RestoreDeviceContext(); } \ No newline at end of file diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h index dd629a40f..a44e257b6 100644 --- a/src/core/gpu_hw_texture_cache.h +++ b/src/core/gpu_hw_texture_cache.h @@ -10,6 +10,7 @@ class RGBA8Image; class StateWrapper; struct Settings; +class GPU_HW; ////////////////////////////////////////////////////////////////////////// // Texture Cache @@ -102,9 +103,13 @@ struct Source TListNode hash_cache_ref; }; -bool Initialize(); +bool Initialize(GPU_HW* backend); void UpdateSettings(bool use_texture_cache, const Settings& old_settings); -bool DoState(StateWrapper& sw, bool skip); + +bool GetStateSize(StateWrapper& sw, u32* size); +void LoadState(std::span data, u32 data_version); +void SaveState(StateWrapper& sw); + void Shutdown(); void Invalidate(); diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 10eb5a5bc..480f09745 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -2,7 +2,8 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_sw.h" -#include "gpu_hw_texture_cache.h" +#include "gpu.h" +#include "gpu_sw_rasterizer.h" #include "settings.h" #include "system.h" @@ -10,8 +11,7 @@ #include "common/align.h" #include "common/assert.h" -#include "common/gsvector.h" -#include "common/gsvector_formatter.h" +#include "common/intrin.h" #include "common/log.h" #include @@ -20,27 +20,149 @@ LOG_CHANNEL(GPU_SW); GPU_SW::GPU_SW() = default; -GPU_SW::~GPU_SW() -{ - g_gpu_device->RecycleTexture(std::move(m_upload_texture)); - m_backend.Shutdown(); -} - -const Threading::Thread* GPU_SW::GetSWThread() const -{ - return m_backend.GetThread(); -} +GPU_SW::~GPU_SW() = default; bool GPU_SW::IsHardwareRenderer() const { return false; } -bool GPU_SW::Initialize(Error* error) +u32 GPU_SW::GetResolutionScale() const { - if (!GPU::Initialize(error) || !m_backend.Initialize(g_settings.gpu_use_thread)) + return 1u; +} + +bool GPU_SW::Initialize(bool upload_vram, Error* error) +{ + if (!GPUBackend::Initialize(upload_vram, error)) return false; + // if we're using "new" vram, clear it out here + if (!upload_vram) + std::memset(g_vram, 0, sizeof(g_vram)); + + SetDisplayTextureFormat(); + return true; +} + +void GPU_SW::ClearVRAM() +{ + std::memset(g_vram, 0, sizeof(g_vram)); + std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); +} + +void GPU_SW::UpdateResolutionScale() +{ +} + +void GPU_SW::LoadState(const GPUBackendLoadStateCommand* cmd) +{ + std::memcpy(g_vram, cmd->vram_data, sizeof(g_vram)); + std::memcpy(g_gpu_clut, cmd->clut_data, sizeof(g_gpu_clut)); +} + +void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ +} + +void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) +{ + GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, params.interlaced_rendering, params.active_line_lsb); +} + +void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) +{ + GPU_SW_Rasterizer::WriteVRAM(x, y, width, height, data, params.set_mask_while_drawing, params.check_mask_before_draw); +} + +void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) +{ + GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params.set_mask_while_drawing, + params.check_mask_before_draw); +} + +void GPU_SW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); + + DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); +} + +void GPU_SW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); + + // Need to cut out the irrelevant bits. + // TODO: In _theory_ we could use the fixed-point parts here. + GPUBackendDrawPolygonCommand::Vertex vertices[4]; + for (u32 i = 0; i < cmd->num_vertices; i++) + { + const GPUBackendDrawPrecisePolygonCommand::Vertex& src = cmd->vertices[i]; + vertices[i] = GPUBackendDrawPolygonCommand::Vertex{ + .x = src.native_x, .y = src.native_y, .color = src.color, .texcoord = src.texcoord}; + } + + DrawFunction(cmd, &vertices[0], &vertices[1], &vertices[2]); + if (cmd->num_vertices > 3) + DrawFunction(cmd, &vertices[2], &vertices[1], &vertices[3]); +} + +void GPU_SW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + + const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); + + DrawFunction(cmd); +} + +void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable); + + for (u16 i = 0; i < cmd->num_vertices; i += 2) + DrawFunction(cmd, &cmd->vertices[i], &cmd->vertices[i + 1]); +} + +void GPU_SW::DrawingAreaChanged() +{ + // GPU_SW_Rasterizer::g_drawing_area set by base class. +} + +void GPU_SW::ClearCache() +{ +} + +void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + GPU_SW_Rasterizer::UpdateCLUT(reg, clut_is_8bit); +} + +void GPU_SW::OnBufferSwapped() +{ +} + +void GPU_SW::FlushRender() +{ +} + +void GPU_SW::RestoreDeviceContext() +{ +} + +void GPU_SW::SetDisplayTextureFormat() +{ static constexpr const std::array formats_for_16bit = {GPUTexture::Format::RGB565, GPUTexture::Format::RGBA5551, GPUTexture::Format::RGBA8, GPUTexture::Format::BGRA8}; static constexpr const std::array formats_for_24bit = {GPUTexture::Format::RGBA8, GPUTexture::Format::BGRA8, @@ -61,35 +183,6 @@ bool GPU_SW::Initialize(Error* error) break; } } - - return true; -} - -bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) -{ - // need to ensure the worker thread is done - m_backend.Sync(true); - - // ignore the host texture for software mode, since we want to save vram here - if (!GPU::DoState(sw, nullptr, update_display)) - return false; - - // need to still call the TC, to toss any data in the state - return GPUTextureCache::DoState(sw, true); -} - -void GPU_SW::Reset(bool clear_vram) -{ - GPU::Reset(clear_vram); - - m_backend.Reset(); -} - -void GPU_SW::UpdateSettings(const Settings& old_settings) -{ - GPU::UpdateSettings(old_settings); - if (g_settings.gpu_use_thread != old_settings.gpu_use_thread) - m_backend.SetThreadEnabled(g_settings.gpu_use_thread); } GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format) @@ -427,32 +520,28 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3 } } -void GPU_SW::UpdateDisplay() +void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { - // fill display texture - m_backend.Sync(true); - if (!g_settings.debugging.show_vram) { - if (IsDisplayDisabled()) + if (cmd->display_disabled) { ClearDisplayTexture(); return; } - const bool is_24bit = m_GPUSTAT.display_area_color_depth_24; - const bool interlaced = IsInterlacedDisplayEnabled(); - const u32 field = GetInterlacedDisplayField(); - const u32 vram_offset_x = is_24bit ? m_crtc_state.regs.X : m_crtc_state.display_vram_left; - const u32 vram_offset_y = - m_crtc_state.display_vram_top + ((interlaced && m_GPUSTAT.vertical_resolution) ? field : 0); - const u32 skip_x = is_24bit ? (m_crtc_state.display_vram_left - m_crtc_state.regs.X) : 0; - const u32 read_width = m_crtc_state.display_vram_width; - const u32 read_height = interlaced ? (m_crtc_state.display_vram_height / 2) : m_crtc_state.display_vram_height; + const bool is_24bit = cmd->display_24bit; + const bool interlaced = cmd->interlaced_display_enabled; + const u32 field = cmd->interlaced_display_field; + const u32 vram_offset_x = is_24bit ? cmd->X : cmd->display_vram_left; + const u32 vram_offset_y = cmd->display_vram_top + ((interlaced && cmd->interlaced_display_interleaved) ? field : 0); + const u32 skip_x = is_24bit ? (cmd->display_vram_left - cmd->X) : 0; + const u32 read_width = cmd->display_vram_width; + const u32 read_height = interlaced ? (cmd->display_vram_height / 2) : cmd->display_vram_height; - if (IsInterlacedDisplayEnabled()) + if (cmd->interlaced_display_enabled) { - const u32 line_skip = m_GPUSTAT.vertical_resolution; + const u32 line_skip = cmd->interlaced_display_interleaved; if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit)) { SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); @@ -484,351 +573,7 @@ void GPU_SW::UpdateDisplay() } } -void GPU_SW::FillBackendCommandParameters(GPUBackendCommand* cmd) const +std::unique_ptr GPUBackend::CreateSoftwareBackend() { - cmd->params.bits = 0; - cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; - cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; - cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); -} - -void GPU_SW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const -{ - FillBackendCommandParameters(cmd); - cmd->rc.bits = rc.bits; - cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; - cmd->draw_mode.dither_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; - cmd->palette.bits = m_draw_mode.palette_reg.bits; - cmd->window = m_draw_mode.texture_window; -} - -void GPU_SW::DispatchRenderCommand() -{ - if (m_drawing_area_changed) - { - GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand(); - cmd->new_area = m_drawing_area; - GSVector4i::store(cmd->new_clamped_area, m_clamped_drawing_area); - m_backend.PushCommand(cmd); - m_drawing_area_changed = false; - } - - const GPURenderCommand rc{m_render_command.bits}; - - switch (rc.primitive) - { - case GPUPrimitive::Polygon: - { - const u32 num_vertices = rc.quad_polygon ? 4 : 3; - GPUBackendDrawPolygonCommand* cmd = m_backend.NewDrawPolygonCommand(num_vertices); - FillDrawCommand(cmd, rc); - - std::array positions; - const u32 first_color = rc.color_for_first_vertex; - const bool shaded = rc.shading_enable; - const bool textured = rc.texture_enable; - for (u32 i = 0; i < num_vertices; i++) - { - GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; - vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - const u64 maddr_and_pos = m_fifo.Pop(); - const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; - vert->x = m_drawing_offset.x + vp.x; - vert->y = m_drawing_offset.y + vp.y; - vert->texcoord = textured ? Truncate16(FifoPop()) : 0; - positions[i] = GSVector2i::load(&vert->x); - } - - // Cull polygons which are too large. - const GSVector2i min_pos_12 = positions[1].min_s32(positions[2]); - const GSVector2i max_pos_12 = positions[1].max_s32(positions[2]); - const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_s32(positions[0])) - .upl64(GSVector4i(max_pos_12.max_s32(positions[0]))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const bool first_tri_culled = - (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_012)); - if (first_tri_culled) - { - DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, - cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); - - if (!rc.quad_polygon) - return; - } - else - { - AddDrawTriangleTicks(positions[0], positions[1], positions[2], rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - } - - // quads - if (rc.quad_polygon) - { - const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(positions[3])) - .upl64(GSVector4i(max_pos_12.max_s32(positions[3]))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - - // Cull polygons which are too large. - const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_123)); - if (second_tri_culled) - { - DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, - cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); - - if (first_tri_culled) - return; - } - else - { - AddDrawTriangleTicks(positions[2], positions[1], positions[3], rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - } - } - - m_backend.PushCommand(cmd); - } - break; - - case GPUPrimitive::Rectangle: - { - GPUBackendDrawRectangleCommand* cmd = m_backend.NewDrawRectangleCommand(); - FillDrawCommand(cmd, rc); - cmd->color = rc.color_for_first_vertex; - - const GPUVertexPosition vp{FifoPop()}; - cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); - cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); - - if (rc.texture_enable) - { - const u32 texcoord_and_palette = FifoPop(); - cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); - cmd->texcoord = Truncate16(texcoord_and_palette); - } - else - { - cmd->palette.bits = 0; - cmd->texcoord = 0; - } - - switch (rc.rectangle_size) - { - case GPUDrawRectangleSize::R1x1: - cmd->width = 1; - cmd->height = 1; - break; - case GPUDrawRectangleSize::R8x8: - cmd->width = 8; - cmd->height = 8; - break; - case GPUDrawRectangleSize::R16x16: - cmd->width = 16; - cmd->height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); - cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); - } - break; - } - - const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - if (clamped_rect.rempty()) [[unlikely]] - { - DEBUG_LOG("Culling off-screen rectangle {}", rect); - return; - } - - AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); - - m_backend.PushCommand(cmd); - } - break; - - case GPUPrimitive::Line: - { - if (!rc.polyline) - { - GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(2); - FillDrawCommand(cmd, rc); - cmd->palette.bits = 0; - - if (rc.shading_enable) - { - cmd->vertices[0].color = rc.color_for_first_vertex; - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; - - cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; - } - else - { - cmd->vertices[0].color = rc.color_for_first_vertex; - cmd->vertices[1].color = rc.color_for_first_vertex; - - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; - - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; - } - - const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); - const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); - const GSVector4i rect = v0.min_s32(v1).xyxy(v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, - cmd->vertices[1].x, cmd->vertices[1].y); - return; - } - - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - m_backend.PushCommand(cmd); - } - else - { - const u32 num_vertices = GetPolyLineVertexCount(); - - GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand((num_vertices - 1) * 2); - FillDrawCommand(cmd, m_render_command); - - u32 buffer_pos = 0; - const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i draw_offset = GSVector2i::load(&m_drawing_offset.x); - GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset); - u32 start_color = m_render_command.color_for_first_vertex; - - const bool shaded = m_render_command.shading_enable; - u32 out_vertex_count = 0; - for (u32 i = 1; i < num_vertices; i++) - { - const u32 end_color = - shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; - const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset); - - const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[i - 1].x, - cmd->vertices[i - 1].y, cmd->vertices[i].x, cmd->vertices[i].y); - } - else - { - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; - out_vertex_count += 2; - - GSVector2i::store(&out_vertex[0].x, start_pos); - out_vertex[0].color = start_color; - GSVector2i::store(&out_vertex[1].x, end_pos); - out_vertex[1].color = end_color; - } - - start_pos = end_pos; - start_color = end_color; - } - - if (out_vertex_count > 0) - { - DebugAssert(out_vertex_count <= cmd->num_vertices); - cmd->num_vertices = Truncate16(out_vertex_count); - m_backend.PushCommand(cmd); - } - } - } - break; - - default: - UnreachableCode(); - break; - } -} - -void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - m_backend.Sync(false); -} - -void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - GPUBackendFillVRAMCommand* cmd = m_backend.NewFillVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - cmd->color = color; - m_backend.PushCommand(cmd); -} - -void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - const u32 num_words = width * height; - GPUBackendUpdateVRAMCommand* cmd = m_backend.NewUpdateVRAMCommand(num_words); - FillBackendCommandParameters(cmd); - cmd->params.set_mask_while_drawing = set_mask; - cmd->params.check_mask_before_draw = check_mask; - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - std::memcpy(cmd->data, data, sizeof(u16) * num_words); - m_backend.PushCommand(cmd); -} - -void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - GPUBackendCopyVRAMCommand* cmd = m_backend.NewCopyVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->src_x = static_cast(src_x); - cmd->src_y = static_cast(src_y); - cmd->dst_x = static_cast(dst_x); - cmd->dst_y = static_cast(dst_y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - m_backend.PushCommand(cmd); -} - -void GPU_SW::FlushRender() -{ -} - -void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) -{ - GPUBackendUpdateCLUTCommand* cmd = m_backend.NewUpdateCLUTCommand(); - FillBackendCommandParameters(cmd); - cmd->reg.bits = reg.bits; - cmd->clut_is_8bit = clut_is_8bit; - m_backend.PushCommand(cmd); -} - -std::unique_ptr GPU::CreateSoftwareRenderer(Error* error) -{ - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize(error)) - gpu.reset(); - - return gpu; + return std::make_unique(); } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 2251843aa..9be0930e5 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -4,7 +4,7 @@ #pragma once #include "gpu.h" -#include "gpu_sw_backend.h" +#include "gpu_backend.h" #include "util/gpu_device.h" @@ -12,36 +12,49 @@ #include -namespace Threading { -class Thread; -} - -class GPUTexture; - -class GPU_SW final : public GPU +// TODO: Move to cpp +// TODO: Rename to GPUSWBackend, preserved to avoid conflicts. +class GPU_SW final : public GPUBackend { public: GPU_SW(); ~GPU_SW() override; - ALWAYS_INLINE const GPU_SW_Backend& GetBackend() const { return m_backend; } - - const Threading::Thread* GetSWThread() const override; bool IsHardwareRenderer() const override; - bool Initialize(Error* error) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; - void Reset(bool clear_vram) override; - void UpdateSettings(const Settings& old_settings) override; + bool Initialize(bool upload_vram, Error* error) override; + + void RestoreDeviceContext() override; + + u32 GetResolutionScale() const override; protected: void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void FlushRender() override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; + void DrawingAreaChanged() override; + void ClearCache() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void OnBufferSwapped() override; + + void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override; + + void ClearVRAM() override; + + void FlushRender() override; + + void UpdateResolutionScale() override; + + void LoadState(const GPUBackendLoadStateCommand* cmd) override; + +private: template bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip); @@ -50,19 +63,11 @@ protected: bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit); - void UpdateDisplay() override; - - void DispatchRenderCommand() override; - - void FillBackendCommandParameters(GPUBackendCommand* cmd) const; - void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; - + void SetDisplayTextureFormat(); GPUTexture* GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format); FixedHeapArray m_upload_buffer; GPUTexture::Format m_16bit_display_format = GPUTexture::Format::RGB565; GPUTexture::Format m_24bit_display_format = GPUTexture::Format::RGBA8; std::unique_ptr m_upload_texture; - - GPU_SW_Backend m_backend; }; diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp deleted file mode 100644 index 0ab2e68e8..000000000 --- a/src/core/gpu_sw_backend.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "gpu_sw_backend.h" -#include "gpu.h" -#include "gpu_sw_rasterizer.h" -#include "system.h" - -#include "util/gpu_device.h" - -#include - -GPU_SW_Backend::GPU_SW_Backend() = default; - -GPU_SW_Backend::~GPU_SW_Backend() = default; - -bool GPU_SW_Backend::Initialize(bool use_thread) -{ - return GPUBackend::Initialize(use_thread); -} - -void GPU_SW_Backend::Reset() -{ - GPUBackend::Reset(); -} - -void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) -{ - const GPURenderCommand rc{cmd->rc.bits}; - - const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( - rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); - - DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); - if (rc.quad_polygon) - DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); -} - -void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) -{ - const GPURenderCommand rc{cmd->rc.bits}; - - const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = - GPU_SW_Rasterizer::GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); - - DrawFunction(cmd); -} - -void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd) -{ - const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = - GPU_SW_Rasterizer::GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable); - - for (u16 i = 1; i < cmd->num_vertices; i++) - DrawFunction(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); -} - -void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) -{ - GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, params.interlaced_rendering, params.active_line_lsb); -} - -void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, - GPUBackendCommandParameters params) -{ - GPU_SW_Rasterizer::WriteVRAM(x, y, width, height, data, params.set_mask_while_drawing, params.check_mask_before_draw); -} - -void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) -{ - GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params.set_mask_while_drawing, - params.check_mask_before_draw); -} - -void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) -{ - GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit); -} - -void GPU_SW_Backend::DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) -{ - GPU_SW_Rasterizer::g_drawing_area = new_drawing_area; -} - -void GPU_SW_Backend::FlushRender() -{ -} diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h deleted file mode 100644 index 7f2c492ca..000000000 --- a/src/core/gpu_sw_backend.h +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#pragma once - -#include "gpu.h" -#include "gpu_backend.h" - -#include - -class GPU_SW_Backend final : public GPUBackend -{ -public: - GPU_SW_Backend(); - ~GPU_SW_Backend() override; - - bool Initialize(bool use_thread) override; - void Reset() override; - -protected: - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) override; - - void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; - void DrawLine(const GPUBackendDrawLineCommand* cmd) override; - void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; - void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; - void FlushRender() override; -}; diff --git a/src/core/gpu_sw_rasterizer.cpp b/src/core/gpu_sw_rasterizer.cpp index f2648dcd0..bcd1e4267 100644 --- a/src/core/gpu_sw_rasterizer.cpp +++ b/src/core/gpu_sw_rasterizer.cpp @@ -45,6 +45,31 @@ CopyVRAMFunction CopyVRAM = nullptr; GPUDrawingArea g_drawing_area = {}; } // namespace GPU_SW_Rasterizer +void GPU_SW_Rasterizer::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + const u16* const src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; + const u32 start_x = reg.GetXBase(); + if (!clut_is_8bit) + { + // Wraparound can't happen in 4-bit mode. + std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * 16); + } + else + { + if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] + { + const u32 end = VRAM_WIDTH - start_x; + const u32 start = 256 - end; + std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * end); + std::memcpy(g_gpu_clut + end, src_row, sizeof(u16) * start); + } + else + { + std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * 256); + } + } +} + // Default scalar implementation definitions. namespace GPU_SW_Rasterizer::Scalar { namespace { diff --git a/src/core/gpu_sw_rasterizer.h b/src/core/gpu_sw_rasterizer.h index cdc6e9d5e..69e89e65f 100644 --- a/src/core/gpu_sw_rasterizer.h +++ b/src/core/gpu_sw_rasterizer.h @@ -18,12 +18,15 @@ static constexpr u32 DITHER_LUT_SIZE = 512; using DitherLUT = std::array, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>; extern const DitherLUT g_dither_lut; +// TODO: Pack in struct extern GPUDrawingArea g_drawing_area; +extern void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit); + using DrawRectangleFunction = void (*)(const GPUBackendDrawRectangleCommand* cmd); typedef const DrawRectangleFunction DrawRectangleFunctionTable[2][2][2]; -using DrawTriangleFunction = void (*)(const GPUBackendDrawPolygonCommand* cmd, +using DrawTriangleFunction = void (*)(const GPUBackendDrawCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); diff --git a/src/core/gpu_sw_rasterizer.inl b/src/core/gpu_sw_rasterizer.inl index 0a1ed95e3..64a3f18a8 100644 --- a/src/core/gpu_sw_rasterizer.inl +++ b/src/core/gpu_sw_rasterizer.inl @@ -966,7 +966,7 @@ struct TrianglePart #ifndef USE_VECTOR template -static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, +static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep) { s32 width = x_bound - x_start; @@ -1006,7 +1006,7 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start } template -ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCommand* cmd, const TrianglePart& tp, +ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* cmd, const TrianglePart& tp, const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb, const RGBSteps& rgbstep) { @@ -1143,7 +1143,7 @@ struct TriangleVectors : PixelVectors } // namespace template -static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, +static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep, const TriangleVectors& tv) { @@ -1248,7 +1248,7 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start } template -ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCommand* cmd, const TrianglePart& tp, +ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* cmd, const TrianglePart& tp, const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb, const RGBSteps& rgbstep) { @@ -1347,7 +1347,7 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo #endif // USE_VECTOR template -static void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, +static void DrawTriangle(const GPUBackendDrawCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2) { #ifdef CHECK_VECTOR diff --git a/src/core/gpu_thread.cpp b/src/core/gpu_thread.cpp new file mode 100644 index 000000000..fba927789 --- /dev/null +++ b/src/core/gpu_thread.cpp @@ -0,0 +1,1173 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#include "gpu_thread.h" +#include "fullscreen_ui.h" +#include "gpu_backend.h" +#include "gpu_types.h" +#include "host.h" +#include "imgui_overlays.h" +#include "performance_counters.h" +#include "settings.h" +#include "shader_cache_version.h" +#include "system.h" +#include "system_private.h" + +#include "util/gpu_device.h" +#include "util/imgui_manager.h" +#include "util/input_manager.h" +#include "util/postprocessing.h" +#include "util/state_wrapper.h" + +#include "common/align.h" +#include "common/error.h" +#include "common/log.h" +#include "common/threading.h" +#include "common/timer.h" + +#include "IconsEmoji.h" +#include "IconsFontAwesome5.h" +#include "fmt/format.h" +#include "imgui.h" + +#include + +LOG_CHANNEL(GPUThread); + +// TODO: Runahead/rewind textures. +// TODO: SW renderer for readback flag in class. +// TODO: Smaller settings struct. +// TODO: Remove g_gpu pointer. +// TODO: Auto size video capture. +// TODO: Smooth loady bar for achievements. +// TODO: Tidy up gpu_backend headers. +// TODO: Test that loading new states in old version works. +// TODO: Disable thread when debug windows are enabled. +// TODO: Fullscreen UI without thread active, locks up. + +namespace GPUThread { +enum : u32 +{ + COMMAND_QUEUE_SIZE = 16 * 1024 * 1024, + THRESHOLD_TO_WAKE_GPU = 65536, + MAX_SKIPPED_PRESENT_COUNT = 50 +}; + +static constexpr s32 THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING = 0x40000000; // CPU thread needs waking +static constexpr s32 THREAD_WAKE_COUNT_SLEEPING = -1; + +// Use a slightly longer spin time on ARM64 due to power management. +#ifndef _M_ARM64 +static constexpr u32 THREAD_SPIN_TIME_US = 50; +#else +static constexpr u32 THREAD_SPIN_TIME_US = 200; +#endif + +static bool Reconfigure(std::optional renderer, bool upload_vram, std::optional fullscreen, + std::optional start_fullscreen_ui, bool recreate_device, Error* error); + +static u32 GetPendingCommandSize(); +static void ResetCommandFIFO(); +static void WakeGPUThread(); +static void SyncGPUThread(bool spin); +static bool SleepGPUThread(bool allow_sleep); + +static bool CreateDeviceOnThread(RenderAPI api, bool fullscreen, Error* error); +static void DestroyDeviceOnThread(); +static void ResizeDisplayWindowOnThread(u32 width, u32 height, float scale); +static void UpdateDisplayWindowOnThread(bool fullscreen); +static void DisplayWindowResizedOnThread(); +static void HandleGPUDeviceLost(); +static void HandleExclusiveFullscreenLost(); + +static void ReconfigureOnThread(GPUThreadReconfigureCommand* cmd); +static bool CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, Error* error); +static void DestroyGPUBackendOnThread(); + +static void UpdateSettingsOnThread(const Settings& old_settings); + +static void SleepUntilPresentTime(Common::Timer::Value present_time); + +namespace { + +struct ALIGN_TO_CACHE_LINE State +{ + // Owned by CPU thread. + ALIGN_TO_CACHE_LINE Common::Timer::Value thread_spin_time = 0; + Threading::ThreadHandle gpu_thread; + Common::unique_aligned_ptr command_fifo_data; + WindowInfo render_window_info; + std::optional requested_renderer; // TODO: Non thread safe accessof this + bool use_gpu_thread = false; + + // Hot variables between both threads. + ALIGN_TO_CACHE_LINE std::atomic command_fifo_write_ptr{0}; + std::atomic thread_wake_count{0}; // <0 = sleeping, >= 0 = has work + Threading::KernelSemaphore thread_wake_semaphore; + Threading::KernelSemaphore thread_is_done_semaphore; + + // Owned by GPU thread. + ALIGN_TO_CACHE_LINE std::unique_ptr gpu_backend; + std::atomic command_fifo_read_ptr{0}; + u32 skipped_present_count = 0; + bool run_idle_flag = false; + GPUVSyncMode requested_vsync = GPUVSyncMode::Disabled; + bool requested_allow_present_throttle = false; + bool requested_fullscreen_ui = false; +}; + +} // namespace + +static State s_state; + +} // namespace GPUThread + +const Threading::ThreadHandle& GPUThread::Internal::GetThreadHandle() +{ + return s_state.gpu_thread; +} + +void GPUThread::ResetCommandFIFO() +{ + Assert(!s_state.run_idle_flag && s_state.command_fifo_read_ptr.load(std::memory_order_acquire) == + s_state.command_fifo_write_ptr.load(std::memory_order_relaxed)); + s_state.command_fifo_write_ptr.store(0, std::memory_order_release); + s_state.command_fifo_read_ptr.store(0, std::memory_order_release); +} + +void GPUThread::Internal::SetThreadEnabled(bool enabled) +{ + if (s_state.use_gpu_thread == enabled) + return; + + if (s_state.use_gpu_thread) + { + SyncGPUThread(false); + std::atomic_thread_fence(std::memory_order_acquire); + } + + // Was anything active? + if (!g_gpu_device) + { + // Thread should be idle. Just reset the FIFO. + s_state.use_gpu_thread = enabled; + ResetCommandFIFO(); + return; + } + + const bool fullscreen = Host::IsFullscreen(); + const bool requested_fullscreen_ui = s_state.requested_fullscreen_ui; + const std::optional requested_renderer = s_state.requested_renderer; + + // Force VRAM download, we're recreating. + if (requested_renderer.has_value()) + { + GPUBackendReadVRAMCommand* cmd = GPUBackend::NewReadVRAMCommand(); + cmd->x = 0; + cmd->y = 0; + cmd->width = VRAM_WIDTH; + cmd->height = VRAM_HEIGHT; + PushCommand(cmd); + } + + // Shutdown reconfigure. + Reconfigure(std::nullopt, false, std::nullopt, std::nullopt, false, nullptr); + + // Thread should be idle at this point. Reset the FIFO. + ResetCommandFIFO(); + + // Update state and reconfigure again. + s_state.use_gpu_thread = enabled; + + Error error; + if (!Reconfigure(requested_renderer, requested_renderer.has_value(), fullscreen, requested_fullscreen_ui, true, + &error)) + { + ERROR_LOG("Reconfigure failed: {}", error.GetDescription()); + Panic("Failed to reconfigure when changing thread state."); + } +} + +void GPUThread::Internal::ProcessStartup() +{ + s_state.thread_spin_time = Common::Timer::ConvertNanosecondsToValue(THREAD_SPIN_TIME_US * 1000.0); + s_state.command_fifo_data = Common::make_unique_aligned_for_overwrite(HOST_CACHE_LINE_SIZE, COMMAND_QUEUE_SIZE); + s_state.use_gpu_thread = g_settings.gpu_use_thread; +} + +void GPUThread::Internal::RequestShutdown() +{ + INFO_LOG("Shutting down GPU thread..."); + if (GetPendingCommandSize() > 0) + { + WakeGPUThread(); + SyncGPUThread(false); + } + + // Thread must be enabled to shut it down. + SetThreadEnabled(true); + PushCommandAndWakeThread(AllocateCommand(GPUBackendCommandType::Shutdown, sizeof(GPUThreadCommand))); +} + +bool GPUThread::Reconfigure(std::optional renderer, bool upload_vram, std::optional fullscreen, + std::optional start_fullscreen_ui, bool recreate_device, Error* error) +{ + INFO_LOG("Reconfiguring GPU thread."); + + GPUThreadReconfigureCommand* cmd = static_cast( + AllocateCommand(GPUBackendCommandType::Reconfigure, sizeof(GPUThreadReconfigureCommand))); + cmd->renderer = renderer; + cmd->fullscreen = fullscreen; + cmd->start_fullscreen_ui = start_fullscreen_ui; + cmd->vsync_mode = System::GetEffectiveVSyncMode(); + cmd->allow_present_throttle = System::ShouldAllowPresentThrottle(); + cmd->force_recreate_device = recreate_device; + cmd->upload_vram = upload_vram; + cmd->error_ptr = error; + + if (!s_state.use_gpu_thread) [[unlikely]] + ReconfigureOnThread(cmd); + else + PushCommandAndSync(cmd, false); + + return cmd->result; +} + +bool GPUThread::StartFullscreenUI(bool fullscreen, Error* error) +{ + // Don't need to reconfigure if we already have a system. + if (System::IsValid()) + { + RunOnThread([]() { s_state.requested_fullscreen_ui = true; }); + return true; + } + + return Reconfigure(std::nullopt, false, fullscreen, true, false, error); +} + +void GPUThread::StopFullscreenUI() +{ + // Don't need to reconfigure if we already have a system. + if (System::IsValid()) + { + RunOnThread([]() { s_state.requested_fullscreen_ui = true; }); + return; + } + + Reconfigure(std::nullopt, false, std::nullopt, false, false, nullptr); +} + +std::optional GPUThread::GetRequestedRenderer() +{ + return s_state.requested_renderer; +} + +bool GPUThread::CreateGPUBackend(GPURenderer renderer, bool upload_vram, bool fullscreen, bool force_recreate_device, + Error* error) +{ + s_state.requested_renderer = renderer; + return Reconfigure(renderer, upload_vram, fullscreen ? std::optional(true) : std::nullopt, std::nullopt, + force_recreate_device, error); +} + +void GPUThread::DestroyGPUBackend() +{ + Reconfigure(std::nullopt, false, std::nullopt, std::nullopt, false, nullptr); + s_state.requested_renderer.reset(); +} + +GPUThreadCommand* GPUThread::AllocateCommand(GPUBackendCommandType command, u32 size) +{ + // Ensure size is a multiple of 4 so we don't end up with an unaligned command. + size = Common::AlignUpPow2(size, 4); + + for (;;) + { + u32 read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_acquire); + u32 write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_relaxed); + if (read_ptr > write_ptr) + { + u32 available_size = read_ptr - write_ptr; + while (available_size < (size + sizeof(GPUBackendCommandType))) + { + WakeGPUThread(); + read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_acquire); + available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr); + } + } + else + { + const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; + if ((size + sizeof(GPUBackendCommand)) > available_size) + { + // allocate a dummy command to wrap the buffer around + GPUBackendCommand* dummy_cmd = reinterpret_cast(&s_state.command_fifo_data[write_ptr]); + dummy_cmd->type = GPUBackendCommandType::Wraparound; + dummy_cmd->size = available_size; + dummy_cmd->params.bits = 0; + s_state.command_fifo_write_ptr.store(0, std::memory_order_release); + continue; + } + } + + GPUThreadCommand* cmd = reinterpret_cast(&s_state.command_fifo_data[write_ptr]); + cmd->type = command; + cmd->size = size; + return cmd; + } +} + +u32 GPUThread::GetPendingCommandSize() +{ + const u32 read_ptr = s_state.command_fifo_read_ptr.load(); + const u32 write_ptr = s_state.command_fifo_write_ptr.load(); + return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr); +} + +void GPUThread::PushCommand(GPUThreadCommand* cmd) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + return; + } + + const u32 new_write_ptr = s_state.command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU) // TODO:FIXME: maybe purge this? + WakeGPUThread(); +} + +void GPUThread::PushCommandAndWakeThread(GPUThreadCommand* cmd) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + return; + } + + const u32 new_write_ptr = s_state.command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + WakeGPUThread(); +} + +void GPUThread::PushCommandAndSync(GPUThreadCommand* cmd, bool spin) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + return; + } + + const u32 new_write_ptr = s_state.command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + WakeGPUThread(); + SyncGPUThread(spin); +} + +void GPUThread::PushCommandAndFrame(GPUBackendUpdateDisplayCommand* cmd) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + return; + } + + const bool drain_one = cmd->present_frame && s_state.gpu_backend->BeginQueueFrame(); + + PushCommandAndWakeThread(cmd); + + if (drain_one) + s_state.gpu_backend->WaitForOneQueuedFrame(); +} + +ALWAYS_INLINE s32 GetThreadWakeCount(s32 state) +{ + return (state & ~GPUThread::THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING); +} + +void GPUThread::WakeGPUThread() +{ + // If sleeping, state will be <0, otherwise this will increment the pending work count. + // We add 2 so that there's a positive work count if we were sleeping, otherwise the thread would go to sleep. + if (s_state.thread_wake_count.fetch_add(2, std::memory_order_release) < 0) + s_state.thread_wake_semaphore.Post(); +} + +void GPUThread::SyncGPUThread(bool spin) +{ + DebugAssert(s_state.use_gpu_thread); + if (spin) + { + // Check if the GPU thread is done/sleeping. + if (GetThreadWakeCount(s_state.thread_wake_count.load(std::memory_order_acquire)) < 0) + return; + + Common::Timer::Value start_time = Common::Timer::GetCurrentValue(); + Common::Timer::Value current_time; + do + { + // Check if the GPU thread is done/sleeping. + if (GetThreadWakeCount(s_state.thread_wake_count.load(std::memory_order_acquire)) < 0) + return; + + // Hopefully ought to be enough. + MultiPause(); + + current_time = Common::Timer::GetCurrentValue(); + } while ((current_time - start_time) < s_state.thread_spin_time); + } + + // s_thread_wake_count |= THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING if not zero + s32 value; + do + { + // Check if the GPU thread is done/sleeping. + value = s_state.thread_wake_count.load(std::memory_order_acquire); + if (GetThreadWakeCount(value) < 0) + return; + } while (!s_state.thread_wake_count.compare_exchange_weak(value, value | THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING, + std::memory_order_acq_rel, std::memory_order_relaxed)); + s_state.thread_is_done_semaphore.Wait(); +} + +bool GPUThread::SleepGPUThread(bool allow_sleep) +{ + DebugAssert(!allow_sleep || s_state.thread_wake_count.load(std::memory_order_relaxed) >= 0); + for (;;) + { + // Acknowledge any work that has been queued, but preserve the waiting flag if there is any, since we're not done + // yet. + s32 old_state, new_state; + do + { + old_state = s_state.thread_wake_count.load(std::memory_order_relaxed); + new_state = (GetThreadWakeCount(old_state) > 0) ? (old_state & THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING) : + (allow_sleep ? THREAD_WAKE_COUNT_SLEEPING : 0); + } while (!s_state.thread_wake_count.compare_exchange_weak(old_state, new_state, std::memory_order_acq_rel, + std::memory_order_relaxed)); + + // Are we not done yet? + if (GetThreadWakeCount(old_state) > 0) + return true; + + // We're done, so wake the CPU thread if it's waiting. + if (old_state & THREAD_WAKE_COUNT_CPU_THREAD_IS_WAITING) + s_state.thread_is_done_semaphore.Post(); + + // Sleep until more work is queued. + if (allow_sleep) + s_state.thread_wake_semaphore.Wait(); + else + return false; + } +} + +void GPUThread::Internal::GPUThreadEntryPoint() +{ + s_state.gpu_thread = Threading::ThreadHandle::GetForCallingThread(); + Threading::SetNameOfCurrentThread("GPU Thread"); + + // Take a local copy of the FIFO, that way it's not ping-ponging between the threads. + u8* const command_fifo_data = s_state.command_fifo_data.get(); + + for (;;) + { + u32 write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_acquire); + u32 read_ptr = s_state.command_fifo_read_ptr.load(std::memory_order_relaxed); + if (read_ptr == write_ptr) + { + if (SleepGPUThread(!s_state.run_idle_flag)) + { + // sleep => wake, need to reload pointers + continue; + } + else + { + Internal::PresentFrame(false, 0); + if (!g_gpu_device->GetMainSwapChain()->IsVSyncModeBlocking()) + g_gpu_device->GetMainSwapChain()->ThrottlePresentation(); + + continue; + } + } + + write_ptr = (write_ptr < read_ptr) ? COMMAND_QUEUE_SIZE : write_ptr; + while (read_ptr < write_ptr) + { + GPUThreadCommand* cmd = reinterpret_cast(&command_fifo_data[read_ptr]); + DebugAssert((read_ptr + cmd->size) <= COMMAND_QUEUE_SIZE); + read_ptr += cmd->size; + + if (cmd->type > GPUBackendCommandType::Shutdown) [[likely]] + { + DebugAssert(s_state.gpu_backend); + s_state.gpu_backend->HandleCommand(cmd); + continue; + } + + switch (cmd->type) + { + case GPUBackendCommandType::Wraparound: + { + DebugAssert(read_ptr == COMMAND_QUEUE_SIZE); + write_ptr = s_state.command_fifo_write_ptr.load(std::memory_order_acquire); + read_ptr = 0; + + // let the CPU thread know as early as possible that we're here + s_state.command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + } + break; + + case GPUBackendCommandType::AsyncCall: + { + GPUThreadAsyncCallCommand* acmd = static_cast(cmd); + acmd->func(); + acmd->~GPUThreadAsyncCallCommand(); + } + break; + + case GPUBackendCommandType::Reconfigure: + { + ReconfigureOnThread(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::Shutdown: + { + // Should have consumed everything, and be shutdown. + DebugAssert(read_ptr == write_ptr); + s_state.command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + return; + } + break; + + DefaultCaseIsUnreachable(); + } + } + + s_state.command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + } +} + +bool GPUThread::CreateDeviceOnThread(RenderAPI api, bool fullscreen, Error* error) +{ + DebugAssert(!g_gpu_device); + + INFO_LOG("Trying to create a {} GPU device...", GPUDevice::RenderAPIToString(api)); + g_gpu_device = GPUDevice::CreateDeviceForAPI(api); + + std::optional fullscreen_mode; + if (fullscreen && g_gpu_device && g_gpu_device->SupportsExclusiveFullscreen()) + { + fullscreen_mode = + GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); + } + std::optional exclusive_fullscreen_control; + if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) + { + exclusive_fullscreen_control = + (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); + } + + u32 disabled_features = 0; + if (g_settings.gpu_disable_dual_source_blend) + disabled_features |= GPUDevice::FEATURE_MASK_DUAL_SOURCE_BLEND; + if (g_settings.gpu_disable_framebuffer_fetch) + disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH; + if (g_settings.gpu_disable_texture_buffers) + disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS; + if (g_settings.gpu_disable_memory_import) + disabled_features |= GPUDevice::FEATURE_MASK_MEMORY_IMPORT; + if (g_settings.gpu_disable_raster_order_views) + disabled_features |= GPUDevice::FEATURE_MASK_RASTER_ORDER_VIEWS; + + // Don't dump shaders on debug builds for Android, users will complain about storage... +#if !defined(__ANDROID__) || defined(_DEBUG) + const std::string_view shader_dump_directory(EmuFolders::DataRoot); +#else + const std::string_view shader_dump_directory; +#endif + + Error create_error; + std::optional wi; + if (!g_gpu_device || + !(wi = Host::AcquireRenderWindow(api, fullscreen, fullscreen_mode.has_value(), &create_error)).has_value() || + !g_gpu_device->Create( + g_settings.gpu_adapter, static_cast(disabled_features), shader_dump_directory, + g_settings.gpu_disable_shader_cache ? std::string_view() : std::string_view(EmuFolders::Cache), + SHADER_CACHE_VERSION, g_settings.gpu_use_debug_device, wi.value(), s_state.requested_vsync, + s_state.requested_allow_present_throttle, fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, + exclusive_fullscreen_control, &create_error)) + { + ERROR_LOG("Failed to create GPU device: {}", create_error.GetDescription()); + if (g_gpu_device) + g_gpu_device->Destroy(); + g_gpu_device.reset(); + if (wi.has_value()) + Host::ReleaseRenderWindow(); + + Error::SetStringFmt( + error, + TRANSLATE_FS("System", "Failed to create render device:\n\n{0}\n\nThis may be due to your GPU not supporting the " + "chosen renderer ({1}), or because your graphics drivers need to be updated."), + create_error.GetDescription(), GPUDevice::RenderAPIToString(api)); + + return false; + } + + if (!ImGuiManager::Initialize(g_settings.display_osd_scale / 100.0f, g_settings.display_osd_margin, &create_error) || + (s_state.requested_fullscreen_ui && !FullscreenUI::Initialize())) + { + ERROR_LOG("Failed to initialize ImGuiManager: {}", create_error.GetDescription()); + Error::SetStringFmt(error, "Failed to initialize ImGuiManager: {}", create_error.GetDescription()); + FullscreenUI::Shutdown(); + ImGuiManager::Shutdown(); + g_gpu_device->Destroy(); + g_gpu_device.reset(); + if (wi.has_value()) + Host::ReleaseRenderWindow(); + return false; + } + + InputManager::SetDisplayWindowSize(ImGuiManager::GetWindowWidth(), ImGuiManager::GetWindowHeight()); + + if (const GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) + s_state.render_window_info = swap_chain->GetWindowInfo(); + else + s_state.render_window_info = WindowInfo(); + + std::atomic_thread_fence(std::memory_order_release); + + return true; +} + +void GPUThread::DestroyDeviceOnThread() +{ + if (!g_gpu_device) + return; + + const bool has_window = g_gpu_device->HasMainSwapChain(); + + ImGuiManager::DestroyOverlayTextures(); + FullscreenUI::Shutdown(); + ImGuiManager::Shutdown(); + + INFO_LOG("Destroying {} GPU device...", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); + g_gpu_device->Destroy(); + g_gpu_device.reset(); + if (has_window) + Host::ReleaseRenderWindow(); + + s_state.render_window_info = WindowInfo(); + std::atomic_thread_fence(std::memory_order_release); +} + +void GPUThread::HandleGPUDeviceLost() +{ + static Common::Timer::Value s_last_gpu_reset_time = 0; + static constexpr float MIN_TIME_BETWEEN_RESETS = 15.0f; + + // If we're constantly crashing on something in particular, we don't want to end up in an + // endless reset loop.. that'd probably end up leaking memory and/or crashing us for other + // reasons. So just abort in such case. + const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); + if (s_last_gpu_reset_time != 0 && + Common::Timer::ConvertValueToSeconds(current_time - s_last_gpu_reset_time) < MIN_TIME_BETWEEN_RESETS) + { + Panic("Host GPU lost too many times, device is probably completely wedged."); + } + s_last_gpu_reset_time = current_time; + + const bool is_fullscreen = Host::IsFullscreen(); + + // Device lost, something went really bad. + // Let's just toss out everything, and try to hobble on. + DestroyGPUBackendOnThread(); + DestroyDeviceOnThread(); + + Error error; + if (!CreateDeviceOnThread( + Settings::GetRenderAPIForRenderer(s_state.requested_renderer.value_or(g_gpu_settings.gpu_renderer)), + is_fullscreen, &error) || + (s_state.requested_renderer.has_value() && + !CreateGPUBackendOnThread(s_state.requested_renderer.value(), true, &error))) + { + ERROR_LOG("Failed to recreate GPU device after loss: {}", error.GetDescription()); + Panic("Failed to recreate GPU device after loss."); + return; + } + + // First frame after reopening is definitely going to be trash, so skip it. + Host::AddIconOSDWarning( + "HostGPUDeviceLost", ICON_EMOJI_WARNING, + TRANSLATE_STR("System", "Host GPU device encountered an error and has recovered. This may cause broken rendering."), + Host::OSD_CRITICAL_ERROR_DURATION); +} + +void GPUThread::HandleExclusiveFullscreenLost() +{ + WARNING_LOG("Lost exclusive fullscreen."); + Host::SetFullscreen(false); +} + +bool GPUThread::CreateGPUBackendOnThread(GPURenderer renderer, bool upload_vram, Error* error) +{ + const bool is_hardware = (renderer != GPURenderer::Software); + + if (is_hardware) + s_state.gpu_backend = GPUBackend::CreateHardwareBackend(); + else + s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(); + + Error local_error; + bool okay = s_state.gpu_backend->Initialize(upload_vram, &local_error); + if (!okay) + { + ERROR_LOG("Failed to create {} renderer: {}", Settings::GetRendererName(renderer), local_error.GetDescription()); + + if (is_hardware) + { + Host::AddIconOSDMessage( + "GPUBackendCreationFailed", ICON_FA_PAINT_ROLLER, + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to initialize {} renderer, falling back to software renderer."), + Settings::GetRendererName(s_state.requested_renderer.value())), + Host::OSD_CRITICAL_ERROR_DURATION); + + s_state.requested_renderer = GPURenderer::Software; + s_state.gpu_backend = GPUBackend::CreateSoftwareBackend(); + okay = s_state.gpu_backend->Initialize(upload_vram, &local_error); + } + + if (!okay) + { + if (error) + *error = local_error; + return false; + } + } + + g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); + ImGuiManager::UpdateDebugWindowConfig(); + std::atomic_thread_fence(std::memory_order_release); + return true; +} + +void GPUThread::ReconfigureOnThread(GPUThreadReconfigureCommand* cmd) +{ + // Store state. + s_state.requested_vsync = cmd->vsync_mode; + s_state.requested_allow_present_throttle = cmd->allow_present_throttle; + s_state.requested_fullscreen_ui = cmd->start_fullscreen_ui.value_or(s_state.requested_fullscreen_ui); + + // Are we shutting down everything? + if (!cmd->renderer.has_value() && !s_state.requested_fullscreen_ui) + { + DestroyGPUBackendOnThread(); + DestroyDeviceOnThread(); + return; + } + + // TODO: Make this suck less. + g_gpu_settings = g_settings; + + // Readback old VRAM for hardware renderers. + if (s_state.gpu_backend && cmd->renderer.has_value() && cmd->upload_vram) + { + GPUBackendReadVRAMCommand read_cmd; + read_cmd.type = GPUBackendCommandType::ReadVRAM; + read_cmd.size = sizeof(cmd); + read_cmd.x = 0; + read_cmd.y = 0; + read_cmd.width = VRAM_WIDTH; + read_cmd.height = VRAM_HEIGHT; + s_state.gpu_backend->HandleCommand(&read_cmd); + } + + if (s_state.gpu_backend) + DestroyGPUBackendOnThread(); + + // Device recreation? + const RenderAPI current_api = g_gpu_device ? g_gpu_device->GetRenderAPI() : RenderAPI::None; + const RenderAPI expected_api = + (cmd->renderer.has_value() && cmd->renderer.value() == GPURenderer::Software && current_api != RenderAPI::None) ? + current_api : + Settings::GetRenderAPIForRenderer(s_state.requested_renderer.value_or(g_gpu_settings.gpu_renderer)); + if (cmd->force_recreate_device || !GPUDevice::IsSameRenderAPI(current_api, expected_api)) + { + const bool fullscreen = cmd->fullscreen.value_or(Host::IsFullscreen()); + DestroyDeviceOnThread(); + + Error local_error; + if (!CreateDeviceOnThread(expected_api, fullscreen, &local_error)) + { + Host::AddIconOSDMessage( + "DeviceSwitchFailed", ICON_FA_PAINT_ROLLER, + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to create {} GPU device, reverting to {}.\n{}"), + GPUDevice::RenderAPIToString(expected_api), GPUDevice::RenderAPIToString(current_api), + local_error.GetDescription()), + Host::OSD_CRITICAL_ERROR_DURATION); + + Host::ReleaseRenderWindow(); + if (current_api == RenderAPI::None || !CreateDeviceOnThread(current_api, fullscreen, &local_error)) + { + if (cmd->error_ptr) + *cmd->error_ptr = local_error; + + cmd->result = false; + return; + } + } + } + + if (cmd->renderer.has_value()) + { + // Do we want a renderer? + cmd->result = CreateGPUBackendOnThread(cmd->renderer.value(), cmd->upload_vram, cmd->error_ptr); + } + else if (s_state.requested_fullscreen_ui) + { + if (!g_gpu_device && !CreateDeviceOnThread(expected_api, cmd->fullscreen.value_or(false), cmd->error_ptr)) + { + cmd->result = false; + return; + } + + // Don't need timing to run FSUI. + g_gpu_device->SetGPUTimingEnabled(false); + + cmd->result = FullscreenUI::IsInitialized() || FullscreenUI::Initialize(); + if (!cmd->result) + Error::SetStringView(cmd->error_ptr, "Failed to initialize FullscreenUI."); + } + else + { + // Device is no longer needed. + DestroyDeviceOnThread(); + } +} + +void GPUThread::DestroyGPUBackendOnThread() +{ + if (!s_state.gpu_backend) + return; + + VERBOSE_LOG("Shutting down GPU backend..."); + + ImGuiManager::DestroyAllDebugWindows(); + PostProcessing::Shutdown(); + s_state.gpu_backend.reset(); +} + +void GPUThread::UpdateSettingsOnThread(const Settings& old_settings) +{ + DebugAssert(s_state.gpu_backend); + if (g_gpu_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage) + g_gpu_device->SetGPUTimingEnabled(g_gpu_settings.display_show_gpu_usage); + + s_state.gpu_backend->UpdateSettings(old_settings); + if (ImGuiManager::UpdateDebugWindowConfig()) + Internal::PresentFrame(false, 0); +} + +void GPUThread::RunOnThread(AsyncCallType func) +{ + if (!s_state.use_gpu_thread) [[unlikely]] + { + func(); + return; + } + + GPUThreadAsyncCallCommand* cmd = static_cast( + AllocateCommand(GPUBackendCommandType::AsyncCall, sizeof(GPUThreadAsyncCallCommand))); + new (cmd) GPUThreadAsyncCallCommand; + cmd->func = std::move(func); + PushCommandAndWakeThread(cmd); +} + +void GPUThread::UpdateSettings(bool gpu_settings_changed) +{ + if (gpu_settings_changed) + { + RunOnThread([settings = g_settings]() { + VERBOSE_LOG("Updating GPU settings on thread..."); + + Settings old_settings = std::move(g_gpu_settings); + g_gpu_settings = std::move(settings); + + if (s_state.gpu_backend) + UpdateSettingsOnThread(old_settings); + }); + } + else + { + RunOnThread([]() { + if (s_state.gpu_backend && ImGuiManager::UpdateDebugWindowConfig()) + Internal::PresentFrame(false, 0); + }); + } +} + +void GPUThread::ResizeDisplayWindow(s32 width, s32 height, float scale) +{ + RunOnThread([width, height, scale]() { ResizeDisplayWindowOnThread(width, height, scale); }); +} + +void GPUThread::ResizeDisplayWindowOnThread(u32 width, u32 height, float scale) +{ + // We should _not_ be getting this without a device, since we should have shut down. + if (!g_gpu_device || !g_gpu_device->HasMainSwapChain()) + return; + + DEV_LOG("Display window resized to {}x{}", width, height); + + Error error; + if (!g_gpu_device->GetMainSwapChain()->ResizeBuffers(width, height, scale, &error)) + { + ERROR_LOG("Failed to resize main swap chain: {}", error.GetDescription()); + UpdateDisplayWindowOnThread(Host::IsFullscreen()); + return; + } + + DisplayWindowResizedOnThread(); +} + +void GPUThread::UpdateDisplayWindow(bool fullscreen) +{ + RunOnThread([fullscreen]() { UpdateDisplayWindowOnThread(fullscreen); }); +} + +void GPUThread::UpdateDisplayWindowOnThread(bool fullscreen) +{ + // In case we get the event late. + if (!g_gpu_device) + return; + + std::optional fullscreen_mode; + if (fullscreen && g_gpu_device->SupportsExclusiveFullscreen()) + { + fullscreen_mode = + GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); + } + std::optional exclusive_fullscreen_control; + if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) + { + exclusive_fullscreen_control = + (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); + } + + g_gpu_device->DestroyMainSwapChain(); + + Error error; + std::optional wi = + Host::AcquireRenderWindow(g_gpu_device->GetRenderAPI(), fullscreen, fullscreen_mode.has_value(), &error); + if (!wi.has_value()) + { + Host::ReportFatalError("Failed to get render window after update", error.GetDescription()); + return; + } + + // if surfaceless, just leave it + if (!wi->IsSurfaceless()) + { + if (!g_gpu_device->RecreateMainSwapChain( + wi.value(), s_state.requested_vsync, s_state.requested_allow_present_throttle, + fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, exclusive_fullscreen_control, &error)) + { + Host::ReportFatalError("Failed to change window after update", error.GetDescription()); + return; + } + } + + DisplayWindowResizedOnThread(); +} + +void GPUThread::DisplayWindowResizedOnThread() +{ + const GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain(); + if (swap_chain) + s_state.render_window_info = swap_chain->GetWindowInfo(); + else + s_state.render_window_info = WindowInfo(); + std::atomic_thread_fence(std::memory_order_release); + + // surfaceless is usually temporary, so just ignore it + if (!swap_chain) + return; + + const float f_width = static_cast(swap_chain->GetWidth()); + const float f_height = static_cast(swap_chain->GetHeight()); + ImGuiManager::WindowResized(f_width, f_height); + InputManager::SetDisplayWindowSize(f_width, f_height); + + if (s_state.gpu_backend) + { + Host::RunOnCPUThread([width = swap_chain->GetWidth(), height = swap_chain->GetHeight()]() { + System::DisplayWindowResized(width, height); + }); + + // If we're paused, re-present the current frame at the new window size. + if (System::IsPaused()) + { + // Hackity hack, on some systems, presenting a single frame isn't enough to actually get it + // displayed. Two seems to be good enough. Maybe something to do with direct scanout. + Internal::PresentFrame(false, 0); + Internal::PresentFrame(false, 0); + } + + if (g_gpu_settings.gpu_resolution_scale == 0) + s_state.gpu_backend->UpdateResolutionScale(); + } +} + +const WindowInfo& GPUThread::GetRenderWindowInfo() +{ + // This is infrequently used, so we can get away with a full barrier. + std::atomic_thread_fence(std::memory_order_acquire); + return s_state.render_window_info; +} + +void GPUThread::SetVSync(GPUVSyncMode mode, bool allow_present_throttle) +{ + RunOnThread([mode, allow_present_throttle]() { + if (s_state.requested_vsync == mode && s_state.requested_allow_present_throttle == allow_present_throttle) + return; + + s_state.requested_vsync = mode; + s_state.requested_allow_present_throttle = allow_present_throttle; + + if (!g_gpu_device->HasMainSwapChain()) + return; + + Error error; + if (!g_gpu_device->GetMainSwapChain()->SetVSyncMode(s_state.requested_vsync, + s_state.requested_allow_present_throttle, &error)) + { + ERROR_LOG("Failed to update vsync mode: {}", error.GetDescription()); + } + }); +} + +void GPUThread::PresentCurrentFrame() +{ + RunOnThread([]() { + if (s_state.run_idle_flag) + { + // If we're running idle, we're going to re-present anyway. + return; + } + + Internal::PresentFrame(false, 0); + }); +} + +void GPUThread::SleepUntilPresentTime(Common::Timer::Value present_time) +{ + // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. + // Linux also seems to do a much better job of waking up at the requested time. + +#if !defined(__linux__) && !defined(__ANDROID__) + Common::Timer::SleepUntil(present_time, true); +#else + Common::Timer::SleepUntil(present_time, false); +#endif +} + +void GPUThread::Internal::PresentFrame(bool allow_skip_present, u64 present_time) +{ + const bool skip_present = (!g_gpu_device->HasMainSwapChain() || + (allow_skip_present && g_gpu_device->GetMainSwapChain()->ShouldSkipPresentingFrame() && + s_state.skipped_present_count < MAX_SKIPPED_PRESENT_COUNT)); + + if (!skip_present) + { + // acquire for IO.MousePos and system state. + std::atomic_thread_fence(std::memory_order_acquire); + + FullscreenUI::Render(); + + if (s_state.gpu_backend && System::IsValid()) + ImGuiManager::RenderTextOverlays(s_state.gpu_backend.get()); + + ImGuiManager::RenderOSDMessages(); + + if (s_state.gpu_backend && System::GetState() == System::State::Running) + ImGuiManager::RenderSoftwareCursors(); + + ImGuiManager::RenderOverlayWindows(); + ImGuiManager::RenderDebugWindows(); + } + + const GPUDevice::PresentResult pres = + skip_present ? GPUDevice::PresentResult::SkipPresent : + (s_state.gpu_backend ? s_state.gpu_backend->PresentDisplay() : + g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain())); + if (pres == GPUDevice::PresentResult::OK) + { + s_state.skipped_present_count = 0; + + g_gpu_device->RenderImGui(g_gpu_device->GetMainSwapChain()); + + const GPUDevice::Features features = g_gpu_device->GetFeatures(); + const bool scheduled_present = (present_time != 0); + const bool explicit_present = (scheduled_present && (features.explicit_present && !features.timed_present)); + const bool timed_present = (scheduled_present && features.timed_present); + + if (scheduled_present && !explicit_present) + { + // No explicit present support, simulate it with Flush. + g_gpu_device->FlushCommands(); + SleepUntilPresentTime(present_time); + } + + g_gpu_device->EndPresent(g_gpu_device->GetMainSwapChain(), explicit_present, timed_present ? present_time : 0); + + if (g_gpu_device->IsGPUTimingEnabled()) + PerformanceCounters::AccumulateGPUTime(); + + if (explicit_present) + { + SleepUntilPresentTime(present_time); + g_gpu_device->SubmitPresent(g_gpu_device->GetMainSwapChain()); + } + } + else + { + s_state.skipped_present_count++; + + if (pres == GPUDevice::PresentResult::DeviceLost) [[unlikely]] + HandleGPUDeviceLost(); + else if (pres == GPUDevice::PresentResult::ExclusiveFullscreenLost) + HandleExclusiveFullscreenLost(); + else if (!skip_present) + g_gpu_device->FlushCommands(); + + // Still need to kick ImGui or it gets cranky. + ImGui::EndFrame(); + } + + ImGuiManager::NewFrame(); + + if (s_state.gpu_backend) + s_state.gpu_backend->RestoreDeviceContext(); +} + +bool GPUThread::GetRunIdleOnThread() +{ + // Read from both threads. + return s_state.run_idle_flag; +} + +void GPUThread::SetRunIdleOnThread(bool enabled) +{ + // Should only be called on GPU thread. + s_state.run_idle_flag = enabled; + DEV_LOG("GPU thread now {} idle", enabled ? "running" : "NOT running"); +} diff --git a/src/core/gpu_thread.h b/src/core/gpu_thread.h new file mode 100644 index 000000000..05e706d22 --- /dev/null +++ b/src/core/gpu_thread.h @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#pragma once + +#include "common/types.h" + +#include +#include + +class Error; +struct WindowInfo; + +namespace Threading { +class ThreadHandle; +} + +enum class RenderAPI : u8; +enum class GPUVSyncMode : u8; + +enum class GPURenderer : u8; +enum class GPUBackendCommandType : u8; + +struct GPUThreadCommand; +struct GPUBackendUpdateDisplayCommand; + +namespace GPUThread { +using AsyncCallType = std::function; + +/// Starts Big Picture UI. +bool StartFullscreenUI(bool fullscreen, Error* error); +void StopFullscreenUI(); + +/// Backend control. +std::optional GetRequestedRenderer(); +bool CreateGPUBackend(GPURenderer renderer, bool upload_vram, bool fullscreen, bool force_recreate_device, + Error* error); +void DestroyGPUBackend(); + +/// Re-presents the current frame. Call when things like window resizes happen to re-display +/// the current frame with the correct proportions. Should only be called from the CPU thread. +void PresentCurrentFrame(); + +/// Handles fullscreen transitions and such. +void UpdateDisplayWindow(bool fullscreen); + +/// Called when the window is resized. +void ResizeDisplayWindow(s32 width, s32 height, float scale); + +/// Access to main window size from CPU thread. +const WindowInfo& GetRenderWindowInfo(); + +void UpdateSettings(bool gpu_settings_changed); + +void RunOnThread(AsyncCallType func); +void SetVSync(GPUVSyncMode mode, bool allow_present_throttle); + +bool GetRunIdleOnThread(); +void SetRunIdleOnThread(bool enabled); + +GPUThreadCommand* AllocateCommand(GPUBackendCommandType command, u32 size); +void PushCommand(GPUThreadCommand* cmd); +void PushCommandAndWakeThread(GPUThreadCommand* cmd); +void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); +void PushCommandAndFrame(GPUBackendUpdateDisplayCommand* cmd); + +// NOTE: Only called by GPUBackend +namespace Internal { +const Threading::ThreadHandle& GetThreadHandle(); +void ProcessStartup(); +void SetThreadEnabled(bool enabled); +void RequestShutdown(); +void GPUThreadEntryPoint(); +void PresentFrame(bool allow_skip_present, u64 present_time); +} // namespace Internal +} // namespace GPUThread diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 137264ec5..e2c93b6f5 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -5,12 +5,24 @@ #include "types.h" +#include "util/gpu_texture.h" + #include "common/bitfield.h" #include "common/bitutils.h" #include "common/gsvector.h" #include #include +#include +#include + +class Error; + +class StateWrapper; + +class MediaCapture; + +enum class GPUVSyncMode : u8; enum : u32 { @@ -308,12 +320,17 @@ union GPUTexturePaletteReg ALWAYS_INLINE constexpr u32 GetYBase() const { return static_cast(y); } }; -struct GPUTextureWindow +union GPUTextureWindow { - u8 and_x; - u8 and_y; - u8 or_x; - u8 or_y; + struct + { + u8 and_x; + u8 and_y; + u8 or_x; + u8 or_y; + }; + + u32 bits; ALWAYS_INLINE bool operator==(const GPUTextureWindow& rhs) const { @@ -453,17 +470,149 @@ static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{- enum class GPUBackendCommandType : u8 { Wraparound, - Sync, + AsyncCall, + Reconfigure, + Shutdown, + ClearVRAM, + ClearDisplay, + UpdateDisplay, + BufferSwapped, + UpdateResolutionScale, + RenderScreenshotToBuffer, + RenderScreenshotToFile, + LoadState, + SaveState, + LoadMemoryState, + SaveMemoryState, + ReadVRAM, FillVRAM, UpdateVRAM, CopyVRAM, SetDrawingArea, UpdateCLUT, + ClearCache, DrawPolygon, + DrawPrecisePolygon, DrawRectangle, DrawLine, }; +struct GPUThreadCommand +{ + u32 size; + GPUBackendCommandType type; +}; + +struct GPUThreadReconfigureCommand : public GPUThreadCommand +{ + Error* error_ptr; + std::optional renderer; + std::optional fullscreen; + std::optional start_fullscreen_ui; + GPUVSyncMode vsync_mode; + bool allow_present_throttle; + bool force_recreate_device; + bool upload_vram; + bool result; +}; + +struct GPUThreadAsyncCallCommand : public GPUThreadCommand +{ + std::function func; +}; + +struct GPUThreadRenderScreenshotToBufferCommand : public GPUThreadCommand +{ + u32 width; + u32 height; + u32* out_width; + u32* out_height; + std::vector* out_pixels; + u32* out_stride; + GPUTexture::Format* out_format; + bool* out_result; + bool postfx; +}; + +struct GPUThreadRenderScreenshotToFileCommand : public GPUThreadCommand +{ + DisplayScreenshotMode mode; + u8 quality; + bool compress_on_thread; + bool show_osd_message; + u32 path_length; + char path[0]; +}; + +struct GPUBackendLoadStateCommand : public GPUThreadCommand +{ + GPUDrawingArea drawing_area; + u16 vram_data[VRAM_WIDTH * VRAM_HEIGHT]; + u16 clut_data[GPU_CLUT_SIZE]; + u32 texture_cache_state_version; + u32 texture_cache_state_size; + u8 texture_cache_state[0]; // texture_cache_state_size +}; + +struct GPUBackendSaveStateCommand : public GPUThreadCommand +{ + StateWrapper* sw; +}; + +struct GPUBackendLoadMemoryStateCommand : public GPUThreadCommand +{ +}; + +struct GPUBackendSaveMemoryStateCommand : public GPUThreadCommand +{ +}; + +struct GPUBackendUpdateDisplayCommand : public GPUThreadCommand +{ + u32 frame_number; + u32 internal_frame_number; + + u16 display_width; + u16 display_height; + u16 display_origin_left; + u16 display_origin_top; + u16 display_vram_left; + u16 display_vram_top; + u16 display_vram_width; + u16 display_vram_height; + + u16 X; // TODO: Can we get rid of this? + + union + { + u16 bits; + + BitField interlaced_display_enabled; + BitField interlaced_display_field; + BitField interlaced_display_interleaved; + BitField display_24bit; + BitField display_disabled; + + BitField allow_present_skip; + BitField present_frame; + + BitField is_frame; + }; + + float display_aspect_ratio; + + u64 present_time; + MediaCapture* media_capture; +}; + +struct GPUBackendReadVRAMCommand : public GPUThreadCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; +}; + union GPUBackendCommandParameters { u8 bits; @@ -489,18 +638,12 @@ union GPUBackendCommandParameters } }; -struct GPUBackendCommand +// TODO: Merge this into the other structs, saves padding bytes +struct GPUBackendCommand : public GPUThreadCommand { - u32 size; - GPUBackendCommandType type; GPUBackendCommandParameters params; }; -struct GPUBackendSyncCommand : public GPUBackendCommand -{ - bool allow_sleep; -}; - struct GPUBackendFillVRAMCommand : public GPUBackendCommand { u16 x; @@ -532,7 +675,6 @@ struct GPUBackendCopyVRAMCommand : public GPUBackendCommand struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand { GPUDrawingArea new_area; - s32 new_clamped_area[4]; }; struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand @@ -541,8 +683,10 @@ struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand bool clut_is_8bit; }; +// TODO: Pack texpage struct GPUBackendDrawCommand : public GPUBackendCommand { + // TODO: Cut this down GPUDrawModeReg draw_mode; GPURenderCommand rc; GPUTexturePaletteReg palette; @@ -551,7 +695,7 @@ struct GPUBackendDrawCommand : public GPUBackendCommand struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand { - u16 num_vertices; + u8 num_vertices; struct Vertex { @@ -572,14 +716,22 @@ struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand }; u16 texcoord; }; + }; - ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u16 texcoord_) - { - x = x_; - y = y_; - color = color_; - texcoord = texcoord_; - } + Vertex vertices[0]; +}; + +struct GPUBackendDrawPrecisePolygonCommand : public GPUBackendDrawCommand +{ + u8 num_vertices; + bool valid_w; + + struct Vertex + { + float x, y, w; + s32 native_x, native_y; + u32 color; + u16 texcoord; }; Vertex vertices[0]; @@ -587,9 +739,9 @@ struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand { - s32 x, y; u16 width, height; u16 texcoord; + s32 x, y; u32 color; }; diff --git a/src/core/host.cpp b/src/core/host.cpp index 1d26548be..0c530fde4 100644 --- a/src/core/host.cpp +++ b/src/core/host.cpp @@ -2,19 +2,13 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "host.h" -#include "fullscreen_ui.h" #include "gpu.h" -#include "imgui_overlays.h" -#include "shader_cache_version.h" #include "system.h" #include "system_private.h" #include "scmversion/scmversion.h" #include "util/compress_helpers.h" -#include "util/gpu_device.h" -#include "util/imgui_manager.h" -#include "util/input_manager.h" #include "common/assert.h" #include "common/error.h" @@ -342,175 +336,3 @@ std::string Host::GetHTTPUserAgent() { return fmt::format("DuckStation for {} ({}) {}", TARGET_OS_STR, CPU_ARCH_STR, g_scm_tag_str); } - -bool Host::CreateGPUDevice(RenderAPI api, bool fullscreen, Error* error) -{ - DebugAssert(!g_gpu_device); - - INFO_LOG("Trying to create a {} GPU device...", GPUDevice::RenderAPIToString(api)); - g_gpu_device = GPUDevice::CreateDeviceForAPI(api); - - std::optional fullscreen_mode; - if (fullscreen && g_gpu_device && g_gpu_device->SupportsExclusiveFullscreen()) - { - fullscreen_mode = - GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); - } - std::optional exclusive_fullscreen_control; - if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) - { - exclusive_fullscreen_control = - (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); - } - - u32 disabled_features = 0; - if (g_settings.gpu_disable_dual_source_blend) - disabled_features |= GPUDevice::FEATURE_MASK_DUAL_SOURCE_BLEND; - if (g_settings.gpu_disable_framebuffer_fetch) - disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH; - if (g_settings.gpu_disable_texture_buffers) - disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS; - if (g_settings.gpu_disable_memory_import) - disabled_features |= GPUDevice::FEATURE_MASK_MEMORY_IMPORT; - if (g_settings.gpu_disable_raster_order_views) - disabled_features |= GPUDevice::FEATURE_MASK_RASTER_ORDER_VIEWS; - - // Don't dump shaders on debug builds for Android, users will complain about storage... -#if !defined(__ANDROID__) || defined(_DEBUG) - const std::string_view shader_dump_directory(EmuFolders::DataRoot); -#else - const std::string_view shader_dump_directory; -#endif - - Error create_error; - std::optional wi; - if (!g_gpu_device || - !(wi = Host::AcquireRenderWindow(api, fullscreen, fullscreen_mode.has_value(), &create_error)).has_value() || - !g_gpu_device->Create( - g_settings.gpu_adapter, static_cast(disabled_features), shader_dump_directory, - g_settings.gpu_disable_shader_cache ? std::string_view() : std::string_view(EmuFolders::Cache), - SHADER_CACHE_VERSION, g_settings.gpu_use_debug_device, wi.value(), System::GetEffectiveVSyncMode(), - System::ShouldAllowPresentThrottle(), fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, - exclusive_fullscreen_control, &create_error)) - { - ERROR_LOG("Failed to create GPU device: {}", create_error.GetDescription()); - if (g_gpu_device) - g_gpu_device->Destroy(); - g_gpu_device.reset(); - if (wi.has_value()) - Host::ReleaseRenderWindow(); - - Error::SetStringFmt( - error, - TRANSLATE_FS("System", "Failed to create render device:\n\n{0}\n\nThis may be due to your GPU not supporting the " - "chosen renderer ({1}), or because your graphics drivers need to be updated."), - create_error.GetDescription(), GPUDevice::RenderAPIToString(api)); - return false; - } - - if (!ImGuiManager::Initialize(g_settings.display_osd_scale / 100.0f, g_settings.display_osd_margin, &create_error)) - { - ERROR_LOG("Failed to initialize ImGuiManager: {}", create_error.GetDescription()); - Error::SetStringFmt(error, "Failed to initialize ImGuiManager: {}", create_error.GetDescription()); - g_gpu_device->Destroy(); - g_gpu_device.reset(); - Host::ReleaseRenderWindow(); - return false; - } - - InputManager::SetDisplayWindowSize(ImGuiManager::GetWindowWidth(), ImGuiManager::GetWindowHeight()); - return true; -} - -void Host::UpdateDisplayWindow(bool fullscreen) -{ - if (!g_gpu_device) - return; - - const GPUVSyncMode vsync_mode = System::GetEffectiveVSyncMode(); - const bool allow_present_throttle = System::ShouldAllowPresentThrottle(); - std::optional fullscreen_mode; - if (fullscreen && g_gpu_device->SupportsExclusiveFullscreen()) - { - fullscreen_mode = - GPUDevice::ExclusiveFullscreenMode::Parse(Host::GetTinyStringSettingValue("GPU", "FullscreenMode", "")); - } - std::optional exclusive_fullscreen_control; - if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) - { - exclusive_fullscreen_control = - (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); - } - - g_gpu_device->DestroyMainSwapChain(); - - Error error; - std::optional wi = - Host::AcquireRenderWindow(g_gpu_device->GetRenderAPI(), fullscreen, fullscreen_mode.has_value(), &error); - if (!wi.has_value()) - { - Host::ReportFatalError("Failed to get render window after update", error.GetDescription()); - return; - } - - // if surfaceless, just leave it - if (wi->IsSurfaceless()) - return; - - if (!g_gpu_device->RecreateMainSwapChain(wi.value(), vsync_mode, allow_present_throttle, - fullscreen_mode.has_value() ? &fullscreen_mode.value() : nullptr, - exclusive_fullscreen_control, &error)) - { - Host::ReportFatalError("Failed to change window after update", error.GetDescription()); - return; - } - - const u32 new_width = g_gpu_device->GetMainSwapChain()->GetWidth(); - const u32 new_height = g_gpu_device->GetMainSwapChain()->GetHeight(); - const float f_width = static_cast(new_width); - const float f_height = static_cast(new_height); - ImGuiManager::WindowResized(f_width, f_height); - InputManager::SetDisplayWindowSize(f_width, f_height); - System::DisplayWindowResized(new_width, new_height); -} - -void Host::ResizeDisplayWindow(s32 width, s32 height, float scale) -{ - if (!g_gpu_device || !g_gpu_device->HasMainSwapChain()) - return; - - DEV_LOG("Display window resized to {}x{}", width, height); - - Error error; - if (!g_gpu_device->GetMainSwapChain()->ResizeBuffers(width, height, scale, &error)) - { - ERROR_LOG("Failed to resize main swap chain: {}", error.GetDescription()); - UpdateDisplayWindow(Host::IsFullscreen()); - return; - } - - const u32 new_width = g_gpu_device->GetMainSwapChain()->GetWidth(); - const u32 new_height = g_gpu_device->GetMainSwapChain()->GetHeight(); - const float f_width = static_cast(new_width); - const float f_height = static_cast(new_height); - ImGuiManager::WindowResized(f_width, f_height); - InputManager::SetDisplayWindowSize(f_width, f_height); - System::DisplayWindowResized(new_width, new_height); -} - -void Host::ReleaseGPUDevice() -{ - if (!g_gpu_device) - return; - - ImGuiManager::DestroyAllDebugWindows(); - ImGuiManager::DestroyOverlayTextures(); - FullscreenUI::Shutdown(); - ImGuiManager::Shutdown(); - - INFO_LOG("Destroying {} GPU device...", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); - g_gpu_device->Destroy(); - g_gpu_device.reset(); - - Host::ReleaseRenderWindow(); -} diff --git a/src/core/host.h b/src/core/host.h index 41896a522..498971245 100644 --- a/src/core/host.h +++ b/src/core/host.h @@ -96,21 +96,6 @@ bool IsFullscreen(); /// Alters fullscreen state of hosting application. void SetFullscreen(bool enabled); -/// Attempts to create the rendering device backend. -bool CreateGPUDevice(RenderAPI api, bool fullscreen, Error* error); - -/// Handles fullscreen transitions and such. -void UpdateDisplayWindow(bool fullscreen); - -/// Called when the window is resized. -void ResizeDisplayWindow(s32 width, s32 height, float scale); - -/// Destroys any active rendering device. -void ReleaseGPUDevice(); - -/// Called at the end of the frame, before presentation. -void FrameDone(); - namespace Internal { /// Returns true if the host should use portable mode. diff --git a/src/core/hotkeys.cpp b/src/core/hotkeys.cpp index 6fc333f4d..55ad2d5d2 100644 --- a/src/core/hotkeys.cpp +++ b/src/core/hotkeys.cpp @@ -8,6 +8,7 @@ #include "fullscreen_ui.h" #include "gpu.h" #include "gpu_hw_texture_cache.h" +#include "gpu_thread.h" #include "host.h" #include "imgui_overlays.h" #include "settings.h" @@ -58,8 +59,7 @@ static void HotkeyModifyResolutionScale(s32 increment) if (System::IsValid()) { - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); + GPUThread::UpdateSettings(true); System::ClearMemorySaveStates(); } } @@ -386,11 +386,10 @@ DEFINE_HOTKEY("TogglePGXP", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOO [](s32 pressed) { if (!pressed && System::IsValid()) { - Settings old_settings = g_settings; g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); System::ClearMemorySaveStates(); + GPUThread::UpdateSettings(true); + Host::AddKeyedOSDMessage("TogglePGXP", g_settings.gpu_pgxp_enable ? TRANSLATE_STR("OSDMessage", "PGXP is now enabled.") : @@ -459,12 +458,11 @@ DEFINE_HOTKEY("TogglePGXPDepth", TRANSLATE_NOOP("Hotkeys", "Graphics"), if (!g_settings.gpu_pgxp_enable) return; - const Settings old_settings = g_settings; g_settings.gpu_pgxp_depth_buffer = !g_settings.gpu_pgxp_depth_buffer; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); System::ClearMemorySaveStates(); + GPUThread::UpdateSettings(true); + Host::AddKeyedOSDMessage("TogglePGXPDepth", g_settings.gpu_pgxp_depth_buffer ? TRANSLATE_STR("OSDMessage", "PGXP Depth Buffer is now enabled.") : @@ -480,12 +478,11 @@ DEFINE_HOTKEY("TogglePGXPCPU", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_ if (!g_settings.gpu_pgxp_enable) return; - const Settings old_settings = g_settings; g_settings.gpu_pgxp_cpu = !g_settings.gpu_pgxp_cpu; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); + // GPU thread is unchanged System::ClearMemorySaveStates(); + Host::AddKeyedOSDMessage("TogglePGXPCPU", g_settings.gpu_pgxp_cpu ? TRANSLATE_STR("OSDMessage", "PGXP CPU mode is now enabled.") : @@ -595,29 +592,29 @@ DEFINE_HOTKEY("AudioVolumeDown", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_N DEFINE_HOTKEY("LoadSelectedSaveState", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Load From Selected Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread(SaveStateSelectorUI::LoadCurrentSlot); + GPUThread::RunOnThread(SaveStateSelectorUI::LoadCurrentSlot); }) DEFINE_HOTKEY("SaveSelectedSaveState", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Save To Selected Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread(SaveStateSelectorUI::SaveCurrentSlot); + GPUThread::RunOnThread(SaveStateSelectorUI::SaveCurrentSlot); }) DEFINE_HOTKEY("SelectPreviousSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Select Previous Save Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread([]() { SaveStateSelectorUI::SelectPreviousSlot(true); }); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectPreviousSlot(true); }); }) DEFINE_HOTKEY("SelectNextSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Select Next Save Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread([]() { SaveStateSelectorUI::SelectNextSlot(true); }); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectNextSlot(true); }); }) DEFINE_HOTKEY("SaveStateAndSelectNextSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Save State and Select Next Slot"), [](s32 pressed) { if (!pressed && System::IsValid()) { SaveStateSelectorUI::SaveCurrentSlot(); - SaveStateSelectorUI::SelectNextSlot(false); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectNextSlot(false); }); } }) diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index 47556c01d..11a50fdc4 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -9,6 +9,8 @@ #include "dma.h" #include "fullscreen_ui.h" #include "gpu.h" +#include "gpu_backend.h" +#include "gpu_thread.h" #include "host.h" #include "mdec.h" #include "performance_counters.h" @@ -70,10 +72,10 @@ struct DebugWindowInfo } // namespace static void FormatProcessorStat(SmallStringBase& text, double usage, double time); -static void DrawPerformanceOverlay(float& position_y, float scale, float margin, float spacing); +static void DrawPerformanceOverlay(const GPUBackend* gpu, float& position_y, float scale, float margin, float spacing); static void DrawMediaCaptureOverlay(float& position_y, float scale, float margin, float spacing); static void DrawFrameTimeOverlay(float& position_y, float scale, float margin, float spacing); -static void DrawEnhancementsOverlay(); +static void DrawEnhancementsOverlay(const GPUBackend* gpu); static void DrawInputsOverlay(); #ifndef __ANDROID__ @@ -284,26 +286,25 @@ void ImGuiManager::DestroyAllDebugWindows() #endif } -void ImGuiManager::RenderTextOverlays() +void ImGuiManager::RenderTextOverlays(const GPUBackend* gpu) { + // NOTE: Racey read. const System::State state = System::GetState(); - if (state != System::State::Shutdown) - { - const float scale = ImGuiManager::GetGlobalScale(); - const float f_margin = ImGuiManager::GetScreenMargin() * scale; - const float margin = ImCeil(ImGuiManager::GetScreenMargin() * scale); - const float spacing = ImCeil(5.0f * scale); - float position_y = ImFloor(f_margin); - DrawPerformanceOverlay(position_y, scale, margin, spacing); - DrawFrameTimeOverlay(position_y, scale, margin, spacing); - DrawMediaCaptureOverlay(position_y, scale, margin, spacing); - if (g_settings.display_show_enhancements && state != System::State::Paused) - DrawEnhancementsOverlay(); + const float scale = ImGuiManager::GetGlobalScale(); + const float f_margin = ImGuiManager::GetScreenMargin() * scale; + const float margin = ImCeil(ImGuiManager::GetScreenMargin() * scale); + const float spacing = ImCeil(5.0f * scale); + float position_y = ImFloor(f_margin); + DrawPerformanceOverlay(gpu, position_y, scale, margin, spacing); + DrawFrameTimeOverlay(position_y, scale, margin, spacing); + DrawMediaCaptureOverlay(position_y, scale, margin, spacing); - if (g_settings.display_show_inputs && state != System::State::Paused) - DrawInputsOverlay(); - } + if (g_gpu_settings.display_show_enhancements && state != System::State::Paused) + DrawEnhancementsOverlay(gpu); + + if (g_gpu_settings.display_show_inputs && state != System::State::Paused) + DrawInputsOverlay(); } void ImGuiManager::FormatProcessorStat(SmallStringBase& text, double usage, double time) @@ -317,11 +318,12 @@ void ImGuiManager::FormatProcessorStat(SmallStringBase& text, double usage, doub text.append_format("{:.1f}% ({:.2f}ms)", usage, time); } -void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float margin, float spacing) +void ImGuiManager::DrawPerformanceOverlay(const GPUBackend* gpu, float& position_y, float scale, float margin, + float spacing) { - if (!(g_settings.display_show_fps || g_settings.display_show_speed || g_settings.display_show_gpu_stats || - g_settings.display_show_resolution || g_settings.display_show_cpu_usage || - (g_settings.display_show_status_indicators && + if (!(g_gpu_settings.display_show_fps || g_gpu_settings.display_show_speed || g_gpu_settings.display_show_gpu_stats || + g_gpu_settings.display_show_resolution || g_gpu_settings.display_show_cpu_usage || + (g_gpu_settings.display_show_status_indicators && (System::IsPaused() || System::IsFastForwardEnabled() || System::IsTurboEnabled())))) { return; @@ -352,9 +354,9 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float if (state == System::State::Running) { const float speed = PerformanceCounters::GetEmulationSpeed(); - if (g_settings.display_show_fps) + if (g_gpu_settings.display_show_fps) text.append_format("G: {:.2f} | V: {:.2f}", PerformanceCounters::GetFPS(), PerformanceCounters::GetVPS()); - if (g_settings.display_show_speed) + if (g_gpu_settings.display_show_speed) { text.append_format("{}{}%", text.empty() ? "" : " | ", static_cast(std::round(speed))); @@ -377,19 +379,19 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float DRAW_LINE(fixed_font, text, color); } - if (g_settings.display_show_gpu_stats) + if (g_gpu_settings.display_show_gpu_stats) { - g_gpu->GetStatsString(text); + gpu->GetStatsString(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - g_gpu->GetMemoryStatsString(text); + gpu->GetMemoryStatsString(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_resolution) + if (g_gpu_settings.display_show_resolution) { - const u32 resolution_scale = g_gpu->GetResolutionScale(); - const auto [display_width, display_height] = g_gpu->GetFullDisplayResolution(); + const u32 resolution_scale = gpu->GetResolutionScale(); + const auto [display_width, display_height] = gpu->GetFullDisplayResolution(); const bool interlaced = g_gpu->IsInterlacedDisplayEnabled(); const bool pal = g_gpu->IsInPALMode(); text.format("{}x{} {} {} [{}x]", display_width * resolution_scale, display_height * resolution_scale, @@ -397,13 +399,13 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_latency_stats) + if (g_gpu_settings.display_show_latency_stats) { System::FormatLatencyStats(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_cpu_usage) + if (g_gpu_settings.display_show_cpu_usage) { text.format("{:.2f}ms | {:.2f}ms | {:.2f}ms", PerformanceCounters::GetMinimumFrameTime(), PerformanceCounters::GetAverageFrameTime(), PerformanceCounters::GetMaximumFrameTime()); @@ -459,11 +461,11 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float PerformanceCounters::GetCPUThreadAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - if (g_gpu->GetSWThread()) + if (g_gpu_settings.gpu_use_thread) { - text.assign("SW: "); - FormatProcessorStat(text, PerformanceCounters::GetSWThreadUsage(), - PerformanceCounters::GetSWThreadAverageTime()); + text.assign("RNDR: "); + FormatProcessorStat(text, PerformanceCounters::GetGPUThreadUsage(), + PerformanceCounters::GetGPUThreadAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } @@ -477,14 +479,14 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float #endif } - if (g_settings.display_show_gpu_usage && g_gpu_device->IsGPUTimingEnabled()) + if (g_gpu_settings.display_show_gpu_usage && g_gpu_device->IsGPUTimingEnabled()) { text.assign("GPU: "); FormatProcessorStat(text, PerformanceCounters::GetGPUUsage(), PerformanceCounters::GetGPUAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_status_indicators) + if (g_gpu_settings.display_show_status_indicators) { const bool rewinding = System::IsRewinding(); if (rewinding || System::IsFastForwardEnabled() || System::IsTurboEnabled()) @@ -494,7 +496,7 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float } } } - else if (g_settings.display_show_status_indicators && state == System::State::Paused && + else if (g_gpu_settings.display_show_status_indicators && state == System::State::Paused && !FullscreenUI::HasActiveWindow()) { text.assign(ICON_EMOJI_PAUSE); @@ -504,12 +506,12 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float #undef DRAW_LINE } -void ImGuiManager::DrawEnhancementsOverlay() +void ImGuiManager::DrawEnhancementsOverlay(const GPUBackend* gpu) { LargeString text; text.append_format("{} {}-{}", Settings::GetConsoleRegionName(System::GetRegion()), GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), - g_gpu->IsHardwareRenderer() ? "HW" : "SW"); + gpu->IsHardwareRenderer() ? "HW" : "SW"); if (g_settings.rewind_enable) text.append_format(" RW={}/{}", g_settings.rewind_save_frequency, g_settings.rewind_save_slots); @@ -953,7 +955,10 @@ void SaveStateSelectorUI::ClearList() for (ListEntry& li : s_slots) { if (li.preview_texture) - g_gpu_device->RecycleTexture(std::move(li.preview_texture)); + { + GPUThread::RunOnThread( + [tex = li.preview_texture.release()]() { g_gpu_device->RecycleTexture(std::unique_ptr(tex)); }); + } } s_slots.clear(); } @@ -1273,7 +1278,7 @@ void SaveStateSelectorUI::LoadCurrentSlot() } } - Close(); + GPUThread::RunOnThread(&Close); } void SaveStateSelectorUI::SaveCurrentSlot() @@ -1290,7 +1295,7 @@ void SaveStateSelectorUI::SaveCurrentSlot() } } - Close(); + GPUThread::RunOnThread(&Close); } void SaveStateSelectorUI::ShowSlotOSDMessage() @@ -1314,7 +1319,7 @@ void SaveStateSelectorUI::ShowSlotOSDMessage() void ImGuiManager::RenderOverlayWindows() { const System::State state = System::GetState(); - if (state != System::State::Shutdown) + if (state == System::State::Paused || state == System::State::Running) { if (SaveStateSelectorUI::s_open) SaveStateSelectorUI::Draw(); diff --git a/src/core/imgui_overlays.h b/src/core/imgui_overlays.h index 7c9f26540..dee60e426 100644 --- a/src/core/imgui_overlays.h +++ b/src/core/imgui_overlays.h @@ -7,8 +7,10 @@ #include +class GPUBackend; + namespace ImGuiManager { -void RenderTextOverlays(); +void RenderTextOverlays(const GPUBackend* gpu); void RenderDebugWindows(); bool UpdateDebugWindowConfig(); void DestroyAllDebugWindows(); diff --git a/src/core/performance_counters.cpp b/src/core/performance_counters.cpp index 5a6ad2a20..4c68f1935 100644 --- a/src/core/performance_counters.cpp +++ b/src/core/performance_counters.cpp @@ -3,6 +3,8 @@ #include "performance_counters.h" #include "gpu.h" +#include "gpu_backend.h" +#include "gpu_thread.h" #include "system.h" #include "system_private.h" @@ -45,9 +47,9 @@ struct State float cpu_thread_usage; float cpu_thread_time; - u64 last_sw_time; - float sw_thread_usage; - float sw_thread_time; + u64 last_gpu_thread_time; + float gpu_thread_usage; + float gpu_thread_time; float average_gpu_time; float accumulated_gpu_time; @@ -105,14 +107,14 @@ float PerformanceCounters::GetCPUThreadAverageTime() return s_state.cpu_thread_time; } -float PerformanceCounters::GetSWThreadUsage() +float PerformanceCounters::GetGPUThreadUsage() { - return s_state.sw_thread_usage; + return s_state.gpu_thread_usage; } -float PerformanceCounters::GetSWThreadAverageTime() +float PerformanceCounters::GetGPUThreadAverageTime() { - return s_state.sw_thread_time; + return s_state.gpu_thread_time; } float PerformanceCounters::GetGPUUsage() @@ -150,17 +152,16 @@ void PerformanceCounters::Reset() s_state.last_frame_number = System::GetFrameNumber(); s_state.last_internal_frame_number = System::GetInternalFrameNumber(); s_state.last_cpu_time = System::GetCPUThreadHandle().GetCPUTime(); - if (const Threading::Thread* sw_thread = g_gpu->GetSWThread(); sw_thread) - s_state.last_sw_time = sw_thread->GetCPUTime(); - else - s_state.last_sw_time = 0; + s_state.last_gpu_thread_time = GPUThread::Internal::GetThreadHandle().GetCPUTime(); s_state.average_frame_time_accumulator = 0.0f; s_state.minimum_frame_time_accumulator = 0.0f; s_state.maximum_frame_time_accumulator = 0.0f; + + std::atomic_thread_fence(std::memory_order_release); } -void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) +void PerformanceCounters::Update(GPUBackend* gpu, u32 frame_number, u32 internal_frame_number) { const Common::Timer::Value now_ticks = Common::Timer::GetCurrentValue(); @@ -177,7 +178,7 @@ void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) // update fps counter const Common::Timer::Value ticks_diff = now_ticks - s_state.last_update_time; const float time = static_cast(Common::Timer::ConvertValueToSeconds(ticks_diff)); - if (time < PERFORMANCE_COUNTER_UPDATE_INTERVAL) + if (time < PERFORMANCE_COUNTER_UPDATE_INTERVAL || s_state.last_frame_number == frame_number) return; s_state.last_update_time = now_ticks; @@ -202,18 +203,17 @@ void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) s_state.fps = static_cast(internal_frames_run) / time; s_state.speed = (s_state.vps / System::GetVideoFrameRate()) * 100.0f; - const Threading::Thread* sw_thread = g_gpu->GetSWThread(); const u64 cpu_time = System::GetCPUThreadHandle().GetCPUTime(); - const u64 sw_time = sw_thread ? sw_thread->GetCPUTime() : 0; + const u64 gpu_thread_time = GPUThread::Internal::GetThreadHandle().GetCPUTime(); const u64 cpu_delta = cpu_time - s_state.last_cpu_time; - const u64 sw_delta = sw_time - s_state.last_sw_time; + const u64 gpu_thread_delta = gpu_thread_time - s_state.last_gpu_thread_time; s_state.last_cpu_time = cpu_time; - s_state.last_sw_time = sw_time; + s_state.last_gpu_thread_time = gpu_thread_time; s_state.cpu_thread_usage = static_cast(static_cast(cpu_delta) * pct_divider); s_state.cpu_thread_time = static_cast(static_cast(cpu_delta) * time_divider); - s_state.sw_thread_usage = static_cast(static_cast(sw_delta) * pct_divider); - s_state.sw_thread_time = static_cast(static_cast(sw_delta) * time_divider); + s_state.gpu_thread_usage = static_cast(static_cast(gpu_thread_delta) * pct_divider); + s_state.gpu_thread_time = static_cast(static_cast(gpu_thread_delta) * time_divider); if (MediaCapture* cap = System::GetMediaCapture()) cap->UpdateCaptureThreadUsage(pct_divider, time_divider); @@ -228,13 +228,13 @@ void PerformanceCounters::Update(u32 frame_number, u32 internal_frame_number) s_state.presents_since_last_update = 0; if (g_settings.display_show_gpu_stats) - g_gpu->UpdateStatistics(frames_run); + gpu->UpdateStatistics(frames_run); - VERBOSE_LOG("FPS: {:.2f} VPS: {:.2f} CPU: {:.2f} GPU: {:.2f} Avg: {:.2f}ms Min: {:.2f}ms Max: {:.2f}ms", - s_state.fps, s_state.vps, s_state.cpu_thread_usage, s_state.gpu_usage, s_state.average_frame_time, - s_state.minimum_frame_time, s_state.maximum_frame_time); + VERBOSE_LOG("FPS: {:.2f} VPS: {:.2f} CPU: {:.2f} RNDR: {:.2f} GPU: {:.2f} Avg: {:.2f}ms Min: {:.2f}ms Max: {:.2f}ms", + s_state.fps, s_state.vps, s_state.cpu_thread_usage, s_state.gpu_thread_usage, s_state.gpu_usage, + s_state.average_frame_time, s_state.minimum_frame_time, s_state.maximum_frame_time); - Host::OnPerformanceCountersUpdated(); + Host::OnPerformanceCountersUpdated(gpu); } void PerformanceCounters::AccumulateGPUTime() diff --git a/src/core/performance_counters.h b/src/core/performance_counters.h index db3827b5b..fbdfc86cc 100644 --- a/src/core/performance_counters.h +++ b/src/core/performance_counters.h @@ -5,6 +5,8 @@ #include "common/types.h" +class GPUBackend; + namespace PerformanceCounters { static constexpr u32 NUM_FRAME_TIME_SAMPLES = 150; @@ -18,8 +20,8 @@ float GetMinimumFrameTime(); float GetMaximumFrameTime(); float GetCPUThreadUsage(); float GetCPUThreadAverageTime(); -float GetSWThreadUsage(); -float GetSWThreadAverageTime(); +float GetGPUThreadUsage(); +float GetGPUThreadAverageTime(); float GetGPUUsage(); float GetGPUAverageTime(); const FrameTimeHistory& GetFrameTimeHistory(); @@ -27,7 +29,7 @@ u32 GetFrameTimeHistoryPos(); void Clear(); void Reset(); -void Update(u32 frame_number, u32 internal_frame_number); +void Update(GPUBackend* gpu, u32 frame_number, u32 internal_frame_number); void AccumulateGPUTime(); } // namespace Host diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 401525463..1b2b9df02 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -28,7 +28,8 @@ LOG_CHANNEL(Settings); -Settings g_settings; +ALIGN_TO_CACHE_LINE Settings g_settings; +ALIGN_TO_CACHE_LINE Settings g_gpu_settings; const char* SettingInfo::StringDefaultValue() const { @@ -204,6 +205,7 @@ void Settings::Load(const SettingsInterface& si, const SettingsInterface& contro gpu_disable_raster_order_views = si.GetBoolValue("GPU", "DisableRasterOrderViews", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); + gpu_max_queued_frames = static_cast(si.GetUIntValue("GPU", "MaxQueuedFrames", DEFAULT_GPU_MAX_QUEUED_FRAMES)); gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", true); @@ -533,6 +535,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const } si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); + si.SetUIntValue("GPU", "MaxQueuedFrames", gpu_max_queued_frames); si.SetBoolValue("GPU", "UseThread", gpu_use_thread); si.SetBoolValue("GPU", "UseSoftwareRendererForReadbacks", gpu_use_software_renderer_for_readbacks); si.SetBoolValue("GPU", "TrueColor", gpu_true_color); diff --git a/src/core/settings.h b/src/core/settings.h index d30b40767..08cabf14f 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -98,6 +98,7 @@ struct Settings std::string gpu_adapter; u8 gpu_resolution_scale = 1; u8 gpu_multisamples = 1; + u8 gpu_max_queued_frames = 2; bool gpu_use_thread : 1 = true; bool gpu_use_software_renderer_for_readbacks : 1 = false; bool gpu_use_debug_device : 1 = false; @@ -486,6 +487,8 @@ struct Settings static constexpr ConsoleRegion DEFAULT_CONSOLE_REGION = ConsoleRegion::Auto; static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f; static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f; + static constexpr u8 DEFAULT_GPU_MAX_QUEUED_FRAMES = 2; // TODO: Maybe lower? But that means fast CPU threads would + // always stall, could be a problem for power management. // Prefer oldrec over newrec for now. Except on RISC-V, where there is no oldrec. #if defined(CPU_ARCH_RISCV64) @@ -552,7 +555,9 @@ struct Settings #endif }; -extern Settings g_settings; +// TODO: Use smaller copy for GPU thread copy. +ALIGN_TO_CACHE_LINE extern Settings g_settings; // CPU thread copy. +ALIGN_TO_CACHE_LINE extern Settings g_gpu_settings; // GPU thread copy. namespace EmuFolders { extern std::string AppRoot; diff --git a/src/core/system.cpp b/src/core/system.cpp index f522add4e..c6f4319a2 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -16,8 +16,10 @@ #include "game_database.h" #include "game_list.h" #include "gpu.h" +#include "gpu_backend.h" #include "gpu_dump.h" #include "gpu_hw_texture_cache.h" +#include "gpu_thread.h" #include "gte.h" #include "host.h" #include "host_interface_progress_callback.h" @@ -167,11 +169,10 @@ static void ClearRunningGame(); static void DestroySystem(); static void JoinTaskThreads(); -static bool CreateGPU(GPURenderer renderer, bool is_switching, bool fullscreen, Error* error); -static bool RecreateGPU(GPURenderer renderer, bool force_recreate_device = false, bool update_display = true); -static void HandleHostGPUDeviceLost(); -static void HandleExclusiveFullscreenLost(); +static void RecreateGPU(GPURenderer renderer, bool force_recreate_device = false, bool update_display = true); +static void SetGTEAspectRatioFromRenderWindow(); static std::string GetScreenshotPath(const char* extension); +static void StopMediaCapture(std::unique_ptr cap); /// Returns true if boot is being fast forwarded. static bool IsFastForwardingBoot(); @@ -181,7 +182,7 @@ static void UpdateThrottlePeriod(); static void ResetThrottler(); /// Throttles the system, i.e. sleeps until it's time to execute the next frame. -static void Throttle(Common::Timer::Value current_time); +static void Throttle(Common::Timer::Value current_time, Common::Timer::Value sleep_until); static void AccumulatePreFrameSleepTime(Common::Timer::Value current_time); static void UpdateDisplayVSync(); @@ -304,7 +305,6 @@ struct ALIGN_TO_CACHE_LINE StateVars GameHash running_game_hash; bool running_game_custom_title = false; - bool keep_gpu_device_on_shutdown = false; std::atomic_bool startup_cancelled{false}; bool rewinding_first_save = false; @@ -517,6 +517,8 @@ bool System::CPUThreadInitialize(Error* error) LogStartupInformation(); + GPUThread::Internal::ProcessStartup(); + if (g_settings.achievements_enabled) Achievements::Initialize(); @@ -569,16 +571,6 @@ System::State System::GetState() return s_state.state; } -void System::SetState(State new_state) -{ - if (s_state.state == new_state) - return; - - Assert(s_state.state == State::Paused || s_state.state == State::Running); - Assert(new_state == State::Paused || new_state == State::Running); - s_state.state = new_state; -} - bool System::IsRunning() { return s_state.state == State::Running; @@ -1186,120 +1178,17 @@ std::string System::GetInputProfilePath(std::string_view name) return Path::Combine(EmuFolders::InputProfiles, fmt::format("{}.ini", name)); } -bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool update_display /* = true*/) +void System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool update_display /* = true*/) { ClearMemorySaveStates(); - g_gpu->RestoreDeviceContext(); - - // save current state - DynamicHeapArray state_data(GetMaxSaveStateSize()); - { - StateWrapper sw(state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); - if (!g_gpu->DoState(sw, nullptr, false) || !TimingEvents::DoState(sw)) - { - ERROR_LOG("Failed to save old GPU state when switching renderers"); - state_data.deallocate(); - } - } - - // create new renderer - g_gpu.reset(); - if (force_recreate_device) - { - PostProcessing::Shutdown(); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - } + StopMediaCapture(); Error error; - if (!CreateGPU(renderer, true, Host::IsFullscreen(), &error)) + if (!GPUThread::CreateGPUBackend(renderer, true, false, force_recreate_device, &error)) { - if (!IsStartupCancelled()) - Host::ReportErrorAsync("Error", error.GetDescription()); - - DestroySystem(); - return false; + ERROR_LOG("Failed to switch to {} renderer: {}", Settings::GetRendererName(renderer), error.GetDescription()); + Panic("Failed to switch renderer."); } - - if (!state_data.empty()) - { - StateWrapper sw(state_data.span(), StateWrapper::Mode::Read, SAVE_STATE_VERSION); - g_gpu->RestoreDeviceContext(); - g_gpu->DoState(sw, nullptr, update_display); - TimingEvents::DoState(sw); - } - - if (force_recreate_device) - { - ImGuiManager::UpdateDebugWindowConfig(); - InvalidateDisplay(); - } - - // fix up vsync etc - UpdateSpeedLimiterState(); - return true; -} - -void System::HandleHostGPUDeviceLost() -{ - static Common::Timer::Value s_last_gpu_reset_time = 0; - static constexpr float MIN_TIME_BETWEEN_RESETS = 15.0f; - - // If we're constantly crashing on something in particular, we don't want to end up in an - // endless reset loop.. that'd probably end up leaking memory and/or crashing us for other - // reasons. So just abort in such case. - const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); - if (s_last_gpu_reset_time != 0 && - Common::Timer::ConvertValueToSeconds(current_time - s_last_gpu_reset_time) < MIN_TIME_BETWEEN_RESETS) - { - Panic("Host GPU lost too many times, device is probably completely wedged."); - } - s_last_gpu_reset_time = current_time; - - if (g_gpu) - { - // Little bit janky, but because the device is lost, the VRAM readback is going to give us garbage. - // So back up what we have, it's probably missing bits, but whatever... - DynamicHeapArray vram_backup(VRAM_SIZE); - std::memcpy(vram_backup.data(), g_vram, VRAM_SIZE); - - // Device lost, something went really bad. - // Let's just toss out everything, and try to hobble on. - if (!RecreateGPU(g_gpu->IsHardwareRenderer() ? g_settings.gpu_renderer : GPURenderer::Software, true, false)) - { - Panic("Failed to recreate GPU device after loss."); - return; - } - - // Restore backed-up VRAM. - std::memcpy(g_vram, vram_backup.data(), VRAM_SIZE); - } - else - { - // Only big picture mode was running. - const bool fsui_running = FullscreenUI::IsInitialized(); - const bool fullscreen = Host::IsFullscreen(); - const RenderAPI api = g_gpu_device->GetRenderAPI(); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - if (!Host::CreateGPUDevice(api, fullscreen, nullptr) || (fsui_running && !FullscreenUI::Initialize())) - { - Panic("Failed to recreate GPU device after loss."); - return; - } - } - - // First frame after reopening is definitely going to be trash, so skip it. - Host::AddIconOSDWarning( - "HostGPUDeviceLost", ICON_EMOJI_WARNING, - TRANSLATE_STR("System", "Host GPU device encountered an error and has recovered. This may cause broken rendering."), - Host::OSD_CRITICAL_ERROR_DURATION); -} - -void System::HandleExclusiveFullscreenLost() -{ - WARNING_LOG("Lost exclusive fullscreen."); - Host::SetFullscreen(false); } void System::LoadSettings(bool display_osd_messages) @@ -1579,16 +1468,12 @@ void System::PauseSystem(bool paused) if (paused == IsPaused() || !IsValid()) return; - SetState(paused ? State::Paused : State::Running); + s_state.state = (paused ? State::Paused : State::Running); + std::atomic_thread_fence(std::memory_order_release); SPU::GetOutputStream()->SetPaused(paused); if (paused) { - // Make sure the GPU is flushed, otherwise the VB might still be mapped. - g_gpu->FlushRender(); - - FullscreenUI::OnSystemPaused(); - InputManager::PauseVibration(); InputManager::UpdateHostMouseMode(); @@ -1602,9 +1487,8 @@ void System::PauseSystem(bool paused) #endif Host::OnSystemPaused(); - Host::OnIdleStateChanged(); UpdateDisplayVSync(); - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); } else { @@ -1622,8 +1506,6 @@ void System::PauseSystem(bool paused) #endif Host::OnSystemResumed(); - Host::OnIdleStateChanged(); - UpdateDisplayVSync(); PerformanceCounters::Reset(); ResetThrottler(); @@ -1660,8 +1542,8 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) Assert(s_state.state == State::Shutdown); s_state.state = State::Starting; s_state.startup_cancelled.store(false, std::memory_order_relaxed); - s_state.keep_gpu_device_on_shutdown = static_cast(g_gpu_device); s_state.region = g_settings.region; + std::atomic_thread_fence(std::memory_order_release); Host::OnSystemStarting(); // Load CD image up and detect region. @@ -1836,6 +1718,7 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) // Good to go. s_state.state = State::Running; + std::atomic_thread_fence(std::memory_order_release); SPU::GetOutputStream()->SetPaused(false); FullscreenUI::OnSystemStarted(); @@ -1851,7 +1734,6 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) #endif Host::OnSystemStarted(); - Host::OnIdleStateChanged(); // try to load the state, if it fails, bail out if (!parameters.save_state.empty() && !LoadState(parameters.save_state.c_str(), error, false)) @@ -1872,7 +1754,6 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) PauseSystem(true); UpdateSpeedLimiterState(); - ImGuiManager::UpdateDebugWindowConfig(); PerformanceCounters::Reset(); ResetThrottler(); return true; @@ -1910,11 +1791,17 @@ bool System::Initialize(std::unique_ptr disc, DiscRegion disc_region, b !CDROM::InsertMedia(std::move(disc), disc_region, s_state.running_game_serial, s_state.running_game_title, error)) return false; - if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, false, fullscreen, error)) - return false; + // TODO: Drop pointer + g_gpu = std::make_unique(); + g_gpu->Initialize(); - if (GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) - GTE::UpdateAspectRatio(swap_chain->GetWidth(), swap_chain->GetHeight()); + if (!GPUThread::CreateGPUBackend(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, false, + fullscreen, false, error)) + { + return false; + } + + SetGTEAspectRatioFromRenderWindow(); if (g_settings.gpu_pgxp_enable) CPU::PGXP::Initialize(); @@ -1956,8 +1843,6 @@ void System::DestroySystem() if (s_state.media_capture) StopMediaCapture(); - ImGuiManager::DestroyAllDebugWindows(); - s_state.gpu_dump_player.reset(); s_state.undo_load_state.reset(); @@ -1972,7 +1857,6 @@ void System::DestroySystem() PostProcessing::Shutdown(); SaveStateSelectorUI::Clear(); - FullscreenUI::OnSystemDestroyed(); InputManager::PauseVibration(); InputManager::UpdateHostMouseMode(); @@ -1999,17 +1883,7 @@ void System::DestroySystem() TimingEvents::Shutdown(); GPUTextureCache::Shutdown(); ClearRunningGame(); - - // Restore present-all-frames behavior. - if (s_state.keep_gpu_device_on_shutdown && g_gpu_device) - { - g_gpu_device->SetGPUTimingEnabled(false); - UpdateDisplayVSync(); - } - else - { - Host::ReleaseGPUDevice(); - } + GPUThread::DestroyGPUBackend(); s_state.taints = 0; s_state.bios_hash = {}; @@ -2018,9 +1892,12 @@ void System::DestroySystem() s_state.boot_mode = BootMode::None; s_state.state = State::Shutdown; + std::atomic_thread_fence(std::memory_order_release); + + // NOTE: Must come after DestroyGPUBackend(), otherwise landing page will display. + FullscreenUI::OnSystemDestroyed(); Host::OnSystemDestroyed(); - Host::OnIdleStateChanged(); } void System::ClearRunningGame() @@ -2050,8 +1927,6 @@ void System::Execute() { s_state.system_executing = true; - // TODO: Purge reset/restore - g_gpu->RestoreDeviceContext(); TimingEvents::CommitLeftoverTicks(); if (s_state.gpu_dump_player) [[unlikely]] @@ -2080,9 +1955,6 @@ void System::Execute() void System::FrameDone() { - // Vertex buffer is shared, need to flush what we have. - g_gpu->FlushRender(); - // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. // TODO: when running ahead, we can skip this (and the flush above) if (!IsReplayingGPUDump()) [[likely]] @@ -2104,8 +1976,6 @@ void System::FrameDone() s_state.socket_multiplexer->PollEventsWithTimeout(0); #endif - Host::FrameDone(); - if (s_state.frame_step_request) { s_state.frame_step_request = false; @@ -2135,7 +2005,6 @@ void System::FrameDone() // counter-acts that. Host::PumpMessagesOnCPUThread(); InputManager::PollSources(); - g_gpu->RestoreDeviceContext(); CheckForAndExitExecution(); } @@ -2148,29 +2017,6 @@ void System::FrameDone() SaveRunaheadState(); } - // Kick off media capture early, might take a while. - if (s_state.media_capture && s_state.media_capture->IsCapturingVideo()) [[unlikely]] - { - if (s_state.media_capture->GetVideoFPS() != s_state.video_frame_rate) [[unlikely]] - { - const std::string next_capture_path = s_state.media_capture->GetNextCapturePath(); - INFO_LOG("Video frame rate changed, switching to new capture file {}", Path::GetFileName(next_capture_path)); - - const bool was_capturing_audio = s_state.media_capture->IsCapturingAudio(); - StopMediaCapture(); - if (StartMediaCapture(std::move(next_capture_path), true, was_capturing_audio) && - !g_gpu->SendDisplayToMediaCapture(s_state.media_capture.get())) [[unlikely]] - { - StopMediaCapture(); - } - } - else - { - if (!g_gpu->SendDisplayToMediaCapture(s_state.media_capture.get())) [[unlikely]] - StopMediaCapture(); - } - } - Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); // pre-frame sleep accounting (input lag reduction) @@ -2179,55 +2025,6 @@ void System::FrameDone() if (s_state.pre_frame_sleep) AccumulatePreFrameSleepTime(current_time); - // explicit present (frame pacing) - const bool is_unique_frame = (s_state.last_presented_internal_frame_number != s_state.internal_frame_number); - s_state.last_presented_internal_frame_number = s_state.internal_frame_number; - - const bool skip_this_frame = - (((s_state.skip_presenting_duplicate_frames && !is_unique_frame && - s_state.skipped_frame_count < MAX_SKIPPED_DUPLICATE_FRAME_COUNT) || - (!s_state.optimal_frame_pacing && current_time > s_state.next_frame_time && - s_state.skipped_frame_count < MAX_SKIPPED_TIMEOUT_FRAME_COUNT) || - (g_gpu_device->HasMainSwapChain() && g_gpu_device->GetMainSwapChain()->ShouldSkipPresentingFrame())) && - !s_state.syncing_to_host_with_vsync && !IsExecutionInterrupted()); - if (!skip_this_frame) - { - s_state.skipped_frame_count = 0; - - const bool scheduled_present = - (s_state.optimal_frame_pacing && s_state.throttler_enabled && !IsExecutionInterrupted()); - const GPUDevice::Features features = g_gpu_device->GetFeatures(); - if (scheduled_present && features.timed_present) - { - PresentDisplay(false, s_state.next_frame_time); - Throttle(current_time); - } - else if (scheduled_present && features.explicit_present) - { - const bool do_present = PresentDisplay(true, 0); - Throttle(current_time); - if (do_present) - g_gpu_device->SubmitPresent(g_gpu_device->GetMainSwapChain()); - } - else - { - if (scheduled_present) - Throttle(current_time); - - PresentDisplay(false, 0); - - if (!scheduled_present && s_state.throttler_enabled && !IsExecutionInterrupted()) - Throttle(current_time); - } - } - else - { - DEBUG_LOG("Skipping displaying frame"); - s_state.skipped_frame_count++; - if (s_state.throttler_enabled) - Throttle(current_time); - } - // pre-frame sleep (input lag reduction) current_time = Common::Timer::GetCurrentValue(); if (s_state.pre_frame_sleep) @@ -2236,10 +2033,15 @@ void System::FrameDone() if (pre_frame_sleep_until > current_time && Common::Timer::ConvertValueToMilliseconds(pre_frame_sleep_until - current_time) >= 1) { - Common::Timer::SleepUntil(pre_frame_sleep_until, true); + Throttle(current_time, pre_frame_sleep_until); current_time = Common::Timer::GetCurrentValue(); } } + else + { + if (s_state.throttler_enabled) + Throttle(current_time, s_state.next_frame_time); + } s_state.frame_start_time = current_time; @@ -2250,13 +2052,40 @@ void System::FrameDone() InputManager::PollSources(); CheckForAndExitExecution(); } +} - g_gpu->RestoreDeviceContext(); +void System::GetFramePresentationDetails(bool* is_frame, bool* present_frame, bool* allow_present_skip, + Common::Timer::Value* present_time) +{ + const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); - // Update perf counters *after* throttling, we want to measure from start-of-frame - // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different - // amounts of computation happening in each frame). - PerformanceCounters::Update(s_state.frame_number, s_state.internal_frame_number); + // explicit present (frame pacing) + const bool is_unique_frame = (s_state.last_presented_internal_frame_number != s_state.internal_frame_number); + s_state.last_presented_internal_frame_number = s_state.internal_frame_number; + + const bool is_duplicate_frame = (s_state.skip_presenting_duplicate_frames && !is_unique_frame && + s_state.skipped_frame_count < MAX_SKIPPED_DUPLICATE_FRAME_COUNT); + const bool skip_this_frame = + ((is_duplicate_frame || (!s_state.optimal_frame_pacing && current_time > s_state.next_frame_time && + s_state.skipped_frame_count < MAX_SKIPPED_TIMEOUT_FRAME_COUNT)) && + !s_state.syncing_to_host_with_vsync && !IsExecutionInterrupted()); + const bool should_allow_present_skip = !s_state.syncing_to_host_with_vsync && !s_state.optimal_frame_pacing; + *is_frame = !is_duplicate_frame; + *present_frame = !skip_this_frame; + *allow_present_skip = should_allow_present_skip; + *present_time = (s_state.optimal_frame_pacing && s_state.throttler_enabled && !IsExecutionInterrupted()) ? + s_state.next_frame_time : + 0; + + if (!skip_this_frame) + { + s_state.skipped_frame_count = 0; + } + else + { + DEBUG_LOG("Skipping displaying frame"); + s_state.skipped_frame_count++; + } } float System::GetVideoFrameRate() @@ -2296,12 +2125,12 @@ void System::ResetThrottler() s_state.pre_frame_sleep_time = 0; } -void System::Throttle(Common::Timer::Value current_time) +void System::Throttle(Common::Timer::Value current_time, Common::Timer::Value sleep_until) { // If we're running too slow, advance the next frame time based on the time we lost. Effectively skips // running those frames at the intended time, because otherwise if we pause in the debugger, we'll run // hundreds of frames when we resume. - if (current_time > s_state.next_frame_time) + if (current_time > sleep_until) { const Common::Timer::Value diff = static_cast(current_time) - static_cast(s_state.next_frame_time); s_state.next_frame_time += (diff / s_state.frame_period) * s_state.frame_period + s_state.frame_period; @@ -2316,11 +2145,10 @@ void System::Throttle(Common::Timer::Value current_time) Common::Timer::Value poll_start_time = current_time; for (;;) { - const u32 sleep_ms = - static_cast(Common::Timer::ConvertValueToMilliseconds(s_state.next_frame_time - poll_start_time)); + const u32 sleep_ms = static_cast(Common::Timer::ConvertValueToMilliseconds(sleep_until - poll_start_time)); s_state.socket_multiplexer->PollEventsWithTimeout(sleep_ms); poll_start_time = Common::Timer::GetCurrentValue(); - if (poll_start_time >= s_state.next_frame_time || (!g_settings.display_optimal_frame_pacing && sleep_ms == 0)) + if (poll_start_time >= sleep_until || (!g_settings.display_optimal_frame_pacing && sleep_ms == 0)) break; } } @@ -2329,14 +2157,14 @@ void System::Throttle(Common::Timer::Value current_time) // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. // Linux also seems to do a much better job of waking up at the requested time. #if !defined(__linux__) - Common::Timer::SleepUntil(s_state.next_frame_time, g_settings.display_optimal_frame_pacing); + Common::Timer::SleepUntil(sleep_until, g_settings.display_optimal_frame_pacing); #else - Common::Timer::SleepUntil(s_state.next_frame_time, false); + Common::Timer::SleepUntil(sleep_until, false); #endif } #else // No spinwait on Android, see above. - Common::Timer::SleepUntil(s_state.next_frame_time, false); + Common::Timer::SleepUntil(sleep_until, false); #endif #if 0 @@ -2380,65 +2208,6 @@ void System::IncrementInternalFrameNumber() s_state.internal_frame_number++; } -bool System::CreateGPU(GPURenderer renderer, bool is_switching, bool fullscreen, Error* error) -{ - const RenderAPI api = Settings::GetRenderAPIForRenderer(renderer); - - if (!g_gpu_device || - (renderer != GPURenderer::Software && !GPUDevice::IsSameRenderAPI(g_gpu_device->GetRenderAPI(), api))) - { - if (g_gpu_device) - { - WARNING_LOG("Recreating GPU device, expecting {} got {}", GPUDevice::RenderAPIToString(api), - GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); - PostProcessing::Shutdown(); - } - - Host::ReleaseGPUDevice(); - if (!Host::CreateGPUDevice(api, fullscreen, error)) - { - Host::ReleaseRenderWindow(); - return false; - } - - if (is_switching) - PostProcessing::Initialize(); - } - - if (renderer == GPURenderer::Software) - g_gpu = GPU::CreateSoftwareRenderer(error); - else - g_gpu = GPU::CreateHardwareRenderer(error); - - if (!g_gpu) - { - ERROR_LOG("Failed to initialize {} renderer, falling back to software renderer", - Settings::GetRendererName(renderer)); - Host::AddOSDMessage( - fmt::format(TRANSLATE_FS("System", "Failed to initialize {} renderer, falling back to software renderer."), - Settings::GetRendererName(renderer)), - Host::OSD_CRITICAL_ERROR_DURATION); - g_gpu.reset(); - g_gpu = GPU::CreateSoftwareRenderer(error); - if (!g_gpu) - { - ERROR_LOG("Failed to create fallback software renderer."); - if (!s_state.keep_gpu_device_on_shutdown) - { - PostProcessing::Shutdown(); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - } - return false; - } - } - - if (g_settings.display_show_gpu_usage) - g_gpu_device->SetGPUTimingEnabled(true); - - return true; -} - bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state) { if (!sw.DoMarker("System")) @@ -2504,8 +2273,7 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di if (!sw.DoMarker("InterruptController") || !InterruptController::DoState(sw)) return false; - g_gpu->RestoreDeviceContext(); - if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, host_texture, update_display)) + if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, update_display)) return false; if (!sw.DoMarker("CDROM") || !CDROM::DoState(sw)) @@ -2811,7 +2579,7 @@ bool System::LoadStateFromBuffer(const SaveStateBuffer& buffer, Error* error, bo ResetThrottler(); if (update_display_if_paused && IsPaused()) - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); return true; } @@ -3065,23 +2833,14 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen // save screenshot if (screenshot_size > 0) { - // assume this size is the width - GSVector4i screenshot_display_rect, screenshot_draw_rect; - g_gpu->CalculateDrawRect(screenshot_size, screenshot_size, true, true, &screenshot_display_rect, - &screenshot_draw_rect); - - const u32 screenshot_width = static_cast(screenshot_display_rect.width()); - const u32 screenshot_height = static_cast(screenshot_display_rect.height()); - screenshot_draw_rect = screenshot_draw_rect.sub32(screenshot_display_rect.xyxy()); - screenshot_display_rect = screenshot_display_rect.sub32(screenshot_display_rect.xyxy()); - VERBOSE_LOG("Saving {}x{} screenshot for state", screenshot_width, screenshot_height); - + u32 screenshot_width; + u32 screenshot_height; std::vector screenshot_buffer; u32 screenshot_stride; GPUTexture::Format screenshot_format; - if (g_gpu->RenderScreenshotToBuffer(screenshot_width, screenshot_height, screenshot_display_rect, - screenshot_draw_rect, false, &screenshot_buffer, &screenshot_stride, - &screenshot_format) && + if (GPUBackend::RenderScreenshotToBuffer(screenshot_size, screenshot_size, false, &screenshot_width, + &screenshot_height, &screenshot_buffer, &screenshot_stride, + &screenshot_format) && GPUTexture::ConvertTextureDataToRGBA8(screenshot_width, screenshot_height, screenshot_buffer, screenshot_stride, screenshot_format)) { @@ -3103,8 +2862,8 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen } else { - WARNING_LOG("Failed to save {}x{} screenshot for save state due to render/conversion failure", screenshot_width, - screenshot_height); + WARNING_LOG("Failed to save {}x{} screenshot for save state due to render/conversion failure", screenshot_size, + screenshot_size); } } @@ -3112,7 +2871,6 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen if (buffer->state_data.empty()) buffer->state_data.resize(GetMaxSaveStateSize()); - g_gpu->RestoreDeviceContext(); StateWrapper sw(buffer->state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); if (!DoState(sw, nullptr, false, false)) { @@ -3350,10 +3108,9 @@ void System::UpdateSpeedLimiterState() s_state.syncing_to_host = false; s_state.syncing_to_host_with_vsync = false; - if (g_settings.sync_to_host_refresh_rate && g_gpu_device->HasMainSwapChain()) + if (g_settings.sync_to_host_refresh_rate) { - const float host_refresh_rate = g_gpu_device->GetMainSwapChain()->GetWindowInfo().surface_refresh_rate; - if (host_refresh_rate > 0.0f) + if (const float host_refresh_rate = GPUThread::GetRenderWindowInfo().surface_refresh_rate; host_refresh_rate > 0.0f) { const float ratio = host_refresh_rate / s_state.video_frame_rate; s_state.can_sync_to_host = (ratio >= 0.95f && ratio <= 1.05f); @@ -3405,32 +3162,15 @@ void System::UpdateSpeedLimiterState() void System::UpdateDisplayVSync() { - static constexpr std::array(GPUVSyncMode::Count)> vsync_modes = {{ - "Disabled", - "FIFO", - "Mailbox", - }}; - // Avoid flipping vsync on and off by manually throttling when vsync is on. const GPUVSyncMode vsync_mode = GetEffectiveVSyncMode(); const bool allow_present_throttle = ShouldAllowPresentThrottle(); - if (!g_gpu_device->HasMainSwapChain() || - (g_gpu_device->GetMainSwapChain()->GetVSyncMode() == vsync_mode && - g_gpu_device->GetMainSwapChain()->IsPresentThrottleAllowed() == allow_present_throttle)) - { - return; - } - VERBOSE_LOG("VSync: {}{}{}", vsync_modes[static_cast(vsync_mode)], + VERBOSE_LOG("VSync: {}{}{}", GPUDevice::VSyncModeToString(vsync_mode), s_state.syncing_to_host_with_vsync ? " (for throttling)" : "", allow_present_throttle ? " (present throttle allowed)" : ""); - Error error; - if (!g_gpu_device->GetMainSwapChain()->SetVSyncMode(vsync_mode, allow_present_throttle, &error)) - { - ERROR_LOG("Failed to update vsync mode to {}: {}", vsync_modes[static_cast(vsync_mode)], - error.GetDescription()); - } + GPUThread::SetVSync(vsync_mode, allow_present_throttle); } GPUVSyncMode System::GetEffectiveVSyncMode() @@ -3881,7 +3621,6 @@ bool System::DumpVRAM(const char* filename) if (!IsValid()) return false; - g_gpu->RestoreDeviceContext(); return g_gpu->DumpVRAMToFile(filename); } @@ -4057,10 +3796,11 @@ void System::UpdateRunningGame(const std::string_view path, CDImage* image, bool if (s_state.running_game_serial != prev_serial) UpdateSessionTime(prev_serial); + // TODO GPU-THREAD: Racey... if (SaveStateSelectorUI::IsOpen()) - SaveStateSelectorUI::RefreshList(s_state.running_game_serial); - else - SaveStateSelectorUI::ClearList(); + { + GPUThread::RunOnThread([serial = s_state.running_game_serial]() { SaveStateSelectorUI::RefreshList(serial); }); + } UpdateRichPresence(booting); @@ -4316,7 +4056,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || g_settings.gpu_multisamples != old_settings.gpu_multisamples || g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading || - g_settings.gpu_use_thread != old_settings.gpu_use_thread || + g_settings.gpu_max_queued_frames != old_settings.gpu_max_queued_frames || g_settings.gpu_use_software_renderer_for_readbacks != old_settings.gpu_use_software_renderer_for_readbacks || g_settings.gpu_fifo_size != old_settings.gpu_fifo_size || g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead || @@ -4355,9 +4095,14 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.texture_replacements.dump_textures != old_settings.texture_replacements.dump_textures || g_settings.texture_replacements.config != old_settings.texture_replacements.config) { - g_gpu->UpdateSettings(old_settings); + GPUThread::UpdateSettings(true); if (IsPaused()) - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); + } + else + { + // still need to update debug windows + GPUThread::UpdateSettings(false); } if (g_settings.gpu_widescreen_hack != old_settings.gpu_widescreen_hack || @@ -4366,8 +4111,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) (g_settings.display_aspect_ratio_custom_numerator != old_settings.display_aspect_ratio_custom_numerator || g_settings.display_aspect_ratio_custom_denominator != old_settings.display_aspect_ratio_custom_denominator))) { - if (GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) - GTE::UpdateAspectRatio(swap_chain->GetWidth(), swap_chain->GetHeight()); + SetGTEAspectRatioFromRenderWindow(); } if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || @@ -4386,9 +4130,6 @@ void System::CheckForSettingsChanges(const Settings& old_settings) InterruptExecution(); } - if (g_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) - g_gpu->ResetStatistics(); - if (g_settings.cdrom_readahead_sectors != old_settings.cdrom_readahead_sectors) CDROM::SetReadaheadSectors(g_settings.cdrom_readahead_sectors); @@ -4449,9 +4190,6 @@ void System::CheckForSettingsChanges(const Settings& old_settings) PostProcessing::UpdateSettings(); - if (ImGuiManager::UpdateDebugWindowConfig()) - InvalidateDisplay(); - #ifdef ENABLE_GDB_SERVER if (g_settings.debugging.enable_gdb_server != old_settings.debugging.enable_gdb_server || g_settings.debugging.gdb_server_port != old_settings.debugging.gdb_server_port) @@ -4508,6 +4246,9 @@ void System::CheckForSettingsChanges(const Settings& old_settings) Panic("Failed to reallocate memory map. The log may contain more information."); } } + + if (g_settings.gpu_use_thread != old_settings.gpu_use_thread) [[unlikely]] + GPUThread::Internal::SetThreadEnabled(g_settings.gpu_use_thread); } void System::SetTaintsFromSettings() @@ -4742,6 +4483,9 @@ void System::CalculateRewindMemoryUsage(u32 num_saves, u32 resolution_scale, u64 void System::ClearMemorySaveStates() { + if (!s_state.rewind_states.empty() || !s_state.runahead_states.empty()) + Panic("FIXME TEXTURE CLEAR"); + s_state.rewind_states.clear(); s_state.runahead_states.clear(); } @@ -4924,11 +4668,12 @@ void System::DoRewind() s_state.rewind_load_counter--; } - InvalidateDisplay(); + // TODO FIXME InvalidateDisplay(); + Host::PumpMessagesOnCPUThread(); IdlePollUpdate(); - Throttle(Common::Timer::GetCurrentValue()); + Throttle(Common::Timer::GetCurrentValue(), s_state.next_frame_time); } void System::SaveRunaheadState() @@ -5047,6 +4792,7 @@ void System::ShutdownSystem(bool save_resume_state) } s_state.state = State::Stopping; + std::atomic_thread_fence(std::memory_order_release); if (!s_state.system_executing) DestroySystem(); } @@ -5163,17 +4909,17 @@ std::string System::GetScreenshotPath(const char* extension) return path; } -bool System::SaveScreenshot(const char* path, DisplayScreenshotMode mode, DisplayScreenshotFormat format, u8 quality, +void System::SaveScreenshot(const char* path, DisplayScreenshotMode mode, DisplayScreenshotFormat format, u8 quality, bool compress_on_thread) { if (!IsValid()) - return false; + return; std::string auto_path; if (!path) path = (auto_path = GetScreenshotPath(Settings::GetDisplayScreenshotFormatExtension(format))).c_str(); - return g_gpu->RenderScreenshotToFile(path, mode, quality, compress_on_thread, true); + GPUBackend::RenderScreenshotToFile(path, mode, quality, compress_on_thread, true); } bool System::StartRecordingGPUDump(const char* path /*= nullptr*/, u32 num_frames /*= 0*/) @@ -5245,17 +4991,20 @@ bool System::StartMediaCapture(std::string path, bool capture_video, bool captur Host::GetUIntSettingValue("MediaCapture", "VideoWidth", Settings::DEFAULT_MEDIA_CAPTURE_VIDEO_WIDTH); u32 capture_height = Host::GetUIntSettingValue("MediaCapture", "VideoHeight", Settings::DEFAULT_MEDIA_CAPTURE_VIDEO_HEIGHT); + const WindowInfo& main_window_info = GPUThread::GetRenderWindowInfo(); const GPUTexture::Format capture_format = - g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; + main_window_info.IsSurfaceless() ? GPUTexture::Format::RGBA8 : main_window_info.surface_format; if (capture_video) { - // TODO: This will be a mess with GPU thread. +#if 0 + // TODO:FIXME: This will be a mess with GPU thread. The start will have to be moved there. if (Host::GetBoolSettingValue("MediaCapture", "VideoAutoSize", false)) { GSVector4i unused_display_rect, unused_draw_rect; g_gpu->CalculateScreenshotSize(DisplayScreenshotMode::InternalResolution, &capture_width, &capture_height, &unused_display_rect, &unused_draw_rect); } +#endif MediaCapture::AdjustVideoSize(&capture_width, &capture_height); } @@ -5303,7 +5052,7 @@ bool System::StartMediaCapture(std::string path, bool capture_video, bool captur return false; } - Host::AddIconOSDMessage("MediaCapture", ICON_FA_CAMERA, + Host::AddIconOSDMessage(fmt::format("MediaCapture_{}", s_state.media_capture->GetPath()), ICON_FA_CAMERA, fmt::format(TRANSLATE_FS("System", "Starting {0} to '{1}'."), GetCaptureTypeForMessage(s_state.media_capture->IsCapturingVideo(), s_state.media_capture->IsCapturingAudio()), @@ -5319,30 +5068,45 @@ void System::StopMediaCapture() if (!s_state.media_capture) return; - const bool was_capturing_audio = s_state.media_capture->IsCapturingAudio(); - const bool was_capturing_video = s_state.media_capture->IsCapturingVideo(); + if (s_state.media_capture->IsCapturingVideo()) + { + // If we're capturing video, we need to finish the capture on the GPU thread. + // This is because it owns texture objects, and OpenGL is not thread-safe. + GPUThread::RunOnThread( + [cap = s_state.media_capture.release()]() mutable { StopMediaCapture(std::unique_ptr(cap)); }); + } + else + { + // Otherwise, we can do it on the CPU thread. + StopMediaCapture(std::move(s_state.media_capture)); + } + + Host::OnMediaCaptureStopped(); +} + +void System::StopMediaCapture(std::unique_ptr cap) +{ + const bool was_capturing_audio = cap->IsCapturingAudio(); + const bool was_capturing_video = cap->IsCapturingVideo(); Error error; - if (s_state.media_capture->EndCapture(&error)) + std::string osd_key = fmt::format("MediaCapture_{}", cap->GetPath()); + if (cap->EndCapture(&error)) { - Host::AddIconOSDMessage("MediaCapture", ICON_FA_CAMERA, + Host::AddIconOSDMessage(std::move(osd_key), ICON_FA_CAMERA, fmt::format(TRANSLATE_FS("System", "Stopped {0} to '{1}'."), GetCaptureTypeForMessage(was_capturing_video, was_capturing_audio), - Path::GetFileName(s_state.media_capture->GetPath())), + Path::GetFileName(cap->GetPath())), Host::OSD_INFO_DURATION); } else { - Host::AddIconOSDWarning("MediaCapture", ICON_FA_EXCLAMATION_TRIANGLE, + Host::AddIconOSDWarning(std::move(osd_key), ICON_FA_EXCLAMATION_TRIANGLE, fmt::format(TRANSLATE_FS("System", "Stopped {0}: {1}."), - GetCaptureTypeForMessage(s_state.media_capture->IsCapturingVideo(), - s_state.media_capture->IsCapturingAudio()), + GetCaptureTypeForMessage(was_capturing_video, was_capturing_audio), error.GetDescription()), Host::OSD_INFO_DURATION); } - s_state.media_capture.reset(); - - Host::OnMediaCaptureStopped(); } std::string System::GetGameSaveStateFileName(std::string_view serial, s32 slot) @@ -5615,8 +5379,7 @@ void System::ToggleWidescreen() Settings::GetDisplayAspectRatioDisplayName(g_settings.display_aspect_ratio), 5.0f)); } - if (GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) - GTE::UpdateAspectRatio(swap_chain->GetWidth(), swap_chain->GetHeight()); + SetGTEAspectRatioFromRenderWindow(); } void System::ToggleSoftwareRendering() @@ -5624,18 +5387,15 @@ void System::ToggleSoftwareRendering() if (IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software) return; - const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; + const GPURenderer new_renderer = + GPUBackend::IsUsingHardwareBackend() ? GPURenderer::Software : g_settings.gpu_renderer; Host::AddIconOSDMessage("SoftwareRendering", ICON_FA_PAINT_ROLLER, fmt::format(TRANSLATE_FS("OSDMessage", "Switching to {} renderer..."), Settings::GetRendererDisplayName(new_renderer)), Host::OSD_QUICK_DURATION); - RecreateGPU(new_renderer); - // TODO: GPU-THREAD: Drop this - PerformanceCounters::Reset(); - - g_gpu->UpdateResolutionScale(); + RecreateGPU(new_renderer, false, IsPaused()); } void System::RequestDisplaySize(float scale /*= 0.0f*/) @@ -5644,7 +5404,7 @@ void System::RequestDisplaySize(float scale /*= 0.0f*/) return; if (scale == 0.0f) - scale = g_gpu->IsHardwareRenderer() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; + scale = GPUBackend::IsUsingHardwareBackend() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; const float y_scale = (static_cast(g_gpu->GetCRTCDisplayWidth()) / static_cast(g_gpu->GetCRTCDisplayHeight())) / @@ -5668,75 +5428,13 @@ void System::DisplayWindowResized(u32 width, u32 height) return; if (g_settings.gpu_widescreen_hack && g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow) - GTE::UpdateAspectRatio(width, height); - - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateResolutionScale(); - - // If we're paused, re-present the current frame at the new window size. - if (IsPaused()) - { - // Hackity hack, on some systems, presenting a single frame isn't enough to actually get it - // displayed. Two seems to be good enough. Maybe something to do with direct scanout. - InvalidateDisplay(); - InvalidateDisplay(); - } + SetGTEAspectRatioFromRenderWindow(); } -bool System::PresentDisplay(bool explicit_present, u64 present_time) +void System::SetGTEAspectRatioFromRenderWindow() { - // acquire for IO.MousePos. - std::atomic_thread_fence(std::memory_order_acquire); - - FullscreenUI::Render(); - ImGuiManager::RenderTextOverlays(); - ImGuiManager::RenderOSDMessages(); - - if (s_state.state == State::Running) - ImGuiManager::RenderSoftwareCursors(); - - // Debug windows are always rendered, otherwise mouse input breaks on skip. - ImGuiManager::RenderOverlayWindows(); - - if (IsValid()) - ImGuiManager::RenderDebugWindows(); - - const GPUDevice::PresentResult pres = - g_gpu_device->HasMainSwapChain() ? - (g_gpu ? g_gpu->PresentDisplay() : g_gpu_device->BeginPresent(g_gpu_device->GetMainSwapChain())) : - GPUDevice::PresentResult::SkipPresent; - if (pres == GPUDevice::PresentResult::OK) - { - g_gpu_device->RenderImGui(g_gpu_device->GetMainSwapChain()); - g_gpu_device->EndPresent(g_gpu_device->GetMainSwapChain(), explicit_present, present_time); - - if (g_gpu_device->IsGPUTimingEnabled()) - PerformanceCounters::AccumulateGPUTime(); - } - else - { - if (pres == GPUDevice::PresentResult::DeviceLost) [[unlikely]] - HandleHostGPUDeviceLost(); - else if (pres == GPUDevice::PresentResult::ExclusiveFullscreenLost) - HandleExclusiveFullscreenLost(); - else - g_gpu_device->FlushCommands(); - - // Still need to kick ImGui or it gets cranky. - ImGui::EndFrame(); - } - - ImGuiManager::NewFrame(); - - return (pres == GPUDevice::PresentResult::OK); -} - -void System::InvalidateDisplay() -{ - PresentDisplay(false, 0); - - if (g_gpu) - g_gpu->RestoreDeviceContext(); + if (const WindowInfo& main_window_info = GPUThread::GetRenderWindowInfo(); !main_window_info.IsSurfaceless()) + GTE::UpdateAspectRatio(main_window_info.surface_width, main_window_info.surface_height); } bool System::OpenGPUDump(std::string path, Error* error) diff --git a/src/core/system.h b/src/core/system.h index 7f8fadc5a..ff8fcaf49 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -159,7 +159,6 @@ std::string GetGameSettingsPath(std::string_view game_serial); std::string GetInputProfilePath(std::string_view name); State GetState(); -void SetState(State new_state); bool IsRunning(); bool IsPaused(); bool IsShutdown(); @@ -272,6 +271,8 @@ bool IsRunningAtNonStandardSpeed(); float GetVideoFrameRate(); void SetVideoFrameRate(float frequency); +void GetFramePresentationDetails(bool* is_frame, bool* present_frame, bool* allow_present_skip, u64* present_time); + // Access controllers for simulating input. Controller* GetController(u32 slot); void UpdateMemoryCardTypes(); @@ -375,7 +376,7 @@ s32 GetAudioOutputVolume(); void UpdateVolume(); /// Saves a screenshot to the specified file. If no file name is provided, one will be generated automatically. -bool SaveScreenshot(const char* path = nullptr, DisplayScreenshotMode mode = g_settings.display_screenshot_mode, +void SaveScreenshot(const char* path = nullptr, DisplayScreenshotMode mode = g_settings.display_screenshot_mode, DisplayScreenshotFormat format = g_settings.display_screenshot_format, u8 quality = g_settings.display_screenshot_quality, bool compress_on_thread = true); @@ -404,10 +405,6 @@ void ToggleSoftwareRendering(); /// If the scale is set to 0, the internal resolution will be used, otherwise it is treated as a multiplier to 1x. void RequestDisplaySize(float scale = 0.0f); -/// Renders the display. -bool PresentDisplay(bool explicit_present, u64 present_time); -void InvalidateDisplay(); - ////////////////////////////////////////////////////////////////////////// // Memory Save States (Rewind and Runahead) ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/system_private.h b/src/core/system_private.h index e47baf4c0..5e41623b4 100644 --- a/src/core/system_private.h +++ b/src/core/system_private.h @@ -86,11 +86,8 @@ void OnSystemPaused(); /// Called when the VM is resumed after being paused. void OnSystemResumed(); -/// Called when the pause state changes, or fullscreen UI opens. -void OnIdleStateChanged(); - /// Called when performance metrics are updated, approximately once a second. -void OnPerformanceCountersUpdated(); +void OnPerformanceCountersUpdated(const GPUBackend* gpu_backend); /// Provided by the host; called when the running executable changes. void OnGameChanged(const std::string& disc_path, const std::string& game_serial, const std::string& game_name); diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index c0b52fd92..4c0865a5a 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -604,8 +604,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* tr("Draws a wireframe outline of the triangles rendered by the console's GPU, either as a " "replacement or an overlay.")); dialog->registerWidgetHelp(m_ui.gpuThread, tr("Threaded Rendering"), tr("Checked"), - tr("Uses a second thread for drawing graphics. Currently only available for the software " - "renderer, but can provide a significant speed improvement, and is safe to use.")); + tr("Uses a second thread for drawing graphics. Provides a significant speed improvement " + "particularly with the software renderer, and is safe to use.")); dialog->registerWidgetHelp( m_ui.useDebugDevice, tr("Use Debug Device"), tr("Unchecked"), @@ -807,8 +807,6 @@ void GraphicsSettingsWidget::updateRendererDependentOptions() m_ui.blitSwapChain->setEnabled(render_api == RenderAPI::D3D11); #endif - m_ui.gpuThread->setEnabled(!is_hardware); - m_ui.exclusiveFullscreenLabel->setEnabled(render_api == RenderAPI::D3D11 || render_api == RenderAPI::D3D12 || render_api == RenderAPI::Vulkan); m_ui.exclusiveFullscreenControl->setEnabled(render_api == RenderAPI::Vulkan); diff --git a/src/duckstation-qt/mainwindow.cpp b/src/duckstation-qt/mainwindow.cpp index 8f04c8e8f..43b8f55fa 100644 --- a/src/duckstation-qt/mainwindow.cpp +++ b/src/duckstation-qt/mainwindow.cpp @@ -83,6 +83,7 @@ static bool s_use_central_widget = false; // UI thread VM validity. static bool s_system_valid = false; static bool s_system_paused = false; +static bool s_fullscreen_ui_started = false; static std::atomic_uint32_t s_system_locked{false}; static QString s_current_game_title; static QString s_current_game_serial; @@ -762,7 +763,7 @@ void MainWindow::recreate() { g_emu_thread->setSurfaceless(false); g_main_window->updateEmulationActions(false, System::IsValid(), Achievements::IsHardcoreModeActive()); - g_main_window->onFullscreenUIStateChange(g_emu_thread->isRunningFullscreenUI()); + g_main_window->onFullscreenUIStartedOrStopped(s_fullscreen_ui_started); } if (controller_settings_window_pos.has_value()) @@ -1252,8 +1253,9 @@ void MainWindow::onStartFullscreenUITriggered() g_emu_thread->startFullscreenUI(); } -void MainWindow::onFullscreenUIStateChange(bool running) +void MainWindow::onFullscreenUIStartedOrStopped(bool running) { + s_fullscreen_ui_started = running; m_ui.actionStartFullscreenUI->setText(running ? tr("Stop Big Picture Mode") : tr("Start Big Picture Mode")); m_ui.actionStartFullscreenUI2->setText(running ? tr("Exit Big Picture") : tr("Big Picture")); } @@ -1999,7 +2001,7 @@ void MainWindow::connectSignals() connect(g_emu_thread, &EmuThread::mediaCaptureStarted, this, &MainWindow::onMediaCaptureStarted); connect(g_emu_thread, &EmuThread::mediaCaptureStopped, this, &MainWindow::onMediaCaptureStopped); connect(g_emu_thread, &EmuThread::mouseModeRequested, this, &MainWindow::onMouseModeRequested); - connect(g_emu_thread, &EmuThread::fullscreenUIStateChange, this, &MainWindow::onFullscreenUIStateChange); + connect(g_emu_thread, &EmuThread::fullscreenUIStartedOrStopped, this, &MainWindow::onFullscreenUIStartedOrStopped); connect(g_emu_thread, &EmuThread::achievementsLoginRequested, this, &MainWindow::onAchievementsLoginRequested); connect(g_emu_thread, &EmuThread::achievementsChallengeModeChanged, this, &MainWindow::onAchievementsChallengeModeChanged); @@ -2452,7 +2454,7 @@ bool MainWindow::requestShutdown(bool allow_confirm /* = true */, bool allow_sav // reshow the main window during display updates, because otherwise fullscreen transitions and renderer switches // would briefly show and then hide the main window. So instead, we do it on shutdown, here. Except if we're in // batch mode, when we're going to exit anyway. - if (!isRenderingToMain() && isHidden() && !QtHost::InBatchMode() && !g_emu_thread->isRunningFullscreenUI()) + if (!isRenderingToMain() && isHidden() && !QtHost::InBatchMode() && !s_fullscreen_ui_started) updateWindowState(true); // Now we can actually shut down the VM. diff --git a/src/duckstation-qt/mainwindow.h b/src/duckstation-qt/mainwindow.h index 32551795f..00b605b62 100644 --- a/src/duckstation-qt/mainwindow.h +++ b/src/duckstation-qt/mainwindow.h @@ -167,7 +167,7 @@ private Q_SLOTS: void onCheatsActionTriggered(); void onCheatsMenuAboutToShow(); void onStartFullscreenUITriggered(); - void onFullscreenUIStateChange(bool running); + void onFullscreenUIStartedOrStopped(bool running); void onRemoveDiscActionTriggered(); void onViewToolbarActionToggled(bool checked); void onViewLockToolbarActionToggled(bool checked); diff --git a/src/duckstation-qt/qthost.cpp b/src/duckstation-qt/qthost.cpp index e8a55a015..db1f661f2 100644 --- a/src/duckstation-qt/qthost.cpp +++ b/src/duckstation-qt/qthost.cpp @@ -19,7 +19,9 @@ #include "core/game_list.h" #include "core/gdb_server.h" #include "core/gpu.h" +#include "core/gpu_backend.h" #include "core/gpu_hw_texture_cache.h" +#include "core/gpu_thread.h" #include "core/host.h" #include "core/imgui_overlays.h" #include "core/memory_card.h" @@ -87,6 +89,20 @@ static constexpr u32 GDB_SERVER_POLLING_INTERVAL = 1; // Local function declarations ////////////////////////////////////////////////////////////////////////// namespace QtHost { + +namespace { + +class GPUThread : public QThread +{ +public: + GPUThread(QObject* parent = nullptr); + ~GPUThread() override; + + void run() override; +}; + +} // namespace + static bool PerformEarlyHardwareChecks(); static bool EarlyProcessStartup(); static void RegisterTypes(); @@ -565,13 +581,8 @@ void Host::LoadSettings(const SettingsInterface& si, std::unique_lock params) @@ -867,7 +859,7 @@ void EmuThread::onDisplayWindowMouseWheelEvent(const QPoint& delta_angle) void EmuThread::onDisplayWindowResized(int width, int height, float scale) { - Host::ResizeDisplayWindow(width, height, scale); + GPUThread::ResizeDisplayWindow(width, height, scale); } void EmuThread::redrawDisplayWindow() @@ -878,10 +870,10 @@ void EmuThread::redrawDisplayWindow() return; } - if (!g_gpu_device || System::IsShutdown()) + if (System::IsShutdown()) return; - System::InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); } void EmuThread::toggleFullscreen() @@ -909,7 +901,7 @@ void EmuThread::setFullscreen(bool fullscreen, bool allow_render_to_main) m_is_fullscreen = fullscreen; m_is_rendering_to_main = allow_render_to_main && shouldRenderToMain(); - Host::UpdateDisplayWindow(fullscreen); + GPUThread::UpdateDisplayWindow(fullscreen); } bool Host::IsFullscreen() @@ -938,7 +930,7 @@ void EmuThread::setSurfaceless(bool surfaceless) return; m_is_surfaceless = surfaceless; - Host::UpdateDisplayWindow(false); + GPUThread::UpdateDisplayWindow(false); } void EmuThread::requestDisplaySize(float scale) @@ -995,6 +987,7 @@ void Host::OnSystemStarting() void Host::OnSystemStarted() { g_emu_thread->stopBackgroundControllerPollTimer(); + g_emu_thread->wakeThread(); emit g_emu_thread->systemStarted(); } @@ -1012,6 +1005,7 @@ void Host::OnSystemResumed() g_emu_thread->setSurfaceless(false); emit g_emu_thread->systemResumed(); + g_emu_thread->wakeThread(); g_emu_thread->stopBackgroundControllerPollTimer(); } @@ -1023,9 +1017,14 @@ void Host::OnSystemDestroyed() emit g_emu_thread->systemDestroyed(); } -void Host::OnIdleStateChanged() +void Host::OnFullscreenUIStartedOrStopped(bool started) { - g_emu_thread->wakeThread(); + g_emu_thread->setFullscreenUIStarted(started); +} + +void Host::OnFullscreenUIActiveChanged(bool is_active) +{ + g_emu_thread->setFullscreenUIActive(is_active); } void EmuThread::reloadInputSources() @@ -1679,7 +1678,8 @@ void Host::DestroyAuxiliaryRenderWindow(AuxiliaryRenderWindowHandle handle, s32* *height = size.height(); // eat all pending events, to make sure we're not going to write input events back to a dead pointer - g_emu_thread->getEventLoop()->processEvents(QEventLoop::AllEvents); + if (g_emu_thread->isCurrentThread()) + g_emu_thread->getEventLoop()->processEvents(QEventLoop::AllEvents); } void EmuThread::queueAuxiliaryRenderWindowInputEvent(Host::AuxiliaryRenderWindowUserData userdata, @@ -1699,10 +1699,12 @@ void EmuThread::processAuxiliaryRenderWindowInputEvent(void* userdata, quint32 e quint32 param3) { DebugAssert(isCurrentThread()); - ImGuiManager::ProcessAuxiliaryRenderWindowInputEvent(userdata, static_cast(event), - Host::AuxiliaryRenderWindowEventParam{.uint_param = param1}, - Host::AuxiliaryRenderWindowEventParam{.uint_param = param2}, - Host::AuxiliaryRenderWindowEventParam{.uint_param = param3}); + GPUThread::RunOnThread([userdata, event, param1, param2, param3]() { + ImGuiManager::ProcessAuxiliaryRenderWindowInputEvent(userdata, static_cast(event), + Host::AuxiliaryRenderWindowEventParam{.uint_param = param1}, + Host::AuxiliaryRenderWindowEventParam{.uint_param = param2}, + Host::AuxiliaryRenderWindowEventParam{.uint_param = param3}); + }); } void EmuThread::doBackgroundControllerPoll() @@ -1731,7 +1733,7 @@ void EmuThread::startBackgroundControllerPollTimer() return; u32 poll_interval = BACKGROUND_CONTROLLER_POLLING_INTERVAL; - if (FullscreenUI::IsInitialized()) + if (m_is_fullscreen_ui_active) poll_interval = FULLSCREEN_UI_CONTROLLER_POLLING_INTERVAL; if (GDBServer::HasAnyClients()) poll_interval = GDB_SERVER_POLLING_INTERVAL; @@ -1747,6 +1749,27 @@ void EmuThread::stopBackgroundControllerPollTimer() m_background_controller_polling_timer->stop(); } +void EmuThread::setFullscreenUIActive(bool active) +{ + m_is_fullscreen_ui_active = active; + + // adjust the timer speed to pick up controller input faster + if (!m_background_controller_polling_timer->isActive()) + return; + + g_emu_thread->stopBackgroundControllerPollTimer(); + g_emu_thread->startBackgroundControllerPollTimer(); +} + +void EmuThread::setFullscreenUIStarted(bool started) +{ + if (m_is_fullscreen_ui_started == started) + return; + + m_is_fullscreen_ui_started = started; + emit fullscreenUIStartedOrStopped(started); +} + void EmuThread::start() { AssertMsg(!g_emu_thread, "Emu thread does not exist"); @@ -1790,49 +1813,52 @@ void EmuThread::run() } } - // bind buttons/axises - createBackgroundControllerPollTimer(); - startBackgroundControllerPollTimer(); - - // main loop - while (!m_shutdown_flag) { - if (System::IsRunning()) + // kick off GPU thread + QtHost::GPUThread gpu_thread; + gpu_thread.start(); + + // bind buttons/axises + createBackgroundControllerPollTimer(); + startBackgroundControllerPollTimer(); + + // main loop + while (!m_shutdown_flag) { - System::Execute(); - } - else - { - // we want to keep rendering the UI when paused and fullscreen UI is enabled - if (!FullscreenUI::HasActiveWindow() && !System::IsRunning()) - { - // wait until we have a system before running + if (System::IsRunning()) + System::Execute(); + else m_event_loop->exec(); - continue; - } - - m_event_loop->processEvents(QEventLoop::AllEvents); - System::IdlePollUpdate(); - if (g_gpu_device && g_gpu_device->HasMainSwapChain()) - { - System::PresentDisplay(false, 0); - if (!g_gpu_device->GetMainSwapChain()->IsVSyncModeBlocking()) - g_gpu_device->GetMainSwapChain()->ThrottlePresentation(); - } } + + if (System::IsValid()) + System::ShutdownSystem(false); + + destroyBackgroundControllerPollTimer(); + + // tell GPU thread to exit + GPUThread::Internal::RequestShutdown(); + + // and tidy up everything left + System::CPUThreadShutdown(); } - if (System::IsValid()) - System::ShutdownSystem(false); - - destroyBackgroundControllerPollTimer(); - System::CPUThreadShutdown(); - // move back to UI thread moveToThread(m_ui_thread); } -void Host::FrameDone() +QtHost::GPUThread::GPUThread(QObject* parent) : QThread(parent) +{ +} + +QtHost::GPUThread::~GPUThread() = default; + +void QtHost::GPUThread::run() +{ + ::GPUThread::Internal::GPUThreadEntryPoint(); +} + +void Host::FrameDoneOnGPUThread(GPUBackend* gpu_backend, u32 frame_number) { } @@ -1921,7 +1947,7 @@ void Host::OnInputDeviceConnected(std::string_view identifier, std::string_view { emit g_emu_thread->onInputDeviceConnected(std::string(identifier), std::string(device_name)); - if (System::IsValid() || g_emu_thread->isRunningFullscreenUI()) + if (System::IsValid() || g_emu_thread->isFullscreenUIActive()) { Host::AddIconOSDMessage(fmt::format("ControllerConnected{}", identifier), ICON_FA_GAMEPAD, fmt::format(TRANSLATE_FS("QtHost", "Controller {} connected."), identifier), @@ -1947,7 +1973,7 @@ void Host::OnInputDeviceDisconnected(InputBindingKey key, std::string_view ident Host::AddIconOSDMessage(fmt::format("ControllerConnected{}", identifier), ICON_FA_GAMEPAD, std::move(message), Host::OSD_WARNING_DURATION); } - else if (System::IsValid() || g_emu_thread->isRunningFullscreenUI()) + else if (System::IsValid() || g_emu_thread->isFullscreenUIActive()) { Host::AddIconOSDMessage(fmt::format("ControllerConnected{}", identifier), ICON_FA_GAMEPAD, fmt::format(TRANSLATE_FS("QtHost", "Controller {} disconnected."), identifier), @@ -2012,17 +2038,17 @@ void Host::ReleaseRenderWindow() g_emu_thread->releaseRenderWindow(); } -void EmuThread::updatePerformanceCounters() +void EmuThread::updatePerformanceCounters(const GPUBackend* gpu_backend) { - const RenderAPI render_api = g_gpu_device ? g_gpu_device->GetRenderAPI() : RenderAPI::None; - const bool hardware_renderer = g_gpu && g_gpu->IsHardwareRenderer(); + const RenderAPI render_api = g_gpu_device->GetRenderAPI(); + const bool hardware_renderer = gpu_backend->IsHardwareRenderer(); u32 render_width = 0; u32 render_height = 0; - if (g_gpu) + if (gpu_backend) { - const u32 render_scale = g_gpu->GetResolutionScale(); - std::tie(render_width, render_height) = g_gpu->GetFullDisplayResolution(); + const u32 render_scale = gpu_backend->GetResolutionScale(); + std::tie(render_width, render_height) = gpu_backend->GetFullDisplayResolution(); render_width *= render_scale; render_height *= render_scale; } @@ -2085,9 +2111,9 @@ void EmuThread::resetPerformanceCounters() Q_ARG(const QString&, blank)); } -void Host::OnPerformanceCountersUpdated() +void Host::OnPerformanceCountersUpdated(const GPUBackend* gpu_backend) { - g_emu_thread->updatePerformanceCounters(); + g_emu_thread->updatePerformanceCounters(gpu_backend); } void Host::OnGameChanged(const std::string& disc_path, const std::string& game_serial, const std::string& game_name) diff --git a/src/duckstation-qt/qthost.h b/src/duckstation-qt/qthost.h index aa4e71468..7eafe1835 100644 --- a/src/duckstation-qt/qthost.h +++ b/src/duckstation-qt/qthost.h @@ -44,6 +44,8 @@ class INISettingsInterface; enum class RenderAPI : u8; class GPUDevice; +class GPUBackend; + class MainWindow; class DisplayWidget; @@ -91,9 +93,9 @@ public: ALWAYS_INLINE QEventLoop* getEventLoop() const { return m_event_loop; } ALWAYS_INLINE bool isFullscreen() const { return m_is_fullscreen; } + ALWAYS_INLINE bool isFullscreenUIActive() const { return m_is_fullscreen_ui_active; } ALWAYS_INLINE bool isRenderingToMain() const { return m_is_rendering_to_main; } ALWAYS_INLINE bool isSurfaceless() const { return m_is_surfaceless; } - ALWAYS_INLINE bool isRunningFullscreenUI() const { return m_run_fullscreen_ui; } std::optional acquireRenderWindow(RenderAPI render_api, bool fullscreen, bool exclusive_fullscreen, Error* error); @@ -102,6 +104,8 @@ public: void startBackgroundControllerPollTimer(); void stopBackgroundControllerPollTimer(); + void setFullscreenUIActive(bool active); + void setFullscreenUIStarted(bool started); void wakeThread(); bool shouldRenderToMain() const; @@ -109,7 +113,7 @@ public: void bootOrLoadState(std::string path); - void updatePerformanceCounters(); + void updatePerformanceCounters(const GPUBackend* gpu_backend); void resetPerformanceCounters(); /// Locks the system by pausing it, while a popup dialog is displayed. @@ -147,7 +151,7 @@ Q_SIGNALS: void runningGameChanged(const QString& filename, const QString& game_serial, const QString& game_title); void inputProfileLoaded(); void mouseModeRequested(bool relative, bool hide_cursor); - void fullscreenUIStateChange(bool running); + void fullscreenUIStartedOrStopped(bool running); void achievementsLoginRequested(Achievements::LoginRequestReason reason); void achievementsRefreshed(quint32 id, const QString& game_info_string); void achievementsChallengeModeChanged(bool enabled); @@ -242,9 +246,10 @@ private: QTimer* m_background_controller_polling_timer = nullptr; bool m_shutdown_flag = false; - bool m_run_fullscreen_ui = false; bool m_is_rendering_to_main = false; bool m_is_fullscreen = false; + bool m_is_fullscreen_ui_started = false; + bool m_is_fullscreen_ui_active = false; bool m_is_surfaceless = false; bool m_save_state_on_shutdown = false; diff --git a/src/duckstation-regtest/regtest_host.cpp b/src/duckstation-regtest/regtest_host.cpp index 5f60d1a7e..50a0ace25 100644 --- a/src/duckstation-regtest/regtest_host.cpp +++ b/src/duckstation-regtest/regtest_host.cpp @@ -5,7 +5,7 @@ #include "core/controller.h" #include "core/fullscreen_ui.h" #include "core/game_list.h" -#include "core/gpu.h" +#include "core/gpu_backend.h" #include "core/host.h" #include "core/system.h" #include "core/system_private.h" @@ -276,7 +276,7 @@ void Host::OnIdleStateChanged() // } -void Host::OnPerformanceCountersUpdated() +void Host::OnPerformanceCountersUpdated(const GPUBackend* gpu_backend) { // } @@ -365,14 +365,10 @@ void Host::DestroyAuxiliaryRenderWindow(AuxiliaryRenderWindowHandle handle, s32* { } -void Host::FrameDone() +void Host::FrameDoneOnGPUThread(GPUBackend* gpu_backend, u32 frame_number) { - const u32 frame = System::GetFrameNumber(); - if (s_frame_dump_interval > 0 && (s_frame_dump_interval == 1 || (frame % s_frame_dump_interval) == 0)) - { - std::string dump_filename(RegTestHost::GetFrameDumpFilename(frame)); - g_gpu->WriteDisplayTextureToFile(std::move(dump_filename)); - } + if (s_frame_dump_interval > 0 && (s_frame_dump_interval == 1 || (frame_number % s_frame_dump_interval) == 0)) + gpu_backend->WriteDisplayTextureToFile(RegTestHost::GetFrameDumpFilename(frame_number)); } void Host::OpenURL(std::string_view url) diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index b3b90af66..5bc052c8d 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -343,6 +343,17 @@ const char* GPUDevice::ShaderLanguageToString(GPUShaderLanguage language) } } +const char* GPUDevice::VSyncModeToString(GPUVSyncMode mode) +{ + static constexpr std::array(GPUVSyncMode::Count)> vsync_modes = {{ + "Disabled", + "FIFO", + "Mailbox", + }}; + + return vsync_modes[static_cast(mode)]; +} + bool GPUDevice::IsSameRenderAPI(RenderAPI lhs, RenderAPI rhs) { return (lhs == rhs || ((lhs == RenderAPI::OpenGL || lhs == RenderAPI::OpenGLES) && diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 309b4db39..0859ea6c4 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -601,6 +601,9 @@ public: /// Returns a string representing the specified language. static const char* ShaderLanguageToString(GPUShaderLanguage language); + /// Returns a string representing the specified vsync mode. + static const char* VSyncModeToString(GPUVSyncMode mode); + /// Returns a new device for the specified API. static std::unique_ptr CreateDeviceForAPI(RenderAPI api); diff --git a/src/util/state_wrapper.h b/src/util/state_wrapper.h index 9ac2b86d8..bdf581eef 100644 --- a/src/util/state_wrapper.h +++ b/src/util/state_wrapper.h @@ -34,6 +34,8 @@ public: ALWAYS_INLINE bool IsReading() const { return (m_mode == Mode::Read); } ALWAYS_INLINE bool IsWriting() const { return (m_mode == Mode::Write); } ALWAYS_INLINE u32 GetVersion() const { return m_version; } + ALWAYS_INLINE const u8* GetData() const { return m_data; } + ALWAYS_INLINE size_t GetDataSize() const { return m_size; } ALWAYS_INLINE size_t GetPosition() const { return m_pos; } /// Overload for integral or floating-point types. Writes bytes as-is.