diff --git a/src/common/threading.cpp b/src/common/threading.cpp index 8dacce838..88c08de17 100644 --- a/src/common/threading.cpp +++ b/src/common/threading.cpp @@ -1,10 +1,17 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin , 2002-2023 PCSX2 Dev Team +// SPDX-License-Identifier: LGPL-3.0 #include "threading.h" #include "assert.h" +#include "log.h" +#include "timer.h" + #include +#if defined(CPU_ARCH_X86) || defined(CPU_ARCH_X64) +#include +#endif + #if !defined(_WIN32) && !defined(__APPLE__) #ifndef _GNU_SOURCE #define _GNU_SOURCE @@ -14,6 +21,10 @@ #if defined(_WIN32) #include "windows_headers.h" #include + +#if defined(CPU_ARCH_ARM64) && defined(_MSC_VER) +#include +#endif #else #include #include @@ -38,6 +49,8 @@ #endif #endif +Log_SetChannel(Threading); + #ifdef _WIN32 union FileTimeU64Union { @@ -101,6 +114,138 @@ void Threading::Timeslice() #endif } +static void MultiPause() +{ +#if defined(CPU_ARCH_X86) || defined(CPU_ARCH_X64) + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); +#elif defined(CPU_ARCH_ARM64) && defined(_MSC_VER) + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); + __isb(_ARM64_BARRIER_SY); +#elif defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_ARM32) + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); + __asm__ __volatile__("isb"); +#elif defined(CPU_ARCH_RISCV64) + // Probably wrong... pause is optional :/ + asm volatile("fence" ::: "memory"); +#else +#pragma warning("Missing implementation") +#endif +} + +// Apple uses a lower tick frequency, so we can't use the dynamic loop below. +#if !defined(_M_ARM64) || defined(__APPLE__) || defined(_WIN32) + +static u32 PAUSE_TIME = 0; + +static u32 MeasurePauseTime() +{ + // GetCPUTicks may have resolution as low as 1us + // One call to MultiPause could take anywhere from 20ns (fast Haswell) to 400ns (slow Skylake) + // We want a measurement of reasonable resolution, but don't want to take too long + // So start at a fairly small number and increase it if it's too fast + for (int testcnt = 64; true; testcnt *= 2) + { + Common::Timer::Value start = Common::Timer::GetCurrentValue(); + for (int i = 0; i < testcnt; i++) + { + MultiPause(); + } + Common::Timer::Value time = Common::Timer::GetCurrentValue() - start; + if (time > 100) + { + const double nanos = Common::Timer::ConvertValueToNanoseconds(time); + return static_cast((nanos / testcnt) + 1); + } + } +} + +NEVER_INLINE static void UpdatePauseTime() +{ + Common::Timer::BusyWait(10000000); + u32 pause = MeasurePauseTime(); + // Take a few measurements in case something weird happens during one + // (e.g. OS interrupt) + for (int i = 0; i < 4; i++) + pause = std::min(pause, MeasurePauseTime()); + PAUSE_TIME = pause; + VERBOSE_LOG("MultiPause time: {}ns", pause); +} + +u32 Threading::ShortSpin() +{ + u32 inc = PAUSE_TIME; + if (inc == 0) [[unlikely]] + { + UpdatePauseTime(); + inc = PAUSE_TIME; + } + + u32 time = 0; + // Sleep for approximately 500ns + for (; time < 500; time += inc) + MultiPause(); + + return time; +} + +#else + +// On ARM, we have big/little cores, and who knows which one we'll measure/run on.. +// TODO: Actually verify this code. +const u32 SHORT_SPIN_TIME_TICKS = static_cast((Common::Timer::GetFrequency() * 500) / 1000000000); + +u32 Threading::ShortSpin() +{ + const Common::Timer::Value start = Common::Timer::GetCurrentValue(); + Common::Timer::Value now = start; + while ((now - start) < SHORT_SPIN_TIME_TICKS) + { + MultiPause(); + now = Common::Timer::GetCurrentValue(); + } + + return static_cast((Common::Timer::GetCurrentValue() * (now - start)) / 1000000000); +} + +#endif + +static u32 GetSpinTime() +{ + if (char* req = std::getenv("WAIT_SPIN_MICROSECONDS")) + { + return 1000 * atoi(req); + } + else + { +#ifndef _M_ARM64 + return 50 * 1000; // 50us +#else + return 200 * 1000; // 200us +#endif + } +} + +const u32 Threading::SPIN_TIME_NS = GetSpinTime(); + Threading::ThreadHandle::ThreadHandle() = default; #ifdef _WIN32 @@ -617,3 +762,130 @@ bool Threading::KernelSemaphore::TryWait() return sem_trywait(&m_sema) == 0; #endif } + +bool Threading::WorkSema::CheckForWork() +{ + s32 value = m_state.load(std::memory_order_relaxed); + DebugAssert(!IsDead(value)); + + // we want to switch to the running state, but preserve the waiting empty bit for RUNNING_N -> RUNNING_0 + // otherwise, we clear the waiting flag (since we're notifying the waiter that we're empty below) + while (!m_state.compare_exchange_weak(value, + IsReadyForSleep(value) ? STATE_RUNNING_0 : (value & STATE_FLAG_WAITING_EMPTY), + std::memory_order_acq_rel, std::memory_order_relaxed)) + { + } + + // if we're not empty, we have work to do + if (!IsReadyForSleep(value)) + return true; + + // this means we're empty, so notify any waiters + if (value & STATE_FLAG_WAITING_EMPTY) + m_empty_sema.Post(); + + // no work to do + return false; +} + +void Threading::WorkSema::WaitForWork() +{ + // State change: + // SLEEPING, SPINNING: This is the worker thread and it's clearly not asleep or spinning, so these states should be + // impossible RUNNING_0: Change state to SLEEPING, wake up thread if WAITING_EMPTY RUNNING_N: Change state to + // RUNNING_0 (and preserve WAITING_EMPTY flag) + s32 value = m_state.load(std::memory_order_relaxed); + DebugAssert(!IsDead(value)); + while (!m_state.compare_exchange_weak(value, NextStateWaitForWork(value), std::memory_order_acq_rel, + std::memory_order_relaxed)) + ; + if (IsReadyForSleep(value)) + { + if (value & STATE_FLAG_WAITING_EMPTY) + m_empty_sema.Post(); + m_sema.Wait(); + // Acknowledge any additional work added between wake up request and getting here + m_state.fetch_and(STATE_FLAG_WAITING_EMPTY, std::memory_order_acquire); + } +} + +void Threading::WorkSema::WaitForWorkWithSpin() +{ + s32 value = m_state.load(std::memory_order_relaxed); + DebugAssert(!IsDead(value)); + while (IsReadyForSleep(value)) + { + if (m_state.compare_exchange_weak(value, STATE_SPINNING, std::memory_order_release, std::memory_order_relaxed)) + { + if (value & STATE_FLAG_WAITING_EMPTY) + m_empty_sema.Post(); + value = STATE_SPINNING; + break; + } + } + u32 waited = 0; + while (value < 0) + { + if (waited > SPIN_TIME_NS) + { + if (!m_state.compare_exchange_weak(value, STATE_SLEEPING, std::memory_order_relaxed)) + continue; + m_sema.Wait(); + break; + } + waited += ShortSpin(); + value = m_state.load(std::memory_order_relaxed); + } + // Clear back to STATE_RUNNING_0 (but preserve waiting empty flag) + m_state.fetch_and(STATE_FLAG_WAITING_EMPTY, std::memory_order_acquire); +} + +bool Threading::WorkSema::WaitForEmpty() +{ + s32 value = m_state.load(std::memory_order_acquire); + while (true) + { + if (value < 0) + return !IsDead(value); // STATE_SLEEPING or STATE_SPINNING, queue is empty! + // Note: We technically only need memory_order_acquire on *failure* (because that's when we could leave without + // sleeping), but libstdc++ still asserts on failure < success + if (m_state.compare_exchange_weak(value, value | STATE_FLAG_WAITING_EMPTY, std::memory_order_acquire)) + break; + } + DebugAssertMsg(!(value & STATE_FLAG_WAITING_EMPTY), + "Multiple threads attempted to wait for empty (not currently supported)"); + m_empty_sema.Wait(); + return !IsDead(m_state.load(std::memory_order_relaxed)); +} + +bool Threading::WorkSema::WaitForEmptyWithSpin() +{ + s32 value = m_state.load(std::memory_order_acquire); + u32 waited = 0; + while (true) + { + if (value < 0) + return !IsDead(value); // STATE_SLEEPING or STATE_SPINNING, queue is empty! + if (waited > SPIN_TIME_NS && + m_state.compare_exchange_weak(value, value | STATE_FLAG_WAITING_EMPTY, std::memory_order_acquire)) + break; + waited += ShortSpin(); + value = m_state.load(std::memory_order_acquire); + } + DebugAssertMsg(!(value & STATE_FLAG_WAITING_EMPTY), + "Multiple threads attempted to wait for empty (not currently supported)"); + m_empty_sema.Wait(); + return !IsDead(m_state.load(std::memory_order_relaxed)); +} + +void Threading::WorkSema::Kill() +{ + s32 value = m_state.exchange(std::numeric_limits::min(), std::memory_order_release); + if (value & STATE_FLAG_WAITING_EMPTY) + m_empty_sema.Post(); +} + +void Threading::WorkSema::Reset() +{ + m_state = STATE_RUNNING_0; +} diff --git a/src/common/threading.h b/src/common/threading.h index 49686bc36..cf138fa39 100644 --- a/src/common/threading.h +++ b/src/common/threading.h @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin , 2002-2023 PCSX2 Dev Team +// SPDX-License-Identifier: LGPL-3.0 #pragma once #include "types.h" @@ -20,9 +20,16 @@ extern u64 GetThreadTicksPerSecond(); /// Set the name of the current thread extern void SetNameOfCurrentThread(const char* name); -// Releases a timeslice to other threads. +/// Releases a timeslice to other threads. extern void Timeslice(); +/// Spin for a short period of time (call while spinning waiting for a lock) +/// Returns the approximate number of ns that passed +extern u32 ShortSpin(); + +/// Number of ns to spin for before sleeping a thread +extern const u32 SPIN_TIME_NS; + // -------------------------------------------------------------------------------------- // ThreadHandle // -------------------------------------------------------------------------------------- @@ -121,4 +128,90 @@ public: bool TryWait(); }; +/// A semaphore for notifying a work-processing thread of new work in a (separate) queue +/// +/// Usage: +/// - Processing thread loops on `WaitForWork()` followed by processing all work in the queue +/// - Threads adding work first add their work to the queue, then call `NotifyOfWork()` +class WorkSema +{ + /// Semaphore for sleeping the worker thread + KernelSemaphore m_sema; + + /// Semaphore for sleeping thread waiting on worker queue empty + KernelSemaphore m_empty_sema; + + /// Current state (see enum below) + std::atomic m_state{0}; + + // Expected call frequency is NotifyOfWork > WaitForWork > WaitForEmpty + // So optimize states for fast NotifyOfWork + enum + { + /* Any <-2 state: STATE_DEAD: Thread has crashed and is awaiting revival */ + STATE_SPINNING = -2, ///< Worker thread is spinning waiting for work + STATE_SLEEPING = -1, ///< Worker thread is sleeping on m_sema + STATE_RUNNING_0 = + 0, ///< Worker thread is processing work, but no work has been added since it last checked for new work + /* Any >0 state: STATE_RUNNING_N: Worker thread is processing work, and work has been added since it last checked + for new work */ + STATE_FLAG_WAITING_EMPTY = + 1 << 30, ///< Flag to indicate that a thread is sleeping on m_empty_sema (can be applied to any STATE_RUNNING) + }; + + bool IsDead(s32 state) { return state < STATE_SPINNING; } + + bool IsReadyForSleep(s32 state) + { + s32 waiting_empty_cleared = state & (STATE_FLAG_WAITING_EMPTY - 1); + return waiting_empty_cleared == STATE_RUNNING_0; + } + + s32 NextStateWaitForWork(s32 current) + { + s32 new_state = IsReadyForSleep(current) ? STATE_SLEEPING : STATE_RUNNING_0; + return new_state | (current & STATE_FLAG_WAITING_EMPTY); // Preserve waiting empty flag for RUNNING_N -> RUNNING_0 + } + +public: + /// Notify the worker thread that you've added new work to its queue + void NotifyOfWork() + { + // State change: + // DEAD: Stay in DEAD (starting DEAD state is INT_MIN so we can assume we won't flip over to anything else) + // SPINNING: Change state to RUNNING. Thread will notice and process the new data + // SLEEPING: Change state to RUNNING and wake worker. Thread will wake up and process the new data. + // RUNNING_0: Change state to RUNNING_N. + // RUNNING_N: Stay in RUNNING_N + s32 old = m_state.fetch_add(2, std::memory_order_release); + if (old == STATE_SLEEPING) + m_sema.Post(); + } + + /// Checks if there's any work in the queue + bool CheckForWork(); + + /// Wait for work to be added to the queue + void WaitForWork(); + + /// Wait for work to be added to the queue, spinning for a bit before sleeping the thread + void WaitForWorkWithSpin(); + + /// Wait for the worker thread to finish processing all entries in the queue or die + /// Returns false if the thread is dead + bool WaitForEmpty(); + + /// Wait for the worker thread to finish processing all entries in the queue or die, spinning a bit before sleeping + /// the thread Returns false if the thread is dead + bool WaitForEmptyWithSpin(); + + /// Called by the worker thread to notify others of its death + /// Dead threads don't process work, and WaitForEmpty will return instantly even though there may be work in the queue + void Kill(); + + /// Reset the semaphore to the initial state + /// Should be called by the worker thread if it restarts after dying + void Reset(); +}; + } // namespace Threading \ No newline at end of file diff --git a/src/common/types.h b/src/common/types.h index 2bc435636..751ae6d38 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -26,6 +26,18 @@ #define ALWAYS_INLINE_RELEASE ALWAYS_INLINE #endif +// Avoid inline helper +#ifndef NEVER_INLINE +#if defined(_MSC_VER) +#define NEVER_INLINE __declspec(noinline) +#elif defined(__GNUC__) || defined(__clang__) +#define NEVER_INLINE __attribute__((noinline)) +#else +#define NEVER_INLINE +#endif +#endif + + // unreferenced parameter macro #ifndef UNREFERENCED_VARIABLE #if defined(__GNUC__) || defined(__clang__) || defined(__EMSCRIPTEN__) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 882659161..62fa58b4b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -55,10 +55,10 @@ add_library(core gpu_shadergen.h gpu_sw.cpp gpu_sw.h - gpu_sw_backend.cpp - gpu_sw_backend.h gpu_sw_rasterizer.cpp gpu_sw_rasterizer.h + gpu_thread.cpp + gpu_thread.h gpu_types.h guncon.cpp guncon.h diff --git a/src/core/achievements.cpp b/src/core/achievements.cpp index 61fbbdd5c..19ecd18ba 100644 --- a/src/core/achievements.cpp +++ b/src/core/achievements.cpp @@ -11,6 +11,7 @@ #include "bus.h" #include "cpu_core.h" #include "fullscreen_ui.h" +#include "gpu_thread.h" #include "host.h" #include "system.h" @@ -1001,7 +1002,7 @@ void Achievements::ClientLoadGameCallback(int result, const char* error_message, // ensure fullscreen UI is ready for notifications if (display_summary) - FullscreenUI::Initialize(); + GPUThread::RunOnThread(&FullscreenUI::Initialize); if (const std::string_view badge_name = info->badge_name; !badge_name.empty()) { @@ -1062,7 +1063,7 @@ void Achievements::ClearGameHash() void Achievements::DisplayAchievementSummary() { - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title; if (IsHardcoreModeActive()) @@ -1087,8 +1088,13 @@ void Achievements::DisplayAchievementSummary() summary = TRANSLATE_STR("Achievements", "This game has no achievements."); } - ImGuiFullscreen::AddNotification("achievement_summary", ACHIEVEMENT_SUMMARY_NOTIFICATION_TIME, std::move(title), - std::move(summary), s_game_icon); + GPUThread::RunOnThread([title = std::move(title), summary = std::move(summary)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievement_summary", ACHIEVEMENT_SUMMARY_NOTIFICATION_TIME, std::move(title), + std::move(summary), s_game_icon); + }); } // Technically not going through the resource API, but since we're passing this to something else, we can't. @@ -1098,11 +1104,16 @@ void Achievements::DisplayAchievementSummary() void Achievements::DisplayHardcoreDeferredMessage() { - if (g_settings.achievements_hardcore_mode && !s_hardcore_mode && System::IsValid() && FullscreenUI::Initialize()) + if (g_settings.achievements_hardcore_mode && !s_hardcore_mode && System::IsValid()) { - ImGuiFullscreen::ShowToast(std::string(), - TRANSLATE_STR("Achievements", "Hardcore mode will be enabled on system reset."), - Host::OSD_WARNING_DURATION); + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::ShowToast(std::string(), + TRANSLATE_STR("Achievements", "Hardcore mode will be enabled on system reset."), + Host::OSD_WARNING_DURATION); + }); } } @@ -1124,7 +1135,7 @@ void Achievements::HandleUnlockEvent(const rc_client_event_t* event) INFO_LOG("Achievement {} ({}) for game {} unlocked", cheevo->title, cheevo->id, s_game_id); UpdateGameSummary(); - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title; if (cheevo->category == RC_CLIENT_ACHIEVEMENT_CATEGORY_UNOFFICIAL) @@ -1134,9 +1145,15 @@ void Achievements::HandleUnlockEvent(const rc_client_event_t* event) std::string badge_path = GetAchievementBadgePath(cheevo, cheevo->state); - ImGuiFullscreen::AddNotification(fmt::format("achievement_unlock_{}", cheevo->id), - static_cast(g_settings.achievements_notification_duration), - std::move(title), cheevo->description, std::move(badge_path)); + GPUThread::RunOnThread([id = cheevo->id, duration = g_settings.achievements_notification_duration, + title = std::move(title), description = std::string(cheevo->description), + badge_path = std::move(badge_path)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("achievement_unlock_{}", id), static_cast(duration), + std::move(title), std::move(description), std::move(badge_path)); + }); } if (g_settings.achievements_sound_effects) @@ -1148,7 +1165,7 @@ void Achievements::HandleGameCompleteEvent(const rc_client_event_t* event) INFO_LOG("Game {} complete", s_game_id); UpdateGameSummary(); - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string title = fmt::format(TRANSLATE_FS("Achievements", "Mastered {}"), s_game_title); std::string message = @@ -1157,8 +1174,13 @@ void Achievements::HandleGameCompleteEvent(const rc_client_event_t* event) s_game_summary.num_unlocked_achievements), TRANSLATE_PLURAL_STR("Achievements", "%n points", "Mastery popup", s_game_summary.points_unlocked)); - ImGuiFullscreen::AddNotification("achievement_mastery", GAME_COMPLETE_NOTIFICATION_TIME, std::move(title), - std::move(message), s_game_icon); + GPUThread::RunOnThread([title = std::move(title), message = std::move(message), icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievement_mastery", GAME_COMPLETE_NOTIFICATION_TIME, std::move(title), + std::move(message), std::move(icon)); + }); } } @@ -1166,14 +1188,19 @@ void Achievements::HandleLeaderboardStartedEvent(const rc_client_event_t* event) { DEV_LOG("Leaderboard {} ({}) started", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { std::string title = event->leaderboard->title; std::string message = TRANSLATE_STR("Achievements", "Leaderboard attempt started."); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - LEADERBOARD_STARTED_NOTIFICATION_TIME, std::move(title), std::move(message), - s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), LEADERBOARD_STARTED_NOTIFICATION_TIME, + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1181,14 +1208,19 @@ void Achievements::HandleLeaderboardFailedEvent(const rc_client_event_t* event) { DEV_LOG("Leaderboard {} ({}) failed", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { std::string title = event->leaderboard->title; std::string message = TRANSLATE_STR("Achievements", "Leaderboard attempt failed."); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - LEADERBOARD_FAILED_NOTIFICATION_TIME, std::move(title), std::move(message), - s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), LEADERBOARD_FAILED_NOTIFICATION_TIME, + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1196,7 +1228,7 @@ void Achievements::HandleLeaderboardSubmittedEvent(const rc_client_event_t* even { DEV_LOG("Leaderboard {} ({}) submitted", event->leaderboard->id, event->leaderboard->title); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { static const char* value_strings[NUM_RC_CLIENT_LEADERBOARD_FORMATS] = { TRANSLATE_NOOP("Achievements", "Your Time: {}{}"), @@ -1212,9 +1244,14 @@ void Achievements::HandleLeaderboardSubmittedEvent(const rc_client_event_t* even event->leaderboard->tracker_value ? event->leaderboard->tracker_value : "Unknown", g_settings.achievements_spectator_mode ? std::string_view() : TRANSLATE_SV("Achievements", " (Submitting)")); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - static_cast(g_settings.achievements_leaderboard_duration), std::move(title), - std::move(message), s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), + static_cast(g_settings.achievements_leaderboard_duration), + std::move(title), std::move(message), std::move(icon)); + }); } if (g_settings.achievements_sound_effects) @@ -1226,7 +1263,7 @@ void Achievements::HandleLeaderboardScoreboardEvent(const rc_client_event_t* eve DEV_LOG("Leaderboard {} scoreboard rank {} of {}", event->leaderboard_scoreboard->leaderboard_id, event->leaderboard_scoreboard->new_rank, event->leaderboard_scoreboard->num_entries); - if (g_settings.achievements_leaderboard_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_leaderboard_notifications) { static const char* value_strings[NUM_RC_CLIENT_LEADERBOARD_FORMATS] = { TRANSLATE_NOOP("Achievements", "Your Time: {} (Best: {})"), @@ -1243,9 +1280,15 @@ void Achievements::HandleLeaderboardScoreboardEvent(const rc_client_event_t* eve event->leaderboard_scoreboard->submitted_score, event->leaderboard_scoreboard->best_score), event->leaderboard_scoreboard->new_rank, event->leaderboard_scoreboard->num_entries); - ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", event->leaderboard->id), - static_cast(g_settings.achievements_leaderboard_duration), std::move(title), - std::move(message), s_game_icon); + GPUThread::RunOnThread([id = event->leaderboard->id, title = std::move(title), message = std::move(message), + icon = s_game_icon]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification(fmt::format("leaderboard_{}", id), + static_cast(g_settings.achievements_leaderboard_duration), + std::move(title), std::move(message), std::move(icon)); + }); } } @@ -1375,26 +1418,30 @@ void Achievements::HandleServerDisconnectedEvent(const rc_client_event_t* event) { WARNING_LOG("Server disconnected."); - if (FullscreenUI::Initialize()) - { + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::ShowToast( TRANSLATE_STR("Achievements", "Achievements Disconnected"), TRANSLATE_STR("Achievements", "An unlock request could not be completed. We will keep retrying to submit this request."), Host::OSD_ERROR_DURATION); - } + }); } void Achievements::HandleServerReconnectedEvent(const rc_client_event_t* event) { WARNING_LOG("Server reconnected."); - if (FullscreenUI::Initialize()) - { + GPUThread::RunOnThread([]() { + if (!FullscreenUI::Initialize()) + return; + ImGuiFullscreen::ShowToast(TRANSLATE_STR("Achievements", "Achievements Reconnected"), TRANSLATE_STR("Achievements", "All pending unlock requests have completed."), Host::OSD_INFO_DURATION); - } + }); } void Achievements::ResetClient() @@ -1472,12 +1519,17 @@ void Achievements::SetHardcoreMode(bool enabled, bool force_display_message) // new mode s_hardcore_mode = enabled; - if (System::IsValid() && (HasActiveGame() || force_display_message) && FullscreenUI::Initialize()) + if (System::IsValid() && (HasActiveGame() || force_display_message)) { - ImGuiFullscreen::ShowToast(std::string(), - enabled ? TRANSLATE_STR("Achievements", "Hardcore mode is now enabled.") : - TRANSLATE_STR("Achievements", "Hardcore mode is now disabled."), - Host::OSD_INFO_DURATION); + GPUThread::RunOnThread([enabled]() { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::ShowToast(std::string(), + enabled ? TRANSLATE_STR("Achievements", "Hardcore mode is now enabled.") : + TRANSLATE_STR("Achievements", "Hardcore mode is now disabled."), + Host::OSD_INFO_DURATION); + }); } rc_client_set_hardcore_enabled(s_client, enabled); @@ -1806,7 +1858,7 @@ void Achievements::ShowLoginNotification() if (!user) return; - if (g_settings.achievements_notifications && FullscreenUI::Initialize()) + if (g_settings.achievements_notifications) { std::string badge_path = GetLoggedInUserBadgePath(); std::string title = user->display_name; @@ -1815,8 +1867,14 @@ void Achievements::ShowLoginNotification() std::string summary = fmt::format(TRANSLATE_FS("Achievements", "Score: {} ({} softcore)\nUnread messages: {}"), user->score, user->score_softcore, user->num_unread_messages); - ImGuiFullscreen::AddNotification("achievements_login", LOGIN_NOTIFICATION_TIME, std::move(title), - std::move(summary), std::move(badge_path)); + GPUThread::RunOnThread( + [title = std::move(title), summary = std::move(summary), badge_path = std::move(badge_path)]() mutable { + if (!FullscreenUI::Initialize()) + return; + + ImGuiFullscreen::AddNotification("achievements_login", LOGIN_NOTIFICATION_TIME, std::move(title), + std::move(summary), std::move(badge_path)); + }); } } @@ -1913,14 +1971,6 @@ void Achievements::ConfirmHardcoreModeDisableAsync(const char* trigger, std::fun } #endif - if (!FullscreenUI::Initialize()) - { - Host::AddOSDMessage(fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), - Host::OSD_WARNING_DURATION); - callback(false); - return; - } - auto real_callback = [callback = std::move(callback)](bool res) mutable { // don't run the callback in the middle of rendering the UI Host::RunOnCPUThread([callback = std::move(callback), res]() { @@ -1930,13 +1980,25 @@ void Achievements::ConfirmHardcoreModeDisableAsync(const char* trigger, std::fun }); }; - ImGuiFullscreen::OpenConfirmMessageDialog( - TRANSLATE_STR("Achievements", "Confirm Hardcore Mode"), - fmt::format(TRANSLATE_FS("Achievements", "{0} cannot be performed while hardcore mode is active. Do you " - "want to disable hardcore mode? {0} will be cancelled if you select No."), - trigger), - std::move(real_callback), fmt::format(ICON_FA_CHECK " {}", TRANSLATE_SV("Achievements", "Yes")), - fmt::format(ICON_FA_TIMES " {}", TRANSLATE_SV("Achievements", "No"))); + GPUThread::RunOnThread([trigger = std::string(trigger), real_callback = std::move(real_callback)]() mutable { + if (!FullscreenUI::Initialize()) + { + Host::AddOSDMessage( + fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), + Host::OSD_WARNING_DURATION); + real_callback(false); + return; + } + + ImGuiFullscreen::OpenConfirmMessageDialog( + TRANSLATE_STR("Achievements", "Confirm Hardcore Mode"), + fmt::format(TRANSLATE_FS("Achievements", + "{0} cannot be performed while hardcore mode is active. Do you " + "want to disable hardcore mode? {0} will be cancelled if you select No."), + trigger), + std::move(real_callback), fmt::format(ICON_FA_CHECK " {}", TRANSLATE_SV("Achievements", "Yes")), + fmt::format(ICON_FA_TIMES " {}", TRANSLATE_SV("Achievements", "No"))); + }); #else Host::AddOSDMessage(fmt::format(TRANSLATE_FS("Achievements", "Cannot {} while hardcode mode is active."), trigger), Host::OSD_WARNING_DURATION); diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index b23b1cbf3..737702eda 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -49,7 +49,6 @@ - AdvancedVectorExtensions2 @@ -57,6 +56,7 @@ true NotUsing + @@ -134,8 +134,8 @@ - + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index d0dd29200..63cd0004f 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -46,7 +46,6 @@ - @@ -70,6 +69,7 @@ + @@ -119,7 +119,6 @@ - @@ -145,6 +144,7 @@ + diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index d0a57b8c8..86b298211 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -12,6 +12,7 @@ #include "cpu_core.h" #include "game_list.h" #include "gpu.h" +#include "gpu_thread.h" #include "host.h" #include "resources.h" #include "settings.h" @@ -594,6 +595,7 @@ bool FullscreenUI::Initialize() s_was_paused_on_quick_menu_open = false; s_about_window_open = false; s_hotkey_list_cache = InputManager::GetHotkeyList(); + GPUThread::SetRunIdle(true); if (!System::IsValid()) SwitchToLanding(); @@ -624,6 +626,7 @@ bool FullscreenUI::AreAnyDialogsOpen() void FullscreenUI::CheckForConfigChanges(const Settings& old_settings) { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; @@ -631,54 +634,114 @@ void FullscreenUI::CheckForConfigChanges(const Settings& old_settings) // That means we're going to be reading achievement state. if (old_settings.achievements_enabled && !g_settings.achievements_enabled) { - if (s_current_main_window == MainWindowType::Achievements || s_current_main_window == MainWindowType::Leaderboards) - ReturnToPreviousWindow(); + if (!IsInitialized()) + return; + + GPUThread::RunOnThread([]() { + if (s_current_main_window == MainWindowType::Achievements || + s_current_main_window == MainWindowType::Leaderboards) + { + ReturnToPreviousWindow(); + } + }); } } void FullscreenUI::OnSystemStarted() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; - s_current_main_window = MainWindowType::None; - QueueResetFocus(); + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + GPUThread::SetRunIdle(false); + + s_current_main_window = MainWindowType::None; + QueueResetFocus(); + }); } void FullscreenUI::OnSystemPaused() { - // noop + // NOTE: Called on CPU thread. + if (!IsInitialized()) + return; + + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + GPUThread::SetRunIdle(true); + }); } void FullscreenUI::OnSystemResumed() { - // get rid of pause menu if we unpaused another way - if (s_current_main_window == MainWindowType::PauseMenu) - ClosePauseMenu(); + // NOTE: Called on CPU thread. + if (!IsInitialized()) + return; + + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + GPUThread::SetRunIdle(false); + + // get rid of pause menu if we unpaused another way + if (s_current_main_window == MainWindowType::PauseMenu) + ClosePauseMenu(); + }); } void FullscreenUI::OnSystemDestroyed() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; - s_pause_menu_was_open = false; - s_was_paused_on_quick_menu_open = false; - s_current_pause_submenu = PauseSubMenu::None; - SwitchToLanding(); + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; + + // If we didn't start big picture before the system, shut ourselves down. + if (!GPUThread::WasFullscreenUIRequested()) + { + Shutdown(); + return; + } + + GPUThread::SetRunIdle(true); + s_pause_menu_was_open = false; + s_was_paused_on_quick_menu_open = false; + s_current_pause_submenu = PauseSubMenu::None; + SwitchToLanding(); + }); } void FullscreenUI::OnRunningGameChanged() { + // NOTE: Called on CPU thread. if (!IsInitialized()) return; const std::string& path = System::GetDiscPath(); const std::string& serial = System::GetGameSerial(); + + std::string subtitle; if (!serial.empty()) - s_current_game_subtitle = fmt::format("{0} - {1}", serial, Path::GetFileName(path)); + subtitle = fmt::format("{0} - {1}", serial, Path::GetFileName(path)); else - s_current_game_subtitle = {}; + subtitle = {}; + + GPUThread::RunOnThread([subtitle = std::move(subtitle)]() mutable { + if (!IsInitialized()) + return; + + s_current_game_subtitle = std::move(subtitle); + }); } void FullscreenUI::PauseForMenuOpen(bool set_pause_menu_open) @@ -695,15 +758,17 @@ void FullscreenUI::OpenPauseMenu() if (!System::IsValid()) return; - if (!Initialize() || s_current_main_window != MainWindowType::None) - return; + GPUThread::RunOnThread([]() { + if (!Initialize() || s_current_main_window != MainWindowType::None) + return; - PauseForMenuOpen(true); - s_current_main_window = MainWindowType::PauseMenu; - s_current_pause_submenu = PauseSubMenu::None; - QueueResetFocus(); - ForceKeyNavEnabled(); - FixStateIfPaused(); + PauseForMenuOpen(true); + s_current_main_window = MainWindowType::PauseMenu; + s_current_pause_submenu = PauseSubMenu::None; + QueueResetFocus(); + ForceKeyNavEnabled(); + FixStateIfPaused(); + }); } void FullscreenUI::FixStateIfPaused() @@ -713,31 +778,26 @@ void FullscreenUI::FixStateIfPaused() // When we're paused, we won't have trickled the key up event for escape yet. Do it now. ImGui::UpdateInputEvents(false); - - Host::OnIdleStateChanged(); - Host::RunOnCPUThread([]() { - if (System::IsValid()) - { - // Why twice? To clear the "wants keyboard input" flag. - System::InvalidateDisplay(); - System::InvalidateDisplay(); - } - }); } void FullscreenUI::ClosePauseMenu() { - if (!IsInitialized() || !System::IsValid()) + if (!System::IsValid()) return; - if (System::GetState() == System::State::Paused && !s_was_paused_on_quick_menu_open) - Host::RunOnCPUThread([]() { System::PauseSystem(false); }); + GPUThread::RunOnThread([]() { + if (!IsInitialized()) + return; - s_current_main_window = MainWindowType::None; - s_current_pause_submenu = PauseSubMenu::None; - s_pause_menu_was_open = false; - QueueResetFocus(); - FixStateIfPaused(); + if (System::GetState() == System::State::Paused && !s_was_paused_on_quick_menu_open) + Host::RunOnCPUThread([]() { System::PauseSystem(false); }); + + s_current_main_window = MainWindowType::None; + s_current_pause_submenu = PauseSubMenu::None; + s_pause_menu_was_open = false; + QueueResetFocus(); + FixStateIfPaused(); + }); } void FullscreenUI::OpenPauseSubMenu(PauseSubMenu submenu) @@ -749,6 +809,8 @@ void FullscreenUI::OpenPauseSubMenu(PauseSubMenu submenu) void FullscreenUI::Shutdown() { + GPUThread::SetRunIdle(false); + Achievements::ClearUIState(); CloseSaveStateSelector(); s_cover_image_map.clear(); @@ -1135,152 +1197,166 @@ void FullscreenUI::DoChangeDiscFromFile() void FullscreenUI::DoChangeDisc() { - ImGuiFullscreen::ChoiceDialogOptions options; + Host::RunOnCPUThread([]() { + ImGuiFullscreen::ChoiceDialogOptions options; - if (System::HasMediaSubImages()) - { - const u32 current_index = System::GetMediaSubImageIndex(); - const u32 count = System::GetMediaSubImageCount(); - options.reserve(count + 1); - options.emplace_back(FSUI_STR("From File..."), false); - - for (u32 i = 0; i < count; i++) - options.emplace_back(System::GetMediaSubImageTitle(i), i == current_index); - - auto callback = [](s32 index, const std::string& title, bool checked) { - if (index == 0) - { - CloseChoiceDialog(); - DoChangeDiscFromFile(); - return; - } - else if (index > 0) - { - System::SwitchMediaSubImage(static_cast(index - 1)); - } - - QueueResetFocus(); - CloseChoiceDialog(); - ReturnToPreviousWindow(); - }; - - OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), - std::move(callback)); - - return; - } - - if (const GameDatabase::Entry* entry = System::GetGameDatabaseEntry(); entry && !entry->disc_set_serials.empty()) - { - const auto lock = GameList::GetLock(); - const auto matches = GameList::GetMatchingEntriesForSerial(entry->disc_set_serials); - if (matches.size() > 1) + if (System::HasMediaSubImages()) { - options.reserve(matches.size() + 1); + const u32 current_index = System::GetMediaSubImageIndex(); + const u32 count = System::GetMediaSubImageCount(); + options.reserve(count + 1); options.emplace_back(FSUI_STR("From File..."), false); - std::vector paths; - paths.reserve(matches.size()); + for (u32 i = 0; i < count; i++) + options.emplace_back(System::GetMediaSubImageTitle(i), i == current_index); - const std::string& current_path = System::GetDiscPath(); - for (auto& [title, glentry] : matches) - { - options.emplace_back(std::move(title), current_path == glentry->path); - paths.push_back(glentry->path); - } + GPUThread::RunOnThread([options = std::move(options)]() mutable { + auto callback = [](s32 index, const std::string& title, bool checked) { + if (index == 0) + { + CloseChoiceDialog(); + DoChangeDiscFromFile(); + return; + } + else if (index > 0) + { + Host::RunOnCPUThread([index = static_cast(index - 1)]() { System::SwitchMediaSubImage(index); }); + } - auto callback = [paths = std::move(paths)](s32 index, const std::string& title, bool checked) { - if (index == 0) - { + QueueResetFocus(); CloseChoiceDialog(); - DoChangeDiscFromFile(); - return; - } - else if (index > 0) - { - System::InsertMedia(paths[index - 1].c_str()); - } + ReturnToPreviousWindow(); + }; - QueueResetFocus(); - CloseChoiceDialog(); - ReturnToMainWindow(); - }; - - OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), - std::move(callback)); + OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), + std::move(callback)); + }); return; } - } - DoChangeDiscFromFile(); + if (const GameDatabase::Entry* entry = System::GetGameDatabaseEntry(); entry && !entry->disc_set_serials.empty()) + { + const auto lock = GameList::GetLock(); + const auto matches = GameList::GetMatchingEntriesForSerial(entry->disc_set_serials); + if (matches.size() > 1) + { + options.reserve(matches.size() + 1); + options.emplace_back(FSUI_STR("From File..."), false); + + std::vector paths; + paths.reserve(matches.size()); + + const std::string& current_path = System::GetDiscPath(); + for (auto& [title, glentry] : matches) + { + options.emplace_back(std::move(title), current_path == glentry->path); + paths.push_back(glentry->path); + } + + GPUThread::RunOnThread([options = std::move(options), paths = std::move(paths)]() mutable { + auto callback = [paths = std::move(paths)](s32 index, const std::string& title, bool checked) { + if (index == 0) + { + CloseChoiceDialog(); + DoChangeDiscFromFile(); + return; + } + else if (index > 0) + { + Host::RunOnCPUThread([path = std::move(paths[index - 1])]() { System::InsertMedia(path.c_str()); }); + } + + QueueResetFocus(); + CloseChoiceDialog(); + ReturnToMainWindow(); + }; + + OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_COMPACT_DISC, "Select Disc Image"), true, std::move(options), + std::move(callback)); + }); + + return; + } + } + + GPUThread::RunOnThread([]() { DoChangeDiscFromFile(); }); + }); } void FullscreenUI::DoCheatsMenu() { - CheatList* cl = System::GetCheatList(); - if (!cl) - { - if (!System::LoadCheatListFromDatabase() || ((cl = System::GetCheatList()) == nullptr)) - { - Host::AddKeyedOSDMessage("load_cheat_list", - fmt::format(FSUI_FSTR("No cheats found for {}."), System::GetGameTitle()), 10.0f); - ReturnToPreviousWindow(); - return; - } - } - - ImGuiFullscreen::ChoiceDialogOptions options; - options.reserve(cl->GetCodeCount()); - for (u32 i = 0; i < cl->GetCodeCount(); i++) - { - const CheatCode& cc = cl->GetCode(i); - options.emplace_back(cc.description.c_str(), cc.enabled); - } - - auto callback = [](s32 index, const std::string& title, bool checked) { - if (index < 0) - { - ReturnToPreviousWindow(); - return; - } - + Host::RunOnCPUThread([]() { CheatList* cl = System::GetCheatList(); if (!cl) - return; + { + if (!System::LoadCheatListFromDatabase() || ((cl = System::GetCheatList()) == nullptr)) + { + Host::AddKeyedOSDMessage("load_cheat_list", + fmt::format(FSUI_FSTR("No cheats found for {}."), System::GetGameTitle()), 10.0f); + ReturnToPreviousWindow(); + return; + } + } - const CheatCode& cc = cl->GetCode(static_cast(index)); - if (cc.activation == CheatCode::Activation::Manual) - cl->ApplyCode(static_cast(index)); - else - System::SetCheatCodeState(static_cast(index), checked); - }; - OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_FROWN, "Cheat List"), true, std::move(options), std::move(callback)); + ImGuiFullscreen::ChoiceDialogOptions options; + options.reserve(cl->GetCodeCount()); + for (u32 i = 0; i < cl->GetCodeCount(); i++) + { + const CheatCode& cc = cl->GetCode(i); + options.emplace_back(cc.description.c_str(), cc.enabled); + } + + GPUThread::RunOnThread([options = std::move(options)]() mutable { + auto callback = [](s32 index, const std::string& title, bool checked) { + if (index < 0) + { + ReturnToPreviousWindow(); + return; + } + + Host::RunOnCPUThread([index, checked]() { + CheatList* cl = System::GetCheatList(); + if (!cl) + return; + + const CheatCode& cc = cl->GetCode(static_cast(index)); + if (cc.activation == CheatCode::Activation::Manual) + cl->ApplyCode(static_cast(index)); + else + System::SetCheatCodeState(static_cast(index), checked); + }); + }; + OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_FROWN, "Cheat List"), true, std::move(options), std::move(callback)); + }); + }); } void FullscreenUI::DoToggleAnalogMode() { // hacky way to toggle analog mode - for (u32 i = 0; i < NUM_CONTROLLER_AND_CARD_PORTS; i++) - { - Controller* ctrl = System::GetController(i); - if (!ctrl) - continue; - - const Controller::ControllerInfo* cinfo = Controller::GetControllerInfo(ctrl->GetType()); - if (!cinfo) - continue; - - for (const Controller::ControllerBindingInfo& bi : cinfo->bindings) + Host::RunOnCPUThread([]() { + for (u32 i = 0; i < NUM_CONTROLLER_AND_CARD_PORTS; i++) { - if (std::strcmp(bi.name, "Analog") == 0) + Controller* ctrl = System::GetController(i); + if (!ctrl) + continue; + + const Controller::ControllerInfo* cinfo = Controller::GetControllerInfo(ctrl->GetType()); + if (!cinfo) + continue; + + for (const Controller::ControllerBindingInfo& bi : cinfo->bindings) { - ctrl->SetBindState(bi.bind_index, 1.0f); - ctrl->SetBindState(bi.bind_index, 0.0f); - break; + if (std::strcmp(bi.name, "Analog") == 0) + { + ctrl->SetBindState(bi.bind_index, 1.0f); + ctrl->SetBindState(bi.bind_index, 0.0f); + break; + } } } - } + }); } void FullscreenUI::DoRequestExit() @@ -3720,12 +3796,9 @@ void FullscreenUI::DrawControllerSettingsPage() &Settings::GetMultitapModeName, &Settings::GetMultitapModeDisplayName, MultitapMode::Count); // load mtap settings - MultitapMode mtap_mode = g_settings.multitap_mode; - if (IsEditingGameSettings(bsi)) - { - mtap_mode = Settings::ParseMultitapModeName(bsi->GetTinyStringValue("ControllerPorts", "MultitapMode", "").c_str()) - .value_or(g_settings.multitap_mode); - } + const MultitapMode mtap_mode = + Settings::ParseMultitapModeName(bsi->GetTinyStringValue("ControllerPorts", "MultitapMode", "").c_str()) + .value_or(Settings::DEFAULT_MULTITAP_MODE); const std::array mtap_enabled = { {(mtap_mode == MultitapMode::Port1Only || mtap_mode == MultitapMode::BothPorts), (mtap_mode == MultitapMode::Port2Only || mtap_mode == MultitapMode::BothPorts)}}; @@ -7076,31 +7149,35 @@ void FullscreenUI::DrawAboutWindow() void FullscreenUI::OpenAchievementsWindow() { + if (!System::IsValid()) + return; + if (!Achievements::IsActive()) { Host::AddKeyedOSDMessage("achievements_disabled", FSUI_STR("Achievements are not enabled."), Host::OSD_INFO_DURATION); return; } - - if (!System::IsValid() || !Initialize()) - return; - - if (!Achievements::HasAchievements() || !Achievements::PrepareAchievementsWindow()) + else if (!Achievements::HasAchievements()) { ShowToast(std::string(), FSUI_STR("This game has no achievements.")); return; } - if (s_current_main_window != MainWindowType::PauseMenu) - { - PauseForMenuOpen(false); - ForceKeyNavEnabled(); - } + GPUThread::RunOnThread([]() { + if (!Initialize() || !Achievements::PrepareAchievementsWindow()) + return; - s_current_main_window = MainWindowType::Achievements; - QueueResetFocus(); - FixStateIfPaused(); + if (s_current_main_window != MainWindowType::PauseMenu) + { + PauseForMenuOpen(false); + ForceKeyNavEnabled(); + } + + s_current_main_window = MainWindowType::Achievements; + QueueResetFocus(); + FixStateIfPaused(); + }); } bool FullscreenUI::IsAchievementsWindowOpen() @@ -7110,31 +7187,35 @@ bool FullscreenUI::IsAchievementsWindowOpen() void FullscreenUI::OpenLeaderboardsWindow() { + if (!System::IsValid()) + return; + if (!Achievements::IsActive()) { Host::AddKeyedOSDMessage("achievements_disabled", FSUI_STR("Leaderboards are not enabled."), Host::OSD_INFO_DURATION); return; } - - if (!System::IsValid() || !Initialize()) - return; - - if (!Achievements::HasLeaderboards() || !Achievements::PrepareLeaderboardsWindow()) + else if (!Achievements::HasLeaderboards()) { ShowToast(std::string(), FSUI_STR("This game has no leaderboards.")); return; } - if (s_current_main_window != MainWindowType::PauseMenu) - { - PauseForMenuOpen(false); - ForceKeyNavEnabled(); - } + GPUThread::RunOnThread([]() { + if (!Initialize() || !Achievements::PrepareLeaderboardsWindow()) + return; - s_current_main_window = MainWindowType::Leaderboards; - QueueResetFocus(); - FixStateIfPaused(); + if (s_current_main_window != MainWindowType::PauseMenu) + { + PauseForMenuOpen(false); + ForceKeyNavEnabled(); + } + + s_current_main_window = MainWindowType::Leaderboards; + QueueResetFocus(); + FixStateIfPaused(); + }); } bool FullscreenUI::IsLeaderboardsWindowOpen() diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 57f33bc34..ccf0e68ee 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -3,6 +3,7 @@ #include "gpu.h" #include "dma.h" +#include "gpu_backend.h" #include "gpu_shadergen.h" #include "host.h" #include "imgui.h" @@ -28,9 +29,6 @@ #include "common/small_string.h" #include "common/string_util.h" -#include "IconsFontAwesome5.h" -#include "fmt/format.h" - #include #include @@ -42,39 +40,17 @@ u16 g_gpu_clut[GPU_CLUT_SIZE]; const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); -static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, - u32 texture_data_stride, GPUTexture::Format texture_format, - bool display_osd_message, bool use_thread); -static void JoinScreenshotThreads(); - -static std::deque s_screenshot_threads; -static std::mutex s_screenshot_threads_mutex; - // #define PSX_GPU_STATS #ifdef PSX_GPU_STATS static u64 s_active_gpu_cycles = 0; static u32 s_active_gpu_cycles_frames = 0; #endif -static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; +GPU::GPU() = default; -GPU::GPU() -{ - ResetStatistics(); -} +GPU::~GPU() = default; -GPU::~GPU() -{ - JoinScreenshotThreads(); - DestroyDeinterlaceTextures(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); - - if (g_gpu_device) - g_gpu_device->SetGPUTimingEnabled(false); -} - -bool GPU::Initialize() +void GPU::Initialize() { m_force_progressive_scan = g_settings.gpu_disable_interlacing; m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings; @@ -91,26 +67,14 @@ bool GPU::Initialize() m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); - if (!CompileDisplayPipelines(true, true, g_settings.gpu_24bit_chroma_smoothing)) - { - Host::ReportErrorAsync("Error", "Failed to compile base GPU pipelines."); - return false; - } - - g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); - #ifdef PSX_GPU_STATS s_active_gpu_cycles = 0; s_active_gpu_cycles_frames = 0; #endif - - return true; } void GPU::UpdateSettings(const Settings& old_settings) { - FlushRender(); - m_force_progressive_scan = g_settings.gpu_disable_interlacing; m_fifo_size = g_settings.gpu_fifo_size; m_max_run_ahead = g_settings.gpu_max_run_ahead; @@ -124,24 +88,6 @@ void GPU::UpdateSettings(const Settings& old_settings) // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); - - if (g_settings.display_scaling != old_settings.display_scaling || - g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || - g_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing) - { - // Toss buffers on mode change. - if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) - DestroyDeinterlaceTextures(); - - if (!CompileDisplayPipelines(g_settings.display_scaling != old_settings.display_scaling, - g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, - g_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing)) - { - Panic("Failed to compile display pipeline on settings change."); - } - } - - g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); } void GPU::CPUClockChanged() @@ -149,20 +95,6 @@ void GPU::CPUClockChanged() UpdateCRTCConfig(); } -void GPU::UpdateResolutionScale() -{ -} - -std::tuple GPU::GetEffectiveDisplayResolution(bool scaled /* = true */) -{ - return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height); -} - -std::tuple GPU::GetFullDisplayResolution(bool scaled /* = true */) -{ - return std::tie(m_crtc_state.display_width, m_crtc_state.display_height); -} - void GPU::Reset(bool clear_vram) { m_GPUSTAT.bits = 0x14802000; @@ -177,12 +109,6 @@ void GPU::Reset(bool clear_vram) m_crtc_state.interlaced_field = 0; m_crtc_state.interlaced_display_field = 0; - if (clear_vram) - { - std::memset(g_vram, 0, sizeof(g_vram)); - std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); - } - // Cancel VRAM writes. m_blitter_state = BlitterState::Idle; @@ -191,12 +117,14 @@ void GPU::Reset(bool clear_vram) m_command_tick_event->Deactivate(); SoftReset(); - UpdateDisplay(); + + // Can skip the VRAM clear if it's not a hardware reset. + if (clear_vram) + GPUBackend::PushCommand(GPUBackend::NewClearVRAMCommand()); } void GPU::SoftReset() { - FlushRender(); if (m_blitter_state == BlitterState::WritingVRAM) FinishVRAMWrite(); @@ -244,12 +172,21 @@ void GPU::SoftReset() bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) { - FlushRender(); - if (sw.IsReading()) { // perform a reset to discard all pending draws/fb state - Reset(host_texture == nullptr); + Reset(false); + } + else + { + // Need to ensure our copy of VRAM is good. + // TODO: This can be slightly less sucky for state loads, because we can just queue it. + // This will impact runahead. + GPUBackendDoStateCommand* cmd = GPUBackend::NewDoStateCommand(); + cmd->host_texture = host_texture; + cmd->is_reading = sw.IsReading(); + cmd->update_display = update_display; + GPUBackend::PushCommandAndSync(cmd, true); } sw.Do(&m_GPUSTAT.bits); @@ -349,8 +286,6 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ if (sw.IsReading()) { - m_draw_mode.texture_page_changed = true; - m_draw_mode.texture_window_changed = true; m_drawing_area_changed = true; SetClampedDrawingArea(); UpdateDMARequest(); @@ -366,20 +301,27 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ if (sw.IsReading()) { + m_drawing_area_changed = true; + UpdateCRTCConfig(); if (update_display) - UpdateDisplay(); + UpdateDisplay(true); UpdateCommandTickEvent(); + + GPUBackendDoStateCommand* cmd = GPUBackend::NewDoStateCommand(); + cmd->host_texture = host_texture; + cmd->is_reading = sw.IsReading(); + cmd->update_display = update_display; + if (host_texture) + GPUBackend::PushCommandAndSync(cmd, true); + else + GPUBackend::PushCommand(cmd); } return !sw.HasError(); } -void GPU::RestoreDeviceContext() -{ -} - void GPU::UpdateDMARequest() { switch (m_blitter_state) @@ -1000,8 +942,7 @@ void GPU::CRTCTickEvent(TickCount ticks) // flush any pending draws and "scan out" the image // TODO: move present in here I guess - FlushRender(); - UpdateDisplay(); + UpdateDisplay(true); TimingEvents::SetFrameDone(); // switch fields early. this is needed so we draw to the correct one. @@ -1091,7 +1032,8 @@ void GPU::UpdateCommandTickEvent() void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const { - const GSVector4i draw_rc = CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight(), true); + // TODO: FIXME + const GSVector4i draw_rc = GSVector4i::zero(); // convert coordinates to active display region, then to full display region const float scaled_display_x = (window_x - static_cast(draw_rc.left)) / static_cast(draw_rc.width()); @@ -1281,7 +1223,7 @@ void GPU::WriteGP1(u32 value) SynchronizeCRTC(); m_crtc_state.regs.display_address_start = new_value; UpdateCRTCDisplayParameters(); - OnBufferSwapped(); + GPUBackend::PushCommand(GPUBackend::NewBufferSwappedCommand()); } } break; @@ -1448,9 +1390,14 @@ void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut) if ((clut.bits != m_current_clut_reg_bits) || BoolToUInt8(needs_8bit) > BoolToUInt8(m_current_clut_is_8bit)) { DEBUG_LOG("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit"); - UpdateCLUT(clut, needs_8bit); m_current_clut_reg_bits = clut.bits; m_current_clut_is_8bit = needs_8bit; + + GPUBackendUpdateCLUTCommand* cmd = GPUBackend::NewUpdateCLUTCommand(); + FillBackendCommandParameters(cmd); + cmd->reg.bits = clut.bits; + cmd->clut_is_8bit = needs_8bit; + GPUBackend::PushCommand(cmd); } } @@ -1465,174 +1412,6 @@ bool GPU::IsCLUTValid() const return (m_current_clut_reg_bits != std::numeric_limits::max()); } -void GPU::ClearDisplay() -{ - ClearDisplayTexture(); - - // Just recycle the textures, it'll get re-fetched. - DestroyDeinterlaceTextures(); -} - -void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ -} - -void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - const u16 color16 = VRAMRGBA8888ToRGBA5551(color); - if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled()) - { - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16); - } - } - else if (IsInterlacedRenderingEnabled()) - { - // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. - if (IsCRTCScanlinePending()) - SynchronizeCRTC(); - - const u32 active_field = GetActiveLineLSB(); - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - if ((row & u32(1)) == active_field) - continue; - - u16* row_ptr = &g_vram[row * VRAM_WIDTH]; - for (u32 xoffs = 0; xoffs < width; xoffs++) - { - const u32 col = (x + xoffs) % VRAM_WIDTH; - row_ptr[col] = color16; - } - } - } - else - { - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - u16* row_ptr = &g_vram[row * VRAM_WIDTH]; - for (u32 xoffs = 0; xoffs < width; xoffs++) - { - const u32 col = (x + xoffs) % VRAM_WIDTH; - row_ptr[col] = color16; - } - } - } -} - -void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - // Fast path when the copy is not oversized. - if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask) - { - const u16* src_ptr = static_cast(data); - u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - std::copy_n(src_ptr, width, dst_ptr); - src_ptr += width; - dst_ptr += VRAM_WIDTH; - } - } - else - { - // Slow path when we need to handle wrap-around. - // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } - const u16* src_ptr = static_cast(data); - const u16 mask_and = check_mask ? 0x8000 : 0; - const u16 mask_or = set_mask ? 0x8000 : 0; - - for (u32 row = 0; row < height;) - { - u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; - for (u32 col = 0; col < width;) - { - // TODO: Handle unaligned reads... - u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; - if (((*pixel_ptr) & mask_and) == 0) - *pixel_ptr = *(src_ptr++) | mask_or; - } - } - } -} - -void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - // Break up oversized copies. This behavior has not been verified on console. - if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) - { - u32 remaining_rows = height; - u32 current_src_y = src_y; - u32 current_dst_y = dst_y; - while (remaining_rows > 0) - { - const u32 rows_to_copy = - std::min(remaining_rows, std::min(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); - - u32 remaining_columns = width; - u32 current_src_x = src_x; - u32 current_dst_x = dst_x; - while (remaining_columns > 0) - { - const u32 columns_to_copy = - std::min(remaining_columns, std::min(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); - CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy); - current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; - current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; - remaining_columns -= columns_to_copy; - } - - current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; - current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; - remaining_rows -= rows_to_copy; - } - - return; - } - - // This doesn't have a fast path, but do we really need one? It's not common. - const u16 mask_and = m_GPUSTAT.GetMaskAND(); - const u16 mask_or = m_GPUSTAT.GetMaskOR(); - - // Copy in reverse when src_x < dst_x, this is verified on console. - if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) - { - for (u32 row = 0; row < height; row++) - { - const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - - for (s32 col = static_cast(width - 1); col >= 0; col--) - { - const u16 src_pixel = src_row_ptr[(src_x + static_cast(col)) % VRAM_WIDTH]; - u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast(col)) % VRAM_WIDTH]; - if ((*dst_pixel_ptr & mask_and) == 0) - *dst_pixel_ptr = src_pixel | mask_or; - } - } - } - else - { - for (u32 row = 0; row < height; row++) - { - const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - - for (u32 col = 0; col < width; col++) - { - const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; - u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; - if ((*dst_pixel_ptr & mask_and) == 0) - *dst_pixel_ptr = src_pixel | mask_or; - } - } - } -} - void GPU::SetClampedDrawingArea() { if (m_drawing_area.left > m_drawing_area.right || m_drawing_area.top > m_drawing_area.bottom) [[unlikely]] @@ -1654,16 +1433,8 @@ void GPU::SetDrawMode(u16 value) if (!m_set_texture_disable_mask) new_mode_reg.texture_disable = false; - if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) - return; - - m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) != - (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK)); m_draw_mode.mode_reg.bits = new_mode_reg.bits; - if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) - FlushRender(); - // Bits 0..10 are returned in the GPU status register. m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) | (ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK); @@ -1673,11 +1444,7 @@ void GPU::SetDrawMode(u16 value) void GPU::SetTexturePalette(u16 value) { value &= DrawMode::PALETTE_MASK; - if (m_draw_mode.palette_reg.bits == value) - return; - m_draw_mode.palette_reg.bits = value; - m_draw_mode.texture_page_changed = true; } void GPU::SetTextureWindow(u32 value) @@ -1686,8 +1453,6 @@ void GPU::SetTextureWindow(u32 value) if (m_draw_mode.texture_window_value == value) return; - FlushRender(); - const u8 mask_x = Truncate8(value & UINT32_C(0x1F)); const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F)); const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F)); @@ -1699,1022 +1464,77 @@ void GPU::SetTextureWindow(u32 value) m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u; m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u; m_draw_mode.texture_window_value = value; - m_draw_mode.texture_window_changed = true; } -void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit) +void GPU::ReadVRAM(u16 x, u16 y, u16 width, u16 height) { - const u16* src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; - const u32 start_x = reg.GetXBase(); - if (!clut_is_8bit) + GPUBackendReadVRAMCommand* cmd = GPUBackend::NewReadVRAMCommand(); + cmd->x = x; + cmd->y = y; + cmd->width = width; + cmd->height = height; + GPUBackend::PushCommandAndSync(cmd, true); +} + +void GPU::UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask) +{ + const u32 num_words = width * height; + GPUBackendUpdateVRAMCommand* cmd = GPUBackend::NewUpdateVRAMCommand(num_words); + cmd->params.bits = 0; + cmd->params.set_mask_while_drawing = set_mask; + cmd->params.check_mask_before_draw = check_mask; + cmd->x = x; + cmd->y = y; + cmd->width = width; + cmd->height = height; + std::memcpy(cmd->data, data, num_words * sizeof(u16)); + GPUBackend::PushCommand(cmd); +} + +void GPU::ClearDisplay() +{ + GPUBackend::PushCommand(GPUBackend::NewClearDisplayCommand()); +} + +void GPU::UpdateDisplay(bool present_frame) +{ + GPUBackendUpdateDisplayCommand* cmd = GPUBackend::NewUpdateDisplayCommand(); + cmd->display_width = m_crtc_state.display_width; + cmd->display_height = m_crtc_state.display_height; + cmd->display_origin_left = m_crtc_state.display_origin_left; + cmd->display_origin_top = m_crtc_state.display_origin_top; + cmd->display_vram_left = m_crtc_state.display_vram_left; + cmd->display_vram_top = m_crtc_state.display_vram_top; + cmd->display_vram_width = m_crtc_state.display_vram_width; + cmd->display_vram_height = m_crtc_state.display_vram_height; + cmd->X = m_crtc_state.regs.X; + cmd->bits = 0; + cmd->interlaced_display_enabled = IsInterlacedDisplayEnabled(); + cmd->interlaced_display_field = GetInterlacedDisplayField(); + cmd->interlaced_display_interleaved = cmd->interlaced_display_enabled && m_GPUSTAT.vertical_resolution; + cmd->display_24bit = m_GPUSTAT.display_area_color_depth_24; + cmd->display_disabled = IsDisplayDisabled(); + cmd->display_aspect_ratio = ComputeDisplayAspectRatio(); + if (present_frame) { - // Wraparound can't happen in 4-bit mode. - std::memcpy(dest, &src_row[start_x], sizeof(u16) * 16); + bool should_allow_present_skip; + System::GetFramePresentationDetails(&present_frame, &should_allow_present_skip, &cmd->present_time); + cmd->present_frame = present_frame; + cmd->allow_present_skip = should_allow_present_skip; } else { - if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] - { - const u32 end = VRAM_WIDTH - start_x; - const u32 start = 256 - end; - std::memcpy(dest, &src_row[start_x], sizeof(u16) * end); - std::memcpy(dest + end, src_row, sizeof(u16) * start); - } - else - { - std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256); - } - } -} - -bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing) -{ - GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, - g_gpu_device->GetFeatures().framebuffer_fetch); - - GPUPipeline::GraphicsConfig plconfig; - plconfig.input_layout.vertex_stride = 0; - plconfig.primitive = GPUPipeline::Primitive::Triangles; - plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); - plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); - plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - plconfig.geometry_shader = nullptr; - plconfig.depth_format = GPUTexture::Format::Unknown; - plconfig.samples = 1; - plconfig.per_sample_shading = false; - plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; - - if (display) - { - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8); - - std::string vs = shadergen.GenerateDisplayVertexShader(); - std::string fs; - switch (g_settings.display_scaling) - { - case DisplayScalingMode::BilinearSharp: - fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); - break; - - case DisplayScalingMode::BilinearSmooth: - fs = shadergen.GenerateDisplayFragmentShader(true); - break; - - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - default: - fs = shadergen.GenerateDisplayFragmentShader(false); - break; - } - - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs); - std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Display Vertex Shader"); - GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", - Settings::GetDisplayScalingName(g_settings.display_scaling)); - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", - Settings::GetDisplayScalingName(g_settings.display_scaling)); + cmd->present_time = 0; + cmd->present_frame = false; + cmd->allow_present_skip = false; } - if (deinterlace) - { - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + const bool drain_one = present_frame && GPUBackend::BeginQueueFrame(); - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader()); - if (!vso) - return false; - GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); + GPUBackend::PushCommandAndWakeThread(cmd); - std::unique_ptr fso; - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateInterleavedFieldExtractFragmentShader()))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - - GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); - - switch (g_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - break; - - case DisplayDeinterlacingMode::Weave: - { - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateDeinterlaceWeaveFragmentShader()))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Blend: - { - if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateDeinterlaceBlendFragmentShader()))) - { - return false; - } - - GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); - } - break; - - case DisplayDeinterlacingMode::Adaptive: - { - fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateFastMADReconstructFragmentShader()); - if (!fso) - return false; - - GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); - - plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; - plconfig.fragment_shader = fso.get(); - if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - - GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); - } - break; - - default: - UnreachableCode(); - } - } - - if (chroma_smoothing) - { - m_chroma_smoothing_pipeline.reset(); - g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); - - if (g_settings.gpu_24bit_chroma_smoothing) - { - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), - shadergen.GenerateScreenQuadVertexShader()); - std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateChromaSmoothingFragmentShader()); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); - GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); - - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); - } - } - - return true; -} - -void GPU::ClearDisplayTexture() -{ - m_display_texture = nullptr; - m_display_texture_view_x = 0; - m_display_texture_view_y = 0; - m_display_texture_view_width = 0; - m_display_texture_view_height = 0; -} - -void GPU::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, - s32 view_height) -{ - DebugAssert(texture); - m_display_texture = texture; - m_display_depth_buffer = depth_buffer; - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; -} - -bool GPU::PresentDisplay() -{ - FlushRender(); - - const GSVector4i draw_rect = CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()); - return RenderDisplay(nullptr, draw_rect, !g_settings.debugging.show_vram); -} - -bool GPU::RenderDisplay(GPUTexture* target, const GSVector4i draw_rect, bool postfx) -{ - GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); - - if (m_display_texture) - m_display_texture->MakeReadyForSampling(); - - // Internal post-processing. - GPUTexture* display_texture = m_display_texture; - s32 display_texture_view_x = m_display_texture_view_x; - s32 display_texture_view_y = m_display_texture_view_y; - s32 display_texture_view_width = m_display_texture_view_width; - s32 display_texture_view_height = m_display_texture_view_height; - if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && - PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, - display_texture_view_height)) - { - DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && - static_cast(display_texture->GetWidth()) == display_texture_view_width && - static_cast(display_texture->GetHeight()) == display_texture_view_height); - - // Now we can apply the post chain. - GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); - if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, - GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), - display_texture_view_width, display_texture_view_height, - m_crtc_state.display_width, m_crtc_state.display_height)) - { - display_texture_view_x = 0; - display_texture_view_y = 0; - display_texture = post_output_texture; - display_texture->MakeReadyForSampling(); - } - } - - const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetWindowFormat(); - const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetWindowWidth(); - const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetWindowHeight(); - const bool really_postfx = - (postfx && PostProcessing::DisplayChain.IsActive() && !g_gpu_device->GetWindowInfo().IsSurfaceless() && - hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && - PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); - const GSVector4i real_draw_rect = - g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect; - if (really_postfx) - { - g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), 0); - g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); - } - else - { - if (target) - g_gpu_device->SetRenderTarget(target); - else if (!g_gpu_device->BeginPresent(false)) - return false; - } - - if (display_texture) - { - bool texture_filter_linear = false; - - struct Uniforms - { - float src_rect[4]; - float src_size[4]; - float clamp_rect[4]; - float params[4]; - } uniforms; - std::memset(uniforms.params, 0, sizeof(uniforms.params)); - - switch (g_settings.display_scaling) - { - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - break; - - case DisplayScalingMode::BilinearSmooth: - case DisplayScalingMode::BlinearInteger: - texture_filter_linear = true; - break; - - case DisplayScalingMode::BilinearSharp: - { - texture_filter_linear = true; - uniforms.params[0] = std::max( - std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); - uniforms.params[1] = std::max( - std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); - uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; - uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; - } - break; - - default: - UnreachableCode(); - break; - } - - g_gpu_device->SetPipeline(m_display_pipeline.get()); - g_gpu_device->SetTextureSampler( - 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); - - // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because - // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. - const float rcp_width = 1.0f / static_cast(display_texture->GetWidth()); - const float rcp_height = 1.0f / static_cast(display_texture->GetHeight()); - uniforms.src_rect[0] = static_cast(display_texture_view_x) * rcp_width; - uniforms.src_rect[1] = static_cast(display_texture_view_y) * rcp_height; - uniforms.src_rect[2] = static_cast(display_texture_view_width) * rcp_width; - uniforms.src_rect[3] = static_cast(display_texture_view_height) * rcp_height; - uniforms.clamp_rect[0] = (static_cast(display_texture_view_x) + 0.5f) * rcp_width; - uniforms.clamp_rect[1] = (static_cast(display_texture_view_y) + 0.5f) * rcp_height; - uniforms.clamp_rect[2] = - (static_cast(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; - uniforms.clamp_rect[3] = - (static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; - uniforms.src_size[0] = static_cast(display_texture->GetWidth()); - uniforms.src_size[1] = static_cast(display_texture->GetHeight()); - uniforms.src_size[2] = rcp_width; - uniforms.src_size[3] = rcp_height; - g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); - - g_gpu_device->SetViewportAndScissor(real_draw_rect); - g_gpu_device->Draw(3, 0); - } - - if (really_postfx) - { - DebugAssert(!g_settings.debugging.show_vram); - - // "original size" in postfx includes padding. - const float upscale_x = m_display_texture ? static_cast(m_display_texture_view_width) / - static_cast(m_crtc_state.display_vram_width) : - 1.0f; - const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / - static_cast(m_crtc_state.display_vram_height) : - 1.0f; - const s32 orig_width = static_cast(std::ceil(static_cast(m_crtc_state.display_width) * upscale_x)); - const s32 orig_height = static_cast(std::ceil(static_cast(m_crtc_state.display_height) * upscale_y)); - - return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, - real_draw_rect, orig_width, orig_height, m_crtc_state.display_width, - m_crtc_state.display_height); - } - else - return true; -} - -void GPU::DestroyDeinterlaceTextures() -{ - for (std::unique_ptr& tex : m_deinterlace_buffers) - g_gpu_device->RecycleTexture(std::move(tex)); - g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); - m_current_deinterlace_buffer = 0; -} - -bool GPU::Deinterlace(u32 field, u32 line_skip) -{ - GPUTexture* src = m_display_texture; - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - - switch (g_settings.display_deinterlacing_mode) - { - case DisplayDeinterlacingMode::Disabled: - { - if (line_skip == 0) - return true; - - // Still have to extract the field. - if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] - return false; - - SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); - return true; - } - - case DisplayDeinterlacingMode::Weave: - { - GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); - - const u32 full_height = height * 2; - if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - src->MakeReadyForSampling(); - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, field, line_skip}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - case DisplayDeinterlacingMode::Blend: - { - constexpr u32 NUM_BLEND_BUFFERS = 2; - - GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); - - const u32 this_buffer = m_current_deinterlace_buffer; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || - !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - // TODO: could be implemented with alpha blending instead.. - - g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; - } - - case DisplayDeinterlacingMode::Adaptive: - { - GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, - line_skip); - - const u32 full_height = height * 2; - const u32 this_buffer = m_current_deinterlace_buffer; - m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; - GL_INS_FMT("Current buffer: {}", this_buffer); - if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || - !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] - { - ClearDisplayTexture(); - return false; - } - - g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); - g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), - g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {field, full_height}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); - g_gpu_device->Draw(3, 0); - - m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); - return true; - } - - default: - UnreachableCode(); - } -} - -bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip) -{ - if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || - m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, false)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); - } - - GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); - g_gpu_device->InvalidateRenderTarget(dst); - - // If we're not skipping lines, then we can simply copy the texture. - if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) - { - GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); - g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); - } - else - { - GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, - line_skip); - - // Otherwise, we need to extract every other line from the texture. - src->MakeReadyForSampling(); - g_gpu_device->SetRenderTarget(dst); - g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); - g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, line_skip}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - GL_POP(); - } - - dst->MakeReadyForSampling(); - return true; -} - -bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) -{ - if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || - m_deinterlace_texture->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, preserve)) [[unlikely]] - { - return false; - } - - GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); - } - - return true; -} - -bool GPU::ApplyChromaSmoothing() -{ - const u32 x = m_display_texture_view_x; - const u32 y = m_display_texture_view_y; - const u32 width = m_display_texture_view_width; - const u32 height = m_display_texture_view_height; - if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || - m_chroma_smoothing_texture->GetHeight() != height) - { - if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, false)) - { - ClearDisplayTexture(); - return false; - } - - GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); - } - - GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); - - m_display_texture->MakeReadyForSampling(); - g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); - g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); - g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); - const u32 uniforms[] = {x, y, width - 1, height - 1}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); - g_gpu_device->Draw(3, 0); - - m_chroma_smoothing_texture->MakeReadyForSampling(); - SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); - return true; -} - -GSVector4i GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_aspect_ratio /* = true */) const -{ - const bool integer_scale = (g_settings.display_scaling == DisplayScalingMode::NearestInteger || - g_settings.display_scaling == DisplayScalingMode::BlinearInteger); - const bool show_vram = g_settings.debugging.show_vram; - const float display_aspect_ratio = ComputeDisplayAspectRatio(); - const float window_ratio = static_cast(window_width) / static_cast(window_height); - const float crtc_display_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_width); - const float crtc_display_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_height); - const float x_scale = - apply_aspect_ratio ? - (display_aspect_ratio / (static_cast(crtc_display_width) / static_cast(crtc_display_height))) : - 1.0f; - float display_width = crtc_display_width; - float display_height = crtc_display_height; - float active_left = static_cast(show_vram ? 0 : m_crtc_state.display_origin_left); - float active_top = static_cast(show_vram ? 0 : m_crtc_state.display_origin_top); - float active_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_vram_width); - float active_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_vram_height); - if (!g_settings.display_stretch_vertically) - { - display_width *= x_scale; - active_left *= x_scale; - active_width *= x_scale; - } - else - { - display_height /= x_scale; - active_top /= x_scale; - active_height /= x_scale; - } - - // now fit it within the window - float scale; - float left_padding, top_padding; - if ((display_width / display_height) >= window_ratio) - { - // align in middle vertically - scale = static_cast(window_width) / display_width; - if (integer_scale) - { - scale = std::max(std::floor(scale), 1.0f); - left_padding = std::max((static_cast(window_width) - display_width * scale) / 2.0f, 0.0f); - } - else - { - left_padding = 0.0f; - } - - switch (g_settings.display_alignment) - { - case DisplayAlignment::RightOrBottom: - top_padding = std::max(static_cast(window_height) - (display_height * scale), 0.0f); - break; - - case DisplayAlignment::Center: - top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); - break; - - case DisplayAlignment::LeftOrTop: - default: - top_padding = 0.0f; - break; - } - } - else - { - // align in middle horizontally - scale = static_cast(window_height) / display_height; - if (integer_scale) - { - scale = std::max(std::floor(scale), 1.0f); - top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); - } - else - { - top_padding = 0.0f; - } - - switch (g_settings.display_alignment) - { - case DisplayAlignment::RightOrBottom: - left_padding = std::max(static_cast(window_width) - (display_width * scale), 0.0f); - break; - - case DisplayAlignment::Center: - left_padding = std::max((static_cast(window_width) - (display_width * scale)) / 2.0f, 0.0f); - break; - - case DisplayAlignment::LeftOrTop: - default: - left_padding = 0.0f; - break; - } - } - - // TODO: This should be a float rectangle. But because GL is lame, it only has integer viewports... - const s32 left = static_cast(active_left * scale + left_padding); - const s32 top = static_cast(active_top * scale + top_padding); - const s32 right = left + static_cast(active_width * scale); - const s32 bottom = top + static_cast(active_height * scale); - return GSVector4i(left, top, right, bottom); -} - -bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, - u32 texture_data_stride, GPUTexture::Format texture_format, bool display_osd_message, - bool use_thread) -{ - std::string osd_key; - if (display_osd_message) - { - // Use a 60 second timeout to give it plenty of time to actually save. - osd_key = fmt::format("ScreenshotSaver_{}", filename); - Host::AddIconOSDMessage(osd_key, ICON_FA_CAMERA, - fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(filename)), - 60.0f); - } - - static constexpr auto proc = [](u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, - u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key, - bool use_thread) { - bool result; - - const char* extension = std::strrchr(filename.c_str(), '.'); - if (extension) - { - if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) - { - if (clear_alpha) - { - for (u32& pixel : texture_data) - pixel |= 0xFF000000u; - } - - if (flip_y) - GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast(texture_data.data()), - texture_data_stride); - - Assert(texture_data_stride == sizeof(u32) * width); - RGBA8Image image(width, height, std::move(texture_data)); - if (image.SaveToFile(filename.c_str(), fp.get(), quality)) - { - result = true; - } - else - { - ERROR_LOG("Unknown extension in filename '{}' or save error: '{}'", filename, extension); - result = false; - } - } - else - { - result = false; - } - } - else - { - ERROR_LOG("Unable to determine file extension for '{}'", filename); - result = false; - } - - if (!osd_key.empty()) - { - Host::AddIconOSDMessage(std::move(osd_key), ICON_FA_CAMERA, - fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : - TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), - Path::GetFileName(filename), - result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); - } - - if (use_thread) - { - // remove ourselves from the list, if the GS thread is waiting for us, we won't be in there - const auto this_id = std::this_thread::get_id(); - std::unique_lock lock(s_screenshot_threads_mutex); - for (auto it = s_screenshot_threads.begin(); it != s_screenshot_threads.end(); ++it) - { - if (it->get_id() == this_id) - { - it->detach(); - s_screenshot_threads.erase(it); - break; - } - } - } - - return result; - }; - - if (!use_thread) - { - return proc(width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, - std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); - } - - std::thread thread(proc, width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, - std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); - std::unique_lock lock(s_screenshot_threads_mutex); - s_screenshot_threads.push_back(std::move(thread)); - return true; -} - -void JoinScreenshotThreads() -{ - std::unique_lock lock(s_screenshot_threads_mutex); - while (!s_screenshot_threads.empty()) - { - std::thread save_thread(std::move(s_screenshot_threads.front())); - s_screenshot_threads.pop_front(); - lock.unlock(); - save_thread.join(); - lock.lock(); - } -} - -bool GPU::WriteDisplayTextureToFile(std::string filename, bool compress_on_thread /* = false */) -{ - if (!m_display_texture) - return false; - - const u32 read_x = static_cast(m_display_texture_view_x); - const u32 read_y = static_cast(m_display_texture_view_y); - const u32 read_width = static_cast(m_display_texture_view_width); - const u32 read_height = static_cast(m_display_texture_view_height); - - const u32 texture_data_stride = - Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); - std::vector texture_data((texture_data_stride * read_height) / sizeof(u32)); - - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = - g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(), - texture_data.size() * sizeof(u32), texture_data_stride); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) - { - ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, - GPUTexture::GetFormatName(m_display_texture->GetFormat())); - return false; - } - } - - dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); - if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) - { - RestoreDeviceContext(); - return false; - } - - RestoreDeviceContext(); - - Error error; - auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); - return false; - } - - constexpr bool clear_alpha = true; - const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); - - return CompressAndWriteTextureToFile( - read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha, - flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), false, compress_on_thread); -} - -bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i draw_rect, bool postfx, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - const GPUTexture::Format hdformat = - g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8; - - auto render_texture = - g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); - if (!render_texture) - return false; - - g_gpu_device->ClearRenderTarget(render_texture.get(), 0); - - // TODO: this should use copy shader instead. - RenderDisplay(render_texture.get(), draw_rect, postfx); - - const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32)); - out_pixels->resize((height * stride) / sizeof(u32)); - - std::unique_ptr dltex; - if (g_gpu_device->GetFeatures().memory_import) - { - dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), - out_pixels->size() * sizeof(u32), stride); - } - if (!dltex) - { - if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) - { - ERROR_LOG("Failed to create {}x{} download texture", width, height); - return false; - } - } - - dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); - if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride)) - { - RestoreDeviceContext(); - return false; - } - - *out_stride = stride; - *out_format = hdformat; - RestoreDeviceContext(); - return true; -} - -bool GPU::RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, - bool show_osd_message) -{ - u32 width = g_gpu_device->GetWindowWidth(); - u32 height = g_gpu_device->GetWindowHeight(); - GSVector4i draw_rect = CalculateDrawRect(width, height, true); - - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram); - if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) - { - if (mode == DisplayScreenshotMode::InternalResolution) - { - const u32 draw_width = static_cast(draw_rect.width()); - const u32 draw_height = static_cast(draw_rect.height()); - - // If internal res, scale the computed draw rectangle to the internal res. - // We re-use the draw rect because it's already been AR corrected. - const float sar = - static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); - const float dar = static_cast(draw_width) / static_cast(draw_height); - if (sar >= dar) - { - // stretch height, preserve width - const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); - width = m_display_texture_view_width; - height = static_cast(std::round(static_cast(draw_height) * scale)); - } - else - { - // stretch width, preserve height - const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); - width = static_cast(std::round(static_cast(draw_width) * scale)); - height = m_display_texture_view_height; - } - - // DX11 won't go past 16K texture size. - const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); - if (width > max_texture_size) - { - height = static_cast(static_cast(height) / - (static_cast(width) / static_cast(max_texture_size))); - width = max_texture_size; - } - if (height > max_texture_size) - { - height = max_texture_size; - width = static_cast(static_cast(width) / - (static_cast(height) / static_cast(max_texture_size))); - } - } - else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) - { - width = m_display_texture_view_width; - height = m_display_texture_view_height; - } - - // Remove padding, it's not part of the framebuffer. - draw_rect = GSVector4i(0, 0, static_cast(width), static_cast(height)); - } - if (width == 0 || height == 0) - return false; - - std::vector pixels; - u32 pixels_stride; - GPUTexture::Format pixels_format; - if (!RenderScreenshotToBuffer(width, height, draw_rect, !internal_resolution, &pixels, &pixels_stride, - &pixels_format)) - { - ERROR_LOG("Failed to render {}x{} screenshot", width, height); - return false; - } - - Error error; - auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); - if (!fp) - { - ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); - return false; - } - - return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), quality, true, - g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride, - pixels_format, show_osd_message, compress_on_thread); + if (drain_one) + GPUBackend::WaitForOneQueuedFrame(); } bool GPU::DumpVRAMToFile(const char* filename) @@ -2772,7 +1592,8 @@ void GPU::DrawDebugStateWindow() return; } - DrawRendererStats(); + // TODO: FIXME + // DrawRendererStats(is_idle_frame); if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) { @@ -2829,73 +1650,3 @@ void GPU::DrawDebugStateWindow() ImGui::End(); } - -void GPU::DrawRendererStats() -{ -} - -void GPU::OnBufferSwapped() -{ -} - -void GPU::GetStatsString(SmallStringBase& str) -{ - if (IsHardwareRenderer()) - { - str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", - GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, - m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes, - m_stats.host_num_downloads, m_stats.num_copies, m_stats.num_writes); - } - else - { - str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), - m_stats.num_primitives, m_stats.num_reads, m_stats.num_copies, m_stats.num_writes); - } -} - -void GPU::GetMemoryStatsString(SmallStringBase& str) -{ - const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); - const u32 stream_kb = static_cast((m_stats.host_buffer_streamed + (1024 - 1)) / 1024); - - str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, m_stats.host_num_copies, - m_stats.host_num_uploads); -} - -void GPU::ResetStatistics() -{ - m_counters = {}; - g_gpu_device->ResetStatistics(); -} - -void GPU::UpdateStatistics(u32 frame_count) -{ - const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); - const u32 round = (frame_count - 1); - -#define UPDATE_COUNTER(x) m_stats.x = (m_counters.x + round) / frame_count -#define UPDATE_GPU_STAT(x) m_stats.host_##x = (stats.x + round) / frame_count - - UPDATE_COUNTER(num_reads); - UPDATE_COUNTER(num_writes); - UPDATE_COUNTER(num_copies); - UPDATE_COUNTER(num_vertices); - UPDATE_COUNTER(num_primitives); - - // UPDATE_COUNTER(num_read_texture_updates); - // UPDATE_COUNTER(num_ubo_updates); - - UPDATE_GPU_STAT(buffer_streamed); - UPDATE_GPU_STAT(num_draws); - UPDATE_GPU_STAT(num_barriers); - UPDATE_GPU_STAT(num_render_passes); - UPDATE_GPU_STAT(num_copies); - UPDATE_GPU_STAT(num_downloads); - UPDATE_GPU_STAT(num_uploads); - -#undef UPDATE_GPU_STAT -#undef UPDATE_COUNTER - - ResetStatistics(); -} diff --git a/src/core/gpu.h b/src/core/gpu.h index 019138b28..6f4329a2f 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -28,14 +28,11 @@ class GPUDevice; class GPUTexture; class GPUPipeline; +class GPUBackend; struct Settings; class TimingEvent; -namespace Threading { -class Thread; -} - -class GPU +class GPU final { public: enum class BlitterState : u8 @@ -60,7 +57,6 @@ public: DOT_TIMER_INDEX = 0, HBLANK_TIMER_INDEX = 1, MAX_RESOLUTION_SCALE = 32, - DEINTERLACE_BUFFER_COUNT = 4, DRAWING_AREA_COORD_MASK = 1023, }; @@ -86,25 +82,14 @@ public: // Base class constructor. GPU(); - virtual ~GPU(); + ~GPU(); - virtual const Threading::Thread* GetSWThread() const = 0; - virtual bool IsHardwareRenderer() const = 0; - - virtual bool Initialize(); - virtual void Reset(bool clear_vram); - virtual bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display); - - // Graphics API state reset/restore - call when drawing the UI etc. - // TODO: replace with "invalidate cached state" - virtual void RestoreDeviceContext(); + void Initialize(); + void Reset(bool clear_vram); + bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display); // Render statistics debug window. void DrawDebugStateWindow(); - void GetStatsString(SmallStringBase& str); - void GetMemoryStatsString(SmallStringBase& str); - void ResetStatistics(); - void UpdateStatistics(u32 frame_count); void CPUClockChanged(); @@ -160,24 +145,12 @@ public: void SynchronizeCRTC(); /// Recompile shaders/recreate framebuffers when needed. - virtual void UpdateSettings(const Settings& old_settings); - - /// Updates the resolution scale when it's set to automatic. - virtual void UpdateResolutionScale(); - - /// Returns the effective display resolution of the GPU. - virtual std::tuple GetEffectiveDisplayResolution(bool scaled = true); - - /// Returns the full display resolution of the GPU, including padding. - virtual std::tuple GetFullDisplayResolution(bool scaled = true); + void UpdateSettings(const Settings& old_settings); float ComputeHorizontalFrequency() const; float ComputeVerticalFrequency() const; float ComputeDisplayAspectRatio() const; - static std::unique_ptr CreateHardwareRenderer(); - static std::unique_ptr CreateSoftwareRenderer(); - // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. void ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const; @@ -203,30 +176,7 @@ public: // Dumps raw VRAM to a file. bool DumpVRAMToFile(const char* filename); - // Ensures all buffered vertices are drawn. - virtual void FlushRender() = 0; - - /// Helper function for computing the draw rectangle in a larger window. - GSVector4i CalculateDrawRect(s32 window_width, s32 window_height, bool apply_aspect_ratio = true) const; - - /// Helper function to save current display texture to PNG. - bool WriteDisplayTextureToFile(std::string filename, bool compress_on_thread = false); - - /// Renders the display, optionally with postprocessing to the specified image. - bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i draw_rect, bool postfx, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format); - - /// Helper function to save screenshot to PNG. - bool RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, - bool show_osd_message); - - /// Draws the current display texture, with any post-processing. - bool PresentDisplay(); - - /// Reads the CLUT from the specified coordinates, accounting for wrap-around. - static void ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit); - -protected: +private: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; @@ -237,16 +187,6 @@ protected: } ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; } - static constexpr std::tuple UnpackTexcoord(u16 texcoord) - { - return std::make_tuple(static_cast(texcoord), static_cast(texcoord >> 8)); - } - - static constexpr std::tuple UnpackColorRGB24(u32 rgb24) - { - return std::make_tuple(static_cast(rgb24), static_cast(rgb24 >> 8), static_cast(rgb24 >> 16)); - } - static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha); @@ -270,10 +210,10 @@ protected: void CommandTickEvent(TickCount ticks); /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...). - ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); } + ALWAYS_INLINE u8 GetInterlacedDisplayField() const { return m_crtc_state.interlaced_field; } /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. - ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); } + ALWAYS_INLINE u8 GetActiveLineLSB() const { return m_crtc_state.active_line_lsb; } /// Updates drawing area that's suitablef or clamping. void SetClampedDrawingArea(); @@ -308,16 +248,15 @@ protected: void InvalidateCLUT(); bool IsCLUTValid() const; - // Rendering in the backend - virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height); - virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); - virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask); - virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); - virtual void DispatchRenderCommand() = 0; - virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; - virtual void UpdateDisplay() = 0; - virtual void DrawRendererStats(); - virtual void OnBufferSwapped(); + void ReadVRAM(u16 x, u16 y, u16 width, u16 height); + void UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask); + + void UpdateDisplay(bool present_frame); + + void PrepareForDraw(); + void FinishPolyline(); + void FillBackendCommandParameters(GPUBackendCommand* cmd) const; + void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded, bool textured, bool semitransparent) @@ -443,19 +382,10 @@ protected: u32 texture_window_value; // decoded values + // TODO: Make this a command GPUTextureWindow texture_window; bool texture_x_flip; bool texture_y_flip; - bool texture_page_changed; - bool texture_window_changed; - - ALWAYS_INLINE bool IsTexturePageChanged() const { return texture_page_changed; } - ALWAYS_INLINE void SetTexturePageChanged() { texture_page_changed = true; } - ALWAYS_INLINE void ClearTexturePageChangedFlag() { texture_page_changed = false; } - - ALWAYS_INLINE bool IsTextureWindowChanged() const { return texture_window_changed; } - ALWAYS_INLINE void SetTextureWindowChanged() { texture_window_changed = true; } - ALWAYS_INLINE void ClearTextureWindowChangedFlag() { texture_window_changed = false; } } m_draw_mode = {}; GPUDrawingArea m_drawing_area = {}; @@ -587,64 +517,7 @@ protected: TickCount m_max_run_ahead = 128; u32 m_fifo_size = 128; - void ClearDisplayTexture(); - void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, s32 view_width, - s32 view_height); - - bool RenderDisplay(GPUTexture* target, const GSVector4i draw_rect, bool postfx); - - bool Deinterlace(u32 field, u32 line_skip); - bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); - bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); - void DestroyDeinterlaceTextures(); - bool ApplyChromaSmoothing(); - - u32 m_current_deinterlace_buffer = 0; - std::unique_ptr m_deinterlace_pipeline; - std::unique_ptr m_deinterlace_extract_pipeline; - std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; - std::unique_ptr m_deinterlace_texture; - - std::unique_ptr m_chroma_smoothing_pipeline; - std::unique_ptr m_chroma_smoothing_texture; - - std::unique_ptr m_display_pipeline; - GPUTexture* m_display_texture = nullptr; - GPUTexture* m_display_depth_buffer = nullptr; - s32 m_display_texture_view_x = 0; - s32 m_display_texture_view_y = 0; - s32 m_display_texture_view_width = 0; - s32 m_display_texture_view_height = 0; - - struct Counters - { - u32 num_reads; - u32 num_writes; - u32 num_copies; - u32 num_vertices; - u32 num_primitives; - - // u32 num_read_texture_updates; - // u32 num_ubo_updates; - }; - - struct Stats : Counters - { - size_t host_buffer_streamed; - u32 host_num_draws; - u32 host_num_barriers; - u32 host_num_render_passes; - u32 host_num_copies; - u32 host_num_downloads; - u32 host_num_uploads; - }; - - Counters m_counters = {}; - Stats m_stats = {}; - private: - bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing); - using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index 57508ff78..75eb85eca 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -1,288 +1,415 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_backend.h" -#include "common/align.h" -#include "common/log.h" -#include "common/timer.h" +#include "gpu.h" +#include "gpu_shadergen.h" +#include "gpu_thread.h" +#include "host.h" #include "settings.h" + +#include "util/gpu_device.h" +#include "util/image.h" +#include "util/imgui_manager.h" +#include "util/postprocessing.h" #include "util/state_wrapper.h" + +#include "common/align.h" +#include "common/error.h" +#include "common/file_system.h" +#include "common/gsvector_formatter.h" +#include "common/log.h" +#include "common/path.h" +#include "common/small_string.h" +#include "common/string_util.h" +#include "common/timer.h" + +#include "IconsFontAwesome5.h" +#include "fmt/format.h" + +#include + Log_SetChannel(GPUBackend); -std::unique_ptr g_gpu_backend; - -GPUBackend::GPUBackend() = default; - -GPUBackend::~GPUBackend() = default; - -bool GPUBackend::Initialize(bool force_thread) +namespace { +struct Counters { - if (force_thread || g_settings.gpu_use_thread) - StartGPUThread(); + u32 num_reads; + u32 num_writes; + u32 num_copies; + u32 num_vertices; + u32 num_primitives; + // u32 num_read_texture_updates; + // u32 num_ubo_updates; +}; + +// TODO: This is probably wrong/racey... +struct Stats : Counters +{ + size_t host_buffer_streamed; + u32 host_num_draws; + u32 host_num_barriers; + u32 host_num_render_passes; + u32 host_num_copies; + u32 host_num_downloads; + u32 host_num_uploads; +}; +} // namespace + +static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, + u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, + u32 texture_data_stride, GPUTexture::Format texture_format, + bool display_osd_message, bool use_thread); +static void JoinScreenshotThreads(); + +// TODO: Pack state... + +static std::atomic s_queued_frames; +static std::atomic_bool s_waiting_for_gpu_thread; +static Threading::KernelSemaphore s_gpu_thread_wait; + +static std::tuple s_last_display_source_size; + +static std::deque s_screenshot_threads; +static std::mutex s_screenshot_threads_mutex; + +static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; + +static Counters s_counters = {}; +static Stats s_stats = {}; + +GPUBackend::GPUBackend() +{ + ResetStatistics(); +} + +GPUBackend::~GPUBackend() +{ + JoinScreenshotThreads(); + DestroyDeinterlaceTextures(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); + g_gpu_device->SetGPUTimingEnabled(false); +} + +bool GPUBackend::Initialize(bool clear_vram, Error* error) +{ + if (!CompileDisplayPipelines(true, true, g_gpu_settings.gpu_24bit_chroma_smoothing)) + { + Error::SetStringView(error, "Failed to compile base GPU pipelines."); + return false; + } + + g_gpu_device->SetGPUTimingEnabled(g_gpu_settings.display_show_gpu_usage); return true; } -void GPUBackend::Reset() +void GPUBackend::UpdateSettings(const Settings& old_settings) { - Sync(true); - DrawingAreaChanged(GPUDrawingArea{0, 0, 0, 0}, GSVector4i::zero()); -} + FlushRender(); -void GPUBackend::UpdateSettings() -{ - Sync(true); + if (g_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage) + g_gpu_device->SetGPUTimingEnabled(g_gpu_settings.display_show_gpu_usage); - if (m_use_gpu_thread != g_settings.gpu_use_thread) + if (g_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) + GPUBackend::ResetStatistics(); + + if (g_gpu_settings.display_scaling != old_settings.display_scaling || + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || + g_gpu_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing) { - if (!g_settings.gpu_use_thread) - StopGPUThread(); - else - StartGPUThread(); + // Toss buffers on mode change. + if (g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) + DestroyDeinterlaceTextures(); + + if (!CompileDisplayPipelines(g_gpu_settings.display_scaling != old_settings.display_scaling, + g_gpu_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, + g_gpu_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing)) + { + Panic("Failed to compile display pipeline on settings change."); + } } } -void GPUBackend::Shutdown() +void GPUBackend::RestoreDeviceContext() { - StopGPUThread(); +} + +GPUThreadCommand* GPUBackend::NewClearVRAMCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearVRAM, sizeof(GPUThreadCommand))); +} + +GPUBackendDoStateCommand* GPUBackend::NewDoStateCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::DoState, sizeof(GPUBackendDoStateCommand))); +} + +GPUThreadCommand* GPUBackend::NewClearDisplayCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearDisplay, sizeof(GPUThreadCommand))); +} + +GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateDisplay, sizeof(GPUBackendUpdateDisplayCommand))); +} + +GPUThreadCommand* GPUBackend::NewClearCacheCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ClearCache, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewBufferSwappedCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::BufferSwapped, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewFlushRenderCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::FlushRender, sizeof(GPUThreadCommand))); +} + +GPUThreadCommand* GPUBackend::NewUpdateResolutionScaleCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateResolutionScale, sizeof(GPUThreadCommand))); +} + +GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand() +{ + return static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::ReadVRAM, sizeof(GPUBackendReadVRAMCommand))); } GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand))); } GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words) { const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16)); GPUBackendUpdateVRAMCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::UpdateVRAM, size)); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::UpdateVRAM, size)); return cmd; } GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand))); } GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); } GPUBackendUpdateCLUTCommand* GPUBackend::NewUpdateCLUTCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); } GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex)); GPUBackendDrawPolygonCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::DrawPolygon, size)); - cmd->num_vertices = Truncate16(num_vertices); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawPolygon, size)); + cmd->num_vertices = Truncate8(num_vertices); + return cmd; +} + +GPUBackendDrawPrecisePolygonCommand* GPUBackend::NewDrawPrecisePolygonCommand(u32 num_vertices) +{ + const u32 size = + sizeof(GPUBackendDrawPrecisePolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + GPUBackendDrawPrecisePolygonCommand* cmd = static_cast( + GPUThread::AllocateCommand(GPUBackendCommandType::DrawPrecisePolygon, size)); + cmd->num_vertices = Truncate8(num_vertices); return cmd; } GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand() { return static_cast( - AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand))); + GPUThread::AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand))); } GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex)); GPUBackendDrawLineCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::DrawLine, size)); + static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawLine, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } -void* GPUBackend::AllocateCommand(GPUBackendCommandType command, u32 size) +void GPUBackend::PushCommand(GPUThreadCommand* cmd) { - // Ensure size is a multiple of 4 so we don't end up with an unaligned command. - size = Common::AlignUpPow2(size, 4); - - for (;;) - { - u32 read_ptr = m_command_fifo_read_ptr.load(); - u32 write_ptr = m_command_fifo_write_ptr.load(); - if (read_ptr > write_ptr) - { - u32 available_size = read_ptr - write_ptr; - while (available_size < (size + sizeof(GPUBackendCommandType))) - { - WakeGPUThread(); - read_ptr = m_command_fifo_read_ptr.load(); - available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr); - } - } - else - { - const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; - if ((size + sizeof(GPUBackendCommand)) > available_size) - { - // allocate a dummy command to wrap the buffer around - GPUBackendCommand* dummy_cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); - dummy_cmd->type = GPUBackendCommandType::Wraparound; - dummy_cmd->size = available_size; - dummy_cmd->params.bits = 0; - m_command_fifo_write_ptr.store(0); - continue; - } - } - - GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); - cmd->type = command; - cmd->size = size; - return cmd; - } + GPUThread::PushCommand(cmd); } -u32 GPUBackend::GetPendingCommandSize() const +void GPUBackend::PushCommandAndWakeThread(GPUThreadCommand* cmd) { - const u32 read_ptr = m_command_fifo_read_ptr.load(); - const u32 write_ptr = m_command_fifo_write_ptr.load(); - return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr); + GPUThread::PushCommandAndWakeThread(cmd); } -void GPUBackend::PushCommand(GPUBackendCommand* cmd) +void GPUBackend::PushCommandAndSync(GPUThreadCommand* cmd, bool spin) { - if (!m_use_gpu_thread) - { - // single-thread mode - if (cmd->type != GPUBackendCommandType::Sync) - HandleCommand(cmd); - } - else - { - const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size; - DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); - UNREFERENCED_VARIABLE(new_write_ptr); - if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU) - WakeGPUThread(); - } + GPUThread::PushCommandAndSync(cmd, spin); } -void GPUBackend::WakeGPUThread() +bool GPUBackend::IsUsingHardwareBackend() { - std::unique_lock lock(m_sync_mutex); - if (!m_gpu_thread_sleeping.load()) - return; - - m_wake_gpu_thread_cv.notify_one(); + return (GPUThread::GetRequestedRenderer().value_or(GPURenderer::Software) != GPURenderer::Software); } -void GPUBackend::StartGPUThread() +bool GPUBackend::BeginQueueFrame() { - m_gpu_loop_done.store(false); - m_use_gpu_thread = true; - m_gpu_thread.Start([this]() { RunGPULoop(); }); - INFO_LOG("GPU thread started."); + const u32 queued_frames = s_queued_frames.fetch_add(1, std::memory_order_acq_rel) + 1; + if (queued_frames < g_settings.gpu_max_queued_frames) + return false; + + DEV_LOG("<-- {} queued frames, {} max, blocking CPU thread", queued_frames, g_settings.gpu_max_queued_frames); + s_waiting_for_gpu_thread.store(true, std::memory_order_release); + return true; } -void GPUBackend::StopGPUThread() +void GPUBackend::WaitForOneQueuedFrame() { - if (!m_use_gpu_thread) - return; - - m_gpu_loop_done.store(true); - WakeGPUThread(); - m_gpu_thread.Join(); - m_use_gpu_thread = false; - INFO_LOG("GPU thread stopped."); + s_gpu_thread_wait.Wait(); } -void GPUBackend::Sync(bool allow_sleep) +bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i draw_rect, bool postfx, + std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) { - if (!m_use_gpu_thread) - return; + bool result; - GPUBackendSyncCommand* cmd = - static_cast(AllocateCommand(GPUBackendCommandType::Sync, sizeof(GPUBackendSyncCommand))); - cmd->allow_sleep = allow_sleep; - PushCommand(cmd); - WakeGPUThread(); + GPUThreadRenderScreenshotToBufferCommand* cmd = + static_cast(GPUThread::AllocateCommand( + GPUBackendCommandType::RenderScreenshotToBuffer, sizeof(GPUThreadRenderScreenshotToBufferCommand))); + cmd->width = width; + cmd->height = height; + GSVector4i::store(cmd->draw_rect, draw_rect); + cmd->postfx = postfx; + cmd->out_pixels = out_pixels; + cmd->out_stride = out_stride; + cmd->out_format = out_format; + cmd->out_result = &result; + PushCommandAndSync(cmd, false); - m_sync_semaphore.Wait(); + return result; } -void GPUBackend::RunGPULoop() +std::tuple GPUBackend::GetLastDisplaySourceSize() { - static constexpr double SPIN_TIME_NS = 1 * 1000000; - Common::Timer::Value last_command_time = 0; - - for (;;) - { - u32 write_ptr = m_command_fifo_write_ptr.load(); - u32 read_ptr = m_command_fifo_read_ptr.load(); - if (read_ptr == write_ptr) - { - const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); - if (Common::Timer::ConvertValueToNanoseconds(current_time - last_command_time) < SPIN_TIME_NS) - continue; - - std::unique_lock lock(m_sync_mutex); - m_gpu_thread_sleeping.store(true); - m_wake_gpu_thread_cv.wait(lock, [this]() { return m_gpu_loop_done.load() || GetPendingCommandSize() > 0; }); - m_gpu_thread_sleeping.store(false); - - if (m_gpu_loop_done.load()) - break; - else - continue; - } - - if (write_ptr < read_ptr) - write_ptr = COMMAND_QUEUE_SIZE; - - bool allow_sleep = false; - while (read_ptr < write_ptr) - { - const GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[read_ptr]); - read_ptr += cmd->size; - - switch (cmd->type) - { - case GPUBackendCommandType::Wraparound: - { - DebugAssert(read_ptr == COMMAND_QUEUE_SIZE); - write_ptr = m_command_fifo_write_ptr.load(); - read_ptr = 0; - } - break; - - case GPUBackendCommandType::Sync: - { - DebugAssert(read_ptr == write_ptr); - m_sync_semaphore.Post(); - allow_sleep = static_cast(cmd)->allow_sleep; - } - break; - - default: - HandleCommand(cmd); - break; - } - } - - last_command_time = allow_sleep ? 0 : Common::Timer::GetCurrentValue(); - m_command_fifo_read_ptr.store(read_ptr); - } + std::atomic_thread_fence(std::memory_order_acquire); + return s_last_display_source_size; } -void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) +void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) { switch (cmd->type) { - case GPUBackendCommandType::FillVRAM: + case GPUBackendCommandType::ClearVRAM: + { + ClearVRAM(); + } + break; + + case GPUBackendCommandType::DoState: + { + const GPUBackendDoStateCommand* ccmd = static_cast(cmd); + DoState(ccmd->host_texture, ccmd->is_reading, ccmd->update_display); + } + break; + + case GPUBackendCommandType::ClearDisplay: + { + ClearDisplay(); + } + break; + + case GPUBackendCommandType::UpdateDisplay: + { + const GPUBackendUpdateDisplayCommand* ccmd = static_cast(cmd); + m_display_width = ccmd->display_width; + m_display_height = ccmd->display_height; + m_display_origin_left = ccmd->display_origin_left; + m_display_origin_top = ccmd->display_origin_top; + m_display_vram_width = ccmd->display_vram_width; + m_display_vram_height = ccmd->display_vram_height; + m_display_aspect_ratio = ccmd->display_aspect_ratio; + + UpdateDisplay(ccmd); + + if (ccmd->present_frame) + { + GPUThread::Internal::PresentFrame(true, ccmd->present_time); + + s_queued_frames.fetch_sub(1); + + bool expected = true; + if (s_waiting_for_gpu_thread.compare_exchange_strong(expected, false)) + { + DEV_LOG("--> Unblocking CPU thread"); + s_gpu_thread_wait.Post(); + } + } + } + break; + + case GPUBackendCommandType::ClearCache: + { + ClearCache(); + } + break; + + case GPUBackendCommandType::BufferSwapped: + { + OnBufferSwapped(); + } + break; + + case GPUBackendCommandType::FlushRender: { FlushRender(); + } + break; + + case GPUBackendCommandType::UpdateResolutionScale: + { + UpdateResolutionScale(); + } + break; + + case GPUBackendCommandType::RenderScreenshotToBuffer: + { + HandleRenderScreenshotToBuffer(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::ReadVRAM: + { + const GPUBackendReadVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_reads++; + ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height)); + } + break; + + case GPUBackendCommandType::FillVRAM: + { const GPUBackendFillVRAMCommand* ccmd = static_cast(cmd); FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->color, ccmd->params); @@ -291,8 +418,8 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) case GPUBackendCommandType::UpdateVRAM: { - FlushRender(); const GPUBackendUpdateVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_writes++; UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->data, ccmd->params); } @@ -300,8 +427,8 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) case GPUBackendCommandType::CopyVRAM: { - FlushRender(); const GPUBackendCopyVRAMCommand* ccmd = static_cast(cmd); + s_counters.num_copies++; CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x), ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params); } @@ -324,23 +451,1272 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) case GPUBackendCommandType::DrawPolygon: { - DrawPolygon(static_cast(cmd)); + const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives++; + DrawPolygon(ccmd); + } + break; + + case GPUBackendCommandType::DrawPrecisePolygon: + { + const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives++; + DrawPrecisePolygon(static_cast(cmd)); } break; case GPUBackendCommandType::DrawRectangle: { - DrawRectangle(static_cast(cmd)); + const GPUBackendDrawRectangleCommand* ccmd = static_cast(cmd); + s_counters.num_vertices++; + s_counters.num_primitives++; + DrawSprite(ccmd); } break; case GPUBackendCommandType::DrawLine: { - DrawLine(static_cast(cmd)); + const GPUBackendDrawLineCommand* ccmd = static_cast(cmd); + s_counters.num_vertices += ccmd->num_vertices; + s_counters.num_primitives += ccmd->num_vertices / 2; + DrawLine(ccmd); } break; + DefaultCaseIsUnreachable(); + } +} + +void GPUBackend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) +{ + const u16 color16 = VRAMRGBA8888ToRGBA5551(color); + if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering) + { + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16); + } + } + else if (params.interlaced_rendering) + { + // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. + const u32 active_field = params.active_line_lsb; + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + if ((row & u32(1)) == active_field) + continue; + + u16* row_ptr = &g_vram[row * VRAM_WIDTH]; + for (u32 xoffs = 0; xoffs < width; xoffs++) + { + const u32 col = (x + xoffs) % VRAM_WIDTH; + row_ptr[col] = color16; + } + } + } + else + { + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + u16* row_ptr = &g_vram[row * VRAM_WIDTH]; + for (u32 xoffs = 0; xoffs < width; xoffs++) + { + const u32 col = (x + xoffs) % VRAM_WIDTH; + row_ptr[col] = color16; + } + } + } +} + +void GPUBackend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) +{ + // Fast path when the copy is not oversized. + if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled()) + { + const u16* src_ptr = static_cast(data); + u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + std::copy_n(src_ptr, width, dst_ptr); + src_ptr += width; + dst_ptr += VRAM_WIDTH; + } + } + else + { + // Slow path when we need to handle wrap-around. + // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } + const u16* src_ptr = static_cast(data); + const u16 mask_and = params.GetMaskAND(); + const u16 mask_or = params.GetMaskOR(); + + for (u32 row = 0; row < height;) + { + u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; + for (u32 col = 0; col < width;) + { + // TODO: Handle unaligned reads... + u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; + if (((*pixel_ptr) & mask_and) == 0) + *pixel_ptr = *(src_ptr++) | mask_or; + } + } + } +} + +void GPUBackend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) +{ + // Break up oversized copies. This behavior has not been verified on console. + if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) + { + u32 remaining_rows = height; + u32 current_src_y = src_y; + u32 current_dst_y = dst_y; + while (remaining_rows > 0) + { + const u32 rows_to_copy = + std::min(remaining_rows, std::min(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); + + u32 remaining_columns = width; + u32 current_src_x = src_x; + u32 current_dst_x = dst_x; + while (remaining_columns > 0) + { + const u32 columns_to_copy = + std::min(remaining_columns, std::min(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); + CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, params); + current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; + current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; + remaining_columns -= columns_to_copy; + } + + current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; + current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; + remaining_rows -= rows_to_copy; + } + + return; + } + + // This doesn't have a fast path, but do we really need one? It's not common. + const u16 mask_and = params.GetMaskAND(); + const u16 mask_or = params.GetMaskOR(); + + // Copy in reverse when src_x < dst_x, this is verified on console. + if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) + { + for (u32 row = 0; row < height; row++) + { + const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + + for (s32 col = static_cast(width - 1); col >= 0; col--) + { + const u16 src_pixel = src_row_ptr[(src_x + static_cast(col)) % VRAM_WIDTH]; + u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast(col)) % VRAM_WIDTH]; + if ((*dst_pixel_ptr & mask_and) == 0) + *dst_pixel_ptr = src_pixel | mask_or; + } + } + } + else + { + for (u32 row = 0; row < height; row++) + { + const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + + for (u32 col = 0; col < width; col++) + { + const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; + u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; + if ((*dst_pixel_ptr & mask_and) == 0) + *dst_pixel_ptr = src_pixel | mask_or; + } + } + } +} + +bool GPUBackend::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing) +{ + GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); + + GPUPipeline::GraphicsConfig plconfig; + plconfig.input_layout.vertex_stride = 0; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.geometry_shader = nullptr; + plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.samples = 1; + plconfig.per_sample_shading = false; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + + if (display) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8); + + std::string vs = shadergen.GenerateDisplayVertexShader(); + std::string fs; + switch (g_settings.display_scaling) + { + case DisplayScalingMode::BilinearSharp: + fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); + break; + + case DisplayScalingMode::BilinearSmooth: + fs = shadergen.GenerateDisplayFragmentShader(true); + break; + + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + default: + fs = shadergen.GenerateDisplayFragmentShader(false); + break; + } + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs); + std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Display Vertex Shader"); + GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", + Settings::GetDisplayScalingName(g_settings.display_scaling)); + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", + Settings::GetDisplayScalingName(g_settings.display_scaling)); + } + + if (deinterlace) + { + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader()); + if (!vso) + return false; + GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); + + std::unique_ptr fso; + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateInterleavedFieldExtractFragmentShader()))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); + + switch (g_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + break; + + case DisplayDeinterlacingMode::Weave: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateDeinterlaceWeaveFragmentShader()))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Blend: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateDeinterlaceBlendFragmentShader()))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Adaptive: + { + fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateFastMADReconstructFragmentShader()); + if (!fso) + return false; + + GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); + } + break; + + default: + UnreachableCode(); + } + } + + if (chroma_smoothing) + { + m_chroma_smoothing_pipeline.reset(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); + + if (g_settings.gpu_24bit_chroma_smoothing) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), + shadergen.GenerateScreenQuadVertexShader()); + std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateChromaSmoothingFragmentShader()); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); + GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); + + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); + } + } + + return true; +} + +void GPUBackend::ClearDisplay() +{ + ClearDisplayTexture(); + + // Just recycle the textures, it'll get re-fetched. + DestroyDeinterlaceTextures(); +} + +void GPUBackend::ClearDisplayTexture() +{ + m_display_texture = nullptr; + m_display_texture_view_x = 0; + m_display_texture_view_y = 0; + m_display_texture_view_width = 0; + m_display_texture_view_height = 0; + s_last_display_source_size = {}; + std::atomic_thread_fence(std::memory_order_release); +} + +void GPUBackend::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, + s32 view_width, s32 view_height) +{ + DebugAssert(texture); + m_display_texture = texture; + m_display_depth_buffer = depth_texture; + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; + s_last_display_source_size = {static_cast(view_width), static_cast(view_height)}; + std::atomic_thread_fence(std::memory_order_release); +} + +bool GPUBackend::PresentDisplay() +{ + if (!HasDisplayTexture()) + return g_gpu_device->BeginPresent(false); + + const GSVector4i draw_rect = CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()); + return RenderDisplay(nullptr, draw_rect, !g_gpu_settings.debugging.show_vram); +} + +bool GPUBackend::RenderDisplay(GPUTexture* target, const GSVector4i draw_rect, bool postfx) +{ + GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); + + if (m_display_texture) + m_display_texture->MakeReadyForSampling(); + + // Internal post-processing. + GPUTexture* display_texture = m_display_texture; + s32 display_texture_view_x = m_display_texture_view_x; + s32 display_texture_view_y = m_display_texture_view_y; + s32 display_texture_view_width = m_display_texture_view_width; + s32 display_texture_view_height = m_display_texture_view_height; + if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && + PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, + display_texture_view_height)) + { + DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && + static_cast(display_texture->GetWidth()) == display_texture_view_width && + static_cast(display_texture->GetHeight()) == display_texture_view_height); + + // Now we can apply the post chain. + GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); + if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, + GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), + display_texture_view_width, display_texture_view_height, m_display_width, + m_display_height)) + { + display_texture_view_x = 0; + display_texture_view_y = 0; + display_texture = post_output_texture; + display_texture->MakeReadyForSampling(); + } + } + + const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetWindowFormat(); + const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetWindowWidth(); + const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetWindowHeight(); + const bool really_postfx = + (postfx && PostProcessing::DisplayChain.IsActive() && !g_gpu_device->GetWindowInfo().IsSurfaceless() && + hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && + PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); + const GSVector4i real_draw_rect = + g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect; + if (really_postfx) + { + g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), 0); + g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); + } + else + { + if (target) + g_gpu_device->SetRenderTarget(target); + else if (!g_gpu_device->BeginPresent(false)) + return false; + } + + if (display_texture) + { + bool texture_filter_linear = false; + + struct Uniforms + { + float src_rect[4]; + float src_size[4]; + float clamp_rect[4]; + float params[4]; + } uniforms; + std::memset(uniforms.params, 0, sizeof(uniforms.params)); + + switch (g_settings.display_scaling) + { + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + break; + + case DisplayScalingMode::BilinearSmooth: + case DisplayScalingMode::BlinearInteger: + texture_filter_linear = true; + break; + + case DisplayScalingMode::BilinearSharp: + { + texture_filter_linear = true; + uniforms.params[0] = std::max( + std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); + uniforms.params[1] = std::max( + std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); + uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; + uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; + } + break; + + default: + UnreachableCode(); + break; + } + + g_gpu_device->SetPipeline(m_display_pipeline.get()); + g_gpu_device->SetTextureSampler( + 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); + + // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because + // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. + const float rcp_width = 1.0f / static_cast(display_texture->GetWidth()); + const float rcp_height = 1.0f / static_cast(display_texture->GetHeight()); + uniforms.src_rect[0] = static_cast(display_texture_view_x) * rcp_width; + uniforms.src_rect[1] = static_cast(display_texture_view_y) * rcp_height; + uniforms.src_rect[2] = static_cast(display_texture_view_width) * rcp_width; + uniforms.src_rect[3] = static_cast(display_texture_view_height) * rcp_height; + uniforms.clamp_rect[0] = (static_cast(display_texture_view_x) + 0.5f) * rcp_width; + uniforms.clamp_rect[1] = (static_cast(display_texture_view_y) + 0.5f) * rcp_height; + uniforms.clamp_rect[2] = + (static_cast(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; + uniforms.clamp_rect[3] = + (static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; + uniforms.src_size[0] = static_cast(display_texture->GetWidth()); + uniforms.src_size[1] = static_cast(display_texture->GetHeight()); + uniforms.src_size[2] = rcp_width; + uniforms.src_size[3] = rcp_height; + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + + g_gpu_device->SetViewportAndScissor(real_draw_rect); + g_gpu_device->Draw(3, 0); + } + + if (really_postfx) + { + DebugAssert(!g_settings.debugging.show_vram); + + // "original size" in postfx includes padding. + const float upscale_x = + m_display_texture ? static_cast(m_display_texture_view_width) / static_cast(m_display_vram_width) : + 1.0f; + const float upscale_y = m_display_texture ? static_cast(m_display_texture_view_height) / + static_cast(m_display_vram_height) : + 1.0f; + const s32 orig_width = static_cast(std::ceil(static_cast(m_display_width) * upscale_x)); + const s32 orig_height = static_cast(std::ceil(static_cast(m_display_height) * upscale_y)); + + return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, + real_draw_rect, orig_width, orig_height, m_display_width, + m_display_height); + } + else + return true; +} + +void GPUBackend::DestroyDeinterlaceTextures() +{ + for (std::unique_ptr& tex : m_deinterlace_buffers) + g_gpu_device->RecycleTexture(std::move(tex)); + g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); + m_current_deinterlace_buffer = 0; +} + +bool GPUBackend::Deinterlace(u32 field, u32 line_skip) +{ + GPUTexture* src = m_display_texture; + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + + switch (g_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + { + if (line_skip == 0) + return true; + + // Still have to extract the field. + if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] + return false; + + SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Weave: + { + GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 full_height = height * 2; + if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + src->MakeReadyForSampling(); + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, field, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + + case DisplayDeinterlacingMode::Blend: + { + constexpr u32 NUM_BLEND_BUFFERS = 2; + + GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + // TODO: could be implemented with alpha blending instead.. + + g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Adaptive: + { + GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, + line_skip); + + const u32 full_height = height * 2; + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {field, full_height}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); + return true; + } + default: UnreachableCode(); } } + +bool GPUBackend::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, + u32 line_skip) +{ + if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || + m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, false)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); + } + + GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); + g_gpu_device->InvalidateRenderTarget(dst); + + // If we're not skipping lines, then we can simply copy the texture. + if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) + { + GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); + g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); + } + else + { + GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, + line_skip); + + // Otherwise, we need to extract every other line from the texture. + src->MakeReadyForSampling(); + g_gpu_device->SetRenderTarget(dst); + g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + GL_POP(); + } + + dst->MakeReadyForSampling(); + return true; +} + +bool GPUBackend::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) +{ + if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || + m_deinterlace_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, preserve)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); + } + + return true; +} + +bool GPUBackend::ApplyChromaSmoothing() +{ + const u32 x = m_display_texture_view_x; + const u32 y = m_display_texture_view_y; + const u32 width = m_display_texture_view_width; + const u32 height = m_display_texture_view_height; + if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || + m_chroma_smoothing_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, false)) + { + ClearDisplayTexture(); + return false; + } + + GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); + } + + GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); + + m_display_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, width - 1, height - 1}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_chroma_smoothing_texture->MakeReadyForSampling(); + SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); + return true; +} + +void GPUBackend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ +} + +GSVector4i GPUBackend::CalculateDrawRect(s32 window_width, s32 window_height, + bool apply_aspect_ratio /* = true */) const +{ + const bool integer_scale = (g_gpu_settings.display_scaling == DisplayScalingMode::NearestInteger || + g_gpu_settings.display_scaling == DisplayScalingMode::BlinearInteger); + const bool show_vram = g_gpu_settings.debugging.show_vram; + const float display_aspect_ratio = m_display_aspect_ratio; + const float window_ratio = static_cast(window_width) / static_cast(window_height); + const float crtc_display_width = static_cast(show_vram ? VRAM_WIDTH : m_display_width); + const float crtc_display_height = static_cast(show_vram ? VRAM_HEIGHT : m_display_height); + const float x_scale = + apply_aspect_ratio ? + (display_aspect_ratio / (static_cast(crtc_display_width) / static_cast(crtc_display_height))) : + 1.0f; + float display_width = crtc_display_width; + float display_height = crtc_display_height; + float active_left = static_cast(show_vram ? 0 : m_display_origin_left); + float active_top = static_cast(show_vram ? 0 : m_display_origin_top); + float active_width = static_cast(show_vram ? VRAM_WIDTH : m_display_vram_width); + float active_height = static_cast(show_vram ? VRAM_HEIGHT : m_display_vram_height); + if (!g_gpu_settings.display_stretch_vertically) + { + display_width *= x_scale; + active_left *= x_scale; + active_width *= x_scale; + } + else + { + display_height /= x_scale; + active_top /= x_scale; + active_height /= x_scale; + } + + // now fit it within the window + float scale; + float left_padding, top_padding; + if ((display_width / display_height) >= window_ratio) + { + // align in middle vertically + scale = static_cast(window_width) / display_width; + if (integer_scale) + { + scale = std::max(std::floor(scale), 1.0f); + left_padding = std::max((static_cast(window_width) - display_width * scale) / 2.0f, 0.0f); + } + else + { + left_padding = 0.0f; + } + + switch (g_gpu_settings.display_alignment) + { + case DisplayAlignment::RightOrBottom: + top_padding = std::max(static_cast(window_height) - (display_height * scale), 0.0f); + break; + + case DisplayAlignment::Center: + top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); + break; + + case DisplayAlignment::LeftOrTop: + default: + top_padding = 0.0f; + break; + } + } + else + { + // align in middle horizontally + scale = static_cast(window_height) / display_height; + if (integer_scale) + { + scale = std::max(std::floor(scale), 1.0f); + top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); + } + else + { + top_padding = 0.0f; + } + + switch (g_gpu_settings.display_alignment) + { + case DisplayAlignment::RightOrBottom: + left_padding = std::max(static_cast(window_width) - (display_width * scale), 0.0f); + break; + + case DisplayAlignment::Center: + left_padding = std::max((static_cast(window_width) - (display_width * scale)) / 2.0f, 0.0f); + break; + + case DisplayAlignment::LeftOrTop: + default: + left_padding = 0.0f; + break; + } + } + + // TODO: This should be a float rectangle. But because GL is lame, it only has integer viewports... + const s32 left = static_cast(active_left * scale + left_padding); + const s32 top = static_cast(active_top * scale + top_padding); + const s32 right = left + static_cast(active_width * scale); + const s32 bottom = top + static_cast(active_height * scale); + return GSVector4i(left, top, right, bottom); +} + +bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, + u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, + u32 texture_data_stride, GPUTexture::Format texture_format, bool display_osd_message, + bool use_thread) +{ + std::string osd_key; + if (display_osd_message) + { + // Use a 60 second timeout to give it plenty of time to actually save. + osd_key = fmt::format("ScreenshotSaver_{}", filename); + Host::AddIconOSDMessage(osd_key, ICON_FA_CAMERA, + fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(filename)), + 60.0f); + } + + static constexpr auto proc = [](u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, + u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, + u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key, + bool use_thread) { + bool result; + + const char* extension = std::strrchr(filename.c_str(), '.'); + if (extension) + { + if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) + { + if (clear_alpha) + { + for (u32& pixel : texture_data) + pixel |= 0xFF000000u; + } + + if (flip_y) + GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast(texture_data.data()), + texture_data_stride); + + Assert(texture_data_stride == sizeof(u32) * width); + RGBA8Image image(width, height, std::move(texture_data)); + if (image.SaveToFile(filename.c_str(), fp.get(), quality)) + { + result = true; + } + else + { + ERROR_LOG("Unknown extension in filename '{}' or save error: '{}'", filename, extension); + result = false; + } + } + else + { + result = false; + } + } + else + { + ERROR_LOG("Unable to determine file extension for '{}'", filename); + result = false; + } + + if (!osd_key.empty()) + { + Host::AddIconOSDMessage(std::move(osd_key), ICON_FA_CAMERA, + fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : + TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), + Path::GetFileName(filename), + result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); + } + + if (use_thread) + { + // remove ourselves from the list, if the GS thread is waiting for us, we won't be in there + const auto this_id = std::this_thread::get_id(); + std::unique_lock lock(s_screenshot_threads_mutex); + for (auto it = s_screenshot_threads.begin(); it != s_screenshot_threads.end(); ++it) + { + if (it->get_id() == this_id) + { + it->detach(); + s_screenshot_threads.erase(it); + break; + } + } + } + + return result; + }; + + if (!use_thread) + { + return proc(width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, + std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); + } + + std::thread thread(proc, width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, + std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); + std::unique_lock lock(s_screenshot_threads_mutex); + s_screenshot_threads.push_back(std::move(thread)); + return true; +} + +void JoinScreenshotThreads() +{ + std::unique_lock lock(s_screenshot_threads_mutex); + while (!s_screenshot_threads.empty()) + { + std::thread save_thread(std::move(s_screenshot_threads.front())); + s_screenshot_threads.pop_front(); + lock.unlock(); + save_thread.join(); + lock.lock(); + } +} + +bool GPUBackend::WriteDisplayTextureToFile(std::string filename, bool compress_on_thread /* = false */) +{ + if (!m_display_texture) + return false; + + const u32 read_x = static_cast(m_display_texture_view_x); + const u32 read_y = static_cast(m_display_texture_view_y); + const u32 read_width = static_cast(m_display_texture_view_width); + const u32 read_height = static_cast(m_display_texture_view_height); + + const u32 texture_data_stride = + Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); + std::vector texture_data((texture_data_stride * read_height) / sizeof(u32)); + + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = + g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(), + texture_data.size() * sizeof(u32), texture_data_stride); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) + { + ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, + GPUTexture::GetFormatName(m_display_texture->GetFormat())); + return false; + } + } + + dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); + if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) + { + RestoreDeviceContext(); + return false; + } + + RestoreDeviceContext(); + + Error error; + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); + return false; + } + + constexpr bool clear_alpha = true; + const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); + + return CompressAndWriteTextureToFile( + read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha, + flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), false, compress_on_thread); +} + +void GPUBackend::HandleRenderScreenshotToBuffer(const GPUThreadRenderScreenshotToBufferCommand* cmd) +{ + const u32 width = cmd->width; + const u32 height = cmd->height; + const GSVector4i draw_rect = GSVector4i::load(cmd->draw_rect); + const GPUTexture::Format hdformat = + g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8; + + auto render_texture = + g_gpu_device->FetchAutoRecycleTexture(cmd->width, cmd->height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); + if (!render_texture) + { + *cmd->out_result = false; + return; + } + + g_gpu_device->ClearRenderTarget(render_texture.get(), 0); + + // TODO: this should use copy shader instead. + RenderDisplay(render_texture.get(), draw_rect, cmd->postfx); + + const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32)); + cmd->out_pixels->resize((height * stride) / sizeof(u32)); + + std::unique_ptr dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, cmd->out_pixels->data(), + cmd->out_pixels->size() * sizeof(u32), stride); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) + { + ERROR_LOG("Failed to create {}x{} download texture", width, height); + *cmd->out_result = false; + return; + } + } + + dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); + if (!dltex->ReadTexels(0, 0, width, height, cmd->out_pixels->data(), stride)) + { + RestoreDeviceContext(); + *cmd->out_result = false; + return; + } + + *cmd->out_stride = stride; + *cmd->out_format = hdformat; + *cmd->out_result = true; + RestoreDeviceContext(); +} + +bool GPUBackend::RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, + bool compress_on_thread, bool show_osd_message) +{ + u32 width = g_gpu_device->GetWindowWidth(); + u32 height = g_gpu_device->GetWindowHeight(); + GSVector4i draw_rect = CalculateDrawRect(width, height, true); + + const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram); + if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) + { + if (mode == DisplayScreenshotMode::InternalResolution) + { + const u32 draw_width = static_cast(draw_rect.width()); + const u32 draw_height = static_cast(draw_rect.height()); + + // If internal res, scale the computed draw rectangle to the internal res. + // We re-use the draw rect because it's already been AR corrected. + const float sar = + static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); + const float dar = static_cast(draw_width) / static_cast(draw_height); + if (sar >= dar) + { + // stretch height, preserve width + const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); + width = m_display_texture_view_width; + height = static_cast(std::round(static_cast(draw_height) * scale)); + } + else + { + // stretch width, preserve height + const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); + width = static_cast(std::round(static_cast(draw_width) * scale)); + height = m_display_texture_view_height; + } + + // DX11 won't go past 16K texture size. + const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); + if (width > max_texture_size) + { + height = static_cast(static_cast(height) / + (static_cast(width) / static_cast(max_texture_size))); + width = max_texture_size; + } + if (height > max_texture_size) + { + height = max_texture_size; + width = static_cast(static_cast(width) / + (static_cast(height) / static_cast(max_texture_size))); + } + } + else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) + { + width = m_display_texture_view_width; + height = m_display_texture_view_height; + } + + // Remove padding, it's not part of the framebuffer. + draw_rect = GSVector4i(0, 0, static_cast(width), static_cast(height)); + } + if (width == 0 || height == 0) + return false; + + std::vector pixels; + u32 pixels_stride; + GPUTexture::Format pixels_format; + if (!RenderScreenshotToBuffer(width, height, draw_rect, !internal_resolution, &pixels, &pixels_stride, + &pixels_format)) + { + ERROR_LOG("Failed to render {}x{} screenshot", width, height); + return false; + } + + Error error; + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); + if (!fp) + { + ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); + return false; + } + + return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), quality, true, + g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride, + pixels_format, show_osd_message, compress_on_thread); +} + +void GPUBackend::GetStatsString(SmallStringBase& str) +{ + if (IsUsingHardwareBackend()) + { + str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", + GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), s_stats.num_primitives, + s_stats.host_num_draws, s_stats.host_num_barriers, s_stats.host_num_render_passes, + s_stats.host_num_downloads, s_stats.num_copies, s_stats.num_writes); + } + else + { + str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), + s_stats.num_primitives, s_stats.num_reads, s_stats.num_copies, s_stats.num_writes); + } +} + +void GPUBackend::GetMemoryStatsString(SmallStringBase& str) +{ + const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); + const u32 stream_kb = static_cast((s_stats.host_buffer_streamed + (1024 - 1)) / 1024); + + str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, s_stats.host_num_copies, + s_stats.host_num_uploads); +} + +void GPUBackend::ResetStatistics() +{ + s_counters = {}; + g_gpu_device->ResetStatistics(); +} + +void GPUBackend::UpdateStatistics(u32 frame_count) +{ + const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); + const u32 round = (frame_count - 1); + +#define UPDATE_COUNTER(x) s_stats.x = (s_counters.x + round) / frame_count +#define UPDATE_GPU_STAT(x) s_stats.host_##x = (stats.x + round) / frame_count + + UPDATE_COUNTER(num_reads); + UPDATE_COUNTER(num_writes); + UPDATE_COUNTER(num_copies); + UPDATE_COUNTER(num_vertices); + UPDATE_COUNTER(num_primitives); + + // UPDATE_COUNTER(num_read_texture_updates); + // UPDATE_COUNTER(num_ubo_updates); + + UPDATE_GPU_STAT(buffer_streamed); + UPDATE_GPU_STAT(num_draws); + UPDATE_GPU_STAT(num_barriers); + UPDATE_GPU_STAT(num_render_passes); + UPDATE_GPU_STAT(num_copies); + UPDATE_GPU_STAT(num_downloads); + UPDATE_GPU_STAT(num_uploads); + +#undef UPDATE_GPU_STAT +#undef UPDATE_COUNTER + + ResetStatistics(); +} diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index f6e44e311..babbdd49c 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -2,91 +2,182 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "common/heap_array.h" -#include "common/threading.h" -#include "gpu_types.h" -#include -#include -#include -#include -#include -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324) // warning C4324: 'GPUBackend': structure was padded due to alignment specifier -#endif +#include "gpu_types.h" + +#include "util/gpu_texture.h" + +#include + +class Error; +class SmallStringBase; + +class GPUFramebuffer; +class GPUPipeline; + +struct Settings; +class StateWrapper; + +// DESIGN NOTE: Only static methods should be called on the CPU thread. +// You specifically don't have a global pointer available for this reason. class GPUBackend { +public: + static GPUThreadCommand* NewClearVRAMCommand(); + static GPUBackendDoStateCommand* NewDoStateCommand(); + static GPUThreadCommand* NewClearDisplayCommand(); + static GPUBackendUpdateDisplayCommand* NewUpdateDisplayCommand(); + static GPUThreadCommand* NewClearCacheCommand(); + static GPUThreadCommand* NewBufferSwappedCommand(); + static GPUThreadCommand* NewFlushRenderCommand(); + static GPUThreadCommand* NewUpdateResolutionScaleCommand(); + static GPUBackendReadVRAMCommand* NewReadVRAMCommand(); + static GPUBackendFillVRAMCommand* NewFillVRAMCommand(); + static GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); + static GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); + static GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); + static GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand(); + static GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); + static GPUBackendDrawPrecisePolygonCommand* NewDrawPrecisePolygonCommand(u32 num_vertices); + static GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); + static GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + static void PushCommand(GPUThreadCommand* cmd); + static void PushCommandAndWakeThread(GPUThreadCommand* cmd); + static void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); + + static bool IsUsingHardwareBackend(); + + static std::unique_ptr CreateHardwareBackend(); + static std::unique_ptr CreateSoftwareBackend(); + + static bool BeginQueueFrame(); + static void WaitForOneQueuedFrame(); + + static bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i draw_rect, bool postfx, + std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format); + + static std::tuple GetLastDisplaySourceSize(); + + static void GetStatsString(SmallStringBase& str); + static void GetMemoryStatsString(SmallStringBase& str); + static void ResetStatistics(); + static void UpdateStatistics(u32 frame_count); + public: GPUBackend(); virtual ~GPUBackend(); - ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; } + ALWAYS_INLINE const void* GetDisplayTextureHandle() const { return m_display_texture; } + ALWAYS_INLINE s32 GetDisplayWidth() const { return m_display_width; } + ALWAYS_INLINE s32 GetDisplayHeight() const { return m_display_height; } + ALWAYS_INLINE s32 GetDisplayViewWidth() const { return m_display_texture_view_width; } + ALWAYS_INLINE s32 GetDisplayViewHeight() const { return m_display_texture_view_height; } + ALWAYS_INLINE float GetDisplayAspectRatio() const { return m_display_aspect_ratio; } + ALWAYS_INLINE bool HasDisplayTexture() const { return static_cast(m_display_texture); } - virtual bool Initialize(bool force_thread); - virtual void UpdateSettings(); - virtual void Reset(); - virtual void Shutdown(); + virtual bool Initialize(bool clear_vram, Error* error); - GPUBackendFillVRAMCommand* NewFillVRAMCommand(); - GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); - GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); - GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); - GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand(); - GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); - GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); - GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + virtual void ClearVRAM() = 0; + virtual bool DoState(GPUTexture** host_texture, bool is_reading, bool update_display) = 0; - void PushCommand(GPUBackendCommand* cmd); - void Sync(bool allow_sleep); - - /// Processes all pending GPU commands. - void RunGPULoop(); - -protected: - void* AllocateCommand(GPUBackendCommandType command, u32 size); - u32 GetPendingCommandSize() const; - void WakeGPUThread(); - void StartGPUThread(); - void StopGPUThread(); - - virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0; - virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, - GPUBackendCommandParameters params) = 0; + virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0; + virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params); + virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) = 0; + GPUBackendCommandParameters params); + virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0; - virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0; + virtual void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) = 0; + virtual void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) = 0; virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; - virtual void FlushRender() = 0; + virtual void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) = 0; virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; + virtual void ClearCache() = 0; + virtual void OnBufferSwapped() = 0; - void HandleCommand(const GPUBackendCommand* cmd); + virtual void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) = 0; - Threading::KernelSemaphore m_sync_semaphore; - std::atomic_bool m_gpu_thread_sleeping{false}; - std::atomic_bool m_gpu_loop_done{false}; - Threading::Thread m_gpu_thread; - bool m_use_gpu_thread = false; + virtual void UpdateSettings(const Settings& old_settings); - std::mutex m_sync_mutex; - std::condition_variable m_sync_cpu_thread_cv; - std::condition_variable m_wake_gpu_thread_cv; - bool m_sync_done = false; + /// Returns the effective display resolution of the GPU. + virtual std::tuple GetEffectiveDisplayResolution(bool scaled = true) const = 0; + /// Returns the full display resolution of the GPU, including padding. + virtual std::tuple GetFullDisplayResolution(bool scaled = true) const = 0; + + /// TODO: Updates the resolution scale when it's set to automatic. + virtual void UpdateResolutionScale() = 0; + + /// Ensures all pending draws are flushed to the host GPU. + virtual void FlushRender() = 0; + + // Graphics API state reset/restore - call when drawing the UI etc. + // TODO: replace with "invalidate cached state" + virtual void RestoreDeviceContext() = 0; + + void HandleCommand(const GPUThreadCommand* cmd); + + /// Draws the current display texture, with any post-processing. + bool PresentDisplay(); + +protected: enum : u32 { - COMMAND_QUEUE_SIZE = 4 * 1024 * 1024, - THRESHOLD_TO_WAKE_GPU = 256 + DEINTERLACE_BUFFER_COUNT = 4, }; - FixedHeapArray m_command_fifo_data; - alignas(HOST_CACHE_LINE_SIZE) std::atomic m_command_fifo_read_ptr{0}; - alignas(HOST_CACHE_LINE_SIZE) std::atomic m_command_fifo_write_ptr{0}; -}; + /// Helper function for computing the draw rectangle in a larger window. + GSVector4i CalculateDrawRect(s32 window_width, s32 window_height, bool apply_aspect_ratio = true) const; -#ifdef _MSC_VER -#pragma warning(pop) -#endif + /// Helper function to save current display texture to PNG. + bool WriteDisplayTextureToFile(std::string filename, bool compress_on_thread = false); + + /// Renders the display, optionally with postprocessing to the specified image. + void HandleRenderScreenshotToBuffer(const GPUThreadRenderScreenshotToBufferCommand* cmd); + + /// Helper function to save screenshot to PNG. + bool RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, + bool show_osd_message); + + bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing); + + void ClearDisplay(); + void ClearDisplayTexture(); + void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, s32 view_width, + s32 view_height); + + bool RenderDisplay(GPUTexture* target, const GSVector4i draw_rect, bool postfx); + + bool Deinterlace(u32 field, u32 line_skip); + bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); + bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); + void DestroyDeinterlaceTextures(); + bool ApplyChromaSmoothing(); + + s32 m_display_width = 0; + s32 m_display_height = 0; + s32 m_display_origin_left = 0; + s32 m_display_origin_top = 0; + s32 m_display_vram_width = 0; + s32 m_display_vram_height = 0; + float m_display_aspect_ratio = 1.0f; + + u32 m_current_deinterlace_buffer = 0; + std::unique_ptr m_deinterlace_pipeline; + std::unique_ptr m_deinterlace_extract_pipeline; + std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; + std::unique_ptr m_deinterlace_texture; + + std::unique_ptr m_chroma_smoothing_pipeline; + std::unique_ptr m_chroma_smoothing_texture; + + std::unique_ptr m_display_pipeline; + GPUTexture* m_display_texture = nullptr; + GPUTexture* m_display_depth_buffer = nullptr; + s32 m_display_texture_view_x = 0; + s32 m_display_texture_view_y = 0; + s32 m_display_texture_view_width = 0; + s32 m_display_texture_view_height = 0; +}; diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 3955d813e..379a3797d 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -1,13 +1,18 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -#include "common/assert.h" -#include "common/log.h" -#include "common/string_util.h" +#include "cpu_pgxp.h" #include "gpu.h" +#include "gpu_backend.h" #include "interrupt_controller.h" #include "system.h" #include "texture_replacements.h" + +#include "common/assert.h" +#include "common/gsvector_formatter.h" +#include "common/log.h" +#include "common/string_util.h" + Log_SetChannel(GPU); #define CHECK_COMMAND_SIZE(num_words) \ @@ -90,7 +95,7 @@ void GPU::TryExecuteCommands() // drop terminator m_fifo.RemoveOne(); DEBUG_LOG("Drawing poly-line with {} vertices", GetPolyLineVertexCount()); - DispatchRenderCommand(); + FinishPolyline(); m_blit_buffer.clear(); EndCommand(); continue; @@ -197,8 +202,8 @@ bool GPU::HandleNOPCommand() bool GPU::HandleClearCacheCommand() { DEBUG_LOG("GP0 clear cache"); - m_draw_mode.SetTexturePageChanged(); InvalidateCLUT(); + GPUBackend::PushCommand(GPUBackend::NewClearCacheCommand()); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); @@ -245,8 +250,6 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand() DEBUG_LOG("Set drawing area top-left: ({}, {})", left, top); if (m_drawing_area.left != left || m_drawing_area.top != top) { - FlushRender(); - m_drawing_area.left = left; m_drawing_area.top = top; m_drawing_area_changed = true; @@ -267,8 +270,6 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand() DEBUG_LOG("Set drawing area bottom-right: ({}, {})", m_drawing_area.right, m_drawing_area.bottom); if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) { - FlushRender(); - m_drawing_area.right = right; m_drawing_area.bottom = bottom; m_drawing_area_changed = true; @@ -288,8 +289,6 @@ bool GPU::HandleSetDrawingOffsetCommand() DEBUG_LOG("Set drawing offset ({}, {})", m_drawing_offset.x, m_drawing_offset.y); if (m_drawing_offset.x != x || m_drawing_offset.y != y) { - FlushRender(); - m_drawing_offset.x = x; m_drawing_offset.y = y; } @@ -305,11 +304,7 @@ bool GPU::HandleSetMaskBitCommand() constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; - if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits) - { - FlushRender(); - m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; - } + m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; DEBUG_LOG("Set mask bit {} {}", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); @@ -318,6 +313,36 @@ bool GPU::HandleSetMaskBitCommand() return true; } +void GPU::PrepareForDraw() +{ + if (m_drawing_area_changed) + { + m_drawing_area_changed = false; + GPUBackendSetDrawingAreaCommand* cmd = GPUBackend::NewSetDrawingAreaCommand(); + cmd->new_area = m_drawing_area; + GSVector4i::store(cmd->new_clamped_area, m_clamped_drawing_area); + GPUBackend::PushCommand(cmd); + } +} + +void GPU::FillBackendCommandParameters(GPUBackendCommand* cmd) const +{ + cmd->params.bits = 0; + cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; + cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); +} + +void GPU::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const +{ + FillBackendCommandParameters(cmd); + cmd->rc.bits = rc.bits; + cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; + cmd->palette.bits = m_draw_mode.palette_reg.bits; + cmd->window = m_draw_mode.texture_window; +} + bool GPU::HandleRenderPolygonCommand() { const GPURenderCommand rc{FifoPeek(0)}; @@ -343,6 +368,7 @@ bool GPU::HandleRenderPolygonCommand() words_per_vertex, setup_ticks); // set draw state up + // TODO: Get rid of SetTexturePalette() and just fill it as needed if (rc.texture_enable) { const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16); @@ -352,12 +378,219 @@ bool GPU::HandleRenderPolygonCommand() UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg); } - m_counters.num_vertices += num_vertices; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + + if (g_gpu_settings.gpu_pgxp_enable) + { + GPUBackendDrawPrecisePolygonCommand* cmd = GPUBackend::NewDrawPrecisePolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + bool valid_w = g_gpu_settings.gpu_pgxp_texture_correction; + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPrecisePolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->native_x = m_drawing_offset.x + vp.x; + vert->native_y = m_drawing_offset.y + vp.y; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + + valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->native_x, vert->native_y, + m_drawing_offset.x, m_drawing_offset.y, &vert->x, &vert->y, &vert->w); + } + + cmd->valid_w = valid_w; + if (!valid_w) + { + if (g_settings.gpu_pgxp_disable_2d) + { + // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPrecisePolygonCommand::Vertex& v = cmd->vertices[i]; + GSVector2::store(&v.x, GSVector2(GSVector2i::load(&v.native_x))); + v.w = 1.0f; + } + } + else + { + for (u32 i = 0; i < num_vertices; i++) + cmd->vertices[i].w = 1.0f; + } + } + + // Cull polygons which are too large. + const GSVector2 v0f = GSVector2::load(&cmd->vertices[0].x); + const GSVector2 v1f = GSVector2::load(&cmd->vertices[1].x); + const GSVector2 v2f = GSVector2::load(&cmd->vertices[2].x); + const GSVector2 min_pos_12 = v1f.min(v2f); + const GSVector2 max_pos_12 = v1f.max(v2f); + const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_012.rintersects(m_clamped_drawing_area)); + if (first_tri_culled) + { + // TODO: GPU events... somehow. + DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, + cmd->vertices[0].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y, + cmd->vertices[2].native_x, cmd->vertices[2].native_y); + + if (!rc.quad_polygon) + { + EndCommand(); + return true; + } + } + else + { + AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[0].native_x), GSVector2i::load(&cmd->vertices[1].native_x), + GSVector2i::load(&cmd->vertices[2].native_x), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const GSVector2 v3f = GSVector2::load(&cmd->vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + + // Cull polygons which are too large. + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_123.rintersects(m_clamped_drawing_area)); + if (second_tri_culled) + { + DEBUG_LOG("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", + cmd->vertices[2].native_x, cmd->vertices[2].native_y, cmd->vertices[1].native_x, + cmd->vertices[1].native_y, cmd->vertices[0].native_x, cmd->vertices[0].native_y); + + if (first_tri_culled) + { + EndCommand(); + return true; + } + + // Remove second part of quad. + cmd->num_vertices = 3; + } + else + { + AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[2].native_x), GSVector2i::load(&cmd->vertices[1].native_x), + GSVector2i::load(&cmd->vertices[3].native_x), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + + // If first part was culled, move the second part to the first. + if (first_tri_culled) + { + std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + } + } + + GPUBackend::PushCommand(cmd); + } + else + { + GPUBackendDrawPolygonCommand* cmd = GPUBackend::NewDrawPolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->x = m_drawing_offset.x + vp.x; + vert->y = m_drawing_offset.y + vp.y; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + } + + // Cull polygons which are too large. + const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); + const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); + const GSVector2i v2 = GSVector2i::load(&cmd->vertices[2].x); + const GSVector2i min_pos_12 = v1.min_i32(v2); + const GSVector2i max_pos_12 = v1.max_i32(v2); + const GSVector4i draw_rect_012 = + GSVector4i(min_pos_12.min_i32(v0)).upl64(GSVector4i(max_pos_12.max_i32(v0))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_012.rintersects(m_clamped_drawing_area)); + if (first_tri_culled) + { + DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); + + if (!rc.quad_polygon) + { + EndCommand(); + return true; + } + + } + else + { + AddDrawTriangleTicks(v0, v1, v2, rc.shading_enable, rc.texture_enable, rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const GSVector2i v3 = GSVector2i::load(&cmd->vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_i32(v3)) + .upl64(GSVector4i(max_pos_12.max_i32(v3))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + + // Cull polygons which are too large. + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !draw_rect_123.rintersects(m_clamped_drawing_area)); + if (second_tri_culled) + { + DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, + cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); + + if (first_tri_culled) + { + EndCommand(); + return true; + } + + // Remove second part of quad. + cmd->num_vertices = 3; + } + else + { + AddDrawTriangleTicks(v2, v1, v3, rc.shading_enable, rc.texture_enable, rc.transparency_enable); + + // If first part was culled, move the second part to the first. + if (first_tri_culled) + { + std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + cmd->num_vertices = 3; + } + } + } + + GPUBackend::PushCommand(cmd); + } + EndCommand(); return true; } @@ -386,12 +619,65 @@ bool GPU::HandleRenderRectangleCommand() rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", total_words, setup_ticks); - m_counters.num_vertices++; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + GPUBackendDrawRectangleCommand* cmd = GPUBackend::NewDrawRectangleCommand(); + FillDrawCommand(cmd, rc); + cmd->color = rc.color_for_first_vertex; + + const GPUVertexPosition vp{FifoPop()}; + cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); + cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); + + if (rc.texture_enable) + { + const u32 texcoord_and_palette = FifoPop(); + cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); + cmd->texcoord = Truncate16(texcoord_and_palette); + } + else + { + cmd->palette.bits = 0; + cmd->texcoord = 0; + } + + switch (rc.rectangle_size) + { + case GPUDrawRectangleSize::R1x1: + cmd->width = 1; + cmd->height = 1; + break; + case GPUDrawRectangleSize::R8x8: + cmd->width = 8; + cmd->height = 8; + break; + case GPUDrawRectangleSize::R16x16: + cmd->width = 16; + cmd->height = 16; + break; + default: + { + const u32 width_and_height = FifoPop(); + cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); + cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); + } + break; + } + + const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + if (clamped_rect.rempty()) [[unlikely]] + { + DEBUG_LOG("Culling off-screen rectangle {}", rect); + EndCommand(); + return true; + } + + AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); + + GPUBackend::PushCommand(cmd); EndCommand(); return true; } @@ -408,12 +694,55 @@ bool GPU::HandleRenderLineCommand() TRACE_LOG("Render {} {} line ({} total words)", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", total_words); - m_counters.num_vertices += 2; - m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + PrepareForDraw(); + GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->palette.bits = 0; + + if (rc.shading_enable) + { + cmd->vertices[0].color = rc.color_for_first_vertex; + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + else + { + cmd->vertices[0].color = rc.color_for_first_vertex; + cmd->vertices[1].color = rc.color_for_first_vertex; + + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + + const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); + const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); + const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y); + EndCommand(); + return true; + } + + AddDrawLineTicks(clamped_rect, rc.shading_enable); + GPUBackend::PushCommand(cmd); EndCommand(); return true; } @@ -450,6 +779,48 @@ bool GPU::HandleRenderPolyLineCommand() return true; } +void GPU::FinishPolyline() +{ + PrepareForDraw(); + + const u32 num_vertices = GetPolyLineVertexCount(); + + GPUBackendDrawLineCommand* cmd = GPUBackend::NewDrawLineCommand(num_vertices); + FillDrawCommand(cmd, m_render_command); + + u32 buffer_pos = 0; + const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; + cmd->vertices[0].x = start_vp.x + m_drawing_offset.x; + cmd->vertices[0].y = start_vp.y + m_drawing_offset.y; + cmd->vertices[0].color = m_render_command.color_for_first_vertex; + + const bool shaded = m_render_command.shading_enable; + for (u32 i = 1; i < num_vertices; i++) + { + cmd->vertices[i].color = + shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; + const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; + cmd->vertices[i].x = m_drawing_offset.x + vp.x; + cmd->vertices[i].y = m_drawing_offset.y + vp.y; + + const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); + const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); + const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + { + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[i - 1].x, cmd->vertices[i - 1].y, + cmd->vertices[i].x, cmd->vertices[i].y); + return; + } + else + { + AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + } + } +} + bool GPU::HandleFillRectangleCommand() { CHECK_COMMAND_SIZE(3); @@ -457,8 +828,6 @@ bool GPU::HandleFillRectangleCommand() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); - const u32 color = FifoPop() & 0x00FFFFFF; const u32 dst_x = FifoPeek() & 0x3F0; const u32 dst_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; @@ -468,9 +837,17 @@ bool GPU::HandleFillRectangleCommand() DEBUG_LOG("Fill VRAM rectangle offset=({},{}), size=({},{})", dst_x, dst_y, width, height); if (width > 0 && height > 0) - FillVRAM(dst_x, dst_y, width, height, color); + { + GPUBackendFillVRAMCommand* cmd = GPUBackend::NewFillVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->x = static_cast(dst_x); + cmd->y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + cmd->color = color; + GPUBackend::PushCommand(cmd); + } - m_counters.num_writes++; AddCommandTicks(46 + ((width / 8) + 9) * height); EndCommand(); return true; @@ -520,8 +897,6 @@ void GPU::FinishVRAMWrite() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); - if (m_blit_remaining_words == 0) { if (g_settings.debugging.dump_cpu_to_vram_copies) @@ -554,18 +929,18 @@ void GPU::FinishVRAMWrite() const u8* blit_ptr = reinterpret_cast(m_blit_buffer.data()); if (transferred_full_rows > 0) { - UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, transferred_full_rows, blit_ptr, - m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); + UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, static_cast(transferred_full_rows), + blit_ptr, m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); blit_ptr += (ZeroExtend32(m_vram_transfer.width) * transferred_full_rows) * sizeof(u16); } if (transferred_width_last_row > 0) { - UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y + transferred_full_rows, transferred_width_last_row, 1, blit_ptr, - m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); + UpdateVRAM(m_vram_transfer.x, static_cast(m_vram_transfer.y + transferred_full_rows), + static_cast(transferred_width_last_row), 1, blit_ptr, m_GPUSTAT.set_mask_while_drawing, + m_GPUSTAT.check_mask_before_draw); } } - m_counters.num_writes++; m_blit_buffer.clear(); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; @@ -585,9 +960,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() m_vram_transfer.width, m_vram_transfer.height); DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); - // all rendering should be done first... - FlushRender(); - // ensure VRAM shadow is up to date ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); @@ -599,7 +971,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() } // switch to pixel-by-pixel read state - m_counters.num_reads++; m_blitter_state = BlitterState::ReadingVRAM; m_command_total_words = 0; return true; @@ -625,10 +996,15 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand() width == 0 || height == 0 || (src_x == dst_x && src_y == dst_y && !m_GPUSTAT.set_mask_while_drawing); if (!skip_copy) { - m_counters.num_copies++; - - FlushRender(); - CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + GPUBackendCopyVRAMCommand* cmd = GPUBackend::NewCopyVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->src_x = static_cast(src_x); + cmd->src_y = static_cast(src_y); + cmd->dst_x = static_cast(dst_x); + cmd->dst_y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + GPUBackend::PushCommand(cmd); } AddCommandTicks(width * height * 2); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 50e6bdda6..a24899d8d 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -4,8 +4,8 @@ #include "gpu_hw.h" #include "cpu_core.h" #include "cpu_pgxp.h" +#include "gpu.h" #include "gpu_hw_shadergen.h" -#include "gpu_sw_backend.h" #include "host.h" #include "settings.h" #include "system.h" @@ -16,6 +16,7 @@ #include "common/align.h" #include "common/assert.h" +#include "common/error.h" #include "common/gsvector_formatter.h" #include "common/log.h" #include "common/scoped_guard.h" @@ -31,6 +32,8 @@ Log_SetChannel(GPU_HW); +// TODO FIXME: CMR2.0 state 1 has semitransparent strip + // TODO: instead of full state restore, only restore what changed static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8; @@ -62,7 +65,7 @@ ALWAYS_INLINE static u32 GetMaxResolutionScale() ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) { - u32 scale = std::min(resolution_scale, g_settings.gpu_downsample_scale); + u32 scale = std::min(resolution_scale, g_gpu_settings.gpu_downsample_scale); while ((resolution_scale % scale) != 0) scale--; return scale; @@ -71,19 +74,21 @@ ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) ALWAYS_INLINE static bool ShouldClampUVs(GPUTextureFilter texture_filter) { // We only need UV limits if PGXP is enabled, or texture filtering is enabled. - return g_settings.gpu_pgxp_enable || texture_filter != GPUTextureFilter::Nearest; + return g_gpu_settings.gpu_pgxp_enable || texture_filter != GPUTextureFilter::Nearest; } ALWAYS_INLINE static bool ShouldAllowSpriteMode(u8 resolution_scale, GPUTextureFilter texture_filter, GPUTextureFilter sprite_texture_filter) { // Use sprite shaders/mode when texcoord rounding is forced, or if the filters are different. - return (sprite_texture_filter != texture_filter || (resolution_scale > 1 && g_settings.gpu_force_round_texcoords)); + return (sprite_texture_filter != texture_filter || + (resolution_scale > 1 && g_gpu_settings.gpu_force_round_texcoords)); } ALWAYS_INLINE static bool ShouldDisableColorPerspective() { - return g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction && !g_settings.gpu_pgxp_color_correction; + return g_gpu_settings.gpu_pgxp_enable && g_gpu_settings.gpu_pgxp_texture_correction && + !g_gpu_settings.gpu_pgxp_color_correction; } /// Returns true if the specified texture filtering mode requires dual-source blending. @@ -157,7 +162,7 @@ private: }; } // namespace -GPU_HW::GPU_HW() : GPU() +GPU_HW::GPU_HW() : GPUBackend() { #ifdef _DEBUG s_draw_number = 0; @@ -166,11 +171,15 @@ GPU_HW::GPU_HW() : GPU() GPU_HW::~GPU_HW() { - if (m_sw_renderer) - { - m_sw_renderer->Shutdown(); - m_sw_renderer.reset(); - } + DestroyBuffers(); + DestroyPipelines(); + + // TODO FIXME + // if (m_sw_renderer) + //{ + // m_sw_renderer->Shutdown(); + // m_sw_renderer.reset(); + //} } ALWAYS_INLINE void GPU_HW::BatchVertex::Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, @@ -203,34 +212,24 @@ ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 mi uv_limits = PackUVLimits(min_u, max_u, min_v, max_v); } -const Threading::Thread* GPU_HW::GetSWThread() const +bool GPU_HW::Initialize(bool clear_vram, Error* error) { - return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr; -} - -bool GPU_HW::IsHardwareRenderer() const -{ - return true; -} - -bool GPU_HW::Initialize() -{ - if (!GPU::Initialize()) + if (!GPUBackend::Initialize(clear_vram, error)) return false; const GPUDevice::Features features = g_gpu_device->GetFeatures(); m_resolution_scale = Truncate8(CalculateResolutionScale()); - m_multisamples = Truncate8(std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); - m_texture_filtering = g_settings.gpu_texture_filter; - m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; - m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; + m_multisamples = Truncate8(std::min(g_gpu_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); + m_texture_filtering = g_gpu_settings.gpu_texture_filter; + m_sprite_texture_filtering = g_gpu_settings.gpu_sprite_texture_filter; + m_line_detect_mode = (m_resolution_scale > 1) ? g_gpu_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_downsample_mode = GetDownsampleMode(m_resolution_scale); - m_wireframe_mode = g_settings.gpu_wireframe_mode; + m_wireframe_mode = g_gpu_settings.gpu_wireframe_mode; m_supports_dual_source_blend = features.dual_source_blend; m_supports_framebuffer_fetch = features.framebuffer_fetch; - m_true_color = g_settings.gpu_true_color; - m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + m_true_color = g_gpu_settings.gpu_true_color; + m_pgxp_depth_buffer = g_gpu_settings.UsingPGXPDepthBuffer(); m_clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); @@ -243,56 +242,58 @@ bool GPU_HW::Initialize() if (!CompilePipelines()) { - ERROR_LOG("Failed to compile pipelines"); + Error::SetStringView(error, "Failed to compile pipelines"); return false; } if (!CreateBuffers()) { - ERROR_LOG("Failed to create framebuffer"); + Error::SetStringView(error, "Failed to create framebuffer"); return false; } UpdateDownsamplingLevels(); - RestoreDeviceContext(); + + // If we're not initializing VRAM, need to upload it here. Implies RestoreDeviceContext(). + if (!clear_vram) + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); + else + RestoreDeviceContext(); + return true; } -void GPU_HW::Reset(bool clear_vram) +void GPU_HW::ClearVRAM() { + // Don't need to finish the current draw. if (m_batch_vertex_ptr) UnmapGPUBuffer(0, 0); - GPU::Reset(clear_vram); + m_texpage_dirty = false; + m_compute_uv_range = m_clamp_uvs; - if (m_sw_renderer) - m_sw_renderer->Reset(); + // if (m_sw_renderer) + // m_sw_renderer->Reset(clear_vram); - m_batch = {}; - m_batch_ubo_data = {}; - m_batch_ubo_dirty = true; - m_current_depth = 1; - SetClampedDrawingArea(); - - if (clear_vram) - ClearFramebuffer(); + ClearFramebuffer(); } -bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) +bool GPU_HW::DoState(GPUTexture** host_texture, bool is_reading, bool update_display) { +#if 0 // Need to download local VRAM copy before calling the base class, because it serializes this. if (m_sw_renderer) m_sw_renderer->Sync(true); else if (sw.IsWriting() && !host_texture) +#else + if (!is_reading && !host_texture) ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - - if (!GPU::DoState(sw, host_texture, update_display)) - return false; +#endif if (host_texture) { GPUTexture* tex = *host_texture; - if (sw.IsReading()) + if (is_reading) { if (tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || tex->GetSamples() != m_vram_texture->GetSamples()) @@ -324,14 +325,14 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di tex->GetHeight()); } } - else if (sw.IsReading()) + else if (is_reading) { // Need to update the VRAM copy on the GPU with the state data. UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); } // invalidate the whole VRAM read texture when loading state - if (sw.IsReading()) + if (is_reading) { DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); ClearVRAMDirtyRectangle(); @@ -353,43 +354,42 @@ void GPU_HW::RestoreDeviceContext() void GPU_HW::UpdateSettings(const Settings& old_settings) { - GPU::UpdateSettings(old_settings); + GPUBackend::UpdateSettings(old_settings); const GPUDevice::Features features = g_gpu_device->GetFeatures(); const u8 resolution_scale = Truncate8(CalculateResolutionScale()); - const u8 multisamples = Truncate8(std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); + const u8 multisamples = Truncate8(std::min(g_gpu_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); const bool clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); - const bool framebuffer_changed = - (m_resolution_scale != resolution_scale || m_multisamples != multisamples || - (static_cast(m_vram_depth_texture) != (g_settings.UsingPGXPDepthBuffer() || !m_supports_framebuffer_fetch))); + const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || + (static_cast(m_vram_depth_texture) != + (g_gpu_settings.UsingPGXPDepthBuffer() || !m_supports_framebuffer_fetch))); const bool shaders_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || - m_true_color != g_settings.gpu_true_color || g_settings.gpu_debanding != old_settings.gpu_debanding || - (multisamples > 0 && g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || - (resolution_scale > 1 && g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || - (resolution_scale > 1 && g_settings.gpu_texture_filter == GPUTextureFilter::Nearest && - g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || - m_texture_filtering != g_settings.gpu_texture_filter || - m_sprite_texture_filtering != g_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || - (resolution_scale > 1 && (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || + m_true_color != g_gpu_settings.gpu_true_color || g_gpu_settings.gpu_debanding != old_settings.gpu_debanding || + (multisamples > 0 && g_gpu_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || + (resolution_scale > 1 && g_gpu_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || + (resolution_scale > 1 && g_gpu_settings.gpu_texture_filter == GPUTextureFilter::Nearest && + g_gpu_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || + m_texture_filtering != g_gpu_settings.gpu_texture_filter || + m_sprite_texture_filtering != g_gpu_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || + (resolution_scale > 1 && (g_gpu_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || (m_downsample_mode == GPUDownsampleMode::Box && - g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale))) || - (features.geometry_shaders && g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode) || - m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || + g_gpu_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale))) || + (features.geometry_shaders && g_gpu_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode) || + m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() || (features.noperspective_interpolation && ShouldDisableColorPerspective() != old_settings.gpu_pgxp_color_correction) || - m_allow_sprite_mode != - ShouldAllowSpriteMode(m_resolution_scale, g_settings.gpu_texture_filter, g_settings.gpu_sprite_texture_filter)); + m_allow_sprite_mode != ShouldAllowSpriteMode(m_resolution_scale, g_gpu_settings.gpu_texture_filter, + g_gpu_settings.gpu_sprite_texture_filter)); if (m_resolution_scale != resolution_scale) { Host::AddIconOSDMessage( "ResolutionScaleChanged", ICON_FA_PAINT_BRUSH, fmt::format(TRANSLATE_FS("GPU_HW", "Resolution scale set to {0}x (display {1}x{2}, VRAM {3}x{4})"), - resolution_scale, m_crtc_state.display_vram_width * resolution_scale, - resolution_scale * m_crtc_state.display_vram_height, VRAM_WIDTH * resolution_scale, - VRAM_HEIGHT * resolution_scale), + resolution_scale, m_display_width * resolution_scale, m_display_height * resolution_scale, + VRAM_WIDTH * resolution_scale, VRAM_HEIGHT * resolution_scale), Host::OSD_INFO_DURATION); } @@ -421,12 +421,12 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_resolution_scale = resolution_scale; m_multisamples = multisamples; - m_texture_filtering = g_settings.gpu_texture_filter; - m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; - m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; + m_texture_filtering = g_gpu_settings.gpu_texture_filter; + m_sprite_texture_filtering = g_gpu_settings.gpu_sprite_texture_filter; + m_line_detect_mode = (m_resolution_scale > 1) ? g_gpu_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_downsample_mode = GetDownsampleMode(resolution_scale); - m_wireframe_mode = g_settings.gpu_wireframe_mode; - m_true_color = g_settings.gpu_true_color; + m_wireframe_mode = g_gpu_settings.gpu_wireframe_mode; + m_true_color = g_gpu_settings.gpu_true_color; m_clamp_uvs = clamp_uvs; m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering); @@ -434,9 +434,9 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) CheckSettings(); - if (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()) + if (m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer()) { - m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); + m_pgxp_depth_buffer = g_gpu_settings.UsingPGXPDepthBuffer(); m_batch.use_depth_buffer = false; m_depth_was_copied = false; @@ -473,9 +473,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) UpdateDownsamplingLevels(); RestoreDeviceContext(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, {}); UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); } if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || @@ -490,11 +489,11 @@ void GPU_HW::CheckSettings() { const GPUDevice::Features features = g_gpu_device->GetFeatures(); - if (m_multisamples != g_settings.gpu_multisamples) + if (m_multisamples != g_gpu_settings.gpu_multisamples) { Host::AddIconOSDMessage("MSAAUnsupported", ICON_FA_EXCLAMATION_TRIANGLE, fmt::format(TRANSLATE_FS("GPU_HW", "{}x MSAA is not supported, using {}x instead."), - g_settings.gpu_multisamples, m_multisamples), + g_gpu_settings.gpu_multisamples, m_multisamples), Host::OSD_CRITICAL_ERROR_DURATION); } else @@ -502,7 +501,7 @@ void GPU_HW::CheckSettings() Host::RemoveKeyedOSDMessage("MSAAUnsupported"); } - if (g_settings.gpu_per_sample_shading && !features.per_sample_shading) + if (g_gpu_settings.gpu_per_sample_shading && !features.per_sample_shading) { Host::AddIconOSDMessage("SSAAUnsupported", ICON_FA_EXCLAMATION_TRIANGLE, TRANSLATE_STR("GPU_HW", "SSAA is not supported, using MSAA instead."), @@ -537,13 +536,13 @@ void GPU_HW::CheckSettings() { const u32 resolution_scale = CalculateResolutionScale(); const u32 box_downscale = GetBoxDownsampleScale(resolution_scale); - if (box_downscale != g_settings.gpu_downsample_scale || box_downscale == resolution_scale) + if (box_downscale != g_gpu_settings.gpu_downsample_scale || box_downscale == resolution_scale) { Host::AddIconOSDMessage( "BoxDownsampleUnsupported", ICON_FA_PAINT_BRUSH, fmt::format(TRANSLATE_FS( "GPU_HW", "Resolution scale {0}x is not divisible by downsample scale {1}x, using {2}x instead."), - resolution_scale, g_settings.gpu_downsample_scale, box_downscale), + resolution_scale, g_gpu_settings.gpu_downsample_scale, box_downscale), Host::OSD_WARNING_DURATION); } else @@ -551,7 +550,7 @@ void GPU_HW::CheckSettings() Host::RemoveKeyedOSDMessage("BoxDownsampleUnsupported"); } - if (box_downscale == g_settings.gpu_resolution_scale) + if (box_downscale == g_gpu_settings.gpu_resolution_scale) m_downsample_mode = GPUDownsampleMode::Disabled; } } @@ -561,21 +560,21 @@ u32 GPU_HW::CalculateResolutionScale() const const u32 max_resolution_scale = GetMaxResolutionScale(); u32 scale; - if (g_settings.gpu_resolution_scale != 0) + if (g_gpu_settings.gpu_resolution_scale != 0) { - scale = std::clamp(g_settings.gpu_resolution_scale, 1, max_resolution_scale); + scale = std::clamp(g_gpu_settings.gpu_resolution_scale, 1, max_resolution_scale); } else { // Auto scaling. When the system is starting and all borders crop is enabled, the registers are zero, and // display_height therefore is also zero. Use the default size from the region in this case. - const s32 height = (m_crtc_state.display_height != 0) ? - static_cast(m_crtc_state.display_height) : - (m_console_is_pal ? (PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START) : - (NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START)); + const s32 height = (m_display_height != 0) ? static_cast(m_display_height) : + ((System::GetRegion() == ConsoleRegion::PAL) ? + (GPU::PAL_VERTICAL_ACTIVE_END - GPU::PAL_VERTICAL_ACTIVE_START) : + (GPU::NTSC_VERTICAL_ACTIVE_END - GPU::NTSC_VERTICAL_ACTIVE_START)); float widescreen_multiplier = 1.0f; - if (g_settings.gpu_widescreen_hack) + if (g_gpu_settings.gpu_widescreen_hack) { // Multiply scale factor by aspect ratio relative to 4:3, so that widescreen resolution is as close as possible to // native screen resolution. Otherwise, anamorphic stretching would result in increasingly less horizontal @@ -592,12 +591,12 @@ u32 GPU_HW::CalculateResolutionScale() const scale = static_cast(std::clamp(preferred_scale, 1, max_resolution_scale)); } - if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) + if (g_gpu_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) { const u32 new_scale = Common::PreviousPow2(scale); WARNING_LOG("Resolution scale {}x not supported for adaptive downsampling, using {}x", scale, new_scale); - if (g_settings.gpu_resolution_scale != 0) + if (g_gpu_settings.gpu_resolution_scale != 0) { Host::AddIconOSDMessage( "ResolutionNotPow2", ICON_FA_PAINT_BRUSH, @@ -615,15 +614,13 @@ u32 GPU_HW::CalculateResolutionScale() const void GPU_HW::UpdateResolutionScale() { - GPU::UpdateResolutionScale(); - if (CalculateResolutionScale() != m_resolution_scale) UpdateSettings(g_settings); } GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const { - return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode; + return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_gpu_settings.gpu_downsample_mode; } bool GPU_HW::IsUsingMultisampling() const @@ -631,15 +628,15 @@ bool GPU_HW::IsUsingMultisampling() const return m_multisamples > 1; } -bool GPU_HW::IsUsingDownsampling() const +bool GPU_HW::IsUsingDownsampling(const GPUBackendUpdateDisplayCommand* cmd) const { - return (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24); + return (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit); } void GPU_HW::SetFullVRAMDirtyRectangle() { m_vram_dirty_draw_rect = VRAM_SIZE_RECT; - m_draw_mode.SetTexturePageChanged(); + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; } void GPU_HW::ClearVRAMDirtyRectangle() @@ -672,25 +669,25 @@ void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) { // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // shadow texture is updated - if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && + if (m_draw_mode.bits != INVALID_DRAW_MODE_BITS && m_batch.texture_mode != BatchTextureMode::Disabled && (m_draw_mode.mode_reg.GetTexturePageRectangle().rintersects(update_rect) || (m_draw_mode.mode_reg.IsUsingPalette() && m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) { - m_draw_mode.SetTexturePageChanged(); + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; } } -std::tuple GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) +std::tuple GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) const { const u32 scale = scaled ? m_resolution_scale : 1u; - return std::make_tuple(m_crtc_state.display_vram_width * scale, m_crtc_state.display_vram_height * scale); + return std::make_tuple(m_display_vram_width * scale, m_display_vram_height * scale); } -std::tuple GPU_HW::GetFullDisplayResolution(bool scaled /* = true */) +std::tuple GPU_HW::GetFullDisplayResolution(bool scaled /* = true */) const { const u32 scale = scaled ? m_resolution_scale : 1u; - return std::make_tuple(m_crtc_state.display_width * scale, m_crtc_state.display_height * scale); + return std::make_tuple(m_display_width * scale, m_display_height * scale); } void GPU_HW::PrintSettingsToLog() @@ -698,15 +695,15 @@ void GPU_HW::PrintSettingsToLog() INFO_LOG("Resolution Scale: {} ({}x{}), maximum {}", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, VRAM_HEIGHT * m_resolution_scale, GetMaxResolutionScale()); INFO_LOG("Multisampling: {}x{}", m_multisamples, - (g_settings.gpu_per_sample_shading && g_gpu_device->GetFeatures().per_sample_shading) ? + (g_gpu_settings.gpu_per_sample_shading && g_gpu_device->GetFeatures().per_sample_shading) ? " (per sample shading)" : ""); INFO_LOG("Dithering: {}{}", m_true_color ? "Disabled" : "Enabled", - (!m_true_color && g_settings.gpu_scaled_dithering) ? + (!m_true_color && g_gpu_settings.gpu_scaled_dithering) ? " (Scaled)" : - ((m_true_color && g_settings.gpu_debanding) ? " (Debanding)" : "")); + ((m_true_color && g_gpu_settings.gpu_debanding) ? " (Debanding)" : "")); INFO_LOG("Force round texture coordinates: {}", - (m_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) ? "Enabled" : "Disabled"); + (m_resolution_scale > 1 && g_gpu_settings.gpu_force_round_texcoords) ? "Enabled" : "Disabled"); INFO_LOG("Texture Filtering: {}/{}", Settings::GetTextureFilterDisplayName(m_texture_filtering), Settings::GetTextureFilterDisplayName(m_sprite_texture_filtering)); INFO_LOG("Dual-source blending: {}", m_supports_dual_source_blend ? "Supported" : "Not supported"); @@ -715,7 +712,7 @@ void GPU_HW::PrintSettingsToLog() INFO_LOG("Downsampling: {}", Settings::GetDownsampleModeDisplayName(m_downsample_mode)); INFO_LOG("Wireframe rendering: {}", Settings::GetGPUWireframeModeDisplayName(m_wireframe_mode)); INFO_LOG("Line detection: {}", Settings::GetLineDetectModeDisplayName(m_line_detect_mode)); - INFO_LOG("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); + // TODO: FIXME INFO_LOG("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); INFO_LOG("Separate sprite shaders: {}", m_allow_sprite_mode ? "YES" : "NO"); } @@ -814,6 +811,7 @@ void GPU_HW::ClearFramebuffer() g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); ClearVRAMDirtyRectangle(); m_last_depth_z = 1.0f; + m_current_depth = 1; } void GPU_HW::SetVRAMRenderTarget() @@ -1672,7 +1670,8 @@ ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full); } -ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) +ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(const GPUBackendDrawCommand* cmd, + BatchVertex* vertices) { // Taken from beetle-psx gpu_polygon.cpp // For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior. If U or V is decreasing in X @@ -1778,7 +1777,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVert // 2D polygons should have zero change in V on the X axis, and vice versa. if (m_allow_sprite_mode) - SetBatchSpriteMode(zero_dudy && zero_dvdx); + SetBatchSpriteMode(cmd, zero_dudy && zero_dvdx); } bool GPU_HW::IsPossibleSpritePolygon(const BatchVertex* vertices) const @@ -1958,7 +1957,7 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::ExpandLineTriangles(BatchVertex* vertices) return true; } -void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) +void GPU_HW::ComputePolygonUVLimits(const GPUBackendDrawCommand* cmd, BatchVertex* vertices, u32 num_vertices) { DebugAssert(num_vertices == 3 || num_vertices == 4); @@ -1986,10 +1985,10 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); if (m_texpage_dirty != 0) - CheckForTexPageOverlap(GSVector4i(min).upl32(GSVector4i(max)).u16to32()); + CheckForTexPageOverlap(cmd, GSVector4i(min).upl32(GSVector4i(max)).u16to32()); } -void GPU_HW::SetBatchDepthBuffer(bool enabled) +void GPU_HW::SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled) { if (m_batch.use_depth_buffer == enabled) return; @@ -1997,13 +1996,13 @@ void GPU_HW::SetBatchDepthBuffer(bool enabled) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } m_batch.use_depth_buffer = enabled; } -void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) +void GPU_HW::CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices) { DebugAssert(num_vertices == 3 || num_vertices == 4); float average_z; @@ -2012,17 +2011,17 @@ void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) else average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w + vertices[3].w) / 4.0f, 1.0f); - if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold) + if ((average_z - m_last_depth_z) >= g_gpu_settings.gpu_pgxp_depth_clear_threshold) { FlushRender(); CopyAndClearDepthBuffer(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } m_last_depth_z = average_z; } -void GPU_HW::SetBatchSpriteMode(bool enabled) +void GPU_HW::SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled) { if (m_batch.sprite_mode == enabled) return; @@ -2030,7 +2029,7 @@ void GPU_HW::SetBatchSpriteMode(bool enabled) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } GL_INS_FMT("Sprite mode is now {}", enabled ? "ON" : "OFF"); @@ -2038,6 +2037,43 @@ void GPU_HW::SetBatchSpriteMode(bool enabled) m_batch.sprite_mode = enabled; } +void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + + const u32 num_vertices = cmd->num_vertices; + DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); + + const float depth = GetCurrentNormalizedVertexDepth(); + GSVector4i start_pos = GSVector4i::loadl(&cmd->vertices[0].x); + u32 start_color = cmd->vertices[0].color; + + for (u32 i = 1; i < num_vertices; i++) + { + const GSVector4i end_pos = GSVector4i::loadl(&cmd->vertices[i].x); + const u32 end_color = cmd->vertices[i].color; + + const GSVector4i bounds = start_pos.xyxy(end_pos); + const GSVector4i rect = + start_pos.min_i32(end_pos).xyxy(start_pos.max_i32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty()) + + AddDrawnRectangle(clamped_rect); + DrawLine(GSVector4(bounds), start_color, end_color, depth); + + start_pos = end_pos; + start_color = end_color; + } + +#if 0 + // TODO: FIXME + if (m_sw_renderer) + m_sw_renderer->PushCommand(cmd); +#endif +} + void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) { DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); @@ -2139,454 +2175,216 @@ void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) m_batch_index_space -= 6; } -void GPU_HW::LoadVertices() +void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) { - if (m_GPUSTAT.check_mask_before_draw) - m_current_depth++; + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); + SetBatchSpriteMode(cmd, m_allow_sprite_mode); + DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); - const GPURenderCommand rc{m_render_command.bits}; - const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg.bits) << 16); + const s32 pos_x = cmd->x; + const s32 pos_y = cmd->y; + const u32 texpage = m_draw_mode.bits; + const u32 color = cmd->color; const float depth = GetCurrentNormalizedVertexDepth(); + const u32 orig_tex_left = ZeroExtend32(Truncate8(cmd->texcoord)); + const u32 orig_tex_top = ZeroExtend32(cmd->texcoord) >> 8; + const u32 rectangle_width = cmd->width; + const u32 rectangle_height = cmd->height; - switch (rc.primitive) + const GSVector4i rect = + GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + DebugAssert(!clamped_rect.rempty()); + + // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. + u32 tex_top = orig_tex_top; + for (u32 y_offset = 0; y_offset < rectangle_height;) { - case GPUPrimitive::Polygon: + const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); + const float quad_start_y = static_cast(pos_y + static_cast(y_offset)); + const float quad_end_y = quad_start_y + static_cast(quad_height); + const u32 tex_bottom = tex_top + quad_height; + + u32 tex_left = orig_tex_left; + for (u32 x_offset = 0; x_offset < rectangle_width;) { - const bool textured = rc.texture_enable; - const bool raw_texture = textured && rc.raw_texture_enable; - const bool shaded = rc.shading_enable; - const bool pgxp = g_settings.gpu_pgxp_enable; + const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); + const float quad_start_x = static_cast(pos_x + static_cast(x_offset)); + const float quad_end_x = quad_start_x + static_cast(quad_width); + const u32 tex_right = tex_left + quad_width; + const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - const u32 first_color = rc.color_for_first_vertex; - u32 num_vertices = rc.quad_polygon ? 4 : 3; - std::array vertices; - std::array native_vertex_positions; - std::array native_texcoords; - bool valid_w = g_settings.gpu_pgxp_texture_correction; - for (u32 i = 0; i < num_vertices; i++) + if (cmd->rc.texture_enable && m_texpage_dirty != 0) { - const u32 vert_color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - const u32 color = raw_texture ? UINT32_C(0x00808080) : vert_color; - const u64 maddr_and_pos = m_fifo.Pop(); - const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; - const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; - const s32 native_x = native_vertex_positions[i].x = m_drawing_offset.x + vp.x; - const s32 native_y = native_vertex_positions[i].y = m_drawing_offset.y + vp.y; - native_texcoords[i] = texcoord; - vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage, - texcoord, 0xFFFF0000u); - - if (pgxp) - { - valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, - m_drawing_offset.x, m_drawing_offset.y, &vertices[i].x, &vertices[i].y, - &vertices[i].w); - } - } - if (pgxp) - { - if (!valid_w) - { - SetBatchDepthBuffer(false); - if (g_settings.gpu_pgxp_disable_2d) - { - // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. - for (size_t i = 0; i < vertices.size(); i++) - { - BatchVertex& v = vertices[i]; - v.x = static_cast(native_vertex_positions[i].x); - v.y = static_cast(native_vertex_positions[i].y); - v.w = 1.0f; - } - } - else - { - for (BatchVertex& v : vertices) - v.w = 1.0f; - } - } - else if (m_pgxp_depth_buffer) - { - SetBatchDepthBuffer(true); - CheckForDepthClear(vertices.data(), num_vertices); - } + CheckForTexPageOverlap(cmd, GSVector4i(static_cast(tex_left), static_cast(tex_top), + static_cast(tex_right), static_cast(tex_bottom))); } - // Use PGXP to exclude primitives that are definitely 3D. - const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); - if (m_resolution_scale > 1 && !is_3d && rc.quad_polygon) - HandleFlippedQuadTextureCoordinates(vertices.data()); - else if (m_allow_sprite_mode) - SetBatchSpriteMode((pgxp && !is_3d) || IsPossibleSpritePolygon(vertices.data())); + const u32 base_vertex = m_batch_vertex_count; + (m_batch_vertex_ptr++) + ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), + uv_limits); + (m_batch_vertex_ptr++) + ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), + uv_limits); + m_batch_vertex_count += 4; + m_batch_vertex_space -= 4; - if (m_sw_renderer) - { - GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); - FillDrawCommand(cmd, rc); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 0); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); + m_batch_index_count += 6; + m_batch_index_space -= 6; - const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3; - for (u32 i = 0; i < sw_num_vertices; i++) - { - GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; - vert->x = native_vertex_positions[i].x; - vert->y = native_vertex_positions[i].y; - vert->texcoord = native_texcoords[i]; - vert->color = vertices[i].color; - } - - m_sw_renderer->PushCommand(cmd); - } - - // Cull polygons which are too large. - const GSVector2 v0f = GSVector2::load(&vertices[0].x); - const GSVector2 v1f = GSVector2::load(&vertices[1].x); - const GSVector2 v2f = GSVector2::load(&vertices[2].x); - const GSVector2 min_pos_12 = v1f.min(v2f); - const GSVector2 max_pos_12 = v1f.max(v2f); - const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); - const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || - draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || clamped_draw_rect_012.rempty()); - if (first_tri_culled) - { - GL_INS_FMT("Culling off-screen/too-large polygon: {},{} {},{} {},{}", native_vertex_positions[0].x, - native_vertex_positions[0].y, native_vertex_positions[1].x, native_vertex_positions[1].y, - native_vertex_positions[2].x, native_vertex_positions[2].y); - - if (!rc.quad_polygon) - return; - } - else - { - if (textured && m_compute_uv_range) - ComputePolygonUVLimits(vertices.data(), num_vertices); - - AddDrawnRectangle(clamped_draw_rect_012); - AddDrawTriangleTicks(native_vertex_positions[0], native_vertex_positions[1], native_vertex_positions[2], - rc.shading_enable, rc.texture_enable, rc.transparency_enable); - - // Expand lines to triangles (Doom, Soul Blade, etc.) - if (!rc.quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && - ExpandLineTriangles(vertices.data())) - { - return; - } - - const u32 start_index = m_batch_vertex_count; - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - - // quads - if (rc.quad_polygon) - { - const GSVector2 v3f = GSVector2::load(&vertices[3].x); - const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); - - // Cull polygons which are too large. - const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - clamped_draw_rect_123.rempty()); - if (second_tri_culled) - { - GL_INS_FMT("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", - native_vertex_positions[2].x, native_vertex_positions[2].y, native_vertex_positions[1].x, - native_vertex_positions[1].y, native_vertex_positions[0].x, native_vertex_positions[0].y); - - if (first_tri_culled) - return; - } - else - { - if (first_tri_culled && textured && m_compute_uv_range) - ComputePolygonUVLimits(vertices.data(), num_vertices); - - AddDrawnRectangle(clamped_draw_rect_123); - AddDrawTriangleTicks(native_vertex_positions[2], native_vertex_positions[1], native_vertex_positions[3], - rc.shading_enable, rc.texture_enable, rc.transparency_enable); - - const u32 start_index = m_batch_vertex_count; - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 3); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - } - - if (num_vertices == 4) - { - DebugAssert(m_batch_vertex_space >= 4); - std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); - m_batch_vertex_ptr += 4; - m_batch_vertex_count += 4; - m_batch_vertex_space -= 4; - } - else - { - DebugAssert(m_batch_vertex_space >= 3); - std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); - m_batch_vertex_ptr += 3; - m_batch_vertex_count += 3; - m_batch_vertex_space -= 3; - } + x_offset += quad_width; + tex_left = 0; } - break; - case GPUPrimitive::Rectangle: - { - const u32 color = (rc.texture_enable && rc.raw_texture_enable) ? UINT32_C(0x00808080) : rc.color_for_first_vertex; - const GPUVertexPosition vp{FifoPop()}; - const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); - const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); + y_offset += quad_height; + tex_top = 0; + } - const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0); - u32 orig_tex_left = ZeroExtend16(texcoord_x); - u32 orig_tex_top = ZeroExtend16(texcoord_y); - u32 rectangle_width; - u32 rectangle_height; - switch (rc.rectangle_size) - { - case GPUDrawRectangleSize::R1x1: - rectangle_width = 1; - rectangle_height = 1; - break; - case GPUDrawRectangleSize::R8x8: - rectangle_width = 8; - rectangle_height = 8; - break; - case GPUDrawRectangleSize::R16x16: - rectangle_width = 16; - rectangle_height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - rectangle_width = (width_and_height & VRAM_WIDTH_MASK); - rectangle_height = ((width_and_height >> 16) & VRAM_HEIGHT_MASK); - } - break; - } + AddDrawnRectangle(clamped_rect); - const GSVector4i rect = - GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - if (clamped_rect.rempty()) [[unlikely]] - { - GL_INS_FMT("Culling off-screen rectangle {}", rect); - return; - } +#if 0 + // TODO: FIXME + if (m_sw_renderer) + m_sw_renderer->PushCommand(cmd); +#endif +} - // we can split the rectangle up into potentially 8 quads - SetBatchDepthBuffer(false); - SetBatchSpriteMode(m_allow_sprite_mode); - DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && - m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); +void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + PrepareDraw(cmd); + SetBatchDepthBuffer(cmd, false); - // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. - u32 tex_top = orig_tex_top; - for (u32 y_offset = 0; y_offset < rectangle_height;) - { - const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); - const float quad_start_y = static_cast(pos_y + static_cast(y_offset)); - const float quad_end_y = quad_start_y + static_cast(quad_height); - const u32 tex_bottom = tex_top + quad_height; + // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. + const float depth = GetCurrentNormalizedVertexDepth(); + const u32 num_vertices = cmd->num_vertices; + const u32 texpage = m_draw_mode.bits; + std::array vertices; + for (u32 i = 0; i < num_vertices; i++) + { + const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i]; + const GSVector2 vert_pos = GSVector2(GSVector2i::load(&vert.x)); + vertices[i].Set(vert_pos.x, vert_pos.y, depth, 1.0f, vert.color, texpage, vert.texcoord, 0xFFFF0000u); + } - u32 tex_left = orig_tex_left; - for (u32 x_offset = 0; x_offset < rectangle_width;) - { - const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); - const float quad_start_x = static_cast(pos_x + static_cast(x_offset)); - const float quad_end_x = quad_start_x + static_cast(quad_width); - const u32 tex_right = tex_left + quad_width; - const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); + FinishPolygonDraw(cmd, vertices, num_vertices, false); +} - if (rc.texture_enable && m_texpage_dirty != 0) - { - CheckForTexPageOverlap(GSVector4i(static_cast(tex_left), static_cast(tex_top), - static_cast(tex_right), static_cast(tex_bottom))); - } +void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) +{ + PrepareDraw(cmd); - const u32 base_vertex = m_batch_vertex_count; - (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), - uv_limits); - (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), - uv_limits); - m_batch_vertex_count += 4; - m_batch_vertex_space -= 4; + // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. + const float depth = GetCurrentNormalizedVertexDepth(); + const u32 num_vertices = cmd->num_vertices; + const u32 texpage = m_draw_mode.bits; + std::array vertices; + for (u32 i = 0; i < num_vertices; i++) + { + const GPUBackendDrawPrecisePolygonCommand::Vertex& vert = cmd->vertices[i]; + vertices[i].Set(vert.x, vert.y, depth, vert.w, vert.color, texpage, vert.texcoord, 0xFFFF0000u); + } - *(m_batch_index_ptr++) = Truncate16(base_vertex + 0); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); - *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); - m_batch_index_count += 6; - m_batch_index_space -= 6; + const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w; + SetBatchDepthBuffer(cmd, use_depth); + if (use_depth) + CheckForDepthClear(cmd, vertices.data(), num_vertices); - x_offset += quad_width; - tex_left = 0; - } + // Use PGXP to exclude primitives that are definitely 3D. + const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); + FinishPolygonDraw(cmd, vertices, num_vertices, is_3d); +} - y_offset += quad_height; - tex_top = 0; - } +ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand* cmd, + std::array& vertices, u32 num_vertices, bool is_3d) +{ + if (m_resolution_scale > 1 && !is_3d && cmd->rc.quad_polygon) + HandleFlippedQuadTextureCoordinates(cmd, vertices.data()); + else if (m_allow_sprite_mode) + SetBatchSpriteMode(cmd, !is_3d || IsPossibleSpritePolygon(vertices.data())); - AddDrawnRectangle(clamped_rect); - AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); +#if 0 + // TODO: FIXME + if (m_sw_renderer) + m_sw_renderer->PushCommand(cmd); +#endif - if (m_sw_renderer) - { - GPUBackendDrawRectangleCommand* cmd = m_sw_renderer->NewDrawRectangleCommand(); - FillDrawCommand(cmd, rc); - cmd->color = color; - cmd->x = pos_x; - cmd->y = pos_y; - cmd->width = static_cast(rectangle_width); - cmd->height = static_cast(rectangle_height); - cmd->texcoord = (static_cast(texcoord_y) << 8) | static_cast(texcoord_x); - m_sw_renderer->PushCommand(cmd); - } - } - break; + // Cull polygons which are too large. + const GSVector2 v0f = GSVector2::load(&vertices[0].x); + const GSVector2 v1f = GSVector2::load(&vertices[1].x); + const GSVector2 v2f = GSVector2::load(&vertices[2].x); + const GSVector2 min_pos_12 = v1f.min(v2f); + const GSVector2 max_pos_12 = v1f.max(v2f); + const GSVector4i draw_rect_012 = + GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); + DebugAssert(draw_rect_012.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_012.height() <= MAX_PRIMITIVE_HEIGHT && + !clamped_draw_rect_012.rempty()); - case GPUPrimitive::Line: - { - SetBatchDepthBuffer(false); + if (cmd->rc.texture_enable && m_compute_uv_range) + ComputePolygonUVLimits(cmd, vertices.data(), num_vertices); - if (!rc.polyline) - { - DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); + AddDrawnRectangle(clamped_draw_rect_012); - u32 start_color, end_color; - GPUVertexPosition start_pos, end_pos; - if (rc.shading_enable) - { - start_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_color = FifoPop() & UINT32_C(0x00FFFFFF); - end_pos.bits = FifoPop(); - } - else - { - start_color = end_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_pos.bits = FifoPop(); - } + // Expand lines to triangles (Doom, Soul Blade, etc.) + if (!cmd->rc.quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && + ExpandLineTriangles(vertices.data())) + { + return; + } - const GSVector4i vstart_pos = GSVector4i(start_pos.x + m_drawing_offset.x, start_pos.y + m_drawing_offset.y); - const GSVector4i vend_pos = GSVector4i(end_pos.x + m_drawing_offset.x, end_pos.y + m_drawing_offset.y); - const GSVector4i bounds = vstart_pos.xyxy(vend_pos); - const GSVector4i rect = - vstart_pos.min_i32(vend_pos).xyxy(vstart_pos.max_i32(vend_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + const u32 start_index = m_batch_vertex_count; + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + m_batch_index_count += 3; + m_batch_index_space -= 3; - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - GL_INS_FMT("Culling too-large/off-screen line: {},{} - {},{}", bounds.x, bounds.y, bounds.z, bounds.w); - return; - } + // quads, use num_vertices here, because the first half might be culled + if (num_vertices == 4) + { + const GSVector2 v3f = GSVector2::load(&vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); + DebugAssert(draw_rect_123.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_123.height() <= MAX_PRIMITIVE_HEIGHT && + !clamped_draw_rect_123.rempty()); + AddDrawnRectangle(clamped_draw_rect_123); - AddDrawnRectangle(clamped_rect); - AddDrawLineTicks(clamped_rect, rc.shading_enable); + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 3); + m_batch_index_count += 3; + m_batch_index_space -= 3; - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(GSVector4(bounds), start_color, end_color, depth); - - if (m_sw_renderer) - { - GPUBackendDrawLineCommand* cmd = m_sw_renderer->NewDrawLineCommand(2); - FillDrawCommand(cmd, rc); - GSVector4i::storel(&cmd->vertices[0], bounds); - cmd->vertices[0].color = start_color; - GSVector4i::storeh(&cmd->vertices[1], bounds); - cmd->vertices[1].color = end_color; - m_sw_renderer->PushCommand(cmd); - } - } - else - { - // Multiply by two because we don't use line strips. - const u32 num_vertices = GetPolyLineVertexCount(); - DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); - - const bool shaded = rc.shading_enable; - - u32 buffer_pos = 0; - const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - GSVector4i start_pos = GSVector4i(start_vp.x + m_drawing_offset.x, start_vp.y + m_drawing_offset.y); - u32 start_color = rc.color_for_first_vertex; - - GPUBackendDrawLineCommand* cmd; - if (m_sw_renderer) - { - cmd = m_sw_renderer->NewDrawLineCommand(num_vertices); - FillDrawCommand(cmd, rc); - GSVector4i::storel(&cmd->vertices[0].x, start_pos); - cmd->vertices[0].color = start_color; - } - else - { - cmd = nullptr; - } - - for (u32 i = 1; i < num_vertices; i++) - { - const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color; - const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - const GSVector4i end_pos = GSVector4i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); - const GSVector4i bounds = start_pos.xyxy(end_pos); - const GSVector4i rect = - start_pos.min_i32(end_pos).xyxy(start_pos.max_i32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - GL_INS_FMT("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); - } - else - { - AddDrawnRectangle(clamped_rect); - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(GSVector4(bounds), start_color, end_color, depth); - } - - start_pos = end_pos; - start_color = end_color; - - if (cmd) - { - GSVector4i::storel(&cmd->vertices[i], end_pos); - cmd->vertices[i].color = end_color; - } - } - - if (cmd) - m_sw_renderer->PushCommand(cmd); - } - } - break; - - default: - UnreachableCode(); - break; + DebugAssert(m_batch_vertex_space >= 4); + std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); + m_batch_vertex_ptr += 4; + m_batch_vertex_count += 4; + m_batch_vertex_space -= 4; + } + else + { + DebugAssert(m_batch_vertex_space >= 3); + std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); + m_batch_vertex_ptr += 3; + m_batch_vertex_count += 3; + m_batch_vertex_space -= 3; } } @@ -2631,7 +2429,7 @@ bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacements::ReplacementIm return true; } -ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) +ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(const GPUBackendDrawCommand* cmd, GSVector4i uv_rect) { DebugAssert(m_texpage_dirty != 0 && m_batch.texture_mode != BatchTextureMode::Disabled); @@ -2686,7 +2484,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) if (m_batch_index_count > 0) { FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } UpdateVRAMReadTexture(update_drawn, update_written); @@ -2732,26 +2530,27 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices MapGPUBuffer(required_vertices, required_indices); } -void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() +void GPU_HW::EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd) { u32 required_vertices; u32 required_indices; - switch (m_render_command.primitive) + switch (cmd->type) { - case GPUPrimitive::Polygon: + case GPUBackendCommandType::DrawPolygon: + case GPUBackendCommandType::DrawPrecisePolygon: required_vertices = 4; // assume quad, in case of expansion required_indices = 6; break; - case GPUPrimitive::Rectangle: + case GPUBackendCommandType::DrawRectangle: required_vertices = MAX_VERTICES_FOR_RECTANGLE; // TODO: WRong required_indices = MAX_VERTICES_FOR_RECTANGLE; break; - case GPUPrimitive::Line: + case GPUBackendCommandType::DrawLine: { // assume expansion - const u32 vert_count = m_render_command.polyline ? GetPolyLineVertexCount() : 2; - required_vertices = vert_count * 4; - required_indices = vert_count * 6; + const GPUBackendDrawLineCommand* lcmd = static_cast(cmd); + required_vertices = lcmd->num_vertices * 4; + required_indices = lcmd->num_vertices * 6; } break; @@ -2788,11 +2587,12 @@ ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) { - const bool current_enabled = (m_sw_renderer != nullptr); - const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks; + const bool current_enabled = false; // (m_sw_renderer != nullptr); + const bool new_enabled = g_gpu_settings.gpu_use_software_renderer_for_readbacks; if (current_enabled == new_enabled) return; +#if 0 if (!new_enabled) { if (m_sw_renderer) @@ -2827,41 +2627,28 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) } m_sw_renderer = std::move(sw_renderer); +#else + Panic("FIXME"); +#endif } -void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const +void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) { - cmd->params.bits = 0; - cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; - cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; - cmd->params.interlaced_rendering = m_GPUSTAT.SkipDrawingToActiveField(); -} + FlushRender(); -void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const -{ - FillBackendCommandParameters(cmd); - cmd->rc.bits = rc.bits; - cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; - cmd->palette.bits = m_draw_mode.palette_reg.bits; - cmd->window = m_draw_mode.texture_window; -} - -void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ GL_SCOPE_FMT("FillVRAM({},{} => {},{} ({}x{}) with 0x{:08X}", x, y, x + width, y + height, width, height, color); - if (m_sw_renderer) - { - GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - cmd->color = color; - m_sw_renderer->PushCommand(cmd); - } + // if (m_sw_renderer) + // { + // GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); + // FillBackendCommandParameters(cmd); + // cmd->x = static_cast(x); + // cmd->y = static_cast(y); + // cmd->width = static_cast(width); + // cmd->height = static_cast(height); + // cmd->color = color; + // m_sw_renderer->PushCommand(cmd); + // } GL_INS_FMT("Dirty draw area before: {}", m_vram_dirty_draw_rect); @@ -2872,7 +2659,7 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)); g_gpu_device->SetPipeline( - m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(IsInterlacedRenderingEnabled())].get()); + m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(params.interlaced_rendering)].get()); const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(scaled_bounds); @@ -2894,7 +2681,7 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) // drop precision unless true colour is enabled uniforms.u_fill_color = GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); - uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + uniforms.u_interlaced_displayed_field = params.active_line_lsb; g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); @@ -2903,14 +2690,16 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { + FlushRender(); + GL_PUSH_FMT("ReadVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); - if (m_sw_renderer) - { - m_sw_renderer->Sync(false); - GL_POP(); - return; - } + // if (m_sw_renderer) + // { + // m_sw_renderer->Sync(false); + // GL_POP(); + // return; + // } // Get bounds with wrap-around handled. GSVector4i copy_rect = GetVRAMTransferBounds(x, y, width, height); @@ -2959,30 +2748,32 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) RestoreDeviceContext(); } -void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) +void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) { + FlushRender(); + GL_SCOPE_FMT("UpdateVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); - if (m_sw_renderer) - { - const u32 num_words = width * height; - GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); - FillBackendCommandParameters(cmd); - cmd->params.set_mask_while_drawing = set_mask; - cmd->params.check_mask_before_draw = check_mask; - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - std::memcpy(cmd->data, data, sizeof(u16) * num_words); - m_sw_renderer->PushCommand(cmd); - } + // if (m_sw_renderer) + // { + // const u32 num_words = width * height; + // GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); + // FillBackendCommandParameters(cmd); + // cmd->params.set_mask_while_drawing = set_mask; + // cmd->params.check_mask_before_draw = check_mask; + // cmd->x = static_cast(x); + // cmd->y = static_cast(y); + // cmd->width = static_cast(width); + // cmd->height = static_cast(height); + // std::memcpy(cmd->data, data, sizeof(u16) * num_words); + // m_sw_renderer->PushCommand(cmd); + // } const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); AddWrittenRectangle(bounds); - if (check_mask) + if (params.check_mask_before_draw) { // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; @@ -2997,7 +2788,8 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b } } - UpdateVRAMOnGPU(x, y, width, height, data, sizeof(u16) * width, set_mask, check_mask, bounds); + UpdateVRAMOnGPU(x, y, width, height, data, sizeof(u16) * width, params.set_mask_while_drawing, + params.check_mask_before_draw, bounds); } void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, @@ -3064,26 +2856,29 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da RestoreDeviceContext(); } -void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) { + FlushRender(); + GL_SCOPE_FMT("CopyVRAM({}x{} @ {},{} => {},{}", width, height, src_x, src_y, dst_x, dst_y); - if (m_sw_renderer) - { - GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->src_x = static_cast(src_x); - cmd->src_y = static_cast(src_y); - cmd->dst_x = static_cast(dst_x); - cmd->dst_y = static_cast(dst_y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - m_sw_renderer->PushCommand(cmd); - } + // if (m_sw_renderer) + // { + // GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); + // FillBackendCommandParameters(cmd); + // cmd->src_x = static_cast(src_x); + // cmd->src_y = static_cast(src_y); + // cmd->dst_x = static_cast(dst_x); + // cmd->dst_y = static_cast(dst_y); + // cmd->width = static_cast(width); + // cmd->height = static_cast(height); + // m_sw_renderer->PushCommand(cmd); + // } // masking enabled, oversized, or overlapping const bool use_shader = - (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + (params.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT); const GSVector4i src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); @@ -3118,20 +2913,20 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale, - m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, + params.set_mask_while_drawing ? 1u : 0u, GetCurrentNormalizedVertexDepth()}; // VRAM read texture should already be bound. const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); g_gpu_device->SetPipeline( - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer && NeedsDepthBuffer())] + m_vram_copy_pipelines[BoolToUInt8(params.check_mask_before_draw && !m_pgxp_depth_buffer && NeedsDepthBuffer())] .get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); - if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) + if (params.check_mask_before_draw && !m_pgxp_depth_buffer) m_current_depth++; return; @@ -3165,7 +2960,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 AddUnclampedDrawnRectangle(dst_bounds); } - if (m_GPUSTAT.check_mask_before_draw) + if (params.check_mask_before_draw) { // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; @@ -3178,17 +2973,26 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 m_vram_read_texture->MakeReadyForSampling(); } -void GPU_HW::DispatchRenderCommand() +void GPU_HW::ClearCache() { - const GPURenderCommand rc{m_render_command.bits}; + FlushRender(); + // Force the check below to fail. + m_draw_mode.bits = INVALID_DRAW_MODE_BITS; +} + +void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd) +{ BatchTextureMode texture_mode = BatchTextureMode::Disabled; - if (rc.IsTexturingEnabled()) + if (cmd->rc.IsTexturingEnabled()) { // texture page changed - check that the new page doesn't intersect the drawing area - if (m_draw_mode.IsTexturePageChanged()) + if (((m_draw_mode.bits ^ cmd->draw_mode.bits) & GPUDrawModeReg::TEXTURE_PAGE_AND_MODE_MASK) != 0 || + (cmd->draw_mode.IsUsingPalette() && m_draw_mode.palette_reg.bits != cmd->palette.bits)) + { - m_draw_mode.ClearTexturePageChangedFlag(); + m_draw_mode.mode_reg.bits = cmd->draw_mode.bits; + m_draw_mode.palette_reg.bits = cmd->palette.bits; #if 0 if (!m_vram_dirty_draw_rect.eq(INVALID_RECT) || !m_vram_dirty_write_rect.eq(INVALID_RECT)) @@ -3200,7 +3004,7 @@ void GPU_HW::DispatchRenderCommand() } #endif - if (m_draw_mode.mode_reg.IsUsingPalette()) + if (cmd->draw_mode.IsUsingPalette()) { const GSVector4i palette_rect = m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode); const bool update_drawn = palette_rect.rintersects(m_vram_dirty_draw_rect); @@ -3245,22 +3049,24 @@ void GPU_HW::DispatchRenderCommand() // has any state changed which requires a new batch? // Reverse blending breaks with mixed transparent and opaque pixels, so we have to do one draw per polygon. // If we have fbfetch, we don't need to draw it in two passes. Test case: Suikoden 2 shadows. + // TODO: make this suck less.. somehow. probably arrange the relevant bits in a comparable pattern const GPUTransparencyMode transparency_mode = - rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; - const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; + cmd->rc.transparency_enable ? cmd->draw_mode.transparency_mode : GPUTransparencyMode::Disabled; + const bool dithering_enable = (!m_true_color && cmd->rc.IsDitheringEnabled()) ? cmd->draw_mode.dither_enable : false; if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || - dithering_enable != m_batch.dithering) + dithering_enable != m_batch.dithering || m_batch.set_mask_while_drawing != cmd->params.set_mask_while_drawing || + m_batch.check_mask_before_draw != cmd->params.check_mask_before_draw) { FlushRender(); } - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); if (m_batch_index_count == 0) { // transparency mode change - const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + const bool check_mask_before_draw = cmd->params.check_mask_before_draw; if (transparency_mode != GPUTransparencyMode::Disabled && (texture_mode == BatchTextureMode::Disabled || !NeedsShaderBlending(transparency_mode, check_mask_before_draw))) { @@ -3274,7 +3080,7 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor; } - const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + const bool set_mask_while_drawing = cmd->params.set_mask_while_drawing; if (m_batch.check_mask_before_draw != check_mask_before_draw || m_batch.set_mask_while_drawing != set_mask_while_drawing) { @@ -3284,10 +3090,10 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(set_mask_while_drawing); } - m_batch.interlacing = IsInterlacedRenderingEnabled(); + m_batch.interlacing = cmd->params.interlaced_rendering; if (m_batch.interlacing) { - const u32 displayed_field = GetActiveLineLSB(); + const u32 displayed_field = cmd->params.active_line_lsb; m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field); m_batch_ubo_data.u_interlaced_displayed_field = displayed_field; } @@ -3297,47 +3103,49 @@ void GPU_HW::DispatchRenderCommand() m_batch.transparency_mode = transparency_mode; m_batch.dithering = dithering_enable; - if (m_draw_mode.IsTextureWindowChanged()) + if (cmd->window.bits != m_last_texture_window_bits) { - m_draw_mode.ClearTextureWindowChangedFlag(); + m_last_texture_window_bits = cmd->window.bits; - m_batch_ubo_data.u_texture_window[0] = ZeroExtend32(m_draw_mode.texture_window.and_x); - m_batch_ubo_data.u_texture_window[1] = ZeroExtend32(m_draw_mode.texture_window.and_y); - m_batch_ubo_data.u_texture_window[2] = ZeroExtend32(m_draw_mode.texture_window.or_x); - m_batch_ubo_data.u_texture_window[3] = ZeroExtend32(m_draw_mode.texture_window.or_y); + m_batch_ubo_data.u_texture_window[0] = ZeroExtend32(cmd->window.and_x); + m_batch_ubo_data.u_texture_window[1] = ZeroExtend32(cmd->window.and_y); + m_batch_ubo_data.u_texture_window[2] = ZeroExtend32(cmd->window.or_x); + m_batch_ubo_data.u_texture_window[3] = ZeroExtend32(cmd->window.or_y); - m_texture_window_active = ((m_draw_mode.texture_window.and_x & m_draw_mode.texture_window.and_y) != 0xFF || - ((m_draw_mode.texture_window.or_x | m_draw_mode.texture_window.or_y) != 0)); + m_texture_window_active = + ((cmd->window.and_x & cmd->window.and_y) != 0xFF || ((cmd->window.or_x | cmd->window.or_y) != 0)); m_batch_ubo_dirty = true; } if (m_drawing_area_changed) { m_drawing_area_changed = false; - SetClampedDrawingArea(); SetScissor(); if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) { FlushRender(); CopyAndClearDepthBuffer(); - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpaceForCommand(cmd); } - if (m_sw_renderer) - { - GPUBackendSetDrawingAreaCommand* cmd = m_sw_renderer->NewSetDrawingAreaCommand(); - cmd->new_area = m_drawing_area; - m_sw_renderer->PushCommand(cmd); - } + // if (m_sw_renderer) + // { + // GPUBackendSetDrawingAreaCommand* cmd = m_sw_renderer->NewSetDrawingAreaCommand(); + // cmd->new_area = m_drawing_area; + // m_sw_renderer->PushCommand(cmd); + // } } } - LoadVertices(); + if (cmd->params.check_mask_before_draw) + m_current_depth++; } void GPU_HW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) { +#if 0 + // TODO: FIXME // Not done in HW, but need to forward through to SW if using that for readbacks if (m_sw_renderer) { @@ -3347,6 +3155,7 @@ void GPU_HW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) cmd->clut_is_8bit = clut_is_8bit; m_sw_renderer->PushCommand(cmd); } +#endif } void GPU_HW::FlushRender() @@ -3397,13 +3206,19 @@ void GPU_HW::FlushRender() } } -void GPU_HW::UpdateDisplay() +void GPU_HW::DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) +{ + m_clamped_drawing_area = clamped_drawing_area; + m_drawing_area_changed = true; +} + +void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { FlushRender(); GL_SCOPE("UpdateDisplay()"); - if (g_settings.debugging.show_vram) + if (g_gpu_settings.debugging.show_vram) { if (IsUsingMultisampling()) { @@ -3419,30 +3234,30 @@ void GPU_HW::UpdateDisplay() return; } - const bool interlaced = IsInterlacedDisplayEnabled(); - const u32 interlaced_field = GetInterlacedDisplayField(); - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 scaled_vram_offset_x = m_crtc_state.display_vram_left * resolution_scale; - const u32 scaled_vram_offset_y = (m_crtc_state.display_vram_top * resolution_scale) + - ((interlaced && m_GPUSTAT.vertical_resolution) ? interlaced_field : 0); - const u32 scaled_display_width = m_crtc_state.display_vram_width * resolution_scale; - const u32 scaled_display_height = m_crtc_state.display_vram_height * resolution_scale; + const bool interlaced = cmd->interlaced_display_enabled; + const u32 interlaced_field = cmd->interlaced_display_field; + const u32 resolution_scale = cmd->display_24bit ? 1 : m_resolution_scale; + const u32 scaled_vram_offset_x = cmd->display_vram_left * resolution_scale; + const u32 scaled_vram_offset_y = (cmd->display_vram_top * resolution_scale) + + ((interlaced && cmd->interlaced_display_interleaved) ? interlaced_field : 0); + const u32 scaled_display_width = cmd->display_vram_width * resolution_scale; + const u32 scaled_display_height = cmd->display_vram_height * resolution_scale; const u32 read_height = interlaced ? (scaled_display_height / 2u) : scaled_display_height; - const u32 line_skip = BoolToUInt32(interlaced && m_GPUSTAT.vertical_resolution); + const u32 line_skip = cmd->interlaced_display_interleaved; bool drew_anything = false; // Don't bother grabbing depth if postfx doesn't need it. - GPUTexture* depth_source = (!m_GPUSTAT.display_area_color_depth_24 && m_pgxp_depth_buffer && - PostProcessing::InternalChain.NeedsDepthBuffer()) ? - (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : - nullptr; + GPUTexture* depth_source = + (!cmd->display_24bit && m_pgxp_depth_buffer && PostProcessing::InternalChain.NeedsDepthBuffer()) ? + (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : + nullptr; - if (IsDisplayDisabled()) + if (cmd->display_disabled) { ClearDisplayTexture(); return; } - else if (!m_GPUSTAT.display_area_color_depth_24 && !IsUsingMultisampling() && + else if (!cmd->display_24bit && !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight() && !PostProcessing::InternalChain.IsActive()) @@ -3497,14 +3312,14 @@ void GPU_HW::UpdateDisplay() else { g_gpu_device->SetRenderTarget(m_vram_extract_texture.get()); - g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)].get()); + g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(cmd->display_24bit)].get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); } - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 skip_x = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + const u32 reinterpret_start_x = cmd->X * resolution_scale; + const u32 skip_x = (cmd->display_vram_left - cmd->X) * resolution_scale; GL_INS_FMT("VRAM extract, depth = {}, 24bpp = {}, skip_x = {}, line_skip = {}", depth_source ? "yes" : "no", - m_GPUSTAT.display_area_color_depth_24.GetValue(), skip_x, line_skip); + cmd->display_24bit.GetValue(), skip_x, line_skip); GL_INS_FMT("Source: {},{} => {},{} ({}x{})", reinterpret_start_x, scaled_vram_offset_y, reinterpret_start_x + scaled_display_width, scaled_vram_offset_y + read_height, scaled_display_width, read_height); @@ -3542,7 +3357,7 @@ void GPU_HW::UpdateDisplay() } } - if (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24) + if (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit) { DebugAssert(m_display_texture); DownsampleFramebuffer(); @@ -3753,6 +3568,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, ds_width, ds_height); } +#if 0 void GPU_HW::DrawRendererStats() { if (ImGui::CollapsingHeader("Renderer Statistics", ImGuiTreeNodeFlags_DefaultOpen)) @@ -3800,27 +3616,24 @@ void GPU_HW::DrawRendererStats() ImGui::TextUnformatted("PGXP:"); ImGui::NextColumn(); - ImGui::TextColored(g_settings.gpu_pgxp_enable ? active_color : inactive_color, "Geom"); + ImGui::TextColored(g_gpu_settings.gpu_pgxp_enable ? active_color : inactive_color, "Geom"); ImGui::SameLine(); - ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling) ? active_color : inactive_color, + ImGui::TextColored((g_gpu_settings.gpu_pgxp_enable && g_gpu_settings.gpu_pgxp_culling) ? active_color : inactive_color, "Cull"); ImGui::SameLine(); ImGui::TextColored( - (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction) ? active_color : inactive_color, "Tex"); + (g_gpu_settings.gpu_pgxp_enable && g_gpu_settings.gpu_pgxp_texture_correction) ? active_color : inactive_color, "Tex"); ImGui::SameLine(); - ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_vertex_cache) ? active_color : inactive_color, + ImGui::TextColored((g_gpu_settings.gpu_pgxp_enable && g_gpu_settings.gpu_pgxp_vertex_cache) ? active_color : inactive_color, "Cache"); ImGui::NextColumn(); ImGui::Columns(1); } } +#endif -std::unique_ptr GPU::CreateHardwareRenderer() +std::unique_ptr GPUBackend::CreateHardwareBackend() { - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize()) - return nullptr; - - return gpu; + return std::make_unique(); } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index d1bc5cb67..855622cf6 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -3,7 +3,7 @@ #pragma once -#include "gpu.h" +#include "gpu_backend.h" #include "texture_replacements.h" #include "util/gpu_device.h" @@ -21,7 +21,9 @@ class GPU_SW_Backend; struct GPUBackendCommand; struct GPUBackendDrawCommand; -class GPU_HW final : public GPU +// TODO: Move to cpp +// TODO: Rename to GPUHWBackend, preserved to avoid conflicts. +class GPU_HW final : public GPUBackend { public: enum class BatchRenderMode : u8 @@ -55,21 +57,37 @@ public: GPU_HW(); ~GPU_HW() override; - const Threading::Thread* GetSWThread() const override; - bool IsHardwareRenderer() const override; + bool Initialize(bool clear_vram, Error* error) override; - bool Initialize() override; - void Reset(bool clear_vram) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + void ClearVRAM() override; + bool DoState(GPUTexture** host_texture, bool is_reading, bool update_display) override; void RestoreDeviceContext() override; void UpdateSettings(const Settings& old_settings) override; - void UpdateResolutionScale() override final; - std::tuple GetEffectiveDisplayResolution(bool scaled = true) override; - std::tuple GetFullDisplayResolution(bool scaled = true) override; - void UpdateDisplay() override; + void UpdateResolutionScale() override; + std::tuple GetEffectiveDisplayResolution(bool scaled = true) const override; + std::tuple GetFullDisplayResolution(bool scaled = true) const override; + + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; + void ClearCache() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void OnBufferSwapped() override; + + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; + void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + + void FlushRender() override; + void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) override; + + void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override; private: enum : u32 @@ -78,6 +96,7 @@ private: MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) * (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u), NUM_TEXTURE_MODES = static_cast(BatchTextureMode::MaxCount), + INVALID_DRAW_MODE_BITS = 0xFFFFFFFFu, }; enum : u8 { @@ -152,8 +171,6 @@ private: bool CompilePipelines(); void DestroyPipelines(); - void LoadVertices(); - void PrintSettingsToLog(); void CheckSettings(); @@ -171,7 +188,7 @@ private: GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; bool IsUsingMultisampling() const; - bool IsUsingDownsampling() const; + bool IsUsingDownsampling(const GPUBackendUpdateDisplayCommand* cmd) const; void SetFullVRAMDirtyRectangle(); void ClearVRAMDirtyRectangle(); @@ -181,11 +198,14 @@ private: void AddUnclampedDrawnRectangle(const GSVector4i rect); void SetTexPageChangedOnOverlap(const GSVector4i update_rect); - void CheckForTexPageOverlap(GSVector4i uv_rect); + void CheckForTexPageOverlap(const GPUBackendDrawCommand* cmd, GSVector4i uv_rect); bool IsFlushed() const; void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices); - void EnsureVertexBufferSpaceForCurrentCommand(); + void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd); + void PrepareDraw(const GPUBackendDrawCommand* cmd); + void FinishPolygonDraw(const GPUBackendDrawCommand* cmd, std::array& vertices, u32 num_vertices, + bool is_3d); void ResetBatchVertexDepth(); /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. @@ -197,20 +217,8 @@ private: /// Returns true if the draw is going to use shader blending/framebuffer fetch. bool NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const; - void FillBackendCommandParameters(GPUBackendCommand* cmd) const; - void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; void UpdateSoftwareRenderer(bool copy_vram_from_hw); - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void DispatchRenderCommand() override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; - void FlushRender() override; - void DrawRendererStats() override; - void OnBufferSwapped() override; - void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, bool check_mask, const GSVector4i bounds); bool BlitVRAMReplacementTexture(const TextureReplacements::ReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width, @@ -220,17 +228,17 @@ private: void DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth); /// Handles quads with flipped texture coordinate directions. - void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); + void HandleFlippedQuadTextureCoordinates(const GPUBackendDrawCommand* cmd, BatchVertex* vertices); bool IsPossibleSpritePolygon(const BatchVertex* vertices) const; bool ExpandLineTriangles(BatchVertex* vertices); /// Computes polygon U/V boundaries, and for overlap with the current texture page. - void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); + void ComputePolygonUVLimits(const GPUBackendDrawCommand* cmd, BatchVertex* vertices, u32 num_vertices); /// Sets the depth test flag for PGXP depth buffering. - void SetBatchDepthBuffer(bool enabled); - void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices); - void SetBatchSpriteMode(bool enabled); + void SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled); + void CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices); + void SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled); void UpdateDownsamplingLevels(); @@ -249,7 +257,7 @@ private: std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; - std::unique_ptr m_sw_renderer; + // std::unique_ptr m_sw_renderer; BatchVertex* m_batch_vertex_ptr = nullptr; u16* m_batch_index_ptr = nullptr; @@ -287,15 +295,29 @@ private: BatchConfig m_batch; // Changed state + bool m_drawing_area_changed = true; bool m_batch_ubo_dirty = true; BatchUBOData m_batch_ubo_data = {}; // Bounding box of VRAM area that the GPU has drawn into. + GSVector4i m_clamped_drawing_area = {}; GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; GSVector4i m_vram_dirty_write_rect = INVALID_RECT; GSVector4i m_current_uv_rect = INVALID_RECT; s32 m_current_texture_page_offset[2] = {}; + union + { + struct + { + GPUDrawModeReg mode_reg; + GPUTexturePaletteReg palette_reg; + }; + + u32 bits = INVALID_DRAW_MODE_BITS; + } m_draw_mode = {}; + u32 m_last_texture_window_bits = 0; + std::unique_ptr m_wireframe_pipeline; // [wrapped][interlaced] diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index f456392ac..ae9a86220 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -2,7 +2,9 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + #include "gpu_hw.h" + #include "util/shadergen.h" class GPU_HW_ShaderGen : public ShaderGen diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index bf9517b78..d997a15ce 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -2,14 +2,15 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_sw.h" +#include "gpu.h" +#include "gpu_sw_rasterizer.h" #include "system.h" #include "util/gpu_device.h" #include "common/align.h" #include "common/assert.h" -#include "common/gsvector.h" -#include "common/gsvector_formatter.h" +#include "common/intrin.h" #include "common/log.h" #include @@ -18,27 +19,141 @@ Log_SetChannel(GPU_SW); GPU_SW::GPU_SW() = default; -GPU_SW::~GPU_SW() -{ - g_gpu_device->RecycleTexture(std::move(m_upload_texture)); - m_backend.Shutdown(); -} +GPU_SW::~GPU_SW() = default; -const Threading::Thread* GPU_SW::GetSWThread() const +bool GPU_SW::Initialize(bool clear_vram, Error* error) { - return m_backend.GetThread(); -} - -bool GPU_SW::IsHardwareRenderer() const -{ - return false; -} - -bool GPU_SW::Initialize() -{ - if (!GPU::Initialize() || !m_backend.Initialize(false)) + if (!GPUBackend::Initialize(clear_vram, error)) return false; + GPU_SW_Rasterizer::SelectImplementation(); + + // if we're using "new" vram, clear it out here + if (clear_vram) + std::memset(g_vram, 0, sizeof(g_vram)); + + SetDisplayTextureFormat(); + return true; +} + +bool GPU_SW::DoState(GPUTexture** host_texture, bool is_reading, bool update_display) +{ + // TODO: FIXME + // ignore the host texture for software mode, since we want to save vram here + return true; +} + +void GPU_SW::ClearVRAM() +{ + std::memset(g_vram, 0, sizeof(g_vram)); + std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); +} + +std::tuple GPU_SW::GetEffectiveDisplayResolution(bool scaled /* = true */) const +{ + return std::tie(m_display_vram_width, m_display_vram_height); +} + +std::tuple GPU_SW::GetFullDisplayResolution(bool scaled /* = true */) const +{ + return std::tie(m_display_width, m_display_height); +} + +void GPU_SW::UpdateResolutionScale() +{ +} + +void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ +} + +void GPU_SW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; + + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); + + DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); + if (rc.quad_polygon) + DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); +} + +void GPU_SW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; + + const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( + rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); + + // Need to cut out the irrelevant bits. + // TODO: In _theory_ we could use the fixed-point parts here. + GPUBackendDrawPolygonCommand::Vertex vertices[4]; + for (u32 i = 0; i < cmd->num_vertices; i++) + { + const GPUBackendDrawPrecisePolygonCommand::Vertex& src = cmd->vertices[i]; + GPUBackendDrawPolygonCommand::Vertex& dst = vertices[i]; + + dst.x = src.native_x; + dst.y = src.native_y; + dst.color = src.color; + dst.texcoord = src.texcoord; + } + + DrawFunction(cmd, &vertices[0], &vertices[1], &vertices[2]); + if (rc.quad_polygon) + DrawFunction(cmd, &vertices[2], &vertices[1], &vertices[3]); +} + +void GPU_SW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + + const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); + + DrawFunction(cmd); +} + +void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = GPU_SW_Rasterizer::GetDrawLineFunction( + cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled()); + + for (u16 i = 1; i < cmd->num_vertices; i++) + DrawFunction(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); +} + +void GPU_SW::DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) +{ + GPU_SW_Rasterizer::g_drawing_area = new_drawing_area; +} + +void GPU_SW::ClearCache() +{ +} + +void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + GPU_SW_Rasterizer::UpdateCLUT(reg, clut_is_8bit); +} + +void GPU_SW::OnBufferSwapped() +{ +} + +void GPU_SW::FlushRender() +{ +} + +void GPU_SW::RestoreDeviceContext() +{ +} + +void GPU_SW::SetDisplayTextureFormat() +{ static constexpr const std::array formats_for_16bit = {GPUTexture::Format::RGB565, GPUTexture::Format::RGBA5551, GPUTexture::Format::RGBA8, GPUTexture::Format::BGRA8}; static constexpr const std::array formats_for_24bit = {GPUTexture::Format::RGBA8, GPUTexture::Format::BGRA8, @@ -59,32 +174,7 @@ bool GPU_SW::Initialize() break; } } - - return true; } - -bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) -{ - // need to ensure the worker thread is done - m_backend.Sync(true); - - // ignore the host texture for software mode, since we want to save vram here - return GPU::DoState(sw, nullptr, update_display); -} - -void GPU_SW::Reset(bool clear_vram) -{ - GPU::Reset(clear_vram); - - m_backend.Reset(); -} - -void GPU_SW::UpdateSettings(const Settings& old_settings) -{ - GPU::UpdateSettings(old_settings); - m_backend.UpdateSettings(); -} - GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format) { if (!m_upload_texture || m_upload_texture->GetWidth() != width || m_upload_texture->GetHeight() != height || @@ -420,32 +510,28 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3 } } -void GPU_SW::UpdateDisplay() +void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { - // fill display texture - m_backend.Sync(true); - if (!g_settings.debugging.show_vram) { - if (IsDisplayDisabled()) + if (cmd->display_disabled) { ClearDisplayTexture(); return; } - const bool is_24bit = m_GPUSTAT.display_area_color_depth_24; - const bool interlaced = IsInterlacedDisplayEnabled(); - const u32 field = GetInterlacedDisplayField(); - const u32 vram_offset_x = is_24bit ? m_crtc_state.regs.X : m_crtc_state.display_vram_left; - const u32 vram_offset_y = - m_crtc_state.display_vram_top + ((interlaced && m_GPUSTAT.vertical_resolution) ? field : 0); - const u32 skip_x = is_24bit ? (m_crtc_state.display_vram_left - m_crtc_state.regs.X) : 0; - const u32 read_width = m_crtc_state.display_vram_width; - const u32 read_height = interlaced ? (m_crtc_state.display_vram_height / 2) : m_crtc_state.display_vram_height; + const bool is_24bit = cmd->display_24bit; + const bool interlaced = cmd->interlaced_display_enabled; + const u32 field = cmd->interlaced_display_field; + const u32 vram_offset_x = is_24bit ? cmd->X : cmd->display_vram_left; + const u32 vram_offset_y = cmd->display_vram_top + ((interlaced && cmd->interlaced_display_interleaved) ? field : 0); + const u32 skip_x = is_24bit ? (cmd->display_vram_left - cmd->X) : 0; + const u32 read_width = cmd->display_vram_width; + const u32 read_height = interlaced ? (cmd->display_vram_height / 2) : cmd->display_vram_height; - if (IsInterlacedDisplayEnabled()) + if (cmd->interlaced_display_enabled) { - const u32 line_skip = m_GPUSTAT.vertical_resolution; + const u32 line_skip = cmd->interlaced_display_interleaved; if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit)) { SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); @@ -477,336 +563,7 @@ void GPU_SW::UpdateDisplay() } } -void GPU_SW::FillBackendCommandParameters(GPUBackendCommand* cmd) const +std::unique_ptr GPUBackend::CreateSoftwareBackend() { - cmd->params.bits = 0; - cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; - cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; - cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); -} - -void GPU_SW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const -{ - FillBackendCommandParameters(cmd); - cmd->rc.bits = rc.bits; - cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; - cmd->palette.bits = m_draw_mode.palette_reg.bits; - cmd->window = m_draw_mode.texture_window; -} - -void GPU_SW::DispatchRenderCommand() -{ - if (m_drawing_area_changed) - { - GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand(); - cmd->new_area = m_drawing_area; - GSVector4i::store(cmd->new_clamped_area, m_clamped_drawing_area); - m_backend.PushCommand(cmd); - m_drawing_area_changed = false; - } - - const GPURenderCommand rc{m_render_command.bits}; - - switch (rc.primitive) - { - case GPUPrimitive::Polygon: - { - const u32 num_vertices = rc.quad_polygon ? 4 : 3; - GPUBackendDrawPolygonCommand* cmd = m_backend.NewDrawPolygonCommand(num_vertices); - FillDrawCommand(cmd, rc); - - std::array positions; - const u32 first_color = rc.color_for_first_vertex; - const bool shaded = rc.shading_enable; - const bool textured = rc.texture_enable; - for (u32 i = 0; i < num_vertices; i++) - { - GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; - vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - const u64 maddr_and_pos = m_fifo.Pop(); - const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; - vert->x = m_drawing_offset.x + vp.x; - vert->y = m_drawing_offset.y + vp.y; - vert->texcoord = textured ? Truncate16(FifoPop()) : 0; - positions[i] = GSVector2i::load(&vert->x); - } - - // Cull polygons which are too large. - const GSVector2i min_pos_12 = positions[1].min_i32(positions[2]); - const GSVector2i max_pos_12 = positions[1].max_i32(positions[2]); - const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_i32(positions[0])) - .upl64(GSVector4i(max_pos_12.max_i32(positions[0]))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const bool first_tri_culled = - (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_012)); - if (first_tri_culled) - { - DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, - cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); - - if (!rc.quad_polygon) - return; - } - else - { - AddDrawTriangleTicks(positions[0], positions[1], positions[2], rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - } - - // quads - if (rc.quad_polygon) - { - const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_i32(positions[3])) - .upl64(GSVector4i(max_pos_12.max_i32(positions[3]))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - - // Cull polygons which are too large. - const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_123)); - if (second_tri_culled) - { - DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, - cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); - - if (first_tri_culled) - return; - } - else - { - AddDrawTriangleTicks(positions[2], positions[1], positions[3], rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - } - } - - m_backend.PushCommand(cmd); - } - break; - - case GPUPrimitive::Rectangle: - { - GPUBackendDrawRectangleCommand* cmd = m_backend.NewDrawRectangleCommand(); - FillDrawCommand(cmd, rc); - cmd->color = rc.color_for_first_vertex; - - const GPUVertexPosition vp{FifoPop()}; - cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); - cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); - - if (rc.texture_enable) - { - const u32 texcoord_and_palette = FifoPop(); - cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); - cmd->texcoord = Truncate16(texcoord_and_palette); - } - else - { - cmd->palette.bits = 0; - cmd->texcoord = 0; - } - - switch (rc.rectangle_size) - { - case GPUDrawRectangleSize::R1x1: - cmd->width = 1; - cmd->height = 1; - break; - case GPUDrawRectangleSize::R8x8: - cmd->width = 8; - cmd->height = 8; - break; - case GPUDrawRectangleSize::R16x16: - cmd->width = 16; - cmd->height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); - cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); - } - break; - } - - const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - if (clamped_rect.rempty()) [[unlikely]] - { - DEBUG_LOG("Culling off-screen rectangle {}", rect); - return; - } - - AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); - - m_backend.PushCommand(cmd); - } - break; - - case GPUPrimitive::Line: - { - if (!rc.polyline) - { - GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(2); - FillDrawCommand(cmd, rc); - cmd->palette.bits = 0; - - if (rc.shading_enable) - { - cmd->vertices[0].color = rc.color_for_first_vertex; - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; - - cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; - } - else - { - cmd->vertices[0].color = rc.color_for_first_vertex; - cmd->vertices[1].color = rc.color_for_first_vertex; - - const GPUVertexPosition start_pos{FifoPop()}; - cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; - cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; - - const GPUVertexPosition end_pos{FifoPop()}; - cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; - cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; - } - - const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); - const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); - const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, - cmd->vertices[1].x, cmd->vertices[1].y); - return; - } - - AddDrawLineTicks(clamped_rect, rc.shading_enable); - - m_backend.PushCommand(cmd); - } - else - { - const u32 num_vertices = GetPolyLineVertexCount(); - - GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(num_vertices); - FillDrawCommand(cmd, m_render_command); - - u32 buffer_pos = 0; - const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - cmd->vertices[0].x = start_vp.x + m_drawing_offset.x; - cmd->vertices[0].y = start_vp.y + m_drawing_offset.y; - cmd->vertices[0].color = m_render_command.color_for_first_vertex; - - const bool shaded = m_render_command.shading_enable; - for (u32 i = 1; i < num_vertices; i++) - { - cmd->vertices[i].color = - shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; - const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - cmd->vertices[i].x = m_drawing_offset.x + vp.x; - cmd->vertices[i].y = m_drawing_offset.y + vp.y; - - const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); - const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); - const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) - { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[i - 1].x, - cmd->vertices[i - 1].y, cmd->vertices[i].x, cmd->vertices[i].y); - return; - } - else - { - AddDrawLineTicks(clamped_rect, rc.shading_enable); - } - } - - m_backend.PushCommand(cmd); - } - } - break; - - default: - UnreachableCode(); - break; - } -} - -void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - m_backend.Sync(false); -} - -void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - GPUBackendFillVRAMCommand* cmd = m_backend.NewFillVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - cmd->color = color; - m_backend.PushCommand(cmd); -} - -void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - const u32 num_words = width * height; - GPUBackendUpdateVRAMCommand* cmd = m_backend.NewUpdateVRAMCommand(num_words); - FillBackendCommandParameters(cmd); - cmd->params.set_mask_while_drawing = set_mask; - cmd->params.check_mask_before_draw = check_mask; - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - std::memcpy(cmd->data, data, sizeof(u16) * num_words); - m_backend.PushCommand(cmd); -} - -void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - GPUBackendCopyVRAMCommand* cmd = m_backend.NewCopyVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->src_x = static_cast(src_x); - cmd->src_y = static_cast(src_y); - cmd->dst_x = static_cast(dst_x); - cmd->dst_y = static_cast(dst_y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - m_backend.PushCommand(cmd); -} - -void GPU_SW::FlushRender() -{ -} - -void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) -{ - GPUBackendUpdateCLUTCommand* cmd = m_backend.NewUpdateCLUTCommand(); - FillBackendCommandParameters(cmd); - cmd->reg.bits = reg.bits; - cmd->clut_is_8bit = clut_is_8bit; - m_backend.PushCommand(cmd); -} - -std::unique_ptr GPU::CreateSoftwareRenderer() -{ - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize()) - return nullptr; - - return gpu; + return std::make_unique(); } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 7e510ffaa..445fe228e 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -1,9 +1,10 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + #include "gpu.h" -#include "gpu_sw_backend.h" +#include "gpu_backend.h" #include "util/gpu_device.h" @@ -13,36 +14,42 @@ #include #include -namespace Threading { -class Thread; -} - -class GPUTexture; - -class GPU_SW final : public GPU +// TODO: Move to cpp +// TODO: Rename to GPUSWBackend, preserved to avoid conflicts. +class GPU_SW final : public GPUBackend { public: GPU_SW(); ~GPU_SW() override; - ALWAYS_INLINE const GPU_SW_Backend& GetBackend() const { return m_backend; } + bool Initialize(bool clear_vram, Error* error) override; - const Threading::Thread* GetSWThread() const override; - bool IsHardwareRenderer() const override; + std::tuple GetEffectiveDisplayResolution(bool scaled = true) const override; + std::tuple GetFullDisplayResolution(bool scaled = true) const override; - bool Initialize() override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; - void Reset(bool clear_vram) override; - void UpdateSettings(const Settings& old_settings) override; + void UpdateResolutionScale() override; -protected: - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void FlushRender() override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void RestoreDeviceContext() override; + + bool DoState(GPUTexture** host_texture, bool is_reading, bool update_display) override; + void ClearVRAM() override; + + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawSprite(const GPUBackendDrawRectangleCommand * cmd) override; + void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) override; + void ClearCache() override; + void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; + void OnBufferSwapped() override; + + void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override; + +private: template bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip); @@ -51,19 +58,11 @@ protected: bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit); - void UpdateDisplay() override; - - void DispatchRenderCommand() override; - - void FillBackendCommandParameters(GPUBackendCommand* cmd) const; - void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; - + void SetDisplayTextureFormat(); GPUTexture* GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format); FixedHeapArray m_upload_buffer; GPUTexture::Format m_16bit_display_format = GPUTexture::Format::RGB565; GPUTexture::Format m_24bit_display_format = GPUTexture::Format::RGBA8; std::unique_ptr m_upload_texture; - - GPU_SW_Backend m_backend; }; diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp deleted file mode 100644 index a56d32c06..000000000 --- a/src/core/gpu_sw_backend.cpp +++ /dev/null @@ -1,227 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "gpu_sw_backend.h" -#include "gpu.h" -#include "gpu_sw_rasterizer.h" -#include "system.h" - -#include "util/gpu_device.h" - -#include - -GPU_SW_Backend::GPU_SW_Backend() = default; - -GPU_SW_Backend::~GPU_SW_Backend() = default; - -bool GPU_SW_Backend::Initialize(bool force_thread) -{ - GPU_SW_Rasterizer::SelectImplementation(); - - return GPUBackend::Initialize(force_thread); -} - -void GPU_SW_Backend::Reset() -{ - GPUBackend::Reset(); -} - -void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) -{ - const GPURenderCommand rc{cmd->rc.bits}; - const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; - - const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( - rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); - - DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); - if (rc.quad_polygon) - DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); -} - -void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) -{ - const GPURenderCommand rc{cmd->rc.bits}; - - const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = - GPU_SW_Rasterizer::GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); - - DrawFunction(cmd); -} - -void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd) -{ - const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = GPU_SW_Rasterizer::GetDrawLineFunction( - cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled()); - - for (u16 i = 1; i < cmd->num_vertices; i++) - DrawFunction(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); -} - -void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) -{ - const u16 color16 = VRAMRGBA8888ToRGBA5551(color); - if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering) - { - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16); - } - } - else if (params.interlaced_rendering) - { - // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. - const u32 active_field = params.active_line_lsb; - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - if ((row & u32(1)) == active_field) - continue; - - u16* row_ptr = &g_vram[row * VRAM_WIDTH]; - for (u32 xoffs = 0; xoffs < width; xoffs++) - { - const u32 col = (x + xoffs) % VRAM_WIDTH; - row_ptr[col] = color16; - } - } - } - else - { - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - u16* row_ptr = &g_vram[row * VRAM_WIDTH]; - for (u32 xoffs = 0; xoffs < width; xoffs++) - { - const u32 col = (x + xoffs) % VRAM_WIDTH; - row_ptr[col] = color16; - } - } - } -} - -void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, - GPUBackendCommandParameters params) -{ - // Fast path when the copy is not oversized. - if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled()) - { - const u16* src_ptr = static_cast(data); - u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - std::copy_n(src_ptr, width, dst_ptr); - src_ptr += width; - dst_ptr += VRAM_WIDTH; - } - } - else - { - // Slow path when we need to handle wrap-around. - const u16* src_ptr = static_cast(data); - const u16 mask_and = params.GetMaskAND(); - const u16 mask_or = params.GetMaskOR(); - - for (u32 row = 0; row < height;) - { - u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; - for (u32 col = 0; col < width;) - { - // TODO: Handle unaligned reads... - u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; - if (((*pixel_ptr) & mask_and) == 0) - *pixel_ptr = *(src_ptr++) | mask_or; - } - } - } -} - -void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) -{ - // Break up oversized copies. This behavior has not been verified on console. - if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) - { - u32 remaining_rows = height; - u32 current_src_y = src_y; - u32 current_dst_y = dst_y; - while (remaining_rows > 0) - { - const u32 rows_to_copy = - std::min(remaining_rows, std::min(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); - - u32 remaining_columns = width; - u32 current_src_x = src_x; - u32 current_dst_x = dst_x; - while (remaining_columns > 0) - { - const u32 columns_to_copy = - std::min(remaining_columns, std::min(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); - CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, params); - current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; - current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; - remaining_columns -= columns_to_copy; - } - - current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; - current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; - remaining_rows -= rows_to_copy; - } - - return; - } - - // This doesn't have a fast path, but do we really need one? It's not common. - const u16 mask_and = params.GetMaskAND(); - const u16 mask_or = params.GetMaskOR(); - - // Copy in reverse when src_x < dst_x, this is verified on console. - if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) - { - for (u32 row = 0; row < height; row++) - { - const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - - for (s32 col = static_cast(width - 1); col >= 0; col--) - { - const u16 src_pixel = src_row_ptr[(src_x + static_cast(col)) % VRAM_WIDTH]; - u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast(col)) % VRAM_WIDTH]; - if ((*dst_pixel_ptr & mask_and) == 0) - *dst_pixel_ptr = src_pixel | mask_or; - } - } - } - else - { - for (u32 row = 0; row < height; row++) - { - const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - - for (u32 col = 0; col < width; col++) - { - const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; - u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; - if ((*dst_pixel_ptr & mask_and) == 0) - *dst_pixel_ptr = src_pixel | mask_or; - } - } - } -} - -void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) -{ - GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit); -} - -void GPU_SW_Backend::DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) -{ - GPU_SW_Rasterizer::g_drawing_area = new_drawing_area; -} - -void GPU_SW_Backend::FlushRender() -{ -} diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h deleted file mode 100644 index 3bc3563bb..000000000 --- a/src/core/gpu_sw_backend.h +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once - -#include "gpu.h" -#include "gpu_backend.h" - -#include -#include -#include - -class GPU_SW_Backend final : public GPUBackend -{ -public: - GPU_SW_Backend(); - ~GPU_SW_Backend() override; - - bool Initialize(bool force_thread) override; - void Reset() override; - -protected: - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, - GPUBackendCommandParameters params) override; - - void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; - void DrawLine(const GPUBackendDrawLineCommand* cmd) override; - void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; - void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) override; - void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; - void FlushRender() override; -}; diff --git a/src/core/gpu_sw_rasterizer.cpp b/src/core/gpu_sw_rasterizer.cpp index 47177c0da..e6607bf23 100644 --- a/src/core/gpu_sw_rasterizer.cpp +++ b/src/core/gpu_sw_rasterizer.cpp @@ -36,6 +36,32 @@ constinit const DitherLUT g_dither_lut = []() constexpr { GPUDrawingArea g_drawing_area = {}; } // namespace GPU_SW_Rasterizer +void GPU_SW_Rasterizer::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) +{ + u16* const dest = g_gpu_clut; + const u16* const src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; + const u32 start_x = reg.GetXBase(); + if (!clut_is_8bit) + { + // Wraparound can't happen in 4-bit mode. + std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * 16); + } + else + { + if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] + { + const u32 end = VRAM_WIDTH - start_x; + const u32 start = 256 - end; + std::memcpy(dest, &src_row[start_x], sizeof(u16) * end); + std::memcpy(dest + end, src_row, sizeof(u16) * start); + } + else + { + std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256); + } + } +} + // Default implementation definitions. namespace GPU_SW_Rasterizer { #include "gpu_sw_rasterizer.inl" diff --git a/src/core/gpu_sw_rasterizer.h b/src/core/gpu_sw_rasterizer.h index f183e26d3..95ee9b85d 100644 --- a/src/core/gpu_sw_rasterizer.h +++ b/src/core/gpu_sw_rasterizer.h @@ -20,12 +20,15 @@ static constexpr u32 DITHER_LUT_SIZE = 512; using DitherLUT = std::array, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>; extern const DitherLUT g_dither_lut; +// TODO: Pack in struct extern GPUDrawingArea g_drawing_area; +extern void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit); + using DrawRectangleFunction = void (*)(const GPUBackendDrawRectangleCommand* cmd); typedef const DrawRectangleFunction DrawRectangleFunctionTable[2][2][2]; -using DrawTriangleFunction = void (*)(const GPUBackendDrawPolygonCommand* cmd, +using DrawTriangleFunction = void (*)(const GPUBackendDrawCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); diff --git a/src/core/gpu_sw_rasterizer.inl b/src/core/gpu_sw_rasterizer.inl index acf6c5cfe..d3679e323 100644 --- a/src/core/gpu_sw_rasterizer.inl +++ b/src/core/gpu_sw_rasterizer.inl @@ -809,7 +809,7 @@ ALWAYS_INLINE_RELEASE static void AddIDeltas_DY(i_group& ig, const i_deltas& idl template -ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, +ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl) { if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast(y)) & 1u)) @@ -988,7 +988,7 @@ ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 template -static void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, +static void DrawTriangle(const GPUBackendDrawCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2) { #if 0 diff --git a/src/core/gpu_thread.cpp b/src/core/gpu_thread.cpp new file mode 100644 index 000000000..8b7810f96 --- /dev/null +++ b/src/core/gpu_thread.cpp @@ -0,0 +1,862 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gpu_thread.h" +#include "fullscreen_ui.h" +#include "gpu_backend.h" +#include "host.h" +#include "imgui_overlays.h" +#include "settings.h" +#include "shader_cache_version.h" +#include "system.h" + +#include "util/gpu_device.h" +#include "util/imgui_manager.h" +#include "util/postprocessing.h" +#include "util/state_wrapper.h" + +#include "common/align.h" +#include "common/error.h" +#include "common/log.h" +#include "common/threading.h" +#include "common/timer.h" + +#include "IconsFontAwesome5.h" +#include "imgui.h" + +#include + +Log_SetChannel(GPUThread); + +namespace GPUThread { +enum : u32 +{ + COMMAND_QUEUE_SIZE = 4 * 1024 * 1024, + THRESHOLD_TO_WAKE_GPU = 256 +}; + +/// Starts the thread, if it hasn't already been started. +/// TODO: Persist thread +static bool Start(std::optional api, Error* error); + +static void RunGPULoop(); +static u32 GetPendingCommandSize(); +static void WakeGPUThread(); + +static bool CreateDeviceOnThread(RenderAPI api, Error* error); +static void DestroyDeviceOnThread(); + +static void CreateGPUBackendOnThread(bool initialize_vram); +static void ChangeGPUBackendOnThread(); +static void DestroyGPUBackendOnThread(); + +static void UpdateSettingsOnThread(const Settings& old_settings); +static void UpdateVSyncOnThread(); +static void UpdatePerformanceCountersOnThread(); + +// TODO: Pack this crap together, don't trust LTO... + +static RenderAPI s_render_api = RenderAPI::None; +static std::unique_ptr s_gpu_backend; +static std::optional s_requested_renderer; +static bool s_start_fullscreen_ui = false; +static GPUVSyncMode s_requested_vsync = GPUVSyncMode::Disabled; +static bool s_requested_allow_present_throttle = false; + +static bool s_last_frame_skipped = false; + +static Common::Timer::Value s_last_performance_counter_update_time = 0; +static u32 s_presents_since_last_update = 0; +static float s_accumulated_gpu_time = 0.0f; +static float s_average_gpu_time = 0.0f; +static float s_gpu_usage = 0.0f; + +static Threading::KernelSemaphore m_sync_semaphore; +static Threading::Thread m_gpu_thread; +static Error s_open_error; +static std::atomic_bool s_open_flag{false}; +static std::atomic_bool s_shutdown_flag{false}; +static std::atomic_bool s_run_idle_flag{false}; +static std::atomic_flag s_performance_counters_updated = ATOMIC_FLAG_INIT; + +static Threading::WorkSema s_work_sema; + +static FixedHeapArray m_command_fifo_data; +alignas(HOST_CACHE_LINE_SIZE) static std::atomic m_command_fifo_read_ptr{0}; +alignas(HOST_CACHE_LINE_SIZE) static std::atomic m_command_fifo_write_ptr{0}; +} // namespace GPUThread + +const Threading::ThreadHandle& GPUThread::GetThreadHandle() +{ + return m_gpu_thread; +} + +RenderAPI GPUThread::GetRenderAPI() +{ + std::atomic_thread_fence(std::memory_order_acquire); + return s_render_api; +} + +bool GPUThread::IsStarted() +{ + return m_gpu_thread.Joinable(); +} + +bool GPUThread::WasFullscreenUIRequested() +{ + return s_start_fullscreen_ui; +} + +bool GPUThread::Start(std::optional renderer, Error* error) +{ + Assert(!IsStarted()); + + INFO_LOG("Starting GPU thread..."); + + s_requested_renderer = renderer; + g_gpu_settings = g_settings; + + s_last_performance_counter_update_time = Common::Timer::GetCurrentValue(); + s_presents_since_last_update = 0; + s_average_gpu_time = 0.0f; + s_gpu_usage = 0.0f; + GPUBackend::ResetStatistics(); + + s_shutdown_flag.store(false, std::memory_order_release); + s_run_idle_flag.store(false, std::memory_order_release); + m_gpu_thread.Start(&GPUThread::RunGPULoop); + m_sync_semaphore.Wait(); + + if (!s_open_flag.load(std::memory_order_acquire)) + { + ERROR_LOG("Failed to create GPU thread."); + if (error) + *error = s_open_error; + + m_gpu_thread.Join(); + return false; + } + + VERBOSE_LOG("GPU thread started."); + return true; +} + +bool GPUThread::StartFullscreenUI(Error* error) +{ + // NOTE: Racey read. + if (FullscreenUI::IsInitialized()) + return true; + + if (IsStarted()) + { + RunOnThread([]() { + // TODO: Error handling. + if (!FullscreenUI::Initialize()) + Panic("Failed"); + }); + + return true; + } + + s_start_fullscreen_ui = true; + if (!Start(std::nullopt, error)) + { + s_start_fullscreen_ui = false; + return false; + } + + return true; +} + +std::optional GPUThread::GetRequestedRenderer() +{ + return s_requested_renderer; +} + +bool GPUThread::CreateGPUBackend(GPURenderer renderer, Error* error) +{ + if (IsStarted()) + { + s_requested_renderer = renderer; + std::atomic_thread_fence(std::memory_order_release); + PushCommandAndSync(AllocateCommand(GPUBackendCommandType::ChangeBackend, sizeof(GPUThreadCommand)), false); + return true; + } + else + { + return Start(renderer, error); + } +} + +bool GPUThread::SwitchGPUBackend(GPURenderer renderer, bool force_recreate_device, Error* error) +{ + if (!force_recreate_device) + { + s_requested_renderer = renderer; + std::atomic_thread_fence(std::memory_order_release); + PushCommandAndSync(AllocateCommand(GPUBackendCommandType::ChangeBackend, sizeof(GPUThreadCommand)), false); + return true; + } + + const bool was_running_fsui = s_start_fullscreen_ui; + Shutdown(); + s_requested_renderer = renderer; + s_start_fullscreen_ui = was_running_fsui; + if (!Start(renderer, error)) + { + s_requested_renderer.reset(); + s_start_fullscreen_ui = false; + return false; + } + + return true; +} + +void GPUThread::DestroyGPUBackend() +{ + if (!IsStarted()) + return; + + if (s_start_fullscreen_ui) + { + VERBOSE_LOG("Keeping GPU thread open for fullscreen UI"); + s_requested_renderer.reset(); + std::atomic_thread_fence(std::memory_order_release); + PushCommandAndSync(AllocateCommand(GPUBackendCommandType::ChangeBackend, sizeof(GPUThreadCommand)), false); + return; + } + + Shutdown(); +} + +void GPUThread::Shutdown() +{ + if (!IsStarted()) + return; + + s_shutdown_flag.store(true, std::memory_order_release); + s_start_fullscreen_ui = false; + s_requested_renderer.reset(); + + WakeGPUThread(); + m_gpu_thread.Join(); + INFO_LOG("GPU thread stopped."); +} + +GPUThreadCommand* GPUThread::AllocateCommand(GPUBackendCommandType command, u32 size) +{ + // Ensure size is a multiple of 4 so we don't end up with an unaligned command. + size = Common::AlignUpPow2(size, 4); + + for (;;) + { + u32 read_ptr = m_command_fifo_read_ptr.load(std::memory_order_acquire); + u32 write_ptr = m_command_fifo_write_ptr.load(std::memory_order_relaxed); + if (read_ptr > write_ptr) + { + u32 available_size = read_ptr - write_ptr; + while (available_size < (size + sizeof(GPUBackendCommandType))) + { + WakeGPUThread(); + read_ptr = m_command_fifo_read_ptr.load(std::memory_order_acquire); + available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr); + } + } + else + { + const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; + if ((size + sizeof(GPUBackendCommand)) > available_size) + { + // allocate a dummy command to wrap the buffer around + GPUBackendCommand* dummy_cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); + dummy_cmd->type = GPUBackendCommandType::Wraparound; + dummy_cmd->size = available_size; + dummy_cmd->params.bits = 0; + m_command_fifo_write_ptr.store(0, std::memory_order_release); + continue; + } + } + + GPUThreadCommand* cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); + cmd->type = command; + cmd->size = size; + return cmd; + } +} + +u32 GPUThread::GetPendingCommandSize() +{ + const u32 read_ptr = m_command_fifo_read_ptr.load(); + const u32 write_ptr = m_command_fifo_write_ptr.load(); + return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr); +} + +void GPUThread::PushCommand(GPUThreadCommand* cmd) +{ + const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU) + WakeGPUThread(); +} + +void GPUThread::PushCommandAndWakeThread(GPUThreadCommand* cmd) +{ + const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + WakeGPUThread(); +} + +void GPUThread::PushCommandAndSync(GPUThreadCommand* cmd, bool spin) +{ + const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size, std::memory_order_release) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + UNREFERENCED_VARIABLE(new_write_ptr); + WakeGPUThread(); + + if (spin) + s_work_sema.WaitForEmptyWithSpin(); + else + s_work_sema.WaitForEmpty(); +} + +void GPUThread::WakeGPUThread() +{ + s_work_sema.NotifyOfWork(); +} + +void GPUThread::RunGPULoop() +{ + Threading::SetNameOfCurrentThread("GPUThread"); + + if (!CreateDeviceOnThread( + Settings::GetRenderAPIForRenderer(s_requested_renderer.value_or(g_gpu_settings.gpu_renderer)), &s_open_error)) + { + Host::ReleaseRenderWindow(); + s_open_flag.store(false, std::memory_order_release); + m_sync_semaphore.Post(); + return; + } + + CreateGPUBackendOnThread(true); + + s_open_flag.store(true, std::memory_order_release); + m_sync_semaphore.Post(); + + for (;;) + { + u32 write_ptr = m_command_fifo_write_ptr.load(std::memory_order_acquire); + u32 read_ptr = m_command_fifo_read_ptr.load(std::memory_order_relaxed); + if (read_ptr == write_ptr) + { + if (s_shutdown_flag.load(std::memory_order_relaxed)) + { + break; + } + else if (s_run_idle_flag.load(std::memory_order_relaxed)) + { + if (!s_work_sema.CheckForWork()) + { + Internal::PresentFrame(false, 0); + if (!g_gpu_device->IsVSyncModeBlocking()) + g_gpu_device->ThrottlePresentation(); + + continue; + } + + // we should have something to do, since we got woken... + } + else + { + s_work_sema.WaitForWork(); + continue; + } + } + + write_ptr = (write_ptr < read_ptr) ? COMMAND_QUEUE_SIZE : write_ptr; + while (read_ptr < write_ptr) + { + GPUThreadCommand* cmd = reinterpret_cast(&m_command_fifo_data[read_ptr]); + DebugAssert((read_ptr + cmd->size) <= COMMAND_QUEUE_SIZE); + read_ptr += cmd->size; + + switch (cmd->type) + { + case GPUBackendCommandType::Wraparound: + { + DebugAssert(read_ptr == COMMAND_QUEUE_SIZE); + write_ptr = m_command_fifo_write_ptr.load(std::memory_order_acquire); + read_ptr = 0; + + // let the CPU thread know as early as possible that we're here + m_command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + } + break; + + case GPUBackendCommandType::AsyncCall: + { + GPUThreadAsyncCallCommand* acmd = static_cast(cmd); + acmd->func(); + acmd->~GPUThreadAsyncCallCommand(); + } + break; + + case GPUBackendCommandType::ChangeBackend: + { + ChangeGPUBackendOnThread(); + } + break; + + case GPUBackendCommandType::UpdateVSync: + { + UpdateVSyncOnThread(); + } + break; + + default: + { + DebugAssert(s_gpu_backend); + s_gpu_backend->HandleCommand(cmd); + } + break; + } + } + + m_command_fifo_read_ptr.store(read_ptr, std::memory_order_release); + } + + DestroyGPUBackendOnThread(); + DestroyDeviceOnThread(); + Host::ReleaseRenderWindow(); +} + +bool GPUThread::CreateDeviceOnThread(RenderAPI api, Error* error) +{ + DebugAssert(!g_gpu_device); + + INFO_LOG("Trying to create a {} GPU device...", GPUDevice::RenderAPIToString(api)); + g_gpu_device = GPUDevice::CreateDeviceForAPI(api); + + std::optional exclusive_fullscreen_control; + if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) + { + exclusive_fullscreen_control = + (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); + } + + u32 disabled_features = 0; + if (g_settings.gpu_disable_dual_source_blend) + disabled_features |= GPUDevice::FEATURE_MASK_DUAL_SOURCE_BLEND; + if (g_settings.gpu_disable_framebuffer_fetch) + disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH; + if (g_settings.gpu_disable_texture_buffers) + disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS; + if (g_settings.gpu_disable_texture_copy_to_self) + disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_COPY_TO_SELF; + + Error create_error; + if (!g_gpu_device || + !g_gpu_device->Create( + g_gpu_settings.gpu_adapter, + g_gpu_settings.gpu_disable_shader_cache ? std::string_view() : std::string_view(EmuFolders::Cache), + SHADER_CACHE_VERSION, g_gpu_settings.gpu_use_debug_device, s_requested_vsync, + s_requested_allow_present_throttle, g_gpu_settings.gpu_threaded_presentation, exclusive_fullscreen_control, + static_cast(disabled_features), &create_error)) + { + ERROR_LOG("Failed to create GPU device: {}", create_error.GetDescription()); + if (g_gpu_device) + g_gpu_device->Destroy(); + g_gpu_device.reset(); + + Error::SetStringFmt( + error, + TRANSLATE_FS("System", "Failed to create render device:\n\n{0}\n\nThis may be due to your GPU not supporting the " + "chosen renderer ({1}), or because your graphics drivers need to be updated."), + create_error.GetDescription(), GPUDevice::RenderAPIToString(api)); + + s_render_api = RenderAPI::None; + std::atomic_thread_fence(std::memory_order_release); + return false; + } + + if (!ImGuiManager::Initialize(g_settings.display_osd_scale / 100.0f, g_settings.display_show_osd_messages, + &create_error) || + (s_start_fullscreen_ui && !FullscreenUI::Initialize())) + { + ERROR_LOG("Failed to initialize ImGuiManager: {}", create_error.GetDescription()); + Error::SetStringFmt(error, "Failed to initialize ImGuiManager: {}", create_error.GetDescription()); + FullscreenUI::Shutdown(); + ImGuiManager::Shutdown(); + g_gpu_device->Destroy(); + g_gpu_device.reset(); + s_render_api = RenderAPI::None; + std::atomic_thread_fence(std::memory_order_release); + return false; + } + + s_accumulated_gpu_time = 0.0f; + s_presents_since_last_update = 0; + s_render_api = g_gpu_device->GetRenderAPI(); + std::atomic_thread_fence(std::memory_order_release); + + return true; +} + +void GPUThread::DestroyDeviceOnThread() +{ + if (!g_gpu_device) + return; + + ImGuiManager::DestroyOverlayTextures(); + FullscreenUI::Shutdown(); + ImGuiManager::Shutdown(); + + INFO_LOG("Destroying {} GPU device...", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); + g_gpu_device->Destroy(); + g_gpu_device.reset(); +} + +void GPUThread::CreateGPUBackendOnThread(bool clear_vram) +{ + Assert(!s_gpu_backend); + if (!s_requested_renderer.has_value()) + return; + + const bool is_hardware = (s_requested_renderer.value() != GPURenderer::Software); + + if (is_hardware) + s_gpu_backend = GPUBackend::CreateHardwareBackend(); + else + s_gpu_backend = GPUBackend::CreateSoftwareBackend(); + + Error error; + DebugAssert(s_gpu_backend); + if (!s_gpu_backend->Initialize(clear_vram, &error)) + { + ERROR_LOG("Failed to create {} renderer: {}", Settings::GetRendererName(s_requested_renderer.value()), + error.GetDescription()); + + if (is_hardware) + { + Host::AddIconOSDMessage( + "GPUBackendCreationFailed", ICON_FA_PAINT_ROLLER, + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to initialize {} renderer, falling back to software renderer."), + Settings::GetRendererName(s_requested_renderer.value())), + Host::OSD_CRITICAL_ERROR_DURATION); + + s_requested_renderer = GPURenderer::Software; + s_gpu_backend = GPUBackend::CreateSoftwareBackend(); + if (!s_gpu_backend) + Panic("Failed to initialize software backend."); + } + } +} + +ALWAYS_INLINE_RELEASE void GPUThread::ChangeGPUBackendOnThread() +{ + std::atomic_thread_fence(std::memory_order_acquire); + if (!s_requested_renderer.has_value()) + { + if (s_gpu_backend) + DestroyGPUBackendOnThread(); + + return; + } + + // Readback old VRAM for hardware renderers. + s_gpu_backend->ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + + if (s_requested_renderer.value() == GPURenderer::Software) + { + // Just recreate the backend, software works with everything. + DestroyGPUBackendOnThread(); + CreateGPUBackendOnThread(false); + return; + } + + DestroyGPUBackendOnThread(); + + DebugAssert(g_gpu_device); + const RenderAPI current_api = s_render_api; + const RenderAPI expected_api = Settings::GetRenderAPIForRenderer(s_requested_renderer.value()); + if (!GPUDevice::IsSameRenderAPI(current_api, expected_api)) + { + WARNING_LOG("Recreating GPU device, expecting {} got {}", GPUDevice::RenderAPIToString(expected_api), + GPUDevice::RenderAPIToString(current_api)); + + DestroyDeviceOnThread(); + + // Things tend to break when you don't recreate the window, after switching APIs. + Host::ReleaseRenderWindow(); + + Error error; + if (!CreateDeviceOnThread(expected_api, &error)) + { + Host::AddIconOSDMessage( + "DeviceSwitchFailed", ICON_FA_PAINT_ROLLER, + fmt::format(TRANSLATE_FS("OSDMessage", "Failed to create {} GPU device, reverting to {}.\n{}"), + GPUDevice::RenderAPIToString(expected_api), GPUDevice::RenderAPIToString(current_api), + error.GetDescription()), + Host::OSD_CRITICAL_ERROR_DURATION); + + Host::ReleaseRenderWindow(); + if (!CreateDeviceOnThread(current_api, &error)) + Panic("Failed to switch back to old API after creation failure"); + } + } + + CreateGPUBackendOnThread(false); +} + +void GPUThread::DestroyGPUBackendOnThread() +{ + if (!s_gpu_backend) + return; + + VERBOSE_LOG("Shutting down GPU backend..."); + + PostProcessing::Shutdown(); + s_gpu_backend.reset(); +} + +void GPUThread::UpdateSettingsOnThread(const Settings& old_settings) +{ + if (g_gpu_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage || + g_gpu_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) + { + s_performance_counters_updated.clear(std::memory_order_relaxed); + s_last_performance_counter_update_time = Common::Timer::GetCurrentValue(); + s_presents_since_last_update = 0; + } + + if (g_gpu_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage) + { + s_accumulated_gpu_time = 0.0f; + s_average_gpu_time = 0.0f; + s_gpu_usage = 0.0f; + } + + if (s_gpu_backend) + s_gpu_backend->UpdateSettings(old_settings); +} + +void GPUThread::UpdateVSyncOnThread() +{ + std::atomic_thread_fence(std::memory_order_acquire); + + g_gpu_device->SetVSyncMode(s_requested_vsync, s_requested_allow_present_throttle); +} + +void GPUThread::RunOnThread(AsyncCallType func) +{ + GPUThreadAsyncCallCommand* cmd = static_cast( + AllocateCommand(GPUBackendCommandType::AsyncCall, sizeof(GPUThreadAsyncCallCommand))); + new (cmd) GPUThreadAsyncCallCommand; + cmd->func = std::move(func); + PushCommandAndWakeThread(cmd); +} + +void GPUThread::UpdateSettings() +{ + AssertMsg(IsStarted(), "GPU Thread is running"); + + RunOnThread([settings = g_settings]() { + VERBOSE_LOG("Updating GPU settings on thread..."); + + Settings old_settings = std::move(g_gpu_settings); + g_gpu_settings = std::move(settings); + + UpdateSettingsOnThread(old_settings); + }); +} + +void GPUThread::ResizeDisplayWindow(s32 width, s32 height, float scale) +{ + AssertMsg(IsStarted(), "GPU Thread is running"); + RunOnThread([width, height, scale]() { + if (!g_gpu_device) + return; + + DEV_LOG("Display window resized to {}x{}", width, height); + + g_gpu_device->ResizeWindow(width, height, scale); + ImGuiManager::WindowResized(); + + // If we're paused, re-present the current frame at the new window size. + if (System::IsValid()) + { + if (System::IsPaused()) + { + // Hackity hack, on some systems, presenting a single frame isn't enough to actually get it + // displayed. Two seems to be good enough. Maybe something to do with direct scanout. + PresentCurrentFrame(); + PresentCurrentFrame(); + } + } + }); + + // TODO: The window size for GTE and stuff isn't going to be correct here. + System::HostDisplayResized(); +} + +void GPUThread::UpdateDisplayWindow() +{ + AssertMsg(IsStarted(), "MTGS is running"); + RunOnThread([]() { + if (!g_gpu_device) + return; + + if (!g_gpu_device->UpdateWindow()) + { + Host::ReportErrorAsync("Error", "Failed to change window after update. The log may contain more information."); + return; + } + + ImGuiManager::WindowResized(); + + if (System::IsValid()) + { + // Fix up vsync etc. + System::UpdateSpeedLimiterState(); + + // If we're paused, re-present the current frame at the new window size. + if (System::IsPaused()) + PresentCurrentFrame(); + } + }); +} + +void GPUThread::SetVSync(GPUVSyncMode mode, bool allow_present_throttle) +{ + Assert(IsStarted()); + + if (s_requested_vsync == mode && s_requested_allow_present_throttle == allow_present_throttle) + return; + + s_requested_vsync = mode; + s_requested_allow_present_throttle = allow_present_throttle; + std::atomic_thread_fence(std::memory_order_release); + PushCommandAndWakeThread(AllocateCommand(GPUBackendCommandType::UpdateVSync, sizeof(GPUThreadCommand))); +} + +void GPUThread::PresentCurrentFrame() +{ + if (s_run_idle_flag.load(std::memory_order_relaxed)) + { + // If we're running idle, we're going to re-present anyway. + return; + } + + RunOnThread([]() { Internal::PresentFrame(false, 0); }); +} + +void GPUThread::Internal::PresentFrame(bool allow_skip_present, Common::Timer::Value present_time) +{ + // Make sure the GPU is flushed, otherwise the VB might still be mapped. + // TODO: Make this suck less... + if (s_gpu_backend) + s_gpu_backend->FlushRender(); + + s_presents_since_last_update++; + if (!s_performance_counters_updated.test_and_set(std::memory_order_acq_rel)) + UpdatePerformanceCountersOnThread(); + + const bool skip_present = (allow_skip_present && g_gpu_device->ShouldSkipPresentingFrame()); + const bool explicit_present = (present_time != 0 && g_gpu_device->GetFeatures().explicit_present); + + // TODO: Maybe? + //(present_time != 0 && Common::Timer::GetCurrentValue() > present_time && !s_last_frame_skipped))); + + // acquire for IO.MousePos. + std::atomic_thread_fence(std::memory_order_acquire); + + if (!skip_present) + { + FullscreenUI::Render(); + ImGuiManager::RenderTextOverlays(); + ImGuiManager::RenderOSDMessages(); + + if (System::GetState() == System::State::Running) + ImGuiManager::RenderSoftwareCursors(); + } + + // Debug windows are always rendered, otherwise mouse input breaks on skip. + ImGuiManager::RenderOverlayWindows(); + ImGuiManager::RenderDebugWindows(); + + if (s_gpu_backend && !skip_present) + s_last_frame_skipped = !s_gpu_backend->PresentDisplay(); + else + s_last_frame_skipped = !g_gpu_device->BeginPresent(skip_present); + + if (!s_last_frame_skipped) + { + g_gpu_device->RenderImGui(); + g_gpu_device->EndPresent(explicit_present); + + if (g_gpu_device->IsGPUTimingEnabled()) + s_accumulated_gpu_time += g_gpu_device->GetAndResetAccumulatedGPUTime(); + + if (explicit_present) + { + // See note in System::Throttle(). +#if !defined(__linux__) && !defined(__ANDROID__) + Common::Timer::SleepUntil(present_time, false); +#else + Common::Timer::SleepUntil(present_time, true); +#endif + + g_gpu_device->SubmitPresent(); + } + } + else + { + // Still need to kick ImGui or it gets cranky. + ImGui::Render(); + } + + ImGuiManager::NewFrame(); + + if (s_gpu_backend) + s_gpu_backend->RestoreDeviceContext(); +} + +void GPUThread::SetRunIdle(bool enabled) +{ + s_run_idle_flag.store(enabled, std::memory_order_release); + DEV_LOG("GPU thread now {} idle", enabled ? "running" : "NOT running"); +} + +float GPUThread::GetGPUUsage() +{ + return s_gpu_usage; +} + +float GPUThread::GetGPUAverageTime() +{ + return s_average_gpu_time; +} + +void GPUThread::SetPerformanceCounterUpdatePending() +{ + s_performance_counters_updated.clear(std::memory_order_release); +} + +void GPUThread::UpdatePerformanceCountersOnThread() +{ + const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); + const u32 frames = std::exchange(s_presents_since_last_update, 0); + const float time = static_cast(Common::Timer::ConvertValueToSeconds( + current_time - std::exchange(s_last_performance_counter_update_time, current_time))); + + if (g_gpu_device->IsGPUTimingEnabled()) + { + s_average_gpu_time = s_accumulated_gpu_time / static_cast(std::max(frames, 1u)); + s_gpu_usage = static_cast(s_accumulated_gpu_time / (time * 10.0)); + s_accumulated_gpu_time = 0.0f; + } + + if (g_settings.display_show_gpu_stats) + GPUBackend::UpdateStatistics(frames); +} \ No newline at end of file diff --git a/src/core/gpu_thread.h b/src/core/gpu_thread.h new file mode 100644 index 000000000..9ac3f6977 --- /dev/null +++ b/src/core/gpu_thread.h @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_types.h" + +#include "common/threading.h" + +#include + +class Error; + +enum class RenderAPI : u32; +enum class GPUVSyncMode : u8; + +namespace GPUThread { +using AsyncCallType = std::function; + +const Threading::ThreadHandle& GetThreadHandle(); +RenderAPI GetRenderAPI(); +bool IsStarted(); +bool WasFullscreenUIRequested(); + +/// Starts Big Picture UI. +bool StartFullscreenUI(Error* error); + +/// Backend control. +std::optional GetRequestedRenderer(); +bool CreateGPUBackend(GPURenderer renderer, Error* error); +bool SwitchGPUBackend(GPURenderer renderer, bool force_recreate_device, Error* error); +void DestroyGPUBackend(); + +/// Fully stops the thread, closing in the process if needed. +void Shutdown(); + +/// Re-presents the current frame. Call when things like window resizes happen to re-display +/// the current frame with the correct proportions. Should only be called from the CPU thread. +void PresentCurrentFrame(); + +/// Handles fullscreen transitions and such. +void UpdateDisplayWindow(); + +/// Called when the window is resized. +void ResizeDisplayWindow(s32 width, s32 height, float scale); + +void UpdateSettings(); + +void RunOnThread(AsyncCallType func); +void SetVSync(GPUVSyncMode mode, bool allow_present_throttle); +void SetRunIdle(bool enabled); + +float GetGPUUsage(); +float GetGPUAverageTime(); +void SetPerformanceCounterUpdatePending(); + +GPUThreadCommand* AllocateCommand(GPUBackendCommandType command, u32 size); +void PushCommand(GPUThreadCommand* cmd); +void PushCommandAndWakeThread(GPUThreadCommand* cmd); +void PushCommandAndSync(GPUThreadCommand* cmd, bool spin); + +// NOTE: Only called by GPUBackend +namespace Internal { +void PresentFrame(bool allow_skip_present, Common::Timer::Value present_time); +} +} // namespace GPUThread diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index c31b376ed..7bfcb6493 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -2,11 +2,21 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + +#include "types.h" + +#include "util/gpu_texture.h" + #include "common/bitfield.h" #include "common/bitutils.h" #include "common/gsvector.h" -#include "types.h" +#include "common/timer.h" + #include +#include +#include + +class StateWrapper; enum : u32 { @@ -166,7 +176,7 @@ static constexpr s32 TruncateGPUVertexPosition(s32 x) union GPUDrawModeReg { static constexpr u16 MASK = 0b1111111111111; - static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111); + static constexpr u16 TEXTURE_PAGE_AND_MODE_MASK = UINT16_C(0b0000000110011111); // Polygon texpage commands only affect bits 0-8, 11 static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111; @@ -227,12 +237,17 @@ union GPUTexturePaletteReg } }; -struct GPUTextureWindow +union GPUTextureWindow { - u8 and_x; - u8 and_y; - u8 or_x; - u8 or_y; + struct + { + u8 and_x; + u8 and_y; + u8 or_x; + u8 or_y; + }; + + u32 bits; }; // 4x4 dither matrix. @@ -249,17 +264,100 @@ static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{- enum class GPUBackendCommandType : u8 { Wraparound, - Sync, + AsyncCall, + ChangeBackend, + UpdateVSync, + ClearVRAM, + DoState, + ClearDisplay, + UpdateDisplay, + ClearCache, + BufferSwapped, + FlushRender, + UpdateResolutionScale, + RenderScreenshotToBuffer, + ReadVRAM, FillVRAM, UpdateVRAM, CopyVRAM, SetDrawingArea, UpdateCLUT, DrawPolygon, + DrawPrecisePolygon, DrawRectangle, DrawLine, }; +struct GPUThreadCommand +{ + u32 size; + GPUBackendCommandType type; +}; + +struct GPUThreadAsyncCallCommand : public GPUThreadCommand +{ + std::function func; +}; + +struct GPUThreadRenderScreenshotToBufferCommand : public GPUThreadCommand +{ + u32 width; + u32 height; + s32 draw_rect[4]; + std::vector* out_pixels; + u32* out_stride; + GPUTexture::Format* out_format; + bool* out_result; + bool postfx; +}; + +struct GPUBackendDoStateCommand : public GPUThreadCommand +{ + GPUTexture** host_texture; + bool is_reading; + bool update_display; +}; + +struct GPUBackendUpdateDisplayCommand : public GPUThreadCommand +{ + u16 display_width; + u16 display_height; + u16 display_origin_left; + u16 display_origin_top; + u16 display_vram_left; + u16 display_vram_top; + u16 display_vram_width; + u16 display_vram_height; + + u16 X; // TODO: Can we get rid of this? + + union + { + u8 bits; + + BitField interlaced_display_enabled; + BitField interlaced_display_field; + BitField interlaced_display_interleaved; + BitField display_24bit; + BitField display_disabled; + + BitField allow_present_skip; + BitField present_frame; + }; + + float display_aspect_ratio; + + Common::Timer::Value present_time; +}; + +struct GPUBackendReadVRAMCommand : public GPUThreadCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; +}; + union GPUBackendCommandParameters { u8 bits; @@ -287,18 +385,12 @@ union GPUBackendCommandParameters } }; -struct GPUBackendCommand +// TODO: Merge this into the other structs, saves padding bytes +struct GPUBackendCommand : public GPUThreadCommand { - u32 size; - GPUBackendCommandType type; GPUBackendCommandParameters params; }; -struct GPUBackendSyncCommand : public GPUBackendCommand -{ - bool allow_sleep; -}; - struct GPUBackendFillVRAMCommand : public GPUBackendCommand { u16 x; @@ -339,8 +431,10 @@ struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand bool clut_is_8bit; }; +// TODO: Pack texpage struct GPUBackendDrawCommand : public GPUBackendCommand { + // TODO: Cut this down GPUDrawModeReg draw_mode; GPURenderCommand rc; GPUTexturePaletteReg palette; @@ -351,7 +445,7 @@ struct GPUBackendDrawCommand : public GPUBackendCommand struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand { - u16 num_vertices; + u8 num_vertices; struct Vertex { @@ -372,14 +466,22 @@ struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand }; u16 texcoord; }; + }; - ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u16 texcoord_) - { - x = x_; - y = y_; - color = color_; - texcoord = texcoord_; - } + Vertex vertices[0]; +}; + +struct GPUBackendDrawPrecisePolygonCommand : public GPUBackendDrawCommand +{ + u8 num_vertices; + bool valid_w; + + struct Vertex + { + float x, y, w; + s32 native_x, native_y; + u32 color; + u16 texcoord; }; Vertex vertices[0]; @@ -387,9 +489,9 @@ struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand { - s32 x, y; u16 width, height; u16 texcoord; + s32 x, y; u32 color; }; diff --git a/src/core/host.cpp b/src/core/host.cpp index 2ab5cd932..cdb921a0b 100644 --- a/src/core/host.cpp +++ b/src/core/host.cpp @@ -2,17 +2,11 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "host.h" -#include "fullscreen_ui.h" #include "gpu.h" -#include "imgui_overlays.h" -#include "shader_cache_version.h" #include "system.h" #include "scmversion/scmversion.h" -#include "util/gpu_device.h" -#include "util/imgui_manager.h" - #include "common/assert.h" #include "common/error.h" #include "common/layered_settings_interface.h" @@ -259,125 +253,3 @@ void Host::ReportFormattedDebuggerMessage(const char* format, ...) ReportDebuggerMessage(message); } - -bool Host::CreateGPUDevice(RenderAPI api, Error* error) -{ - DebugAssert(!g_gpu_device); - - INFO_LOG("Trying to create a {} GPU device...", GPUDevice::RenderAPIToString(api)); - g_gpu_device = GPUDevice::CreateDeviceForAPI(api); - - std::optional exclusive_fullscreen_control; - if (g_settings.display_exclusive_fullscreen_control != DisplayExclusiveFullscreenControl::Automatic) - { - exclusive_fullscreen_control = - (g_settings.display_exclusive_fullscreen_control == DisplayExclusiveFullscreenControl::Allowed); - } - - u32 disabled_features = 0; - if (g_settings.gpu_disable_dual_source_blend) - disabled_features |= GPUDevice::FEATURE_MASK_DUAL_SOURCE_BLEND; - if (g_settings.gpu_disable_framebuffer_fetch) - disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH; - if (g_settings.gpu_disable_texture_buffers) - disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS; - if (g_settings.gpu_disable_texture_copy_to_self) - disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_COPY_TO_SELF; - - Error create_error; - if (!g_gpu_device || !g_gpu_device->Create(g_settings.gpu_adapter, - g_settings.gpu_disable_shader_cache ? std::string_view() : - std::string_view(EmuFolders::Cache), - SHADER_CACHE_VERSION, g_settings.gpu_use_debug_device, - System::GetEffectiveVSyncMode(), System::ShouldAllowPresentThrottle(), - g_settings.gpu_threaded_presentation, exclusive_fullscreen_control, - static_cast(disabled_features), &create_error)) - { - ERROR_LOG("Failed to create GPU device: {}", create_error.GetDescription()); - if (g_gpu_device) - g_gpu_device->Destroy(); - g_gpu_device.reset(); - - Error::SetStringFmt( - error, - TRANSLATE_FS("System", "Failed to create render device:\n\n{0}\n\nThis may be due to your GPU not supporting the " - "chosen renderer ({1}), or because your graphics drivers need to be updated."), - create_error.GetDescription(), GPUDevice::RenderAPIToString(api)); - return false; - } - - if (!ImGuiManager::Initialize(g_settings.display_osd_scale / 100.0f, g_settings.display_show_osd_messages, - &create_error)) - { - ERROR_LOG("Failed to initialize ImGuiManager: {}", create_error.GetDescription()); - Error::SetStringFmt(error, "Failed to initialize ImGuiManager: {}", create_error.GetDescription()); - g_gpu_device->Destroy(); - g_gpu_device.reset(); - return false; - } - - return true; -} - -void Host::UpdateDisplayWindow() -{ - if (!g_gpu_device) - return; - - if (!g_gpu_device->UpdateWindow()) - { - Host::ReportErrorAsync("Error", "Failed to change window after update. The log may contain more information."); - return; - } - - ImGuiManager::WindowResized(); - - if (System::IsValid()) - { - // Fix up vsync etc. - System::UpdateSpeedLimiterState(); - - // If we're paused, re-present the current frame at the new window size. - if (System::IsPaused()) - System::InvalidateDisplay(); - } -} - -void Host::ResizeDisplayWindow(s32 width, s32 height, float scale) -{ - if (!g_gpu_device) - return; - - DEV_LOG("Display window resized to {}x{}", width, height); - - g_gpu_device->ResizeWindow(width, height, scale); - ImGuiManager::WindowResized(); - - // If we're paused, re-present the current frame at the new window size. - if (System::IsValid()) - { - if (System::IsPaused()) - { - // Hackity hack, on some systems, presenting a single frame isn't enough to actually get it - // displayed. Two seems to be good enough. Maybe something to do with direct scanout. - System::InvalidateDisplay(); - System::InvalidateDisplay(); - } - - System::HostDisplayResized(); - } -} - -void Host::ReleaseGPUDevice() -{ - if (!g_gpu_device) - return; - - ImGuiManager::DestroyOverlayTextures(); - FullscreenUI::Shutdown(); - ImGuiManager::Shutdown(); - - INFO_LOG("Destroying {} GPU device...", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); - g_gpu_device->Destroy(); - g_gpu_device.reset(); -} diff --git a/src/core/host.h b/src/core/host.h index 7519a753f..d7b86abd2 100644 --- a/src/core/host.h +++ b/src/core/host.h @@ -87,18 +87,6 @@ void DisplayLoadingScreen(const char* message, int progress_min = -1, int progre /// Safely executes a function on the VM thread. void RunOnCPUThread(std::function function, bool block = false); -/// Attempts to create the rendering device backend. -bool CreateGPUDevice(RenderAPI api, Error* error); - -/// Handles fullscreen transitions and such. -void UpdateDisplayWindow(); - -/// Called when the window is resized. -void ResizeDisplayWindow(s32 width, s32 height, float scale); - -/// Destroys any active rendering device. -void ReleaseGPUDevice(); - /// Called at the end of the frame, before presentation. void FrameDone(); diff --git a/src/core/hotkeys.cpp b/src/core/hotkeys.cpp index 910a76937..d390b8d3d 100644 --- a/src/core/hotkeys.cpp +++ b/src/core/hotkeys.cpp @@ -7,6 +7,7 @@ #include "cpu_pgxp.h" #include "fullscreen_ui.h" #include "gpu.h" +#include "gpu_thread.h" #include "host.h" #include "imgui_overlays.h" #include "settings.h" @@ -55,8 +56,7 @@ static void HotkeyModifyResolutionScale(s32 increment) if (System::IsValid()) { - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); + GPUThread::UpdateSettings(); System::ClearMemorySaveStates(); } } @@ -367,11 +367,10 @@ DEFINE_HOTKEY("TogglePGXP", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOO [](s32 pressed) { if (!pressed && System::IsValid()) { - Settings old_settings = g_settings; g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); System::ClearMemorySaveStates(); + GPUThread::UpdateSettings(); + Host::AddKeyedOSDMessage("TogglePGXP", g_settings.gpu_pgxp_enable ? TRANSLATE_STR("OSDMessage", "PGXP is now enabled.") : @@ -444,12 +443,11 @@ DEFINE_HOTKEY("TogglePGXPDepth", TRANSLATE_NOOP("Hotkeys", "Graphics"), if (!g_settings.gpu_pgxp_enable) return; - const Settings old_settings = g_settings; g_settings.gpu_pgxp_depth_buffer = !g_settings.gpu_pgxp_depth_buffer; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); System::ClearMemorySaveStates(); + GPUThread::UpdateSettings(); + Host::AddKeyedOSDMessage("TogglePGXPDepth", g_settings.gpu_pgxp_depth_buffer ? TRANSLATE_STR("OSDMessage", "PGXP Depth Buffer is now enabled.") : @@ -465,12 +463,11 @@ DEFINE_HOTKEY("TogglePGXPCPU", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_ if (!g_settings.gpu_pgxp_enable) return; - const Settings old_settings = g_settings; g_settings.gpu_pgxp_cpu = !g_settings.gpu_pgxp_cpu; - g_gpu->RestoreDeviceContext(); - g_gpu->UpdateSettings(old_settings); + // GPU thread is unchanged System::ClearMemorySaveStates(); + Host::AddKeyedOSDMessage("TogglePGXPCPU", g_settings.gpu_pgxp_cpu ? TRANSLATE_STR("OSDMessage", "PGXP CPU mode is now enabled.") : @@ -558,29 +555,29 @@ DEFINE_HOTKEY("AudioVolumeDown", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_N DEFINE_HOTKEY("LoadSelectedSaveState", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Load From Selected Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread(SaveStateSelectorUI::LoadCurrentSlot); + GPUThread::RunOnThread(SaveStateSelectorUI::LoadCurrentSlot); }) DEFINE_HOTKEY("SaveSelectedSaveState", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Save To Selected Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread(SaveStateSelectorUI::SaveCurrentSlot); + GPUThread::RunOnThread(SaveStateSelectorUI::SaveCurrentSlot); }) DEFINE_HOTKEY("SelectPreviousSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Select Previous Save Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread([]() { SaveStateSelectorUI::SelectPreviousSlot(true); }); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectPreviousSlot(true); }); }) DEFINE_HOTKEY("SelectNextSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Select Next Save Slot"), [](s32 pressed) { if (!pressed) - Host::RunOnCPUThread([]() { SaveStateSelectorUI::SelectNextSlot(true); }); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectNextSlot(true); }); }) DEFINE_HOTKEY("SaveStateAndSelectNextSlot", TRANSLATE_NOOP("Hotkeys", "Save States"), TRANSLATE_NOOP("Hotkeys", "Save State and Select Next Slot"), [](s32 pressed) { if (!pressed && System::IsValid()) { SaveStateSelectorUI::SaveCurrentSlot(); - SaveStateSelectorUI::SelectNextSlot(false); + GPUThread::RunOnThread([]() { SaveStateSelectorUI::SelectNextSlot(false); }); } }) diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index dcf1a5b9a..35513e53a 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -9,6 +9,8 @@ #include "dma.h" #include "fullscreen_ui.h" #include "gpu.h" +#include "gpu_backend.h" +#include "gpu_thread.h" #include "host.h" #include "mdec.h" #include "resources.h" @@ -176,17 +178,17 @@ void ImGuiManager::RenderDebugWindows() { if (System::IsValid()) { - if (g_settings.debugging.show_gpu_state) + if (g_gpu_settings.debugging.show_gpu_state) g_gpu->DrawDebugStateWindow(); - if (g_settings.debugging.show_cdrom_state) + if (g_gpu_settings.debugging.show_cdrom_state) CDROM::DrawDebugWindow(); - if (g_settings.debugging.show_timers_state) + if (g_gpu_settings.debugging.show_timers_state) Timers::DrawDebugStateWindow(); - if (g_settings.debugging.show_spu_state) + if (g_gpu_settings.debugging.show_spu_state) SPU::DrawDebugStateWindow(); - if (g_settings.debugging.show_mdec_state) + if (g_gpu_settings.debugging.show_mdec_state) MDEC::DrawDebugStateWindow(); - if (g_settings.debugging.show_dma_state) + if (g_gpu_settings.debugging.show_dma_state) DMA::DrawDebugStateWindow(); } } @@ -194,14 +196,14 @@ void ImGuiManager::RenderDebugWindows() void ImGuiManager::RenderTextOverlays() { const System::State state = System::GetState(); - if (state != System::State::Shutdown) + if (state == System::State::Paused || state == System::State::Running) { DrawPerformanceOverlay(); - if (g_settings.display_show_enhancements && state != System::State::Paused) + if (g_gpu_settings.display_show_enhancements && state != System::State::Paused) DrawEnhancementsOverlay(); - if (g_settings.display_show_inputs && state != System::State::Paused) + if (g_gpu_settings.display_show_inputs && state != System::State::Paused) DrawInputsOverlay(); } } @@ -219,9 +221,9 @@ void ImGuiManager::FormatProcessorStat(SmallStringBase& text, double usage, doub void ImGuiManager::DrawPerformanceOverlay() { - if (!(g_settings.display_show_fps || g_settings.display_show_speed || g_settings.display_show_gpu_stats || - g_settings.display_show_resolution || g_settings.display_show_cpu_usage || - (g_settings.display_show_status_indicators && + if (!(g_gpu_settings.display_show_fps || g_gpu_settings.display_show_speed || g_gpu_settings.display_show_gpu_stats || + g_gpu_settings.display_show_resolution || g_gpu_settings.display_show_cpu_usage || + (g_gpu_settings.display_show_status_indicators && (System::IsPaused() || System::IsFastForwardEnabled() || System::IsTurboEnabled())))) { return; @@ -258,12 +260,12 @@ void ImGuiManager::DrawPerformanceOverlay() if (state == System::State::Running) { const float speed = System::GetEmulationSpeed(); - if (g_settings.display_show_fps) + if (g_gpu_settings.display_show_fps) { text.append_format("G: {:.2f} | V: {:.2f}", System::GetFPS(), System::GetVPS()); first = false; } - if (g_settings.display_show_speed) + if (g_gpu_settings.display_show_speed) { text.append_format("{}{}%", first ? "" : " | ", static_cast(std::round(speed))); @@ -288,41 +290,41 @@ void ImGuiManager::DrawPerformanceOverlay() DRAW_LINE(fixed_font, text, color); } - if (g_settings.display_show_gpu_stats) + if (g_gpu_settings.display_show_gpu_stats) { - g_gpu->GetStatsString(text); + GPUBackend::GetStatsString(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - g_gpu->GetMemoryStatsString(text); + GPUBackend::GetMemoryStatsString(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_resolution) + if (g_gpu_settings.display_show_resolution) { // TODO: this seems wrong? - const auto [effective_width, effective_height] = g_gpu->GetEffectiveDisplayResolution(); + const auto [src_width, src_height] = GPUBackend::GetLastDisplaySourceSize(); const bool interlaced = g_gpu->IsInterlacedDisplayEnabled(); const bool pal = g_gpu->IsInPALMode(); - text.format("{}x{} {} {}", effective_width, effective_height, pal ? "PAL" : "NTSC", + text.format("{}x{} {} {}", src_width, src_height, pal ? "PAL" : "NTSC", interlaced ? "Interlaced" : "Progressive"); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_latency_stats) + if (g_gpu_settings.display_show_latency_stats) { System::FormatLatencyStats(text); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_cpu_usage) + if (g_gpu_settings.display_show_cpu_usage) { text.format("{:.2f}ms | {:.2f}ms | {:.2f}ms", System::GetMinimumFrameTime(), System::GetAverageFrameTime(), - System::GetMaximumFrameTime()); + System::GetMaximumFrameTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); if (g_settings.cpu_overclock_active || - (g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler || g_settings.cpu_recompiler_icache || - g_settings.cpu_recompiler_memory_exceptions)) + (g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler || g_settings.cpu_recompiler_icache || + g_settings.cpu_recompiler_memory_exceptions)) { first = true; text.assign("CPU["); @@ -369,22 +371,19 @@ void ImGuiManager::DrawPerformanceOverlay() FormatProcessorStat(text, System::GetCPUThreadUsage(), System::GetCPUThreadAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - if (g_gpu->GetSWThread()) - { - text.assign("SW: "); - FormatProcessorStat(text, System::GetSWThreadUsage(), System::GetSWThreadAverageTime()); - DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - } - } - - if (g_settings.display_show_gpu_usage && g_gpu_device->IsGPUTimingEnabled()) - { - text.assign("GPU: "); - FormatProcessorStat(text, System::GetGPUUsage(), System::GetGPUAverageTime()); + text.assign("RNDR: "); + FormatProcessorStat(text, System::GetGPUThreadUsage(), System::GetGPUThreadAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_status_indicators) + if (g_gpu_settings.display_show_gpu_usage) + { + text.assign("GPU: "); + FormatProcessorStat(text, GPUThread::GetGPUUsage(), GPUThread::GetGPUAverageTime()); + DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); + } + + if (g_gpu_settings.display_show_status_indicators) { const bool rewinding = System::IsRewinding(); if (rewinding || System::IsFastForwardEnabled() || System::IsTurboEnabled()) @@ -394,7 +393,7 @@ void ImGuiManager::DrawPerformanceOverlay() } } - if (g_settings.display_show_frame_times) + if (g_gpu_settings.display_show_frame_times) { const ImVec2 history_size(200.0f * scale, 50.0f * scale); ImGui::SetNextWindowSize(ImVec2(history_size.x, history_size.y)); @@ -455,7 +454,7 @@ void ImGuiManager::DrawPerformanceOverlay() ImGui::PopStyleColor(3); } } - else if (g_settings.display_show_status_indicators && state == System::State::Paused && + else if (g_gpu_settings.display_show_status_indicators && state == System::State::Paused && !FullscreenUI::HasActiveWindow()) { text.assign(ICON_FA_PAUSE); @@ -470,7 +469,7 @@ void ImGuiManager::DrawEnhancementsOverlay() LargeString text; text.append_format("{} {}-{}", Settings::GetConsoleRegionName(System::GetRegion()), GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), - g_gpu->IsHardwareRenderer() ? "HW" : "SW"); + GPUBackend::IsUsingHardwareBackend() ? "HW" : "SW"); if (g_settings.rewind_enable) text.append_format(" RW={}/{}", g_settings.rewind_save_frequency, g_settings.rewind_save_slots); @@ -784,7 +783,11 @@ void SaveStateSelectorUI::ClearList() for (ListEntry& li : s_slots) { if (li.preview_texture) - g_gpu_device->RecycleTexture(std::move(li.preview_texture)); + { + GPUThread::RunOnThread([tex = li.preview_texture.release()]() { + g_gpu_device->RecycleTexture(std::unique_ptr(tex)); + }); + } } s_slots.clear(); } @@ -1102,7 +1105,7 @@ void SaveStateSelectorUI::LoadCurrentSlot() } } - Close(); + GPUThread::RunOnThread(&Close); } void SaveStateSelectorUI::SaveCurrentSlot() @@ -1119,7 +1122,7 @@ void SaveStateSelectorUI::SaveCurrentSlot() } } - Close(); + GPUThread::RunOnThread(&Close); } void SaveStateSelectorUI::ShowSlotOSDMessage() @@ -1143,7 +1146,7 @@ void SaveStateSelectorUI::ShowSlotOSDMessage() void ImGuiManager::RenderOverlayWindows() { const System::State state = System::GetState(); - if (state != System::State::Shutdown) + if (state == System::State::Paused || state == System::State::Running) { if (SaveStateSelectorUI::s_open) SaveStateSelectorUI::Draw(); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index f55ae230c..c218f19d9 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -25,6 +25,7 @@ Log_SetChannel(Settings); Settings g_settings; +Settings g_gpu_settings; const char* SettingInfo::StringDefaultValue() const { @@ -191,7 +192,7 @@ void Settings::Load(SettingsInterface& si) gpu_disable_texture_buffers = si.GetBoolValue("GPU", "DisableTextureBuffers", false); gpu_disable_texture_copy_to_self = si.GetBoolValue("GPU", "DisableTextureCopyToSelf", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); - gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); + gpu_max_queued_frames = static_cast(si.GetUIntValue("GPU", "MaxQueuedFrames", DEFAULT_GPU_MAX_QUEUED_FRAMES)); gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false); gpu_threaded_presentation = si.GetBoolValue("GPU", "ThreadedPresentation", DEFAULT_THREADED_PRESENTATION); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); @@ -495,7 +496,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const } si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); - si.SetBoolValue("GPU", "UseThread", gpu_use_thread); + si.SetUIntValue("GPU", "MaxQueuedFrames", gpu_max_queued_frames); si.SetBoolValue("GPU", "ThreadedPresentation", gpu_threaded_presentation); si.SetBoolValue("GPU", "UseSoftwareRendererForReadbacks", gpu_use_software_renderer_for_readbacks); si.SetBoolValue("GPU", "TrueColor", gpu_true_color); diff --git a/src/core/settings.h b/src/core/settings.h index b2a4b4884..bc54351fa 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -104,7 +104,7 @@ struct Settings std::string gpu_adapter; u8 gpu_resolution_scale = 1; u8 gpu_multisamples = 1; - bool gpu_use_thread : 1 = true; + u8 gpu_max_queued_frames = 2; bool gpu_use_software_renderer_for_readbacks : 1 = false; bool gpu_threaded_presentation : 1 = DEFAULT_THREADED_PRESENTATION; bool gpu_use_debug_device : 1 = false; @@ -457,6 +457,8 @@ struct Settings static constexpr ConsoleRegion DEFAULT_CONSOLE_REGION = ConsoleRegion::Auto; static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f; static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f; + static constexpr u8 DEFAULT_GPU_MAX_QUEUED_FRAMES = 2; // TODO: Maybe lower? But that means fast CPU threads would + // always stall, could be a problem for power management. // Prefer oldrec over newrec for now. Except on RISC-V, where there is no oldrec. #if defined(CPU_ARCH_RISCV64) @@ -526,7 +528,9 @@ struct Settings static constexpr u16 DEFAULT_PINE_SLOT = 28011; }; -extern Settings g_settings; +// TODO: Use smaller copy for GPU thread copy. +extern Settings g_settings; // CPU thread copy. +extern Settings g_gpu_settings; // GPU thread copy. namespace EmuFolders { extern std::string AppRoot; diff --git a/src/core/system.cpp b/src/core/system.cpp index 0e140a95e..bde47cc83 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -17,6 +17,8 @@ #include "game_database.h" #include "game_list.h" #include "gpu.h" +#include "gpu_backend.h" +#include "gpu_thread.h" #include "gte.h" #include "host.h" #include "host_interface_progress_callback.h" @@ -122,13 +124,12 @@ static void ClearRunningGame(); static void DestroySystem(); static std::string GetMediaPathFromSaveState(const char* path); static bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state); -static bool CreateGPU(GPURenderer renderer, bool is_switching, Error* error); static bool SaveUndoLoadState(); static void WarnAboutUnsafeSettings(); static void LogUnsafeSettingsToConsole(const SmallStringBase& messages); /// Throttles the system, i.e. sleeps until it's time to execute the next frame. -static void Throttle(Common::Timer::Value current_time); +static void Throttle(Common::Timer::Value current_time, Common::Timer::Value sleep_until); static void UpdatePerformanceCounters(); static void AccumulatePreFrameSleepTime(); static void UpdatePreFrameSleepTime(); @@ -170,7 +171,6 @@ static std::string s_input_profile_name; static System::State s_state = System::State::Shutdown; static std::atomic_bool s_startup_cancelled{false}; -static bool s_keep_gpu_device_on_shutdown = false; static ConsoleRegion s_region = ConsoleRegion::NTSC_U; TickCount System::g_ticks_per_second = System::MASTER_CLOCK; @@ -226,11 +226,8 @@ static float s_maximum_frame_time = 0.0f; static float s_average_frame_time = 0.0f; static float s_cpu_thread_usage = 0.0f; static float s_cpu_thread_time = 0.0f; -static float s_sw_thread_usage = 0.0f; -static float s_sw_thread_time = 0.0f; -static float s_average_gpu_time = 0.0f; -static float s_accumulated_gpu_time = 0.0f; -static float s_gpu_usage = 0.0f; +static float s_gpu_thread_usage = 0.0f; +static float s_gpu_thread_time = 0.0f; static System::FrameTimeHistory s_frame_time_history; static u32 s_frame_time_history_pos = 0; static u32 s_last_frame_number = 0; @@ -238,7 +235,6 @@ static u32 s_last_internal_frame_number = 0; static u32 s_last_global_tick_counter = 0; static u64 s_last_cpu_time = 0; static u64 s_last_sw_time = 0; -static u32 s_presents_since_last_update = 0; static Common::Timer s_fps_timer; static Common::Timer s_frame_timer; static Threading::ThreadHandle s_cpu_thread_handle; @@ -648,21 +644,13 @@ float System::GetCPUThreadAverageTime() { return s_cpu_thread_time; } -float System::GetSWThreadUsage() +float System::GetGPUThreadUsage() { - return s_sw_thread_usage; + return s_gpu_thread_usage; } -float System::GetSWThreadAverageTime() +float System::GetGPUThreadAverageTime() { - return s_sw_thread_time; -} -float System::GetGPUUsage() -{ - return s_gpu_usage; -} -float System::GetGPUAverageTime() -{ - return s_average_gpu_time; + return s_gpu_thread_time; } const System::FrameTimeHistory& System::GetFrameTimeHistory() { @@ -1059,7 +1047,6 @@ std::string System::GetInputProfilePath(std::string_view name) bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool update_display /* = true*/) { ClearMemorySaveStates(); - g_gpu->RestoreDeviceContext(); // save current state std::unique_ptr state_stream = ByteStream::CreateGrowableMemoryStream(); @@ -1068,16 +1055,8 @@ bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool if (!state_valid) ERROR_LOG("Failed to save old GPU state when switching renderers"); - // create new renderer - g_gpu.reset(); - if (force_recreate_device) - { - PostProcessing::Shutdown(); - Host::ReleaseGPUDevice(); - } - Error error; - if (!CreateGPU(renderer, true, &error)) + if (!GPUThread::SwitchGPUBackend(renderer, force_recreate_device, &error)) { if (!IsStartupCancelled()) Host::ReportErrorAsync("Error", error.GetDescription()); @@ -1090,7 +1069,6 @@ bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool { state_stream->SeekAbsolute(0); sw.SetMode(StateWrapper::Mode::Read); - g_gpu->RestoreDeviceContext(); g_gpu->DoState(sw, nullptr, update_display); TimingEvents::DoState(sw); } @@ -1326,9 +1304,6 @@ void System::PauseSystem(bool paused) if (paused) { - // Make sure the GPU is flushed, otherwise the VB might still be mapped. - g_gpu->FlushRender(); - FullscreenUI::OnSystemPaused(); InputManager::PauseVibration(); @@ -1345,7 +1320,7 @@ void System::PauseSystem(bool paused) Host::OnSystemPaused(); Host::OnIdleStateChanged(); UpdateDisplayVSync(); - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); } else { @@ -1420,7 +1395,7 @@ bool System::LoadState(const char* filename, Error* error) ResetThrottler(); if (IsPaused()) - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); VERBOSE_LOG("Loading state took {:.2f} msec", load_timer.GetTimeMilliseconds()); return true; @@ -1511,7 +1486,6 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) Assert(s_state == State::Shutdown); s_state = State::Starting; s_startup_cancelled.store(false); - s_keep_gpu_device_on_shutdown = static_cast(g_gpu_device); s_region = g_settings.region; Host::OnSystemStarting(); @@ -1801,15 +1775,11 @@ bool System::Initialize(bool force_software_renderer, Error* error) s_average_frame_time = 0.0f; s_cpu_thread_usage = 0.0f; s_cpu_thread_time = 0.0f; - s_sw_thread_usage = 0.0f; - s_sw_thread_time = 0.0f; - s_average_gpu_time = 0.0f; - s_accumulated_gpu_time = 0.0f; - s_gpu_usage = 0.0f; + s_gpu_thread_usage = 0.0f; + s_gpu_thread_time = 0.0f; s_last_frame_number = 0; s_last_internal_frame_number = 0; s_last_global_tick_counter = 0; - s_presents_since_last_update = 0; s_last_cpu_time = 0; s_fps_timer.Reset(); s_frame_timer.Reset(); @@ -1828,34 +1798,33 @@ bool System::Initialize(bool force_software_renderer, Error* error) CPU::CodeCache::Initialize(); - if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, false, error)) + // TODO: Drop pointer + g_gpu = std::make_unique(); + g_gpu->Initialize(); + + if (!GPUThread::CreateGPUBackend(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, error)) { + g_gpu.reset(); Bus::Shutdown(); CPU::Shutdown(); return false; } + // Was startup cancelled? (e.g. shading compilers took too long and the user closed the application) + if (IsStartupCancelled()) + { + GPUThread::DestroyGPUBackend(); + g_gpu.reset(); + CPU::Shutdown(); + Bus::Shutdown(); + return false; + } + GTE::UpdateAspectRatio(); if (g_settings.gpu_pgxp_enable) CPU::PGXP::Initialize(); - // Was startup cancelled? (e.g. shading compilers took too long and the user closed the application) - if (IsStartupCancelled()) - { - g_gpu.reset(); - if (!s_keep_gpu_device_on_shutdown) - { - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - } - if (g_settings.gpu_pgxp_enable) - CPU::PGXP::Shutdown(); - CPU::Shutdown(); - Bus::Shutdown(); - return false; - } - DMA::Initialize(); CDROM::Initialize(); Pad::Initialize(); @@ -1919,16 +1888,9 @@ void System::DestroySystem() TimingEvents::Shutdown(); ClearRunningGame(); - // Restore present-all-frames behavior. - if (s_keep_gpu_device_on_shutdown && g_gpu_device) - { + if (GPUThread::WasFullscreenUIRequested()) UpdateDisplayVSync(); - } - else - { - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - } + GPUThread::DestroyGPUBackend(); s_bios_hash = {}; s_bios_image_info = nullptr; @@ -1967,9 +1929,6 @@ void System::Execute() case State::Running: { s_system_executing = true; - - // TODO: Purge reset/restore - g_gpu->RestoreDeviceContext(); TimingEvents::UpdateCPUDowncount(); if (s_rewind_load_counter >= 0) @@ -1998,9 +1957,6 @@ void System::FrameDone() { s_frame_number++; - // Vertex buffer is shared, need to flush what we have. - g_gpu->FlushRender(); - // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. // TODO: when running ahead, we can skip this (and the flush above) SPU::GeneratePendingSamples(); @@ -2051,7 +2007,6 @@ void System::FrameDone() // counter-acts that. Host::PumpMessagesOnCPUThread(); InputManager::PollSources(); - g_gpu->RestoreDeviceContext(); if (IsExecutionInterrupted()) { @@ -2078,48 +2033,6 @@ void System::FrameDone() if (s_pre_frame_sleep) AccumulatePreFrameSleepTime(); - // explicit present (frame pacing) - const bool is_unique_frame = (s_last_presented_internal_frame_number != s_internal_frame_number); - s_last_presented_internal_frame_number = s_internal_frame_number; - - const bool skip_this_frame = (((s_skip_presenting_duplicate_frames && !is_unique_frame && - s_skipped_frame_count < MAX_SKIPPED_DUPLICATE_FRAME_COUNT) || - (!s_optimal_frame_pacing && current_time > s_next_frame_time && - s_skipped_frame_count < MAX_SKIPPED_TIMEOUT_FRAME_COUNT) || - g_gpu_device->ShouldSkipPresentingFrame()) && - !s_syncing_to_host_with_vsync && !IsExecutionInterrupted()); - if (!skip_this_frame) - { - s_skipped_frame_count = 0; - - const bool throttle_before_present = (s_optimal_frame_pacing && s_throttler_enabled && !IsExecutionInterrupted()); - const bool explicit_present = (throttle_before_present && g_gpu_device->GetFeatures().explicit_present); - if (explicit_present) - { - const bool do_present = PresentDisplay(false, true); - Throttle(current_time); - if (do_present) - g_gpu_device->SubmitPresent(); - } - else - { - if (throttle_before_present) - Throttle(current_time); - - PresentDisplay(false, false); - - if (!throttle_before_present && s_throttler_enabled && !IsExecutionInterrupted()) - Throttle(current_time); - } - } - else - { - DEBUG_LOG("Skipping displaying frame"); - s_skipped_frame_count++; - if (s_throttler_enabled) - Throttle(current_time); - } - // pre-frame sleep (input lag reduction) current_time = Common::Timer::GetCurrentValue(); if (s_pre_frame_sleep) @@ -2128,10 +2041,15 @@ void System::FrameDone() if (pre_frame_sleep_until > current_time && Common::Timer::ConvertValueToMilliseconds(pre_frame_sleep_until - current_time) >= 1) { - Common::Timer::SleepUntil(pre_frame_sleep_until, true); + Throttle(current_time, pre_frame_sleep_until); current_time = Common::Timer::GetCurrentValue(); } } + else + { + if (s_throttler_enabled) + Throttle(current_time, s_next_frame_time); + } s_frame_start_time = current_time; @@ -2149,14 +2067,42 @@ void System::FrameDone() } } - g_gpu->RestoreDeviceContext(); - // Update perf counters *after* throttling, we want to measure from start-of-frame // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different // amounts of computation happening in each frame). System::UpdatePerformanceCounters(); } +void System::GetFramePresentationDetails(bool* present_frame, bool* allow_present_skip, + Common::Timer::Value* present_time) +{ + const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); + + // explicit present (frame pacing) + const bool is_unique_frame = (s_last_presented_internal_frame_number != s_internal_frame_number); + s_last_presented_internal_frame_number = s_internal_frame_number; + + const bool skip_this_frame = (((s_skip_presenting_duplicate_frames && !is_unique_frame && + s_skipped_frame_count < MAX_SKIPPED_DUPLICATE_FRAME_COUNT) || + (!s_optimal_frame_pacing && current_time > s_next_frame_time && + s_skipped_frame_count < MAX_SKIPPED_TIMEOUT_FRAME_COUNT)) && + !s_syncing_to_host_with_vsync && !IsExecutionInterrupted()); + const bool should_allow_present_skip = !s_syncing_to_host_with_vsync && !s_optimal_frame_pacing; + *present_frame = !skip_this_frame; + *allow_present_skip = should_allow_present_skip; + *present_time = (should_allow_present_skip && !IsExecutionInterrupted()) ? s_next_frame_time : 0; + + if (!skip_this_frame) + { + s_skipped_frame_count = 0; + } + else + { + DEBUG_LOG("Skipping displaying frame"); + s_skipped_frame_count++; + } +} + void System::SetThrottleFrequency(float frequency) { if (s_throttle_frequency == frequency) @@ -2188,12 +2134,12 @@ void System::ResetThrottler() s_pre_frame_sleep_time = 0; } -void System::Throttle(Common::Timer::Value current_time) +void System::Throttle(Common::Timer::Value current_time, Common::Timer::Value sleep_until) { // If we're running too slow, advance the next frame time based on the time we lost. Effectively skips // running those frames at the intended time, because otherwise if we pause in the debugger, we'll run // hundreds of frames when we resume. - if (current_time > s_next_frame_time) + if (current_time > sleep_until) { const Common::Timer::Value diff = static_cast(current_time) - static_cast(s_next_frame_time); s_next_frame_time += (diff / s_frame_period) * s_frame_period + s_frame_period; @@ -2208,11 +2154,10 @@ void System::Throttle(Common::Timer::Value current_time) Common::Timer::Value poll_start_time = current_time; for (;;) { - const u32 sleep_ms = - static_cast(Common::Timer::ConvertValueToMilliseconds(s_next_frame_time - poll_start_time)); + const u32 sleep_ms = static_cast(Common::Timer::ConvertValueToMilliseconds(sleep_until - poll_start_time)); s_socket_multiplexer->PollEventsWithTimeout(sleep_ms); poll_start_time = Common::Timer::GetCurrentValue(); - if (poll_start_time >= s_next_frame_time || (!g_settings.display_optimal_frame_pacing && sleep_ms == 0)) + if (poll_start_time >= sleep_until || (!g_settings.display_optimal_frame_pacing && sleep_ms == 0)) break; } } @@ -2221,21 +2166,21 @@ void System::Throttle(Common::Timer::Value current_time) // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. // Linux also seems to do a much better job of waking up at the requested time. #if !defined(__linux__) - Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_optimal_frame_pacing); + Common::Timer::SleepUntil(sleep_until, g_settings.display_optimal_frame_pacing); #else - Common::Timer::SleepUntil(s_next_frame_time, false); + Common::Timer::SleepUntil(sleep_until, false); #endif } #else // No spinwait on Android, see above. - Common::Timer::SleepUntil(s_next_frame_time, false); + Common::Timer::SleepUntil(sleep_until, false); #endif #if 0 DEV_LOG("Asked for {:.2f} ms, slept for {:.2f} ms, {:.2f} ms late", - Common::Timer::ConvertValueToMilliseconds(s_next_frame_time - current_time), + Common::Timer::ConvertValueToMilliseconds(sleep_until - current_time), Common::Timer::ConvertValueToMilliseconds(Common::Timer::GetCurrentValue() - current_time), - Common::Timer::ConvertValueToMilliseconds(Common::Timer::GetCurrentValue() - s_next_frame_time)); + Common::Timer::ConvertValueToMilliseconds(Common::Timer::GetCurrentValue() - sleep_until)); #endif s_next_frame_time += s_frame_period; @@ -2293,62 +2238,6 @@ void System::RecreateSystem() PauseSystem(true); } -bool System::CreateGPU(GPURenderer renderer, bool is_switching, Error* error) -{ - const RenderAPI api = Settings::GetRenderAPIForRenderer(renderer); - - if (!g_gpu_device || - (renderer != GPURenderer::Software && !GPUDevice::IsSameRenderAPI(g_gpu_device->GetRenderAPI(), api))) - { - if (g_gpu_device) - { - WARNING_LOG("Recreating GPU device, expecting {} got {}", GPUDevice::RenderAPIToString(api), - GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); - PostProcessing::Shutdown(); - } - - Host::ReleaseGPUDevice(); - if (!Host::CreateGPUDevice(api, error)) - { - Host::ReleaseRenderWindow(); - return false; - } - - if (is_switching) - PostProcessing::Initialize(); - } - - if (renderer == GPURenderer::Software) - g_gpu = GPU::CreateSoftwareRenderer(); - else - g_gpu = GPU::CreateHardwareRenderer(); - - if (!g_gpu) - { - ERROR_LOG("Failed to initialize {} renderer, falling back to software renderer", - Settings::GetRendererName(renderer)); - Host::AddOSDMessage( - fmt::format(TRANSLATE_FS("System", "Failed to initialize {} renderer, falling back to software renderer."), - Settings::GetRendererName(renderer)), - Host::OSD_CRITICAL_ERROR_DURATION); - g_gpu.reset(); - g_gpu = GPU::CreateSoftwareRenderer(); - if (!g_gpu) - { - ERROR_LOG("Failed to create fallback software renderer."); - if (!s_keep_gpu_device_on_shutdown) - { - PostProcessing::Shutdown(); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); - } - return false; - } - } - - return true; -} - bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state) { if (!sw.DoMarker("System")) @@ -2398,7 +2287,6 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di if (!sw.DoMarker("InterruptController") || !InterruptController::DoState(sw)) return false; - g_gpu->RestoreDeviceContext(); if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, host_texture, update_display)) return false; @@ -2745,9 +2633,9 @@ bool System::SaveStateToStream(ByteStream* state, Error* error, u32 screenshot_s std::vector screenshot_buffer; u32 screenshot_stride; GPUTexture::Format screenshot_format; - if (g_gpu->RenderScreenshotToBuffer(screenshot_width, screenshot_height, - GSVector4i(0, 0, screenshot_width, screenshot_height), false, - &screenshot_buffer, &screenshot_stride, &screenshot_format) && + if (GPUBackend::RenderScreenshotToBuffer(screenshot_width, screenshot_height, + GSVector4i(0, 0, screenshot_width, screenshot_height), false, + &screenshot_buffer, &screenshot_stride, &screenshot_format) && GPUTexture::ConvertTextureDataToRGBA8(screenshot_width, screenshot_height, screenshot_buffer, screenshot_stride, screenshot_format)) { @@ -2782,9 +2670,6 @@ bool System::SaveStateToStream(ByteStream* state, Error* error, u32 screenshot_s // write data { header.offset_to_data = static_cast(state->GetPosition()); - - g_gpu->RestoreDeviceContext(); - header.data_compression_type = compression_method; bool result = false; @@ -2869,9 +2754,9 @@ void System::UpdatePerformanceCounters() 100.0f; s_last_global_tick_counter = global_tick_counter; - const Threading::Thread* sw_thread = g_gpu->GetSWThread(); + const Threading::ThreadHandle& gpu_thread = GPUThread::GetThreadHandle(); const u64 cpu_time = s_cpu_thread_handle ? s_cpu_thread_handle.GetCPUTime() : 0; - const u64 sw_time = sw_thread ? sw_thread->GetCPUTime() : 0; + const u64 sw_time = gpu_thread ? gpu_thread.GetCPUTime() : 0; const u64 cpu_delta = cpu_time - s_last_cpu_time; const u64 sw_delta = sw_time - s_last_sw_time; s_last_cpu_time = cpu_time; @@ -2879,27 +2764,19 @@ void System::UpdatePerformanceCounters() s_cpu_thread_usage = static_cast(static_cast(cpu_delta) * pct_divider); s_cpu_thread_time = static_cast(static_cast(cpu_delta) * time_divider); - s_sw_thread_usage = static_cast(static_cast(sw_delta) * pct_divider); - s_sw_thread_time = static_cast(static_cast(sw_delta) * time_divider); + s_gpu_thread_usage = static_cast(static_cast(sw_delta) * pct_divider); + s_gpu_thread_time = static_cast(static_cast(sw_delta) * time_divider); s_fps_timer.ResetTo(now_ticks); - if (g_gpu_device->IsGPUTimingEnabled()) - { - s_average_gpu_time = s_accumulated_gpu_time / static_cast(std::max(s_presents_since_last_update, 1u)); - s_gpu_usage = s_accumulated_gpu_time / (time * 10.0f); - } - s_accumulated_gpu_time = 0.0f; - s_presents_since_last_update = 0; - - if (g_settings.display_show_gpu_stats) - g_gpu->UpdateStatistics(frames_run); + if (g_settings.display_show_gpu_stats || g_settings.display_show_gpu_stats) + GPUThread::SetPerformanceCounterUpdatePending(); if (s_pre_frame_sleep) UpdatePreFrameSleepTime(); - VERBOSE_LOG("FPS: {:.2f} VPS: {:.2f} CPU: {:.2f} GPU: {:.2f} Average: {:.2f}ms Min: {:.2f}ms Max: {:.2f}ms", s_fps, - s_vps, s_cpu_thread_usage, s_gpu_usage, s_average_frame_time, s_minimum_frame_time, s_maximum_frame_time); + VERBOSE_LOG("FPS: {:.2f} VPS: {:.2f} CPU: {:.2f} Average: {:.2f}ms Min: {:.2f}ms Max: {:.2f}ms", s_fps, s_vps, + s_cpu_thread_usage, s_average_frame_time, s_minimum_frame_time, s_maximum_frame_time); Host::OnPerformanceCountersUpdated(); } @@ -2910,8 +2787,8 @@ void System::ResetPerformanceCounters() s_last_internal_frame_number = s_internal_frame_number; s_last_global_tick_counter = GetGlobalTickCounter(); s_last_cpu_time = s_cpu_thread_handle ? s_cpu_thread_handle.GetCPUTime() : 0; - if (const Threading::Thread* sw_thread = g_gpu->GetSWThread(); sw_thread) - s_last_sw_time = sw_thread->GetCPUTime(); + if (const Threading::ThreadHandle& sw_thread = GPUThread::GetThreadHandle(); sw_thread) + s_last_sw_time = sw_thread.GetCPUTime(); else s_last_sw_time = 0; @@ -3045,7 +2922,7 @@ void System::UpdateDisplayVSync() s_syncing_to_host_with_vsync ? " (for throttling)" : "", allow_present_throttle ? " (present throttle allowed)" : ""); - g_gpu_device->SetVSyncMode(vsync_mode, allow_present_throttle); + GPUThread::SetVSync(vsync_mode, allow_present_throttle); } GPUVSyncMode System::GetEffectiveVSyncMode() @@ -3653,7 +3530,6 @@ bool System::DumpVRAM(const char* filename) if (!IsValid()) return false; - g_gpu->RestoreDeviceContext(); return g_gpu->DumpVRAMToFile(filename); } @@ -3805,10 +3681,11 @@ void System::UpdateRunningGame(const char* path, CDImage* image, bool booting) if (s_running_game_serial != prev_serial) UpdateSessionTime(prev_serial); + // TODO GPU-THREAD: Racey... if (SaveStateSelectorUI::IsOpen()) - SaveStateSelectorUI::RefreshList(s_running_game_serial); - else - SaveStateSelectorUI::ClearList(); + { + GPUThread::RunOnThread([serial = s_running_game_serial]() { SaveStateSelectorUI::RefreshList(serial); }); + } UpdateRichPresence(booting); @@ -4082,7 +3959,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || g_settings.gpu_multisamples != old_settings.gpu_multisamples || g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading || - g_settings.gpu_use_thread != old_settings.gpu_use_thread || + g_settings.gpu_max_queued_frames != old_settings.gpu_max_queued_frames || g_settings.gpu_use_software_renderer_for_readbacks != old_settings.gpu_use_software_renderer_for_readbacks || g_settings.gpu_fifo_size != old_settings.gpu_fifo_size || g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead || @@ -4116,9 +3993,9 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.rewind_enable != old_settings.rewind_enable || g_settings.runahead_frames != old_settings.runahead_frames) { - g_gpu->UpdateSettings(old_settings); + GPUThread::UpdateSettings(); if (IsPaused()) - InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); } if (g_settings.gpu_widescreen_hack != old_settings.gpu_widescreen_hack || @@ -4144,9 +4021,6 @@ void System::CheckForSettingsChanges(const Settings& old_settings) CPU::CodeCache::Reset(); } - if (g_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) - g_gpu->ResetStatistics(); - if (g_settings.cdrom_readahead_sectors != old_settings.cdrom_readahead_sectors) CDROM::SetReadaheadSectors(g_settings.cdrom_readahead_sectors); @@ -4392,6 +4266,9 @@ void System::CalculateRewindMemoryUsage(u32 num_saves, u32 resolution_scale, u64 void System::ClearMemorySaveStates() { + if (!s_rewind_states.empty() || !s_runahead_states.empty()) + Panic("FIXME TEXTURE CLEAR"); + s_rewind_states.clear(); s_runahead_states.clear(); } @@ -4563,11 +4440,12 @@ void System::DoRewind() s_rewind_load_counter--; } - InvalidateDisplay(); + // TODO FIXME InvalidateDisplay(); + Host::PumpMessagesOnCPUThread(); Internal::IdlePollUpdate(); - Throttle(Common::Timer::GetCurrentValue()); + Throttle(Common::Timer::GetCurrentValue(), s_next_frame_time); } void System::SaveRunaheadState() @@ -4858,7 +4736,9 @@ bool System::SaveScreenshot(const char* filename, DisplayScreenshotMode mode, Di filename = auto_filename.c_str(); } - return g_gpu->RenderScreenshotToFile(filename, mode, quality, compress_on_thread, true); + Panic("TODO FIXME"); + // return g_gpu->RenderScreenshotToFile(filename, mode, quality, compress_on_thread, true); + return false; } std::string System::GetGameSaveStateFileName(std::string_view serial, s32 slot) @@ -5322,7 +5202,8 @@ void System::ToggleSoftwareRendering() if (IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software) return; - const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; + const GPURenderer new_renderer = + GPUBackend::IsUsingHardwareBackend() ? GPURenderer::Software : g_settings.gpu_renderer; Host::AddIconOSDMessage("SoftwareRendering", ICON_FA_PAINT_ROLLER, fmt::format(TRANSLATE_FS("OSDMessage", "Switching to {} renderer..."), @@ -5338,7 +5219,7 @@ void System::RequestDisplaySize(float scale /*= 0.0f*/) return; if (scale == 0.0f) - scale = g_gpu->IsHardwareRenderer() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; + scale = GPUBackend::IsUsingHardwareBackend() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; const float y_scale = (static_cast(g_gpu->GetCRTCDisplayWidth()) / static_cast(g_gpu->GetCRTCDisplayHeight())) / @@ -5360,62 +5241,7 @@ void System::HostDisplayResized() if (g_settings.gpu_widescreen_hack && g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow) GTE::UpdateAspectRatio(); - g_gpu->UpdateResolutionScale(); -} - -bool System::PresentDisplay(bool skip_present, bool explicit_present) -{ - // acquire for IO.MousePos. - std::atomic_thread_fence(std::memory_order_acquire); - - if (!skip_present) - { - FullscreenUI::Render(); - ImGuiManager::RenderTextOverlays(); - ImGuiManager::RenderOSDMessages(); - - if (s_state == State::Running) - ImGuiManager::RenderSoftwareCursors(); - } - - // Debug windows are always rendered, otherwise mouse input breaks on skip. - ImGuiManager::RenderOverlayWindows(); - ImGuiManager::RenderDebugWindows(); - - bool do_present; - if (g_gpu && !skip_present) - do_present = g_gpu->PresentDisplay(); - else - do_present = g_gpu_device->BeginPresent(skip_present); - - if (do_present) - { - g_gpu_device->RenderImGui(); - g_gpu_device->EndPresent(explicit_present); - - if (g_gpu_device->IsGPUTimingEnabled()) - { - s_accumulated_gpu_time += g_gpu_device->GetAndResetAccumulatedGPUTime(); - s_presents_since_last_update++; - } - } - else - { - // Still need to kick ImGui or it gets cranky. - ImGui::Render(); - } - - ImGuiManager::NewFrame(); - - return do_present; -} - -void System::InvalidateDisplay() -{ - PresentDisplay(false, false); - - if (g_gpu) - g_gpu->RestoreDeviceContext(); + GPUBackend::PushCommand(GPUBackend::NewUpdateResolutionScaleCommand()); } void System::SetTimerResolutionIncreased(bool enabled) diff --git a/src/core/system.h b/src/core/system.h index 695260571..0b9ca5dc6 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -232,10 +232,8 @@ float GetMaximumFrameTime(); float GetThrottleFrequency(); float GetCPUThreadUsage(); float GetCPUThreadAverageTime(); -float GetSWThreadUsage(); -float GetSWThreadAverageTime(); -float GetGPUUsage(); -float GetGPUAverageTime(); +float GetGPUThreadUsage(); +float GetGPUThreadAverageTime(); const FrameTimeHistory& GetFrameTimeHistory(); u32 GetFrameTimeHistoryPos(); void FormatLatencyStats(SmallStringBase& str); @@ -299,6 +297,7 @@ void SetThrottleFrequency(float frequency); void UpdateThrottlePeriod(); void ResetThrottler(); void ResetPerformanceCounters(); +void GetFramePresentationDetails(bool* present_frame, bool* allow_present_skip, Common::Timer::Value* present_time); // Access controllers for simulating input. Controller* GetController(u32 slot); @@ -479,10 +478,6 @@ void RequestDisplaySize(float scale = 0.0f); /// Call when host display size changes, use with "match display" aspect ratio setting. void HostDisplayResized(); -/// Renders the display. -bool PresentDisplay(bool skip_present, bool explicit_present); -void InvalidateDisplay(); - ////////////////////////////////////////////////////////////////////////// // Memory Save States (Rewind and Runahead) ////////////////////////////////////////////////////////////////////////// diff --git a/src/duckstation-qt/qthost.cpp b/src/duckstation-qt/qthost.cpp index b00449c60..ead3a79e5 100644 --- a/src/duckstation-qt/qthost.cpp +++ b/src/duckstation-qt/qthost.cpp @@ -18,6 +18,8 @@ #include "core/game_list.h" #include "core/gdb_server.h" #include "core/gpu.h" +#include "core/gpu_backend.h" +#include "core/gpu_thread.h" #include "core/host.h" #include "core/imgui_overlays.h" #include "core/memory_card.h" @@ -762,12 +764,9 @@ void EmuThread::startFullscreenUI() m_run_fullscreen_ui = true; Error error; - if (!Host::CreateGPUDevice(Settings::GetRenderAPIForRenderer(g_settings.gpu_renderer), &error) || - !FullscreenUI::Initialize()) + if (!GPUThread::StartFullscreenUI(&error)) { Host::ReportErrorAsync("Error", error.GetDescription()); - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); m_run_fullscreen_ui = false; return; } @@ -803,8 +802,7 @@ void EmuThread::stopFullscreenUI() if (!g_gpu_device) return; - Host::ReleaseGPUDevice(); - Host::ReleaseRenderWindow(); + GPUThread::Shutdown(); } void EmuThread::bootSystem(std::shared_ptr params) @@ -911,7 +909,10 @@ void EmuThread::onDisplayWindowMouseWheelEvent(const QPoint& delta_angle) void EmuThread::onDisplayWindowResized(int width, int height, float scale) { - Host::ResizeDisplayWindow(width, height, scale); + if (!GPUThread::IsStarted()) + return; + + GPUThread::ResizeDisplayWindow(width, height, scale); } void EmuThread::redrawDisplayWindow() @@ -925,7 +926,7 @@ void EmuThread::redrawDisplayWindow() if (!g_gpu_device || System::IsShutdown()) return; - System::InvalidateDisplay(); + GPUThread::PresentCurrentFrame(); } void EmuThread::toggleFullscreen() @@ -953,7 +954,7 @@ void EmuThread::setFullscreen(bool fullscreen, bool allow_render_to_main) m_is_fullscreen = fullscreen; m_is_rendering_to_main = allow_render_to_main && shouldRenderToMain(); - Host::UpdateDisplayWindow(); + GPUThread::UpdateDisplayWindow(); } bool Host::IsFullscreen() @@ -978,7 +979,7 @@ void EmuThread::setSurfaceless(bool surfaceless) return; m_is_surfaceless = surfaceless; - Host::UpdateDisplayWindow(); + GPUThread::UpdateDisplayWindow(); } void EmuThread::requestDisplaySize(float scale) @@ -1757,33 +1758,16 @@ void EmuThread::run() // main loop while (!m_shutdown_flag) { + // TODO: Maybe make this better? if (System::IsRunning()) - { System::Execute(); - } else - { - // we want to keep rendering the UI when paused and fullscreen UI is enabled - if (!FullscreenUI::HasActiveWindow() && !System::IsRunning()) - { - // wait until we have a system before running - m_event_loop->exec(); - continue; - } - - m_event_loop->processEvents(QEventLoop::AllEvents); - System::Internal::IdlePollUpdate(); - if (g_gpu_device) - { - System::PresentDisplay(false, false); - if (!g_gpu_device->IsVSyncModeBlocking()) - g_gpu_device->ThrottlePresentation(); - } - } + m_event_loop->exec(); } if (System::IsValid()) System::ShutdownSystem(false); + GPUThread::Shutdown(); destroyBackgroundControllerPollTimer(); System::Internal::CPUThreadShutdown(); @@ -1979,13 +1963,13 @@ void Host::ReleaseRenderWindow() void EmuThread::updatePerformanceCounters() { - const RenderAPI render_api = g_gpu_device ? g_gpu_device->GetRenderAPI() : RenderAPI::None; - const bool hardware_renderer = g_gpu && g_gpu->IsHardwareRenderer(); + const RenderAPI render_api = GPUThread::GetRenderAPI(); + const bool hardware_renderer = GPUBackend::IsUsingHardwareBackend(); u32 render_width = 0; u32 render_height = 0; - if (g_gpu) - std::tie(render_width, render_height) = g_gpu->GetEffectiveDisplayResolution(); + if (System::IsValid()) + std::tie(render_width, render_height) = GPUBackend::GetLastDisplaySourceSize(); if (render_api != m_last_render_api || hardware_renderer != m_last_hardware_renderer) { diff --git a/src/duckstation-qt/qthost.h b/src/duckstation-qt/qthost.h index 22afadf49..e7a728d1c 100644 --- a/src/duckstation-qt/qthost.h +++ b/src/duckstation-qt/qthost.h @@ -96,6 +96,8 @@ public: ALWAYS_INLINE bool isFullscreen() const { return m_is_fullscreen; } ALWAYS_INLINE bool isRenderingToMain() const { return m_is_rendering_to_main; } ALWAYS_INLINE bool isSurfaceless() const { return m_is_surfaceless; } + + // TODO: Maybe remove this... ALWAYS_INLINE bool isRunningFullscreenUI() const { return m_run_fullscreen_ui; } std::optional acquireRenderWindow(bool recreate_window);