From c970740d1260e40e8c41965a99c59d602df2e63f Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 21:54:43 +1000 Subject: [PATCH 01/35] InputManager: Workaround macro chord trigger issue --- src/util/input_manager.cpp | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/util/input_manager.cpp b/src/util/input_manager.cpp index 4231cdade..e088a899a 100644 --- a/src/util/input_manager.cpp +++ b/src/util/input_manager.cpp @@ -904,13 +904,32 @@ void InputManager::AddPadBindings(const SettingsInterface& si, const std::string { const float deadzone = si.GetFloatValue(section.c_str(), fmt::format("Macro{}Deadzone", macro_button_index + 1).c_str(), 0.0f); - AddBindings(bindings, InputAxisEventHandler{[pad_index, macro_button_index, deadzone](float value) { - if (!System::IsValid()) - return; + for (const std::string& binding : bindings) + { + // We currently can't use chords with a deadzone. + if (binding.find('&') != std::string::npos || deadzone == 0.0f) + { + if (deadzone != 0.0f) + WARNING_LOG("Chord binding {} not supported with trigger deadzone {}.", binding, deadzone); - const bool state = (value > deadzone); - SetMacroButtonState(pad_index, macro_button_index, state); - }}); + AddBinding(binding, InputButtonEventHandler{[pad_index, macro_button_index](bool state) { + if (!System::IsValid()) + return; + + SetMacroButtonState(pad_index, macro_button_index, state); + }}); + } + else + { + AddBindings(bindings, InputAxisEventHandler{[pad_index, macro_button_index, deadzone](float value) { + if (!System::IsValid()) + return; + + const bool state = (value > deadzone); + SetMacroButtonState(pad_index, macro_button_index, state); + }}); + } + } } } From ff010686f856cdba268008bd7e2dac988c32acbc Mon Sep 17 00:00:00 2001 From: Daniel Nylander Date: Sun, 24 Nov 2024 12:55:06 +0100 Subject: [PATCH 02/35] Updated Swedish translation (#3333) --- .../translations/duckstation-qt_sv.ts | 215 +++++++++--------- 1 file changed, 108 insertions(+), 107 deletions(-) diff --git a/src/duckstation-qt/translations/duckstation-qt_sv.ts b/src/duckstation-qt/translations/duckstation-qt_sv.ts index 776b80bf2..490b6c305 100644 --- a/src/duckstation-qt/translations/duckstation-qt_sv.ts +++ b/src/duckstation-qt/translations/duckstation-qt_sv.ts @@ -6107,7 +6107,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Adjusts the emulation speed so the console's refresh rate matches the host when VSync is enabled. - + Justerar emuleringshastigheten sÃ¥ att konsolens uppdateringsfrekvens matchar värdens när VSync är aktiverat. @@ -6122,7 +6122,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Allows loading protected games without subchannel information. - + TillÃ¥t att läsa in skyddade spel utan underkanalsinformation. @@ -6146,7 +6146,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Are you sure you want to clear the current post-processing chain? All configuration will be lost. - + Är du säker pÃ¥ att du vill tömma aktuell efterbehandlingskedja? All konfiguration kommer att förloras. @@ -6156,7 +6156,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Attempts to map the selected port to a chosen controller. - + Försöker att mappa vald port till en vald handkontroller. @@ -6206,17 +6206,17 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Automatically applies patches to disc images when they are present, currently only PPF is supported. - + Tillämpar automatiskt patchar till skivavbilder när de finns tillgängliga. Endast PPF stöds för närvarande. Automatically loads and applies cheats on game start. Cheats can break games and saves. - + Läser automatiskt in och tillämpar fusk pÃ¥ startat spel. Fusk kan göra sönder spel och sparningar. Automatically resizes the window to match the internal resolution. - + Storleksändrar automatiskt fönstret för att matcha interna upplösningen. @@ -6226,12 +6226,12 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Automatically switches to fullscreen mode when the program is started. - + Växlar automatiskt till helskärmsläge när programmet startas. Avoids calls to C++ code, significantly speeding up the recompiler. - + Undviker anrop till C++-kod, snabbar upp omkompileraren avsevärt. @@ -6301,12 +6301,12 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Changes the aspect ratio used to display the console's output to the screen. - + Ändrar bildförhÃ¥llandet som används för att visa konsolens utmatning pÃ¥ skärmen. Chooses the backend to use for rendering the console/game visuals. - + Väljer bakänden att använda för rendering av konsolen/spelets visuella del. @@ -6341,12 +6341,12 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Compatibility Rating - + Kompatibilitetsbetyg Completely exits the application, returning you to your desktop. - + Avslutar programmet helt och Ã¥tervänder till ditt skrivbord. @@ -6366,17 +6366,17 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Controller Port {} - + Kontrollerport {} Controller Port {} Macros - + Makron för kontrollerport {} Controller Port {} Settings - + Inställningar för kontrollerport {} @@ -6391,7 +6391,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Controller settings reset to default. - + Kontrollerinställningar nollställda till standardvärden. @@ -6401,12 +6401,12 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Controls the volume of the audio played on the host when fast forwarding. - + Styr volymen för ljudet som spelas pÃ¥ värden vid snabbspolning. Controls the volume of the audio played on the host. - + Styr volymen för ljudet som spelas pÃ¥ värden. @@ -6416,7 +6416,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Copies the global controller configuration to this game. - + Kopierar global kontrollerkonfiguration till detta spel. @@ -6431,7 +6431,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Could not find any CD/DVD-ROM devices. Please ensure you have a drive connected and sufficient permissions to access it. - + Kunde inte hitta nÃ¥gra cd/dvd-rom-enheter. Försäkra dig om att du har en enhet ansluten och tillräcklig behörighet att komma Ã¥t den. @@ -6516,12 +6516,12 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Determines how large the on-screen messages and monitor are. - + Bestämmer hur stora OSD-meddelanden och skärmen är. Determines how much button pressure is ignored before activating the macro. - + Bestämmer hur hÃ¥rt knapptryck som ignoreras innan makrot aktiveras. @@ -6536,7 +6536,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Determines how much pressure is simulated when macro is active. - + Bestämmer hur hÃ¥rt tryck som simuleras när makrot är aktivt. @@ -6556,7 +6556,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Determines that field that the game list will be sorted by. - + Bestämmer fältet som spellistan ska sorteras efter. @@ -6566,7 +6566,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Determines the emulated hardware type. - + Bestämmer emulerad hÃ¥rdvarutyp. @@ -6581,12 +6581,12 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Determines the margin between the edge of the screen and on-screen messages. - + Bestämmer marginalen mellan kanten av skärmen och OSD-meddelanden. Determines the position on the screen when black borders must be added. - + Bestämmer positionen pÃ¥ skärmen när svarta sorgkanter mÃ¥ste läggas till. @@ -6596,12 +6596,12 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Determines the size of screenshots created by DuckStation. - + Bestämmer storleken för skärmbilder som skapas av DuckStation. Determines whether a prompt will be displayed to confirm shutting down the emulator/game when the hotkey is pressed. - + Bestämmer huruvida en prompt ska visas för att bekräfta avstängning av emulatorn/spelet när snabbtangenten trycks ner. @@ -6621,7 +6621,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Disable Subdirectory Scanning - + Inaktivera inläsning av underkataloger @@ -6666,7 +6666,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Downloads covers from a user-specified URL template. - + Hämtar omslagsbilder frÃ¥n en användarangiven URL-mall. @@ -6696,7 +6696,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Dump Replaced Textures - + Dumpa ersatta texturer @@ -6776,7 +6776,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Enable Subdirectory Scanning - + Aktivera sökning i underkataloger @@ -6811,7 +6811,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Enable XInput Input Source - + Aktivera inmatningskälla för XInput @@ -6841,7 +6841,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Enables caching of guest textures, required for texture replacement. - + Aktiverar cachning av gästtexturer, krävs för texturersättning. @@ -6851,22 +6851,22 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Enables loading of cheats for this game from DuckStation's database. - + Aktiverar inläsning av fusk för detta spel frÃ¥n Duckstations databas. Enables loading of replacement textures. Not compatible with all games. - + Aktiverar inläsning av ersättningstexturer. Inte kompatibelt med alla spel. Enables smooth scrolling of menus in Big Picture UI. - + Aktiverar mjuk rullning i menyer i storbildsläget. Enables the cheats that are selected below. - + Aktiverar fusken som väljs nedan. @@ -6876,7 +6876,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Enables the replacement of background textures in supported games. - + Aktiverar ersättning av bakgrundstexturer i spel som stöds. @@ -6891,7 +6891,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Enter the name of the input profile you wish to create. - + Ange namnet för inmatningsprofilen som du vill skapa. @@ -6916,7 +6916,7 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Exits Big Picture mode, returning to the desktop interface. - + Avslutar storbildsläget och Ã¥tervänder till skrivbordsgränssnittet. @@ -6947,12 +6947,13 @@ Vill du läsa in detta tillstÃ¥nd och fortsätta? Failed to load shader {}. It may be invalid. Error was: - + Misslyckades med att läsa in shadern {}. Den kan vara ogiltig. +Felet var: Failed to save input profile '{}'. - + Misslyckades med att spara inmatningsprofilen '{}'. @@ -7052,7 +7053,7 @@ Error was: GPU adapter will be applied after restarting. - + GPU-adaptern kommer att tillämpas efter omstart. @@ -7077,12 +7078,12 @@ Error was: Game Slot {0}##game_slot_{0} - + Spelplats {0}##game_slot_{0} Game compatibility rating copied to clipboard. - + Spelets kompatibilitetsbetyg kopierat till urklipp. @@ -7097,22 +7098,22 @@ Error was: Game region copied to clipboard. - + Spelets region kopierat till urklipp. Game serial copied to clipboard. - + Spelets serienummer kopierat till urklipp. Game settings have been cleared for '{}'. - + Spelinställningar har tömts för '{}'. Game settings initialized with global settings for '{}'. - + Spelinställningar initierades med globala inställningar för '{}'. @@ -7122,7 +7123,7 @@ Error was: Game type copied to clipboard. - + Speltypen kopierad till urklipp. @@ -7142,12 +7143,12 @@ Error was: Global Slot {0} - {1}##global_slot_{0} - + Global plats {0} - {1}##global_slot_{0} Global Slot {0}##global_slot_{0} - + Global plats {0}##global_slot_{0} @@ -7172,7 +7173,7 @@ Error was: Hides the mouse pointer/cursor when the emulator is in fullscreen mode. - + Döljer muspekaren/markören när emulatorn är i helskärmsläge. @@ -7192,7 +7193,7 @@ Error was: Identifies any new files added to the game directories. - + Identifierar alla nya filer som lagts till i spelkatalogerna. @@ -7287,7 +7288,7 @@ Error was: Load Global State - + Läs in globalt tillstÃ¥nd @@ -7362,7 +7363,7 @@ Error was: Logs messages to the debug console where supported. - + Loggar meddelanden till felsökningskonsollen där det stöds. @@ -7407,7 +7408,7 @@ Error was: Merges multi-disc games into one item in the game list. - + SlÃ¥r samman flerskivsspel till ett objekt i spellistan. @@ -7632,7 +7633,7 @@ Error was: Pauses the emulator when you minimize the window or switch to another application, and unpauses when you switch back. - + Pausar emulatorn när du minimerar fönstret eller byter till ett annat program samt avpausar när du byter tillbaka. @@ -7677,7 +7678,7 @@ Error was: Post-processing chain cleared. - + Efterbehandlingskedja tömd. @@ -7687,7 +7688,7 @@ Error was: Preload Images to RAM - + Förinläs avbilder till RAM @@ -7712,12 +7713,12 @@ Error was: Prevents the emulator from producing any audible sound. - + Förhindrar att emulatorn skapar nÃ¥gon form av ljud. Prevents the screen saver from activating and the host from sleeping while emulation is running. - + Förhindrar att skärmsläckaren aktiveras och värden frÃ¥n att gÃ¥ i viloläge när emuleringen körs. @@ -7737,7 +7738,7 @@ Error was: RAIntegration is being used instead of the built-in achievements implementation. - + RAIntegration används istället för den inbyggda prestationsimplementationen. @@ -7817,17 +7818,17 @@ Error was: Removed stage {} ({}). - + Tog bort steg {} ({}). Removes this shader from the chain. - + Tar bort denna shader frÃ¥n kedjan. Renames existing save states when saving to a backup file. - + Byter namn pÃ¥ befintliga sparade tillstÃ¥nd vid sparning till en säkerhetskopiefil. @@ -7837,17 +7838,17 @@ Error was: Replaces these settings with a previously saved input profile. - + Ersätter dessa inställningar med en tidigare sparad inmatningsprofil. Reset Memory Card Directory - + Nollställ minneskortskatalog Resets all configuration to defaults (including bindings). - + Nollställer all konfiguration till standardvärden (inklusive bindningar). @@ -7857,7 +7858,7 @@ Error was: Resolution change will be applied after restarting. - + Upplösningsändringar kommer att verkställas efter omstart. @@ -8042,7 +8043,7 @@ Error was: Selects the quality at which screenshots will be compressed. - + Väljer kvaliteten för vilken skärmbilder ska komprimeras med. @@ -8052,12 +8053,12 @@ Error was: Selects the resolution to use in fullscreen modes. - + Väljer upplösningen att använda i helskärmslägen. Selects the view that the game list will open to. - + Väljer vyn som spellistan ska öppnas med. @@ -8097,17 +8098,17 @@ Error was: Sets the verbosity of messages logged. Higher levels will log more messages. - + Ställer in verbositeten för loggade meddelanden. Högre nivÃ¥er kommer att logga fler meddelanden. Sets which sort of memory card image will be used for slot {}. - + Anger vilken typ av minneskortsavbild som ska användas för plats {}. Setting {} binding {}. - + Ställer in {} bindning {}. @@ -8117,7 +8118,7 @@ Error was: Shader {} added as stage {}. - + Shader {} lades till som steg {}. @@ -8127,17 +8128,17 @@ Error was: Show Controller Input - + Visa kontrollerinmatning Show Enhancement Settings - + Visa förbättringsinställningar Show Frame Times - + Visa bildrutetider @@ -8157,12 +8158,12 @@ Error was: Shows a visual history of frame times in the upper-left corner of the display. - + Visar en visuell historik över bildrutetider i övre vänstra hörnet pÃ¥ skärmen. Shows enhancement settings in the bottom-right corner of the screen. - + Visar förbättringsinställningar i nedre högra hörnet av skärmen. @@ -8172,57 +8173,57 @@ Error was: Shows information about input and audio latency in the top-right corner of the display. - + Visar information om inmatnings- och ljudlatens i övre högra hörnet av skärmen. Shows information about the emulated GPU in the top-right corner of the display. - + Visar information om emulerad GPU i övre högra hörnet av skärmen. Shows on-screen-display messages when events occur. - + Visar OSD-meddelanden när händelser sker. Shows persistent icons when turbo is active or when paused. - + Visar bestÃ¥ende ikoner när turbo är aktivt eller när pausat. Shows the current controller state of the system in the bottom-left corner of the display. - + Visar aktuellt tillstÃ¥nd för handkontroller för systemet i nedre vänstra hörnet av skärmen. Shows the current emulation speed of the system in the top-right corner of the display as a percentage. - + Visar aktuell emuleringshastighet för systemet i övre högra hörnet av skärmen som ett procenttal. Shows the current rendering resolution of the system in the top-right corner of the display. - + Visar aktuell renderingsupplösning för systemet i övre högra hörnet av skärmen. Shows the game you are currently playing as part of your profile in Discord. - + Visar spelet som du för närvarande spelar som en del av din profil i Discord. Shows the host's CPU usage of each system thread in the top-right corner of the display. - + Visar värdens CPU-användning för varje systemtrÃ¥d i övre högra hörnet av skärmen. Shows the host's GPU usage in the top-right corner of the display. - + Visar värdens GPU-användning i övre högra hörnet av skärmen. Shows the number of frames (or v-syncs) displayed per second by the system in the top-right corner of the display. - + Visar antalet bildrutor (eller v-syncs) som visas per sekund av systemet i övre högra hörnet av skärmen. @@ -8257,7 +8258,7 @@ Error was: Smooth Scrolling - + Mjuk rullning @@ -8277,7 +8278,7 @@ Error was: Sort Reversed - + Omvänd sortering @@ -8287,7 +8288,7 @@ Error was: Spectator Mode - + Ã…skÃ¥darläge @@ -8312,7 +8313,7 @@ Error was: Stores the current settings to an input profile. - + Lagrar aktuella inställningar till en inmatningsprofil. @@ -8337,12 +8338,12 @@ Error was: Switches between full screen and windowed when the window is double-clicked. - + Växlar mellan helskärm och fönsterläge när fönstret dubbelklickas. Sync To Host Refresh Rate - + Synka till värdens uppdateringsfrekvens @@ -8407,7 +8408,7 @@ Error was: Threaded Rendering - + TrÃ¥dad rendering @@ -8437,7 +8438,7 @@ Error was: Toggle every %d frames - + Växla var %d bildruta @@ -8452,7 +8453,7 @@ Error was: True Color Rendering - + True Color-rendering @@ -8592,12 +8593,12 @@ Error was: WARNING: Activating cheats can cause unpredictable behavior, crashing, soft-locks, or broken saved games. - + VARNING: Aktivering av fusk kan orsaka oförutsett beteende, krascher, mjuklÃ¥sningar eller trasiga sparade spel. WARNING: Activating game patches can cause unpredictable behavior, crashing, soft-locks, or broken saved games. - + VARNING: Aktivering av spelpatchar kan orsaka oförutsett beteende, krascher, mjuklÃ¥sningar eller trasiga sparade spel. From affbdfc350028cce60b476d563eb34bb92da6a1d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 22:37:42 +1000 Subject: [PATCH 03/35] Timer: Fix >1 second sleeps on MacOS --- src/common/timer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/timer.cpp b/src/common/timer.cpp index bdcb20cde..6b74097d8 100644 --- a/src/common/timer.cpp +++ b/src/common/timer.cpp @@ -395,7 +395,7 @@ void Timer::NanoSleep(std::uint64_t ns) // Round down to the next millisecond. usleep(static_cast((ns / 1000000) * 1000)); #else - const struct timespec ts = {0, static_cast(ns)}; + const struct timespec ts = {static_cast(ns / 1000000000ULL), static_cast(ns % 1000000000ULL)}; nanosleep(&ts, nullptr); #endif } From e647192437e5d69234721ce5a30de5c51ae41906 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 20 Nov 2024 20:44:37 +1000 Subject: [PATCH 04/35] GPUDevice: Add compute shader support --- src/util/d3d11_device.cpp | 84 +++++++++---- src/util/d3d11_device.h | 6 + src/util/d3d11_pipeline.cpp | 170 ++++++++++++++++++------- src/util/d3d11_pipeline.h | 12 +- src/util/d3d12_builders.h | 2 + src/util/d3d12_device.cpp | 128 ++++++++++++++++++- src/util/d3d12_device.h | 5 + src/util/d3d12_pipeline.cpp | 55 ++++++++ src/util/d3d12_pipeline.h | 1 + src/util/gpu_device.cpp | 68 ++++++++-- src/util/gpu_device.h | 34 +++-- src/util/metal_device.h | 32 +++-- src/util/metal_device.mm | 236 +++++++++++++++++++++++++---------- src/util/opengl_device.cpp | 13 ++ src/util/opengl_device.h | 3 + src/util/vulkan_builders.cpp | 5 +- src/util/vulkan_builders.h | 2 +- src/util/vulkan_device.cpp | 113 ++++++++++++++--- src/util/vulkan_device.h | 6 +- src/util/vulkan_pipeline.cpp | 13 ++ 20 files changed, 791 insertions(+), 197 deletions(-) diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index a8bcfc6a8..c6d44392c 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) m_features.texture_buffers_emulated_with_ssbo = false; m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); + m_features.compute_shaders = + (!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0); m_features.partial_msaa_resolve = false; m_features.memory_import = false; m_features.explicit_present = false; @@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size) m_uniform_buffer.Unmap(m_context.Get(), req_size); s_stats.buffer_streamed += data_size; - if (m_uniform_buffer.IsUsingMapNoOverwrite()) - { - const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u; - const UINT num_constants = req_size / 16u; - m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - } - else - { - DebugAssert(res.index_aligned == 0); - m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); - } + BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size); } void* D3D11Device::MapUniformBuffer(u32 size) @@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size) m_uniform_buffer.Unmap(m_context.Get(), req_size); s_stats.buffer_streamed += size; + BindUniformBuffer(pos, req_size); +} + +void D3D11Device::BindUniformBuffer(u32 offset, u32 size) +{ if (m_uniform_buffer.IsUsingMapNoOverwrite()) { - const UINT first_constant = pos / 16u; - const UINT num_constants = req_size / 16u; - m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + const UINT first_constant = offset / 16u; + const UINT num_constants = size / 16u; + if (m_current_compute_shader) + { + m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + } + else + { + m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + } } else { - DebugAssert(pos == 0); - m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + DebugAssert(offset == 0); + if (m_current_compute_shader) + { + m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + } + else + { + m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + } } } @@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu for (u32 i = 0; i < m_num_current_render_targets; i++) uavs[i] = m_current_render_targets[i]->GetD3DUAV(); - m_context->OMSetRenderTargetsAndUnorderedAccessViews( - 0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0, - m_num_current_render_targets, uavs.data(), nullptr); + if (!m_current_compute_shader) + { + m_context->OMSetRenderTargetsAndUnorderedAccessViews( + 0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0, + m_num_current_render_targets, uavs.data(), nullptr); + } + else + { + m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr); + } } else { @@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s { m_current_textures[slot] = T; m_context->PSSetShaderResources(slot, 1, &T); + if (m_current_compute_shader) + m_context->CSSetShaderResources(slot, 1, &T); } if (m_current_samplers[slot] != S) { m_current_samplers[slot] = S; m_context->PSSetSamplers(slot, 1, &S); + if (m_current_compute_shader) + m_context->CSSetSamplers(slot, 1, &S); } } @@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) if (m_current_textures[slot] != B) { m_current_textures[slot] = B; + + // Compute doesn't support texture buffers, yet... m_context->PSSetShaderResources(slot, 1, &B); } } @@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc) void D3D11Device::Draw(u32 vertex_count, u32 base_vertex) { - DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped()); + DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader); s_stats.num_draws++; m_context->Draw(vertex_count, base_vertex); } void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { - DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped()); + DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader); s_stats.num_draws++; m_context->DrawIndexed(index_count, base_index, base_vertex); } @@ -1129,3 +1151,15 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba { Panic("Barriers are not supported"); } + +void D3D11Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) +{ + DebugAssert(m_current_compute_shader); + s_stats.num_draws++; + + const u32 groups_x = threads_x / group_size_x; + const u32 groups_y = threads_y / group_size_y; + const u32 groups_z = threads_z / group_size_z; + m_context->Dispatch(groups_x, groups_y, groups_z); +} diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index 2fbb2dfa4..e1b327250 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -75,6 +75,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -98,6 +99,8 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -140,6 +143,8 @@ private: bool CreateBuffers(); void DestroyBuffers(); + void BindUniformBuffer(u32 offset, u32 size); + void UnbindComputePipeline(); bool IsRenderTargetBound(const D3D11Texture* tex) const; @@ -180,6 +185,7 @@ private: ID3D11VertexShader* m_current_vertex_shader = nullptr; ID3D11GeometryShader* m_current_geometry_shader = nullptr; ID3D11PixelShader* m_current_pixel_shader = nullptr; + ID3D11ComputeShader* m_current_compute_shader = nullptr; ID3D11RasterizerState* m_current_rasterizer_state = nullptr; ID3D11DepthStencilState* m_current_depth_state = nullptr; ID3D11BlendState* m_current_blend_state = nullptr; diff --git a/src/util/d3d11_pipeline.cpp b/src/util/d3d11_pipeline.cpp index 0c2301cec..b0d4dd681 100644 --- a/src/util/d3d11_pipeline.cpp +++ b/src/util/d3d11_pipeline.cpp @@ -3,6 +3,7 @@ #include "d3d11_pipeline.h" #include "d3d11_device.h" +#include "d3d11_texture.h" #include "d3d_common.h" #include "common/assert.h" @@ -121,10 +122,10 @@ std::unique_ptr D3D11Device::CreateShaderFromSource(GPUShaderStage st D3D11Pipeline::D3D11Pipeline(ComPtr rs, ComPtr ds, ComPtr bs, ComPtr il, ComPtr vs, - ComPtr gs, ComPtr ps, + ComPtr gs, ComPtr ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor) : m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)), - m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride), + m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride), m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor)) { } @@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c return h; } -D3D11Device::ComPtr D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error) +D3D11Device::ComPtr D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, + Error* error) { ComPtr dbs; @@ -365,69 +367,124 @@ std::unique_ptr D3D11Device::CreatePipeline(const GPUPipeline::Grap primitives[static_cast(config.primitive)], vertex_stride, config.blend.constant)); } +std::unique_ptr D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + if (!config.compute_shader) [[unlikely]] + { + Error::SetStringView(error, "Missing compute shader."); + return {}; + } + + return std::unique_ptr( + new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + static_cast(config.compute_shader)->GetComputeShader(), + D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0)); +} + void D3D11Device::SetPipeline(GPUPipeline* pipeline) { if (m_current_pipeline == pipeline) return; + const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline(); D3D11Pipeline* const PL = static_cast(pipeline); m_current_pipeline = PL; - if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il) + if (!PL->IsComputePipeline()) { - m_current_input_layout = il; - m_context->IASetInputLayout(il); - } + if (was_compute) + UnbindComputePipeline(); - if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride) - { - const UINT offset = 0; - m_current_vertex_stride = PL->GetVertexStride(); - m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset); - } + if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il) + { + m_current_input_layout = il; + m_context->IASetInputLayout(il); + } - if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology) - { - m_current_primitive_topology = topology; - m_context->IASetPrimitiveTopology(topology); - } + if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride) + { + const UINT offset = 0; + m_current_vertex_stride = PL->GetVertexStride(); + m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset); + } - if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs) - { - m_current_vertex_shader = vs; - m_context->VSSetShader(vs, nullptr, 0); - } + if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology) + { + m_current_primitive_topology = topology; + m_context->IASetPrimitiveTopology(topology); + } - if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs) - { - m_current_geometry_shader = gs; - m_context->GSSetShader(gs, nullptr, 0); - } + if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs) + { + m_current_vertex_shader = vs; + m_context->VSSetShader(vs, nullptr, 0); + } - if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps) - { - m_current_pixel_shader = ps; - m_context->PSSetShader(ps, nullptr, 0); - } + if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs) + { + m_current_geometry_shader = gs; + m_context->GSSetShader(gs, nullptr, 0); + } - if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs) - { - m_current_rasterizer_state = rs; - m_context->RSSetState(rs); - } + if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps) + { + m_current_pixel_shader = ps; + m_context->PSSetShader(ps, nullptr, 0); + } - if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds) - { - m_current_depth_state = ds; - m_context->OMSetDepthStencilState(ds, 0); - } + if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs) + { + m_current_rasterizer_state = rs; + m_context->RSSetState(rs); + } - if (ID3D11BlendState* bs = PL->GetBlendState(); - m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor()) + if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds) + { + m_current_depth_state = ds; + m_context->OMSetDepthStencilState(ds, 0); + } + + if (ID3D11BlendState* bs = PL->GetBlendState(); + m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor()) + { + m_current_blend_state = bs; + m_current_blend_factor = PL->GetBlendFactor(); + m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu); + } + } + else { - m_current_blend_state = bs; - m_current_blend_factor = PL->GetBlendFactor(); - m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu); + if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader) + { + m_current_compute_shader = cs; + m_context->CSSetShader(cs, nullptr, 0); + } + + if (!was_compute) + { + // need to bind all SRVs/samplers + u32 count; + for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++) + { + if (!m_current_textures[count]) + break; + } + if (count > 0) + { + m_context->CSSetShaderResources(0, count, m_current_textures.data()); + m_context->CSSetSamplers(0, count, m_current_samplers.data()); + } + + if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) + { + ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS]; + for (u32 i = 0; i < m_num_current_render_targets; i++) + uavs[i] = m_current_render_targets[i]->GetD3DUAV(); + + m_context->OMSetRenderTargets(0, nullptr, nullptr); + m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr); + } + } } } @@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl) if (m_current_pipeline != pl) return; + if (pl->IsComputePipeline()) + UnbindComputePipeline(); + // Let the runtime deal with the dead objects... m_current_pipeline = nullptr; } + +void D3D11Device::UnbindComputePipeline() +{ + m_current_compute_shader = nullptr; + + ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {}; + ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {}; + ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {}; + m_context->CSSetShader(nullptr, nullptr, 0); + m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs); + m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers); + if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) + m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr); +} diff --git a/src/util/d3d11_pipeline.h b/src/util/d3d11_pipeline.h index 88e825750..c3d58d3b2 100644 --- a/src/util/d3d11_pipeline.h +++ b/src/util/d3d11_pipeline.h @@ -51,13 +51,18 @@ public: void SetDebugName(std::string_view name) override; + ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; } ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); } ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); } ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); } ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); } ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); } ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); } - ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); } + ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast(m_ps_or_cs.Get()); } + ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const + { + return static_cast(m_ps_or_cs.Get()); + } ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; } ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; } ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; } @@ -66,7 +71,8 @@ public: private: D3D11Pipeline(ComPtr rs, ComPtr ds, ComPtr bs, ComPtr il, ComPtr vs, ComPtr gs, - ComPtr ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor); + ComPtr ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, + u32 blend_factor); ComPtr m_rs; ComPtr m_ds; @@ -74,7 +80,7 @@ private: ComPtr m_il; ComPtr m_vs; ComPtr m_gs; - ComPtr m_ps; + ComPtr m_ps_or_cs; D3D11_PRIMITIVE_TOPOLOGY m_topology; u32 m_vertex_stride; u32 m_blend_factor; diff --git a/src/util/d3d12_builders.h b/src/util/d3d12_builders.h index ae2970716..9dbffcf77 100644 --- a/src/util/d3d12_builders.h +++ b/src/util/d3d12_builders.h @@ -115,6 +115,8 @@ public: ComputePipelineBuilder(); ~ComputePipelineBuilder() = default; + ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; } + void Clear(); Microsoft::WRL::ComPtr Create(ID3D12Device* device, Error* error, bool clear); diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index cd17a22e4..be58487c9 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab m_features.texture_buffers_emulated_with_ssbo = false; m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); + m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS); m_features.partial_msaa_resolve = true; m_features.memory_import = false; m_features.explicit_present = true; @@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) 1, // SingleTextureBufferAndPushConstants 0, // MultiTextureAndUBO 2, // MultiTextureAndPushConstants + 2, // ComputeSingleTextureAndPushConstants }; DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); @@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) const u32 push_param = push_parameters[static_cast(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature()); - GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0); + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + if (!IsUsingComputeRootSignature()) + cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0); + else + cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0); } void* D3D12Device::MapUniformBuffer(u32 size) @@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error) } } + { + auto& rs = m_root_signatures[0][static_cast(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)]; + + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL); + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout"); + } + return true; } @@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass() rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); rt->SetUseFenceValue(GetCurrentFenceValue()); rt->CommitClear(cmdlist); + rt->SetState(GPUTexture::State::Dirty); } } if (m_current_depth_target) @@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck() BeginRenderPass(); } +void D3D12Device::PreDispatchCheck() +{ + if (InRenderPass()) + EndRenderPass(); + + // Transition images. + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) + { + // Still need to clear the RTs. + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + D3D12Texture* const rt = m_current_render_targets[i]; + rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + rt->SetUseFenceValue(GetCurrentFenceValue()); + rt->CommitClear(cmdlist); + rt->SetState(GPUTexture::State::Dirty); + } + } + + // If this is a new command buffer, bind the pipeline and such. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + SetInitialPipelineState(); + + // TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants. + DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); + const u32 dirty = std::exchange(m_dirty_flags, 0); + if (dirty != 0) + { + if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) + { + UpdateRootSignature(); + if (!UpdateRootParameters(dirty)) + { + SubmitCommandList(false, "out of descriptors"); + PreDispatchCheck(); + return; + } + } + else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS)) + { + if (!UpdateRootParameters(dirty)) + { + SubmitCommandList(false, "out of descriptors"); + PreDispatchCheck(); + return; + } + } + } +} + bool D3D12Device::IsUsingROVRootSignature() const { return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0); } +bool D3D12Device::IsUsingComputeRootSignature() const +{ + return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants); +} + void D3D12Device::UpdateRootSignature() { - GetCommandList()->SetGraphicsRootSignature( - m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast(m_current_pipeline_layout)].Get()); + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + if (!IsUsingComputeRootSignature()) + { + cmdlist->SetGraphicsRootSignature( + m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast(m_current_pipeline_layout)].Get()); + } + else + { + cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast(m_current_pipeline_layout)].Get()); + } } template @@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); + if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); + else + cmdlist->SetComputeRootDescriptorTable(0, gpu_handle); } if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0) @@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) return false; } - cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); + if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); + else + cmdlist->SetComputeRootDescriptorTable(1, gpu_handle); } if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) @@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) 1 : ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 : 2); - cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); + if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); + else + cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle); } return true; @@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty) case GPUPipeline::Layout::MultiTextureAndPushConstants: return UpdateParametersForLayout(dirty); + case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants: + return UpdateParametersForLayout(dirty); + default: UnreachableCode(); } @@ -2331,3 +2435,15 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba { Panic("Barriers are not supported"); } + +void D3D12Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) +{ + PreDispatchCheck(); + s_stats.num_draws++; + + const u32 groups_x = threads_x / group_size_x; + const u32 groups_y = threads_y / group_size_y; + const u32 groups_z = threads_z / group_size_z; + GetCommandList()->Dispatch(groups_x, groups_y, groups_z); +} diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index e20bd525f..ba065cfc5 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -96,6 +96,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -119,6 +120,8 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -275,8 +278,10 @@ private: ID3D12RootSignature* GetCurrentRootSignature() const; void SetInitialPipelineState(); void PreDrawCheck(); + void PreDispatchCheck(); bool IsUsingROVRootSignature() const; + bool IsUsingComputeRootSignature() const; void UpdateRootSignature(); template bool UpdateParametersForLayout(u32 dirty); diff --git a/src/util/d3d12_pipeline.cpp b/src/util/d3d12_pipeline.cpp index c25a67c5c..4b78c2aff 100644 --- a/src/util/d3d12_pipeline.cpp +++ b/src/util/d3d12_pipeline.cpp @@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config) return SHA1Digest::DigestToString(digest); } +std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config) +{ + SHA1Digest hash; + hash.Update(&config.layout, sizeof(config.layout)); + if (const D3D12Shader* shader = static_cast(config.compute_shader)) + hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize()); + + u8 digest[SHA1Digest::DIGEST_SIZE]; + hash.Final(digest); + return SHA1Digest::DigestToString(digest); +} + std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) { static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = @@ -274,3 +286,46 @@ std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::Grap pipeline, config.layout, primitives[static_cast(config.primitive)], config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant)); } + +std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + D3D12::ComputePipelineBuilder cpb; + cpb.SetRootSignature(m_root_signatures[0][static_cast(config.layout)].Get()); + cpb.SetShader(static_cast(config.compute_shader)->GetBytecodeData(), + static_cast(config.compute_shader)->GetBytecodeSize()); + + ComPtr pipeline; + if (m_pipeline_library) + { + const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config)); + HRESULT hr = + m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf())); + if (FAILED(hr)) + { + // E_INVALIDARG = not found. + if (hr != E_INVALIDARG) + ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast(hr)); + + // Need to create it normally. + pipeline = cpb.Create(m_device.Get(), error, false); + + // Store if it wasn't an OOM or something else. + if (pipeline && hr == E_INVALIDARG) + { + hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get()); + if (FAILED(hr)) + ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast(hr)); + } + } + } + else + { + pipeline = cpb.Create(m_device.Get(), error, false); + } + + if (!pipeline) + return {}; + + return std::unique_ptr( + new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0)); +} diff --git a/src/util/d3d12_pipeline.h b/src/util/d3d12_pipeline.h index bca9494fa..e2f83d14f 100644 --- a/src/util/d3d12_pipeline.h +++ b/src/util/d3d12_pipeline.h @@ -51,6 +51,7 @@ public: void SetDebugName(std::string_view name) override; static std::string GetPipelineName(const GraphicsConfig& config); + static std::string GetPipelineName(const ComputeConfig& config); private: D3D12Pipeline(Microsoft::WRL::ComPtr pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology, diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index d9554289c..6a4c3a6dd 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP // Need to know if there's UBOs for mapping. const spvc_reflected_resource *ubos, *textures; - size_t ubos_count, textures_count; + size_t ubos_count, textures_count, images_count; if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos, &ubos_count)) != SPVC_SUCCESS || (sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, - &textures, &textures_count)) != SPVC_SUCCESS) + &textures, &textures_count)) != SPVC_SUCCESS || + (sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE, + &textures, &images_count)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast(sres)); return {}; @@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP [[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler); [[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0; [[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1; + [[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2; switch (target_language) { @@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP } } } + + if (stage == GPUShaderStage::Compute) + { + for (u32 i = 0; i < images_count; i++) + { + const spvc_hlsl_resource_binding rb = {.stage = execmodel, + .desc_set = IMAGE_DESCRIPTOR_SET, + .binding = i, + .cbv = {}, + .uav = {.register_space = 0, .register_binding = i}, + .srv = {}, + .sampler = {}}; + if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) + { + Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast(sres)); + return {}; + } + } + } } break; #endif @@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP return {}; } - if (stage == GPUShaderStage::Fragment) + const spvc_msl_resource_binding pc_rb = {.stage = execmodel, + .desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET, + .binding = SPVC_MSL_PUSH_CONSTANT_BINDING, + .msl_buffer = 0, + .msl_texture = 0, + .msl_sampler = 0}; + if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS) + { + Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}", + static_cast(sres)); + return {}; + } + + if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute) { for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { - const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment, - .desc_set = 1, + const spvc_msl_resource_binding rb = {.stage = execmodel, + .desc_set = TEXTURE_DESCRIPTOR_SET, .binding = i, .msl_buffer = i, .msl_texture = i, @@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP return {}; } } + } - if (!m_features.framebuffer_fetch) + if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch) + { + const spvc_msl_resource_binding rb = { + .stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS}; + + if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) + { + Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}", + static_cast(sres)); + return {}; + } + } + + if (stage == GPUShaderStage::Compute) + { + for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) { const spvc_msl_resource_binding rb = { - .stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS}; + .stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i}; if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) { - Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}", - static_cast(sres)); + Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast(sres)); return {}; } } diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 309b4db39..6c1060c60 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -160,6 +160,9 @@ public: // Multiple textures, 128 byte UBO via push constants. MultiTextureAndPushConstants, + // 128 byte UBO via push constants, 1 texture, compute shader. + ComputeSingleTextureAndPushConstants, + MaxCount }; @@ -416,6 +419,12 @@ public: u32 GetRenderTargetCount() const; }; + struct ComputeConfig + { + Layout layout; + GPUShader* compute_shader; + }; + GPUPipeline(); virtual ~GPUPipeline(); @@ -501,9 +510,10 @@ public: FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2), FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3), FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4), - FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5), - FEATURE_MASK_MEMORY_IMPORT = (1 << 6), - FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7), + FEATURE_MASK_COMPUTE_SHADERS = (1 << 5), + FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6), + FEATURE_MASK_MEMORY_IMPORT = (1 << 7), + FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8), }; enum class DrawBarrier : u32 @@ -532,6 +542,7 @@ public: bool texture_buffers_emulated_with_ssbo : 1; bool feedback_loops : 1; bool geometry_shaders : 1; + bool compute_shaders : 1; bool partial_msaa_resolve : 1; bool memory_import : 1; bool explicit_present : 1; @@ -625,11 +636,20 @@ public: 0, // SingleTextureBufferAndPushConstants MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants + 1, // ComputeSingleTextureAndPushConstants }; return counts[static_cast(layout)]; } + /// Returns the number of thread groups to dispatch for a given total count and local size. + static constexpr std::tuple GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x, + u32 local_size_y, u32 local_size_z) + { + return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y, + (count_z + (local_size_z - 1)) / local_size_z); + } + ALWAYS_INLINE const Features& GetFeatures() const { return m_features; } ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; } ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; } @@ -638,10 +658,6 @@ public: ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); } ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast(m_main_swap_chain); } - // ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); } - // ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); } - // ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; } - // ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; } ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); } ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); } @@ -712,6 +728,8 @@ public: Error* error = nullptr, const char* entry_point = "main"); virtual std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error = nullptr) = 0; + virtual std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, + Error* error = nullptr) = 0; /// Debug messaging. virtual void PushDebugGroup(const char* name) = 0; @@ -753,6 +771,8 @@ public: virtual void Draw(u32 vertex_count, u32 base_vertex) = 0; virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0; virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0; + virtual void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) = 0; /// Returns false if the window was completely occluded. virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0; diff --git a/src/util/metal_device.h b/src/util/metal_device.h index 418ab174f..b1a502755 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -78,7 +78,16 @@ class MetalPipeline final : public GPUPipeline public: ~MetalPipeline() override; - ALWAYS_INLINE id GetPipelineState() const { return m_pipeline; } + ALWAYS_INLINE bool IsRenderPipeline() const { return (m_depth != nil); } + ALWAYS_INLINE bool IsComputePipeline() const { return (m_depth == nil); } + ALWAYS_INLINE id GetRenderPipelineState() const + { + return (id)m_pipeline; + } + ALWAYS_INLINE id GetComputePipelineState() const + { + return (id)m_pipeline; + } ALWAYS_INLINE id GetDepthState() const { return m_depth; } ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; } ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; } @@ -86,10 +95,9 @@ public: void SetDebugName(std::string_view name) override; private: - MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, - MTLPrimitiveType primitive); + MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, MTLPrimitiveType primitive); - id m_pipeline; + id m_pipeline; id m_depth; MTLCullMode m_cull_mode; MTLPrimitiveType m_primitive; @@ -251,6 +259,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -265,7 +274,7 @@ public: void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop) override; + GPUPipeline::RenderPassFlag flags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -274,6 +283,8 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -338,7 +349,6 @@ private: std::unique_ptr CreateShaderFromMSL(GPUShaderStage stage, std::string_view source, std::string_view entry_point, Error* error); id GetFunctionFromLibrary(id library, NSString* name); - id CreateComputePipeline(id function, NSString* name); ClearPipelineConfig GetCurrentClearPipelineConfig() const; id GetClearDepthPipeline(const ClearPipelineConfig& config); id GetDepthState(const GPUPipeline::DepthState& ds); @@ -349,9 +359,12 @@ private: void CleanupObjects(); ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); } + ALWAYS_INLINE bool InComputePass() const { return (m_compute_encoder != nil); } ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); } void BeginRenderPass(); void EndRenderPass(); + void BeginComputePass(); + void EndComputePass(); void EndInlineUploading(); void EndAnyEncoding(); @@ -359,6 +372,8 @@ private: void SetInitialEncoderState(); void SetViewportInRenderEncoder(); void SetScissorInRenderEncoder(); + void CommitRenderTargetClears(); + void BindRenderTargetsAsComputeImages(); void RenderBlankFrame(MetalSwapChain* swap_chain); @@ -384,7 +399,7 @@ private: id m_shaders = nil; id m_pipeline_archive = nil; - std::vector, id>> + std::vector, std::unique_ptr>> m_resolve_pipelines; std::vector>> m_clear_pipelines; @@ -394,9 +409,10 @@ private: id m_render_cmdbuf = nil; id m_render_encoder = nil; + id m_compute_encoder = nil; u8 m_num_current_render_targets = 0; - GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; + GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; std::array m_current_render_targets = {}; MetalTexture* m_current_depth_target = nullptr; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 870ca1da2..ba3cd045b 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -77,7 +77,8 @@ static void LogNSError(NSError* error, std::string_view message) { Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, message); Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Code: {}", static_cast(error.code)); - Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}", [error.description UTF8String]); + Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}", + [error.description UTF8String]); } static GPUTexture::Format GetTextureFormatForMTLFormat(MTLPixelFormat fmt) @@ -503,28 +504,6 @@ id MetalDevice::GetFunctionFromLibrary(id library, NSSt return function; } -id MetalDevice::CreateComputePipeline(id function, NSString* name) -{ - MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new]; - if (name != nil) - [desc setLabel:name]; - [desc setComputeFunction:function]; - - NSError* err = nil; - id pipeline = [m_device newComputePipelineStateWithDescriptor:desc - options:MTLPipelineOptionNone - reflection:nil - error:&err]; - [desc release]; - if (pipeline == nil) - { - LogNSError(err, "Create compute pipeline failed:"); - return nil; - } - - return pipeline; -} - void MetalDevice::DestroyDevice() { WaitForPreviousCommandBuffers(); @@ -564,11 +543,6 @@ void MetalDevice::DestroyDevice() [it.second release]; } m_depth_states.clear(); - for (auto& it : m_resolve_pipelines) - { - if (it.second != nil) - [it.second release]; - } m_resolve_pipelines.clear(); for (auto& it : m_clear_pipelines) { @@ -755,7 +729,7 @@ std::unique_ptr MetalDevice::CreateShaderFromSource(GPUShaderStage st return CreateShaderFromMSL(stage, source, entry_point, error); } -MetalPipeline::MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, +MetalPipeline::MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, MTLPrimitiveType primitive) : m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive) { @@ -982,6 +956,29 @@ std::unique_ptr MetalDevice::CreatePipeline(const GPUPipeline::Grap } } +std::unique_ptr MetalDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + @autoreleasepool + { + MTLComputePipelineDescriptor* desc = [[MTLComputePipelineDescriptor new] autorelease]; + [desc setComputeFunction:static_cast(config.compute_shader)->GetFunction()]; + + NSError* nserror = nil; + id pipeline = [m_device newComputePipelineStateWithDescriptor:desc + options:MTLPipelineOptionNone + reflection:nil + error:&nserror]; + if (pipeline == nil) + { + LogNSError(nserror, "Failed to create compute pipeline state"); + CocoaTools::NSErrorToErrorObject(error, "newComputePipelineStateWithDescriptor failed: ", nserror); + return {}; + } + + return std::unique_ptr(new MetalPipeline(pipeline, nil, MTLCullModeNone, MTLPrimitiveTypePoint)); + } +} + MetalTexture::MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format) : GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture) @@ -1559,14 +1556,14 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 const GPUTexture::Format src_format = dst->GetFormat(); const GPUTexture::Format dst_format = dst->GetFormat(); - id resolve_pipeline = nil; + GPUPipeline* resolve_pipeline; if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(), [src_format, dst_format](const auto& it) { return it.first.first == src_format && it.first.second == dst_format; }); iter != m_resolve_pipelines.end()) { - resolve_pipeline = iter->second; + resolve_pipeline = iter->second.get(); } else { @@ -1579,32 +1576,41 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 if (function == nil) Panic("Failed to get resolve kernel"); - resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease]; - if (resolve_pipeline != nil) - [resolve_pipeline retain]; - m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline); + MetalShader temp_shader(GPUShaderStage::Compute, m_shaders, function); + GPUPipeline::ComputeConfig config; + config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants; + config.compute_shader = &temp_shader; + + std::unique_ptr pipeline = CreatePipeline(config, nullptr); + if (!pipeline) + Panic("Failed to create resolve pipeline"); + + GL_OBJECT_NAME(pipeline, is_depth ? "Depth Resolve" : "Color Resolve"); + resolve_pipeline = + m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), std::move(pipeline)).second.get(); } } - if (resolve_pipeline == nil) - Panic("Failed to get resolve pipeline"); if (InRenderPass()) EndRenderPass(); s_stats.num_copies++; - const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth; - const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1); + const id mtl_pipeline = + static_cast(resolve_pipeline)->GetComputePipelineState(); + const u32 threadgroupHeight = mtl_pipeline.maxTotalThreadsPerThreadgroup / mtl_pipeline.threadExecutionWidth; + const MTLSize intrinsicThreadgroupSize = MTLSizeMake(mtl_pipeline.threadExecutionWidth, threadgroupHeight, 1); const MTLSize threadgroupsInGrid = MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width, (src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1); - id computeEncoder = [m_render_cmdbuf computeCommandEncoder]; - [computeEncoder setComputePipelineState:resolve_pipeline]; - [computeEncoder setTexture:static_cast(src)->GetMTLTexture() atIndex:0]; - [computeEncoder setTexture:static_cast(dst)->GetMTLTexture() atIndex:1]; - [computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize]; - [computeEncoder endEncoding]; + // Set up manually to not disturb state. + BeginComputePass(); + [m_compute_encoder setComputePipelineState:mtl_pipeline]; + [m_compute_encoder setTexture:static_cast(src)->GetMTLTexture() atIndex:0]; + [m_compute_encoder setTexture:static_cast(dst)->GetMTLTexture() atIndex:1]; + [m_compute_encoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize]; + EndComputePass(); } void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c) @@ -1645,7 +1651,7 @@ void MetalDevice::ClearDepth(GPUTexture* t, float d) [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; if (m_current_pipeline) - [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()]; if (m_current_cull_mode != MTLCullModeNone) [m_render_encoder setCullMode:m_current_cull_mode]; if (depth != m_current_depth_state) @@ -1674,6 +1680,8 @@ void MetalDevice::CommitClear(MetalTexture* tex) // TODO: We could combine it with the current render pass. if (InRenderPass()) EndRenderPass(); + else if (InComputePass()) + EndComputePass(); @autoreleasepool { @@ -1896,11 +1904,13 @@ void MetalDevice::UnmapUniformBuffer(u32 size) } void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop) + GPUPipeline::RenderPassFlag flags) { bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds || - (!m_features.framebuffer_fetch && ((feedback_loop & GPUPipeline::ColorFeedbackLoop) != - (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)))); + ((flags & GPUPipeline::BindRenderTargetsAsImages) != + (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) || + (!m_features.framebuffer_fetch && ((flags & GPUPipeline::ColorFeedbackLoop) != + (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop)))); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_rt_clear = false; @@ -1915,12 +1925,19 @@ void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu for (u32 i = num_rts; i < m_num_current_render_targets; i++) m_current_render_targets[i] = nullptr; m_num_current_render_targets = static_cast(num_rts); - m_current_feedback_loop = feedback_loop; + m_current_render_pass_flags = flags; if (changed || needs_rt_clear || needs_ds_clear) { if (InRenderPass()) + { EndRenderPass(); + } + else if (InComputePass() && (flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags) + { + CommitRenderTargetClears(); + BindRenderTargetsAsComputeImages(); + } } } @@ -1931,26 +1948,34 @@ void MetalDevice::SetPipeline(GPUPipeline* pipeline) return; m_current_pipeline = static_cast(pipeline); - if (InRenderPass()) + if (!m_current_pipeline->IsComputePipeline()) { - [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + if (InRenderPass()) + { + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()]; - if (m_current_depth_state != m_current_pipeline->GetDepthState()) - { - m_current_depth_state = m_current_pipeline->GetDepthState(); - [m_render_encoder setDepthStencilState:m_current_depth_state]; + if (m_current_depth_state != m_current_pipeline->GetDepthState()) + { + m_current_depth_state = m_current_pipeline->GetDepthState(); + [m_render_encoder setDepthStencilState:m_current_depth_state]; + } + if (m_current_cull_mode != m_current_pipeline->GetCullMode()) + { + m_current_cull_mode = m_current_pipeline->GetCullMode(); + [m_render_encoder setCullMode:m_current_cull_mode]; + } } - if (m_current_cull_mode != m_current_pipeline->GetCullMode()) + else { + // Still need to set depth state before the draw begins. + m_current_depth_state = m_current_pipeline->GetDepthState(); m_current_cull_mode = m_current_pipeline->GetCullMode(); - [m_render_encoder setCullMode:m_current_cull_mode]; } } else { - // Still need to set depth state before the draw begins. - m_current_depth_state = m_current_pipeline->GetDepthState(); - m_current_cull_mode = m_current_pipeline->GetCullMode(); + if (InComputePass()) + [m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()]; } } @@ -1979,6 +2004,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s m_current_textures[slot] = T; if (InRenderPass()) [m_render_encoder setFragmentTexture:T atIndex:slot]; + else if (InComputePass()) + [m_compute_encoder setTexture:T atIndex:slot]; } id S = sampler ? static_cast(sampler)->GetSamplerState() : nil; @@ -1987,6 +2014,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s m_current_samplers[slot] = S; if (InRenderPass()) [m_render_encoder setFragmentSamplerState:S atIndex:slot]; + else if (InComputePass()) + [m_compute_encoder setTexture:T atIndex:slot]; } } @@ -2011,6 +2040,8 @@ void MetalDevice::UnbindTexture(MetalTexture* tex) m_current_textures[i] = nil; if (InRenderPass()) [m_render_encoder setFragmentTexture:nil atIndex:i]; + else if (InComputePass()) + [m_compute_encoder setTexture:nil atIndex:0]; } } @@ -2070,7 +2101,7 @@ void MetalDevice::SetScissor(const GSVector4i rc) void MetalDevice::BeginRenderPass() { - DebugAssert(m_render_encoder == nil); + DebugAssert(m_render_encoder == nil && !InComputePass()); // Inline writes :( if (m_inline_upload_encoder != nil) @@ -2180,12 +2211,57 @@ void MetalDevice::BeginRenderPass() void MetalDevice::EndRenderPass() { - DebugAssert(InRenderPass() && !IsInlineUploading()); + DebugAssert(InRenderPass() && !IsInlineUploading() && !InComputePass()); [m_render_encoder endEncoding]; [m_render_encoder release]; m_render_encoder = nil; } +void MetalDevice::BeginComputePass() +{ + DebugAssert(!InRenderPass() && !IsInlineUploading() && !InComputePass()); + + if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags) + CommitRenderTargetClears(); + + m_compute_encoder = [[m_render_cmdbuf computeCommandEncoder] retain]; + [m_compute_encoder setTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + [m_compute_encoder setSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + + if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags) + BindRenderTargetsAsComputeImages(); + + if (m_current_pipeline && m_current_pipeline->IsComputePipeline()) + [m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()]; +} + +void MetalDevice::CommitRenderTargetClears() +{ + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + MetalTexture* rt = m_current_render_targets[i]; + if (rt->GetState() == GPUTexture::State::Invalidated) + rt->SetState(GPUTexture::State::Dirty); + else if (rt->GetState() == GPUTexture::State::Cleared) + CommitClear(rt); + } +} + +void MetalDevice::BindRenderTargetsAsComputeImages() +{ + for (u32 i = 0; i < m_num_current_render_targets; i++) + [m_compute_encoder setTexture:m_current_render_targets[i]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS + i]; +} + +void MetalDevice::EndComputePass() +{ + DebugAssert(InComputePass()); + + [m_compute_encoder endEncoding]; + [m_compute_encoder release]; + m_compute_encoder = nil; +} + void MetalDevice::EndInlineUploading() { DebugAssert(IsInlineUploading() && !InRenderPass()); @@ -2198,6 +2274,8 @@ void MetalDevice::EndAnyEncoding() { if (InRenderPass()) EndRenderPass(); + else if (InComputePass()) + EndComputePass(); else if (IsInlineUploading()) EndInlineUploading(); } @@ -2213,14 +2291,14 @@ void MetalDevice::SetInitialEncoderState() [m_render_encoder setCullMode:m_current_cull_mode]; if (m_current_depth_state != nil) [m_render_encoder setDepthStencilState:m_current_depth_state]; - if (m_current_pipeline != nil) - [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + if (m_current_pipeline && m_current_pipeline->IsRenderPipeline()) + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()]; [m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; [m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; if (m_current_ssbo) [m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1]; - if (!m_features.framebuffer_fetch && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)) + if (!m_features.framebuffer_fetch && (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop)) { DebugAssert(m_current_render_targets[0]); [m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS]; @@ -2249,7 +2327,12 @@ void MetalDevice::SetScissorInRenderEncoder() void MetalDevice::PreDrawCheck() { if (!InRenderPass()) + { + if (InComputePass()) + EndComputePass(); + BeginRenderPass(); + } } void MetalDevice::Draw(u32 vertex_count, u32 base_vertex) @@ -2392,6 +2475,25 @@ void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba } } +void MetalDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) +{ + if (!InComputePass()) + { + if (InRenderPass()) + EndRenderPass(); + + BeginComputePass(); + } + + DebugAssert(m_current_pipeline && m_current_pipeline->IsComputePipeline()); + id pipeline = m_current_pipeline->GetComputePipelineState(); + + // TODO: We could remap to the optimal group size.. + [m_compute_encoder dispatchThreads:MTLSizeMake(threads_x, threads_y, threads_z) + threadsPerThreadgroup:MTLSizeMake(group_size_x, group_size_y, group_size_z)]; +} + id MetalDevice::GetBlitEncoder(bool is_inline) { @autoreleasepool @@ -2450,7 +2552,7 @@ GPUDevice::PresentResult MetalDevice::BeginPresent(GPUSwapChain* swap_chain, u32 s_stats.num_render_passes++; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_num_current_render_targets = 0; - m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; + m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; m_current_depth_target = nullptr; m_current_pipeline = nullptr; m_current_depth_state = nil; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index e929da63d..29a43df2e 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t) } } +std::unique_ptr OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + ERROR_LOG("Compute shaders are not yet supported."); + return {}; +} + void OpenGLDevice::PushDebugGroup(const char* name) { #ifdef _DEBUG @@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2); + m_features.compute_shaders = false; m_features.gpu_timing = !(m_gl_context->IsGLES() && (!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT)); @@ -1078,6 +1085,12 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b Panic("Barriers are not supported"); } +void OpenGLDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) +{ + Panic("Compute shaders are not supported"); +} + void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex) { diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index e499c5931..d16c3b6d9 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -77,6 +77,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -100,6 +101,8 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) override; PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override; void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override; diff --git a/src/util/vulkan_builders.cpp b/src/util/vulkan_builders.cpp index d9a64a356..6c6251acb 100644 --- a/src/util/vulkan_builders.cpp +++ b/src/util/vulkan_builders.cpp @@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear() m_smap_constants = {}; } -VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/, - bool clear /*= true*/) +VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, + Error* error) { VkPipeline pipeline; VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: "); + SetErrorObject(error, "vkCreateComputePipelines() failed: ", res); return VK_NULL_HANDLE; } diff --git a/src/util/vulkan_builders.h b/src/util/vulkan_builders.h index f65f2e1aa..760caecee 100644 --- a/src/util/vulkan_builders.h +++ b/src/util/vulkan_builders.h @@ -197,7 +197,7 @@ public: void Clear(); - VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true); + VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error); void SetShader(VkShaderModule module, const char* entry_point); diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 924ffde3f..4a41a6818 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe WARNING_LOG("Emulating texture buffers with SSBOs."); m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader; + m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS); m_features.partial_msaa_resolve = true; m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host; @@ -2802,7 +2803,8 @@ bool VulkanDevice::CreatePipelineLayouts() } { - dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, + VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT); if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout"); @@ -2822,7 +2824,8 @@ bool VulkanDevice::CreatePipelineLayouts() if (m_optional_extensions.vk_khr_push_descriptor) dslb.SetPushFlag(); for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) - dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, + VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT); if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout"); @@ -2837,14 +2840,13 @@ bool VulkanDevice::CreatePipelineLayouts() Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout"); } - if (m_features.raster_order_views) + for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) { - for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) - dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) - return false; - Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout"); + dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT); } + if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout"); for (u32 type = 0; type < 3; type++) { @@ -2860,7 +2862,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); @@ -2873,7 +2875,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; @@ -2887,7 +2889,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; @@ -2901,7 +2903,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout"); @@ -2915,13 +2917,24 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout"); } } + { + VkPipelineLayout& pl = + m_pipeline_layouts[0][static_cast(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)]; + plb.AddDescriptorSet(m_single_texture_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); + plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout"); + } + return true; } @@ -2942,7 +2955,7 @@ void VulkanDevice::DestroyPipelineLayouts() l = VK_NULL_HANDLE; } }; - destroy_dsl(m_rov_ds_layout); + destroy_dsl(m_image_ds_layout); destroy_dsl(m_feedback_loop_ds_layout); destroy_dsl(m_multi_texture_ds_layout); destroy_dsl(m_single_texture_buffer_ds_layout); @@ -3674,12 +3687,56 @@ void VulkanDevice::PreDrawCheck() } } +void VulkanDevice::PreDispatchCheck() +{ + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + } + + // Binding as image, but we still need to clear it. + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + VulkanTexture* rt = m_current_render_targets[i]; + if (rt->GetState() == GPUTexture::State::Cleared) + rt->CommitClear(m_current_command_buffer); + rt->SetState(GPUTexture::State::Dirty); + rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage); + rt->SetUseFenceCounter(GetCurrentFenceCounter()); + } + + // If this is a new command buffer, bind the pipeline and such. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + SetInitialPipelineState(); + + DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); + const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT); + const u32 dirty = m_dirty_flags & update_mask; + m_dirty_flags = m_dirty_flags & ~update_mask; + + if (dirty != 0) + { + if (!UpdateDescriptorSets(dirty)) + { + SubmitCommandBuffer(false, "out of descriptor sets"); + PreDispatchCheck(); + return; + } + } +} + template bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) { [[maybe_unused]] bool new_dynamic_offsets = false; - VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout(); + constexpr VkPipelineBindPoint vk_bind_point = + ((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS : + VK_PIPELINE_BIND_POINT_COMPUTE); + const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout(); std::array ds; u32 first_ds = 0; u32 num_ds = 0; @@ -3700,7 +3757,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || - layout == GPUPipeline::Layout::SingleTextureAndPushConstants) + layout == GPUPipeline::Layout::SingleTextureAndPushConstants || + layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) { VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get(); DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE); @@ -3727,7 +3785,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0; - dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set); + dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set); if (num_ds == 0) return true; } @@ -3757,7 +3815,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) { if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) { - VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout); + VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout); if (ids == VK_NULL_HANDLE) return false; @@ -3792,8 +3850,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } DebugAssert(num_ds > 0); - vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds, - num_ds, ds.data(), static_cast(new_dynamic_offsets), + vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(), + static_cast(new_dynamic_offsets), new_dynamic_offsets ? &m_uniform_buffer_position : nullptr); return true; @@ -3818,6 +3876,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty) case GPUPipeline::Layout::MultiTextureAndPushConstants: return UpdateDescriptorSetsForLayout(dirty); + case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants: + return UpdateDescriptorSetsForLayout(dirty); + default: UnreachableCode(); } @@ -3911,3 +3972,15 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b DefaultCaseIsUnreachable(); } } + +void VulkanDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) +{ + PreDispatchCheck(); + s_stats.num_draws++; + + const u32 groups_x = threads_x / group_size_x; + const u32 groups_y = threads_y / group_size_y; + const u32 groups_z = threads_z / group_size_z; + vkCmdDispatch(GetCurrentCommandBuffer(), groups_x, groups_y, groups_z); +} diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index f2e870f93..43a982325 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -113,6 +113,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -136,6 +137,8 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, + u32 group_size_z) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -373,6 +376,7 @@ private: VkPipelineLayout GetCurrentVkPipelineLayout() const; void SetInitialPipelineState(); void PreDrawCheck(); + void PreDispatchCheck(); template bool UpdateDescriptorSetsForLayout(u32 dirty); @@ -435,7 +439,7 @@ private: VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE; - VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE; DimensionalArray(GPUPipeline::Layout::MaxCount), static_cast(PipelineLayoutType::MaxCount)> m_pipeline_layouts = {}; diff --git a/src/util/vulkan_pipeline.cpp b/src/util/vulkan_pipeline.cpp index 52db0d766..a6d801c77 100644 --- a/src/util/vulkan_pipeline.cpp +++ b/src/util/vulkan_pipeline.cpp @@ -275,3 +275,16 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra return std::unique_ptr( new VulkanPipeline(pipeline, config.layout, static_cast(vertices_per_primitive), config.render_pass_flags)); } + +std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + Vulkan::ComputePipelineBuilder cpb; + cpb.SetShader(static_cast(config.compute_shader)->GetModule(), "main"); + cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast(config.layout)]); + + const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error); + if (!pipeline) + return {}; + + return std::unique_ptr(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags)); +} From 3ff1b04576dad862148313d51aadd1a409f3272e Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 21 Nov 2024 00:56:07 +1000 Subject: [PATCH 05/35] GPUDevice: Support generating mipmaps --- src/common/log_channels.h | 1 - src/core/gpu.cpp | 16 +- src/core/gpu_hw.cpp | 104 ++++++------ src/core/gpu_hw.h | 2 +- src/core/gpu_hw_texture_cache.cpp | 16 +- src/core/gpu_sw.cpp | 4 +- src/core/imgui_overlays.cpp | 6 +- src/util/d3d11_device.cpp | 19 +-- src/util/d3d11_device.h | 19 ++- src/util/d3d11_stream_buffer.cpp | 5 +- src/util/d3d11_stream_buffer.h | 4 +- src/util/d3d11_texture.cpp | 117 +++++++------ src/util/d3d11_texture.h | 11 +- src/util/d3d12_builders.cpp | 22 --- src/util/d3d12_builders.h | 7 - src/util/d3d12_descriptor_heap_manager.cpp | 1 - src/util/d3d12_device.cpp | 187 ++++++++++++++++++++- src/util/d3d12_device.h | 36 ++-- src/util/d3d12_texture.cpp | 160 ++++++++++-------- src/util/d3d12_texture.h | 7 +- src/util/gpu_device.cpp | 48 ++++-- src/util/gpu_device.h | 29 ++-- src/util/gpu_framebuffer_manager.h | 2 +- src/util/gpu_texture.cpp | 60 +++++-- src/util/gpu_texture.h | 47 ++++-- src/util/imgui_fullscreen.cpp | 8 +- src/util/imgui_manager.cpp | 12 +- src/util/media_capture.cpp | 2 +- src/util/metal_device.h | 28 +-- src/util/metal_device.mm | 85 ++++++---- src/util/metal_stream_buffer.h | 4 +- src/util/metal_stream_buffer.mm | 5 +- src/util/opengl_device.cpp | 5 +- src/util/opengl_device.h | 17 +- src/util/opengl_stream_buffer.cpp | 33 ++-- src/util/opengl_stream_buffer.h | 4 +- src/util/opengl_texture.cpp | 64 ++++--- src/util/opengl_texture.h | 9 +- src/util/postprocessing.cpp | 12 +- src/util/postprocessing_shader_fx.cpp | 12 +- src/util/vulkan_device.cpp | 44 ++--- src/util/vulkan_device.h | 23 +-- src/util/vulkan_texture.cpp | 119 ++++++++----- src/util/vulkan_texture.h | 9 +- 44 files changed, 898 insertions(+), 527 deletions(-) diff --git a/src/common/log_channels.h b/src/common/log_channels.h index 305f06490..b075a8764 100644 --- a/src/common/log_channels.h +++ b/src/common/log_channels.h @@ -31,7 +31,6 @@ X(GPUDevice) \ X(GPUDump) \ X(GPUShaderCache) \ - X(GPUTexture) \ X(GPUTextureCache) \ X(GPU_HW) \ X(GPU_SW) \ diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 9ec22af4a..22946bd1b 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1470,7 +1470,8 @@ void GPU::WriteGP1(u32 value) } break; - [[unlikely]] default : ERROR_LOG("Unimplemented GP1 command 0x{:02X}", command); + [[unlikely]] default: + ERROR_LOG("Unimplemented GP1 command 0x{:02X}", command); break; } } @@ -1518,7 +1519,8 @@ void GPU::HandleGetGPUInfoCommand(u32 value) } break; - [[unlikely]] default : WARNING_LOG("Unhandled GetGPUInfo(0x{:02X})", subcommand); + [[unlikely]] default: + WARNING_LOG("Unhandled GetGPUInfo(0x{:02X})", subcommand); break; } } @@ -2213,7 +2215,7 @@ bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) { if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, false)) [[unlikely]] + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) [[unlikely]] { return false; } @@ -2258,7 +2260,7 @@ bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) m_deinterlace_texture->GetHeight() != height) { if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, preserve)) [[unlikely]] + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]] { return false; } @@ -2279,7 +2281,7 @@ bool GPU::ApplyChromaSmoothing() m_chroma_smoothing_texture->GetHeight() != height) { if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, - GPUTexture::Format::RGBA8, false)) + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false)) { ClearDisplayTexture(); return false; @@ -2540,8 +2542,8 @@ bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i displ const GPUTexture::Format hdformat = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; - auto render_texture = - g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); + auto render_texture = g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, + hdformat, GPUTexture::Flags::None); if (!render_texture) return false; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 3b93b2f3d..4e4d6bf9e 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -275,15 +275,9 @@ bool GPU_HW::Initialize(Error* error) PrintSettingsToLog(); - if (!CompileCommonShaders(error) || !CompilePipelines(error)) + if (!CompileCommonShaders(error) || !CompilePipelines(error) || !CreateBuffers(error)) return false; - if (!CreateBuffers()) - { - Error::SetStringView(error, "Failed to create framebuffer"); - return false; - } - if (m_use_texture_cache) { if (!GPUTextureCache::Initialize()) @@ -366,7 +360,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di ->FetchTexture( m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(), m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, nullptr, 0) + GPUTexture::Format::RGBA8, GPUTexture::Flags::None) .release(); *host_texture = tex; if (!tex) @@ -538,8 +532,12 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) g_gpu_device->PurgeTexturePool(); g_gpu_device->WaitForGPUIdle(); - if (!CreateBuffers()) + Error error; + if (!CreateBuffers(&error)) + { + ERROR_LOG("Failed to recreate buffers: {}", error.GetDescription()); Panic("Failed to recreate buffers."); + } UpdateDownsamplingLevels(); RestoreDeviceContext(); @@ -849,7 +847,7 @@ GPUTexture::Format GPU_HW::GetDepthBufferFormat() const VRAM_DS_FORMAT; } -bool GPU_HW::CreateBuffers() +bool GPU_HW::CreateBuffers(Error* error) { DestroyBuffers(); @@ -859,28 +857,30 @@ bool GPU_HW::CreateBuffers() const u8 samples = static_cast(m_multisamples); const bool needs_depth_buffer = m_write_mask_as_depth || m_pgxp_depth_buffer; - // Needed for Metal resolve. - const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ? - GPUTexture::Type::RWTexture : - GPUTexture::Type::Texture; - const GPUTexture::Type vram_texture_type = - m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::RenderTarget; + const GPUTexture::Flags read_texture_flags = + (m_multisamples > 1) ? GPUTexture::Flags::AllowMSAAResolveTarget : GPUTexture::Flags::None; + const GPUTexture::Flags vram_texture_flags = + m_use_rov_for_shader_blend ? GPUTexture::Flags::AllowBindAsImage : GPUTexture::Flags::None; const GPUTexture::Type depth_texture_type = - m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::DepthStencil; + m_use_rov_for_shader_blend ? GPUTexture::Type::Texture : GPUTexture::Type::DepthStencil; - if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, vram_texture_type, - VRAM_RT_FORMAT)) || - (needs_depth_buffer && - !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, - depth_texture_type, GetDepthBufferFormat()))) || - (m_pgxp_depth_buffer && !(m_vram_depth_copy_texture = - g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT))) || + if (!(m_vram_texture = + g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, + VRAM_RT_FORMAT, vram_texture_flags, nullptr, 0, error)) || + (needs_depth_buffer && !(m_vram_depth_texture = g_gpu_device->FetchTexture( + texture_width, texture_height, 1, 1, samples, depth_texture_type, + GetDepthBufferFormat(), vram_texture_flags, nullptr, 0, error))) || + (m_pgxp_depth_buffer && !(m_vram_depth_copy_texture = g_gpu_device->FetchTexture( + texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, + VRAM_DS_COLOR_FORMAT, GPUTexture::Flags::None, nullptr, 0, error))) || !(m_vram_read_texture = - g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) || - !(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, - GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT))) + g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, GPUTexture::Type::Texture, VRAM_RT_FORMAT, + read_texture_flags, nullptr, 0, error)) || + !(m_vram_readback_texture = + g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Type::RenderTarget, + VRAM_RT_FORMAT, GPUTexture::Flags::None, nullptr, 0, error))) { + Error::AddPrefix(error, "Failed to create VRAM textures: "); return false; } @@ -895,26 +895,28 @@ bool GPU_HW::CreateBuffers() DEV_LOG("Trying to import guest VRAM buffer for downloads..."); m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture( m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat(), - g_vram, sizeof(g_vram), VRAM_WIDTH * sizeof(u16)); + g_vram, sizeof(g_vram), VRAM_WIDTH * sizeof(u16), error); if (!m_vram_readback_download_texture) ERROR_LOG("Failed to create imported readback buffer"); } if (!m_vram_readback_download_texture) { - m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture( - m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat()); + m_vram_readback_download_texture = + g_gpu_device->CreateDownloadTexture(m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), + m_vram_readback_texture->GetFormat(), error); if (!m_vram_readback_download_texture) { - ERROR_LOG("Failed to create readback download texture"); + Error::AddPrefix(error, "Failed to create readback download texture: "); return false; } } if (g_gpu_device->GetFeatures().supports_texture_buffers) { - if (!(m_vram_upload_buffer = - g_gpu_device->CreateTextureBuffer(GPUTextureBuffer::Format::R16UI, GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS))) + if (!(m_vram_upload_buffer = g_gpu_device->CreateTextureBuffer(GPUTextureBuffer::Format::R16UI, + GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS, error))) { + Error::AddPrefix(error, "Failed to create texture buffer: "); return false; } @@ -2930,9 +2932,9 @@ bool GPU_HW::BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacemen { g_gpu_device->RecycleTexture(std::move(m_vram_replacement_texture)); - if (!(m_vram_replacement_texture = - g_gpu_device->FetchTexture(tex->GetWidth(), tex->GetHeight(), 1, 1, 1, GPUTexture::Type::DynamicTexture, - GPUTexture::Format::RGBA8, tex->GetPixels(), tex->GetPitch()))) + if (!(m_vram_replacement_texture = g_gpu_device->FetchTexture( + tex->GetWidth(), tex->GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, + GPUTexture::Flags::None, tex->GetPixels(), tex->GetPitch()))) { return false; } @@ -3402,7 +3404,7 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da { map_index = 0; upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, - GPUTexture::Format::R16U, data, data_pitch); + GPUTexture::Format::R16U, GPUTexture::Flags::None, data, data_pitch); if (!upload_texture) { ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height); @@ -3938,7 +3940,8 @@ void GPU_HW::UpdateDisplay() m_vram_extract_texture->GetHeight() != read_height) { if (!g_gpu_device->ResizeTexture(&m_vram_extract_texture, scaled_display_width, read_height, - GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8)) [[unlikely]] + GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, + GPUTexture::Flags::None)) [[unlikely]] { ClearDisplayTexture(); return; @@ -3952,7 +3955,7 @@ void GPU_HW::UpdateDisplay() ((m_vram_extract_depth_texture && m_vram_extract_depth_texture->GetWidth() == scaled_display_width && m_vram_extract_depth_texture->GetHeight() == scaled_display_height) || !g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height, - GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT))) + GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT, GPUTexture::Flags::None))) { depth_source->MakeReadyForSampling(); g_gpu_device->InvalidateRenderTarget(m_vram_extract_depth_texture.get()); @@ -4090,15 +4093,16 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top if (!m_downsample_texture || m_downsample_texture->GetWidth() != width || m_downsample_texture->GetHeight() != height) { g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); - m_downsample_texture = - g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT); + m_downsample_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, + VRAM_RT_FORMAT, GPUTexture::Flags::None); } - std::unique_ptr level_texture = g_gpu_device->FetchAutoRecycleTexture( - width, height, 1, m_downsample_scale_or_levels, 1, GPUTexture::Type::Texture, VRAM_RT_FORMAT); - std::unique_ptr weight_texture = - g_gpu_device->FetchAutoRecycleTexture(std::max(width >> (m_downsample_scale_or_levels - 1), 1u), - std::max(height >> (m_downsample_scale_or_levels - 1), 1u), 1, 1, 1, - GPUTexture::Type::RenderTarget, GPUTexture::Format::R8); + std::unique_ptr level_texture = + g_gpu_device->FetchAutoRecycleTexture(width, height, 1, m_downsample_scale_or_levels, 1, GPUTexture::Type::Texture, + VRAM_RT_FORMAT, GPUTexture::Flags::None); + std::unique_ptr weight_texture = g_gpu_device->FetchAutoRecycleTexture( + std::max(width >> (m_downsample_scale_or_levels - 1), 1u), + std::max(height >> (m_downsample_scale_or_levels - 1), 1u), 1, 1, 1, GPUTexture::Type::RenderTarget, + GPUTexture::Format::R8, GPUTexture::Flags::None); if (!m_downsample_texture || !level_texture || !weight_texture) { ERROR_LOG("Failed to create {}x{} RTs for adaptive downsampling", width, height); @@ -4205,8 +4209,8 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to m_downsample_texture->GetHeight() != ds_height) { g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); - m_downsample_texture = - g_gpu_device->FetchTexture(ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT); + m_downsample_texture = g_gpu_device->FetchTexture(ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget, + VRAM_RT_FORMAT, GPUTexture::Flags::None); } if (!m_downsample_texture) { diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 46bdc78fa..53fb6da01 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -155,7 +155,7 @@ private: /// Returns true if a depth buffer should be created. GPUTexture::Format GetDepthBufferFormat() const; - bool CreateBuffers(); + bool CreateBuffers(Error* error); void ClearFramebuffer(); void DestroyBuffers(); diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index f7f825887..0f6ecc124 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -2048,8 +2048,9 @@ GPUTextureCache::HashCacheEntry* GPUTextureCache::LookupHashCache(SourceKey key, entry.ref_count = 0; entry.last_used_frame = 0; entry.sources = {}; - entry.texture = g_gpu_device->FetchTexture(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, 1, 1, 1, - GPUTexture::Type::Texture, GPUTexture::Format::RGBA8); + entry.texture = + g_gpu_device->FetchTexture(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None); if (!entry.texture) { ERROR_LOG("Failed to create texture."); @@ -3285,8 +3286,9 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, { // NOTE: Not recycled, it's unlikely to be reused. s_state.replacement_texture_render_target.reset(); - if (!(s_state.replacement_texture_render_target = g_gpu_device->CreateTexture( - new_width, new_height, 1, 1, 1, GPUTexture::Type::RenderTarget, REPLACEMENT_TEXTURE_FORMAT))) + if (!(s_state.replacement_texture_render_target = + g_gpu_device->CreateTexture(new_width, new_height, 1, 1, 1, GPUTexture::Type::RenderTarget, + REPLACEMENT_TEXTURE_FORMAT, GPUTexture::Flags::None))) { ERROR_LOG("Failed to create {}x{} render target.", new_width, new_height); return; @@ -3294,8 +3296,8 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, } // Grab the actual texture beforehand, in case we OOM. - std::unique_ptr replacement_tex = - g_gpu_device->FetchTexture(new_width, new_height, 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT); + std::unique_ptr replacement_tex = g_gpu_device->FetchTexture( + new_width, new_height, 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT, GPUTexture::Flags::None); if (!replacement_tex) { ERROR_LOG("Failed to create {}x{} texture.", new_width, new_height); @@ -3319,7 +3321,7 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, { const auto temp_texture = g_gpu_device->FetchAutoRecycleTexture( si.image.GetWidth(), si.image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT, - si.image.GetPixels(), si.image.GetPitch()); + GPUTexture::Flags::None, si.image.GetPixels(), si.image.GetPitch()); if (!temp_texture) continue; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 068db4164..a29d99860 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -99,8 +99,8 @@ GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format { ClearDisplayTexture(); g_gpu_device->RecycleTexture(std::move(m_upload_texture)); - m_upload_texture = - g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::DynamicTexture, format, nullptr, 0); + m_upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, format, + GPUTexture::Flags::AllowMap, nullptr, 0); if (!m_upload_texture) [[unlikely]] ERROR_LOG("Failed to create {}x{} {} texture", width, height, static_cast(format)); } diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index 3c8239506..e21f1cf94 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -1049,9 +1049,9 @@ void SaveStateSelectorUI::InitializeListEntry(ListEntry* li, ExtendedSaveStateIn if (ssi->screenshot.IsValid()) { - li->preview_texture = g_gpu_device->FetchTexture(ssi->screenshot.GetWidth(), ssi->screenshot.GetHeight(), 1, 1, 1, - GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, - ssi->screenshot.GetPixels(), ssi->screenshot.GetPitch()); + li->preview_texture = g_gpu_device->FetchTexture( + ssi->screenshot.GetWidth(), ssi->screenshot.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, ssi->screenshot.GetPixels(), ssi->screenshot.GetPitch()); if (!li->preview_texture) [[unlikely]] ERROR_LOG("Failed to upload save state image to GPU"); } diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index c6d44392c..c04a1e417 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -139,11 +139,8 @@ bool D3D11Device::CreateDeviceAndMainSwapChain(std::string_view adapter, Feature return false; } - if (!CreateBuffers()) - { - Error::SetStringView(error, "Failed to create buffers"); + if (!CreateBuffers(error)) return false; - } return true; } @@ -514,11 +511,11 @@ void D3D11Device::WaitForGPUIdle() TrimTexturePool(); } -bool D3D11Device::CreateBuffers() +bool D3D11Device::CreateBuffers(Error* error) { - if (!m_vertex_buffer.Create(D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE) || - !m_index_buffer.Create(D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE, INDEX_BUFFER_SIZE) || - !m_uniform_buffer.Create(D3D11_BIND_CONSTANT_BUFFER, MIN_UNIFORM_BUFFER_SIZE, MAX_UNIFORM_BUFFER_SIZE)) + if (!m_vertex_buffer.Create(D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE, error) || + !m_index_buffer.Create(D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE, INDEX_BUFFER_SIZE, error) || + !m_uniform_buffer.Create(D3D11_BIND_CONSTANT_BUFFER, MIN_UNIFORM_BUFFER_SIZE, MAX_UNIFORM_BUFFER_SIZE, error)) { ERROR_LOG("Failed to create vertex/index/uniform buffers."); return false; @@ -612,7 +609,7 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 bool D3D11Device::IsRenderTargetBound(const D3D11Texture* tex) const { - if (tex->IsRenderTarget() || tex->IsRWTexture()) + if (tex->IsRenderTarget() || tex->HasFlag(GPUTexture::Flags::AllowBindAsImage)) { for (u32 i = 0; i < m_num_current_render_targets; i++) { @@ -1053,7 +1050,7 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s // Runtime will null these if we don't... DebugAssert(!texture || - !((texture->IsRenderTarget() || texture->IsRWTexture()) && + !((texture->IsRenderTarget() || texture->HasFlag(GPUTexture::Flags::AllowBindAsImage)) && IsRenderTargetBound(static_cast(texture))) || !(texture->IsDepthStencil() && (!m_current_depth_target || m_current_depth_target != static_cast(texture)))); @@ -1100,7 +1097,7 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex) } } - if (tex->IsRenderTarget() || tex->IsRWTexture()) + if (tex->IsRenderTarget() || tex->HasFlag(GPUTexture::Flags::AllowBindAsImage)) { for (u32 i = 0; i < m_num_current_render_targets; i++) { diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index e1b327250..d06a75872 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -50,15 +50,18 @@ public: std::optional exclusive_fullscreen_control, Error* error) override; std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Type type, GPUTexture::Format format, - const void* data = nullptr, u32 data_stride = 0) override; - std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; - std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags, + const void* data = nullptr, u32 data_stride = 0, + Error* error = nullptr) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements, + Error* error = nullptr) override; - std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, - u32 memory_stride) override; + Error* error = nullptr) override; + std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_stride, + Error* error = nullptr) override; bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, @@ -141,7 +144,7 @@ private: void SetFeatures(FeatureMask disabled_features); - bool CreateBuffers(); + bool CreateBuffers(Error* error); void DestroyBuffers(); void BindUniformBuffer(u32 offset, u32 size); void UnbindComputePipeline(); diff --git a/src/util/d3d11_stream_buffer.cpp b/src/util/d3d11_stream_buffer.cpp index 8a0179e36..adb59f180 100644 --- a/src/util/d3d11_stream_buffer.cpp +++ b/src/util/d3d11_stream_buffer.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/error.h" #include "common/log.h" +#include "common/small_string.h" LOG_CHANNEL(GPUDevice); @@ -27,7 +28,7 @@ D3D11StreamBuffer::~D3D11StreamBuffer() Destroy(); } -bool D3D11StreamBuffer::Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size) +bool D3D11StreamBuffer::Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size, Error* error) { D3D11_FEATURE_DATA_D3D11_OPTIONS options = {}; HRESULT hr = D3D11Device::GetD3DDevice()->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options)); @@ -72,7 +73,7 @@ bool D3D11StreamBuffer::Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max hr = D3D11Device::GetD3DDevice()->CreateBuffer(&desc, nullptr, &buffer); if (FAILED(hr)) [[unlikely]] { - ERROR_LOG("Creating buffer failed: {}", Error::CreateHResult(hr).GetDescription()); + Error::SetHResult(error, TinyString::from_format("CreateBuffer({}) failed: ", create_size), hr); return false; } diff --git a/src/util/d3d11_stream_buffer.h b/src/util/d3d11_stream_buffer.h index c09a08d40..5c9aced3c 100644 --- a/src/util/d3d11_stream_buffer.h +++ b/src/util/d3d11_stream_buffer.h @@ -9,6 +9,8 @@ #include #include +class Error; + class D3D11StreamBuffer { public: @@ -26,7 +28,7 @@ public: ALWAYS_INLINE bool IsMapped() const { return m_mapped; } ALWAYS_INLINE bool IsUsingMapNoOverwrite() const { return m_use_map_no_overwrite; } - bool Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size); + bool Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size, Error* error); void Destroy(); struct MappingResult diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index 9615d95cb..78b441069 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -6,6 +6,7 @@ #include "d3d_common.h" #include "common/assert.h" +#include "common/error.h" #include "common/log.h" #include "common/string_util.h" @@ -17,9 +18,11 @@ LOG_CHANNEL(GPUDevice); std::unique_ptr D3D11Device::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, - const void* data, u32 data_stride) + GPUTexture::Flags flags, const void* data /* = nullptr */, + u32 data_stride /* = 0 */, Error* error /* = nullptr */) { - return D3D11Texture::Create(m_device.Get(), width, height, layers, levels, samples, type, format, data, data_stride); + return D3D11Texture::Create(m_device.Get(), width, height, layers, levels, samples, type, format, flags, data, + data_stride, error); } bool D3D11Device::SupportsTextureFormat(GPUTexture::Format format) const @@ -44,7 +47,7 @@ void D3D11Sampler::SetDebugName(std::string_view name) SetD3DDebugObjectName(m_ss.Get(), name); } -std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& config) +std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& config, Error* error) { static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ D3D11_TEXTURE_ADDRESS_WRAP, // Repeat @@ -87,7 +90,7 @@ std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& const HRESULT hr = m_device->CreateSamplerState(&desc, ss.GetAddressOf()); if (FAILED(hr)) [[unlikely]] { - ERROR_LOG("CreateSamplerState() failed: {:08X}", static_cast(hr)); + Error::SetHResult(error, "CreateSamplerState() failed: ", hr); return {}; } @@ -95,10 +98,10 @@ std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& } D3D11Texture::D3D11Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, - ComPtr texture, ComPtr srv, + Flags flags, ComPtr texture, ComPtr srv, ComPtr rtv_dsv, ComPtr uav) : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), - static_cast(samples), type, format), + static_cast(samples), type, format, flags), m_texture(std::move(texture)), m_srv(std::move(srv)), m_rtv_dsv(std::move(rtv_dsv)), m_uav(std::move(uav)) { } @@ -127,7 +130,7 @@ void D3D11Texture::CommitClear(ID3D11DeviceContext1* context) else context->ClearDepthStencilView(GetD3DDSV(), D3D11_CLEAR_DEPTH, GetClearDepth(), 0); } - else if (IsRenderTarget() || IsRWTexture()) + else if (IsRenderTarget()) { if (m_state == GPUTexture::State::Invalidated) context->DiscardView(GetD3DRTV()); @@ -141,7 +144,7 @@ void D3D11Texture::CommitClear(ID3D11DeviceContext1* context) bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, u32 level /*= 0*/) { - if (m_type == Type::DynamicTexture) + if (HasFlag(Flags::AllowMap)) { void* map; u32 map_stride; @@ -171,7 +174,7 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, u32 level /*= 0*/) { - if (m_type != Type::DynamicTexture || (x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || + if (!HasFlag(Flags::AllowMap) || (x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) { return false; @@ -207,6 +210,12 @@ void D3D11Texture::Unmap() m_mapped_subresource = 0; } +void D3D11Texture::GenerateMipmaps() +{ + DebugAssert(HasFlag(Flags::AllowGenerateMipmaps)); + D3D11Device::GetD3DContext()->GenerateMips(m_srv.Get()); +} + void D3D11Texture::SetDebugName(std::string_view name) { SetD3DDebugObjectName(m_texture.Get(), name); @@ -218,43 +227,57 @@ DXGI_FORMAT D3D11Texture::GetDXGIFormat() const } std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels, - u32 samples, Type type, Format format, - const void* initial_data /* = nullptr */, - u32 initial_data_stride /* = 0 */) + u32 samples, Type type, Format format, Flags flags, + const void* initial_data, u32 initial_data_stride, Error* error) { - if (!ValidateConfig(width, height, layers, layers, samples, type, format)) + if (!ValidateConfig(width, height, layers, levels, samples, type, format, flags, error)) return nullptr; u32 bind_flags = 0; D3D11_USAGE usage = D3D11_USAGE_DEFAULT; u32 cpu_access = 0; + u32 misc = 0; switch (type) { - case Type::RenderTarget: - bind_flags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; - break; - case Type::DepthStencil: - bind_flags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE; - break; case Type::Texture: bind_flags = D3D11_BIND_SHADER_RESOURCE; break; - case Type::DynamicTexture: - bind_flags = D3D11_BIND_SHADER_RESOURCE; - usage = D3D11_USAGE_DYNAMIC; - cpu_access = D3D11_CPU_ACCESS_WRITE; + + case Type::RenderTarget: + bind_flags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; break; - case Type::RWTexture: - bind_flags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; - break; - default: + + case Type::DepthStencil: + bind_flags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE; break; + + DefaultCaseIsUnreachable(); + } + + if ((flags & Flags::AllowBindAsImage) != Flags::None) + { + DebugAssert(levels == 1); + bind_flags |= D3D11_BIND_UNORDERED_ACCESS; + } + + if ((flags & Flags::AllowGenerateMipmaps) != Flags::None) + { + // Needs RT annoyingly. + bind_flags |= D3D11_BIND_RENDER_TARGET; + misc = D3D11_RESOURCE_MISC_GENERATE_MIPS; + } + + if ((flags & Flags::AllowMap) != Flags::None) + { + DebugAssert(type == Type::Texture); + usage = D3D11_USAGE_DYNAMIC; + cpu_access = D3D11_CPU_ACCESS_WRITE; } const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(format); CD3D11_TEXTURE2D_DESC desc(fm.resource_format, width, height, layers, levels, bind_flags, usage, cpu_access, samples, - 0, 0); + 0, misc); D3D11_SUBRESOURCE_DATA srd; srd.pSysMem = initial_data; @@ -265,9 +288,7 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid const HRESULT tex_hr = device->CreateTexture2D(&desc, initial_data ? &srd : nullptr, texture.GetAddressOf()); if (FAILED(tex_hr)) { - ERROR_LOG("Create texture failed: 0x{:08X} ({}x{} levels:{} samples:{} format:{} bind_flags:{:X} initial_data:{})", - static_cast(tex_hr), width, height, levels, samples, static_cast(format), bind_flags, - initial_data); + Error::SetHResult(error, "CreateTexture2D() failed: ", tex_hr); return nullptr; } @@ -288,7 +309,7 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.GetAddressOf()); if (FAILED(hr)) [[unlikely]] { - ERROR_LOG("Create SRV for texture failed: 0x{:08X}", static_cast(hr)); + Error::SetHResult(error, "CreateShaderResourceView() failed: ", hr); return nullptr; } } @@ -303,7 +324,7 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf()); if (FAILED(hr)) [[unlikely]] { - ERROR_LOG("Create RTV for texture failed: 0x{:08X}", static_cast(hr)); + Error::SetHResult(error, "CreateRenderTargetView() failed: ", hr); return nullptr; } @@ -318,7 +339,7 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid const HRESULT hr = device->CreateDepthStencilView(texture.Get(), &dsv_desc, dsv.GetAddressOf()); if (FAILED(hr)) [[unlikely]] { - ERROR_LOG("Create DSV for texture failed: 0x{:08X}", static_cast(hr)); + Error::SetHResult(error, "CreateDepthStencilView() failed: ", hr); return nullptr; } @@ -334,12 +355,12 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid const HRESULT hr = device->CreateUnorderedAccessView(texture.Get(), &uav_desc, uav.GetAddressOf()); if (FAILED(hr)) [[unlikely]] { - ERROR_LOG("Create UAV for texture failed: 0x{:08X}", static_cast(hr)); + Error::SetHResult(error, "CreateUnorderedAccessView() failed: ", hr); return nullptr; } } - return std::unique_ptr(new D3D11Texture(width, height, layers, levels, samples, type, format, + return std::unique_ptr(new D3D11Texture(width, height, layers, levels, samples, type, format, flags, std::move(texture), std::move(srv), std::move(rtv_dsv), std::move(uav))); } @@ -350,10 +371,10 @@ D3D11TextureBuffer::D3D11TextureBuffer(Format format, u32 size_in_elements) : GP D3D11TextureBuffer::~D3D11TextureBuffer() = default; -bool D3D11TextureBuffer::CreateBuffer() +bool D3D11TextureBuffer::CreateBuffer(Error* error) { const u32 size_in_bytes = GetSizeInBytes(); - if (!m_buffer.Create(D3D11_BIND_SHADER_RESOURCE, size_in_bytes, size_in_bytes)) + if (!m_buffer.Create(D3D11_BIND_SHADER_RESOURCE, size_in_bytes, size_in_bytes, error)) return false; static constexpr std::array(Format::MaxCount)> dxgi_formats = {{ @@ -366,7 +387,7 @@ bool D3D11TextureBuffer::CreateBuffer() D3D11Device::GetD3DDevice()->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf()); if (FAILED(hr)) [[unlikely]] { - ERROR_LOG("CreateShaderResourceView() failed: {:08X}", static_cast(hr)); + Error::SetHResult(error, "CreateShaderResourceView() failed: ", hr); return false; } @@ -395,10 +416,10 @@ void D3D11TextureBuffer::SetDebugName(std::string_view name) } std::unique_ptr D3D11Device::CreateTextureBuffer(GPUTextureBuffer::Format format, - u32 size_in_elements) + u32 size_in_elements, Error* error /* = nullptr */) { std::unique_ptr tb = std::make_unique(format, size_in_elements); - if (!tb->CreateBuffer()) + if (!tb->CreateBuffer(error)) tb.reset(); return tb; @@ -416,7 +437,8 @@ D3D11DownloadTexture::~D3D11DownloadTexture() D3D11DownloadTexture::Unmap(); } -std::unique_ptr D3D11DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format) +std::unique_ptr D3D11DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, + Error* error) { D3D11_TEXTURE2D_DESC desc = {}; desc.Width = width; @@ -433,7 +455,7 @@ std::unique_ptr D3D11DownloadTexture::Create(u32 width, u3 HRESULT hr = D3D11Device::GetD3DDevice()->CreateTexture2D(&desc, nullptr, tex.GetAddressOf()); if (FAILED(hr)) { - ERROR_LOG("CreateTexture2D() failed: {:08X}", hr); + Error::SetHResult(error, "CreateTexture2D() failed: ", hr); return {}; } @@ -520,15 +542,16 @@ void D3D11DownloadTexture::SetDebugName(std::string_view name) SetD3DDebugObjectName(m_texture.Get(), name); } -std::unique_ptr D3D11Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) +std::unique_ptr D3D11Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + Error* error /* = nullptr */) { - return D3D11DownloadTexture::Create(width, height, format); + return D3D11DownloadTexture::Create(width, height, format, error); } std::unique_ptr D3D11Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, void* memory, size_t memory_size, - u32 memory_stride) + u32 memory_stride, Error* error /* = nullptr */) { - ERROR_LOG("D3D11 cannot import memory for download textures"); + Error::SetStringView(error, "D3D11 cannot import memory for download textures"); return {}; } diff --git a/src/util/d3d11_texture.h b/src/util/d3d11_texture.h index 03840a070..25f636442 100644 --- a/src/util/d3d11_texture.h +++ b/src/util/d3d11_texture.h @@ -77,8 +77,8 @@ public: ALWAYS_INLINE operator bool() const { return static_cast(m_texture); } static std::unique_ptr Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels, - u32 samples, Type type, Format format, const void* initial_data = nullptr, - u32 initial_data_stride = 0); + u32 samples, Type type, Format format, Flags flags, + const void* initial_data, u32 initial_data_stride, Error* error); D3D11_TEXTURE2D_DESC GetDesc() const; void CommitClear(ID3D11DeviceContext1* context); @@ -86,11 +86,12 @@ public: bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; void Unmap() override; + void GenerateMipmaps() override; void SetDebugName(std::string_view name) override; private: - D3D11Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + D3D11Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, Flags flags, ComPtr texture, ComPtr srv, ComPtr rtv_dsv, ComPtr uav); @@ -111,7 +112,7 @@ public: ALWAYS_INLINE ID3D11ShaderResourceView* GetSRV() const { return m_srv.Get(); } ALWAYS_INLINE ID3D11ShaderResourceView* const* GetSRVArray() const { return m_srv.GetAddressOf(); } - bool CreateBuffer(); + bool CreateBuffer(Error* error); // Inherited via GPUTextureBuffer void* Map(u32 required_elements) override; @@ -129,7 +130,7 @@ class D3D11DownloadTexture final : public GPUDownloadTexture public: ~D3D11DownloadTexture() override; - static std::unique_ptr Create(u32 width, u32 height, GPUTexture::Format format); + static std::unique_ptr Create(u32 width, u32 height, GPUTexture::Format format, Error* error); void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) override; diff --git a/src/util/d3d12_builders.cpp b/src/util/d3d12_builders.cpp index ed297cc9e..230641c3f 100644 --- a/src/util/d3d12_builders.cpp +++ b/src/util/d3d12_builders.cpp @@ -118,11 +118,6 @@ void D3D12::GraphicsPipelineBuilder::SetMultisamples(u32 multisamples) m_desc.SampleDesc.Count = multisamples; } -void D3D12::GraphicsPipelineBuilder::SetNoCullRasterizationState() -{ - SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); -} - void D3D12::GraphicsPipelineBuilder::SetDepthState(bool depth_test, bool depth_write, D3D12_COMPARISON_FUNC compare_op) { m_desc.DepthStencilState.DepthEnable = depth_test; @@ -141,11 +136,6 @@ void D3D12::GraphicsPipelineBuilder::SetStencilState(bool stencil_test, u8 read_ m_desc.DepthStencilState.BackFace = back; } -void D3D12::GraphicsPipelineBuilder::SetNoDepthTestState() -{ - SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS); -} - void D3D12::GraphicsPipelineBuilder::SetNoStencilState() { D3D12_DEPTH_STENCILOP_DESC empty = {}; @@ -170,18 +160,6 @@ void D3D12::GraphicsPipelineBuilder::SetBlendState(u32 rt, bool blend_enable, D3 m_desc.BlendState.IndependentBlendEnable = TRUE; } -void D3D12::GraphicsPipelineBuilder::SetColorWriteMask(u32 rt, u8 write_mask /* = D3D12_COLOR_WRITE_ENABLE_ALL */) -{ - m_desc.BlendState.RenderTarget[rt].RenderTargetWriteMask = write_mask; -} - -void D3D12::GraphicsPipelineBuilder::SetNoBlendingState() -{ - SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, - D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); - m_desc.BlendState.IndependentBlendEnable = FALSE; -} - void D3D12::GraphicsPipelineBuilder::ClearRenderTargets() { m_desc.NumRenderTargets = 0; diff --git a/src/util/d3d12_builders.h b/src/util/d3d12_builders.h index 9dbffcf77..bf5e3531f 100644 --- a/src/util/d3d12_builders.h +++ b/src/util/d3d12_builders.h @@ -80,21 +80,14 @@ public: void SetMultisamples(u32 multisamples); - void SetNoCullRasterizationState(); - void SetDepthState(bool depth_test, bool depth_write, D3D12_COMPARISON_FUNC compare_op); void SetStencilState(bool stencil_test, u8 read_mask, u8 write_mask, const D3D12_DEPTH_STENCILOP_DESC& front, const D3D12_DEPTH_STENCILOP_DESC& back); - - void SetNoDepthTestState(); void SetNoStencilState(); void SetBlendState(u32 rt, bool blend_enable, D3D12_BLEND src_factor, D3D12_BLEND dst_factor, D3D12_BLEND_OP op, D3D12_BLEND alpha_src_factor, D3D12_BLEND alpha_dst_factor, D3D12_BLEND_OP alpha_op, u8 write_mask = D3D12_COLOR_WRITE_ENABLE_ALL); - void SetColorWriteMask(u32 rt, u8 write_mask = D3D12_COLOR_WRITE_ENABLE_ALL); - - void SetNoBlendingState(); void ClearRenderTargets(); diff --git a/src/util/d3d12_descriptor_heap_manager.cpp b/src/util/d3d12_descriptor_heap_manager.cpp index 7da9d7101..6ea24ece5 100644 --- a/src/util/d3d12_descriptor_heap_manager.cpp +++ b/src/util/d3d12_descriptor_heap_manager.cpp @@ -83,7 +83,6 @@ bool D3D12DescriptorHeapManager::Allocate(D3D12DescriptorHandle* handle) return true; } - Panic("Out of fixed descriptors"); return false; } diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index be58487c9..964d067cc 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -46,7 +46,6 @@ enum : u32 FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024, - // UNIFORM_PUSH_CONSTANTS_STAGES = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, UNIFORM_PUSH_CONSTANTS_SIZE = 128, MAX_UNIFORM_BUFFER_SIZE = 1024, @@ -65,6 +64,55 @@ static DynamicHeapArray s_pipeline_cache_data; static u32 s_debug_scope_depth = 0; #endif +static constexpr const u32 s_mipmap_blit_vs[] = { + 0x43425844, 0xe0f571cf, 0x51234ef3, 0x3a6beab4, 0x141cd2ef, 0x00000001, 0x000003ac, 0x00000005, 0x00000034, + 0x00000144, 0x00000178, 0x000001d0, 0x00000310, 0x46454452, 0x00000108, 0x00000001, 0x00000068, 0x00000001, + 0x0000003c, 0xfffe0500, 0x00008100, 0x000000e0, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028, + 0x00000024, 0x0000000c, 0x00000000, 0x0000005c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000001, 0x00000001, 0x424f4255, 0x6b636f6c, 0xababab00, 0x0000005c, 0x00000001, 0x00000080, 0x00000010, + 0x00000000, 0x00000000, 0x000000a8, 0x00000000, 0x00000010, 0x00000002, 0x000000bc, 0x00000000, 0xffffffff, + 0x00000000, 0xffffffff, 0x00000000, 0x72735f75, 0x65725f63, 0x66007463, 0x74616f6c, 0xabab0034, 0x00030001, + 0x00040001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000b3, 0x7263694d, + 0x666f736f, 0x52282074, 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265, 0x00312e30, + 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000006, 0x00000001, 0x00000000, + 0x00000101, 0x565f5653, 0x65747265, 0x00444978, 0x4e47534f, 0x00000050, 0x00000002, 0x00000008, 0x00000038, + 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000c03, 0x00000041, 0x00000000, 0x00000001, 0x00000003, + 0x00000001, 0x0000000f, 0x43584554, 0x44524f4f, 0x5f565300, 0x69736f50, 0x6e6f6974, 0xababab00, 0x58454853, + 0x00000138, 0x00010050, 0x0000004e, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x04000060, + 0x00101012, 0x00000000, 0x00000006, 0x03000065, 0x00102032, 0x00000000, 0x04000067, 0x001020f2, 0x00000001, + 0x00000001, 0x02000068, 0x00000001, 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000001, 0x00004001, + 0x00000001, 0x0010100a, 0x00000000, 0x00004001, 0x00000000, 0x07000001, 0x00100042, 0x00000000, 0x0010100a, + 0x00000000, 0x00004001, 0x00000002, 0x05000056, 0x00100032, 0x00000000, 0x00100086, 0x00000000, 0x0b000032, + 0x00102032, 0x00000000, 0x00100046, 0x00000000, 0x00208ae6, 0x00000000, 0x00000000, 0x00208046, 0x00000000, + 0x00000000, 0x0f000032, 0x00102032, 0x00000001, 0x00100046, 0x00000000, 0x00004002, 0x40000000, 0xc0000000, + 0x00000000, 0x00000000, 0x00004002, 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, 0x08000036, 0x001020c2, + 0x00000001, 0x00004002, 0x00000000, 0x00000000, 0x00000000, 0x3f800000, 0x0100003e, 0x54415453, 0x00000094, + 0x00000007, 0x00000001, 0x00000000, 0x00000003, 0x00000002, 0x00000000, 0x00000001, 0x00000001, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000001, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000}; + +static constexpr const u32 s_mipmap_blit_ps[] = { + 0x43425844, 0x25500f77, 0x71f24271, 0x5f83f8b8, 0x3f405943, 0x00000001, 0x0000026c, 0x00000005, 0x00000034, + 0x000000f0, 0x00000124, 0x00000158, 0x000001d0, 0x46454452, 0x000000b4, 0x00000000, 0x00000000, 0x00000002, + 0x0000003c, 0xffff0500, 0x00008100, 0x0000008b, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028, + 0x00000024, 0x0000000c, 0x00000000, 0x0000007c, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000001, 0x00000001, 0x00000085, 0x00000002, 0x00000005, 0x00000004, 0xffffffff, 0x00000000, 0x00000001, + 0x0000000d, 0x706d6173, 0x73735f30, 0x6d617300, 0x4d003070, 0x6f726369, 0x74666f73, 0x29522820, 0x534c4820, + 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x30312072, 0xab00312e, 0x4e475349, 0x0000002c, 0x00000001, + 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000303, 0x43584554, 0x44524f4f, + 0xababab00, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, + 0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x00000070, 0x00000050, 0x0000001c, + 0x0100086a, 0x0300005a, 0x00106000, 0x00000000, 0x04001858, 0x00107000, 0x00000000, 0x00005555, 0x03001062, + 0x00101032, 0x00000000, 0x03000065, 0x001020f2, 0x00000000, 0x8b000045, 0x800000c2, 0x00155543, 0x001020f2, + 0x00000000, 0x00101046, 0x00000000, 0x00107e46, 0x00000000, 0x00106000, 0x00000000, 0x0100003e, 0x54415453, + 0x00000094, 0x00000002, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000001, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000}; + D3D12Device::D3D12Device() { m_render_api = RenderAPI::D3D12; @@ -523,7 +571,9 @@ bool D3D12Device::CreateDescriptorHeaps(Error* error) m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle); // Same for samplers. - m_point_sampler = GetSampler(GPUSampler::GetNearestConfig()); + m_point_sampler = GetSampler(GPUSampler::GetNearestConfig(), error); + if (!m_point_sampler) [[unlikely]] + return false; for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) m_current_samplers[i] = m_point_sampler; return true; @@ -2100,7 +2150,7 @@ void D3D12Device::UnbindTexture(D3D12Texture* tex) } } - if (tex->IsRenderTarget() || tex->IsRWTexture()) + if (tex->IsRenderTarget() || tex->HasFlag(GPUTexture::Flags::AllowBindAsImage)) { for (u32 i = 0; i < m_num_current_render_targets; i++) { @@ -2134,6 +2184,137 @@ void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf) m_dirty_flags |= DIRTY_FLAG_TEXTURES; } +void D3D12Device::RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 dst_width, u32 dst_height, + u32 src_level, u32 src_width, u32 src_height) +{ + ID3D12RootSignature* rootsig = + m_root_signatures[0][static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)].Get(); + ComPtr& pipeline = m_mipmap_render_pipelines[static_cast(texture->GetFormat())]; + if (!pipeline) + { + D3D12::GraphicsPipelineBuilder gpb; + gpb.SetRootSignature(rootsig); + gpb.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); + gpb.SetRenderTarget(0, texture->GetDXGIFormat()); + gpb.SetVertexShader(s_mipmap_blit_vs, std::size(s_mipmap_blit_vs)); + gpb.SetPixelShader(s_mipmap_blit_ps, std::size(s_mipmap_blit_ps)); + gpb.SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); + gpb.SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS); + gpb.SetBlendState(0, false, D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_BLEND_ZERO, + D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); + + const std::wstring name = StringUtil::UTF8StringToWideString( + TinyString::from_format("MipmapRender-{}", GPUTexture::GetFormatName(texture->GetFormat()))); + Error error; + if (m_pipeline_library) + { + HRESULT hr = + m_pipeline_library->LoadGraphicsPipeline(name.c_str(), gpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf())); + if (FAILED(hr)) + { + // E_INVALIDARG = not found. + if (hr != E_INVALIDARG) + ERROR_LOG("LoadGraphicsPipeline() failed with HRESULT {:08X}", static_cast(hr)); + + // Need to create it normally. + pipeline = gpb.Create(m_device.Get(), &error, false); + + // Store if it wasn't an OOM or something else. + if (pipeline && hr == E_INVALIDARG) + { + hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get()); + if (FAILED(hr)) + ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast(hr)); + } + } + } + else + { + pipeline = gpb.Create(m_device.Get(), &error, false); + } + if (!pipeline) + { + ERROR_LOG("Failed to compile mipmap render pipeline for {}: {}", GPUTexture::GetFormatName(texture->GetFormat()), + error.GetDescription()); + return; + } + } + + EndRenderPass(); + + // we need a temporary SRV and RTV for each mip level + // Safe to use the init buffer after exec, because everything will be done with the texture. + D3D12DescriptorHandle rtv_handle; + while (!GetRTVHeapManager().Allocate(&rtv_handle)) + SubmitCommandList(false, "Allocate RTV for RenderTextureMipmap()"); + + D3D12DescriptorHandle srv_handle; + while (!GetDescriptorHeapManager().Allocate(&srv_handle)) + SubmitCommandList(false, "Allocate SRV for RenderTextureMipmap()"); + + // Setup views. This will be a partial view for the SRV. + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {texture->GetDXGIFormat(), D3D12_RTV_DIMENSION_TEXTURE2D}; + rtv_desc.Texture2D = {dst_level, 0u}; + m_device->CreateRenderTargetView(texture->GetResource(), &rtv_desc, rtv_handle); + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {texture->GetDXGIFormat(), D3D12_SRV_DIMENSION_TEXTURE2D, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + srv_desc.Texture2D = {src_level, 1u, 0u, 0.0f}; + m_device->CreateShaderResourceView(texture->GetResource(), &srv_desc, srv_handle); + + // *now* we don't have to worry about running out of anything. + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + { + texture->TransitionSubresourceToState(cmdlist, src_level, texture->GetResourceState(), + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET) + { + texture->TransitionSubresourceToState(cmdlist, dst_level, texture->GetResourceState(), + D3D12_RESOURCE_STATE_RENDER_TARGET); + } + + const D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {.cpuDescriptor = rtv_handle, + .BeginningAccess = + D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, + .EndingAccess = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE}; + cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE); + + const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast(dst_width), static_cast(dst_height), 0.0f, 1.0f}; + cmdlist->RSSetViewports(1, &vp); + + const D3D12_RECT scissor = {0, 0, static_cast(dst_width), static_cast(dst_height)}; + cmdlist->RSSetScissorRects(1, &scissor); + + cmdlist->SetPipelineState(pipeline.Get()); + cmdlist->SetGraphicsRootDescriptorTable(0, srv_handle); + cmdlist->SetGraphicsRootDescriptorTable(1, static_cast(m_linear_sampler.get())->GetDescriptor()); + cmdlist->DrawInstanced(3, 1, 0, 0); + + cmdlist->EndRenderPass(); + + if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + { + texture->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + texture->GetResourceState()); + } + if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET) + { + texture->TransitionSubresourceToState(cmdlist, dst_level, D3D12_RESOURCE_STATE_RENDER_TARGET, + texture->GetResourceState()); + } + + // Must destroy after current cmdlist. + DeferDescriptorDestruction(m_descriptor_heap_manager, &srv_handle); + DeferDescriptorDestruction(m_rtv_heap_manager, &rtv_handle); + + // Restore for next normal draw. + SetViewport(GetCommandList()); + SetScissor(GetCommandList()); + m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE; +} + void D3D12Device::SetViewport(const GSVector4i rc) { if (m_current_viewport.eq(rc)) diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index ba065cfc5..6306a1376 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -71,15 +71,18 @@ public: std::optional exclusive_fullscreen_control, Error* error) override; std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Type type, GPUTexture::Format format, - const void* data = nullptr, u32 data_stride = 0) override; - std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; - std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags, + const void* data = nullptr, u32 data_stride = 0, + Error* error = nullptr) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements, + Error* error = nullptr) override; - std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, - u32 memory_stride) override; + Error* error = nullptr) override; + std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_stride, + Error* error = nullptr) override; bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, @@ -191,6 +194,9 @@ public: void UnbindTexture(D3D12Texture* tex); void UnbindTextureBuffer(D3D12TextureBuffer* buf); + void RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 dst_width, u32 dst_height, u32 src_level, + u32 src_width, u32 src_height); + protected: bool CreateDeviceAndMainSwapChain(std::string_view adapter, FeatureMask disabled_features, const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle, @@ -253,7 +259,7 @@ private: void DestroyDescriptorHeaps(); bool CreateTimestampQuery(); void DestroyTimestampQuery(); - D3D12DescriptorHandle GetSampler(const GPUSampler::Config& config); + D3D12DescriptorHandle GetSampler(const GPUSampler::Config& config, Error* error); void DestroySamplers(); void DestroyDeferredObjects(u64 fence_value); @@ -261,10 +267,13 @@ private: void MoveToNextCommandList(); bool CreateSRVDescriptor(ID3D12Resource* resource, u32 layers, u32 levels, u32 samples, DXGI_FORMAT format, - D3D12DescriptorHandle* dh); - bool CreateRTVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); - bool CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); - bool CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); + D3D12DescriptorHandle* dh, Error* error); + bool CreateRTVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh, + Error* error); + bool CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh, + Error* error); + bool CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh, + Error* error); bool IsRenderTargetBound(const GPUTexture* tex) const; @@ -354,6 +363,9 @@ private: GSVector4i m_current_scissor = {}; D3D12SwapChain* m_current_swap_chain = nullptr; + + std::array, static_cast(GPUTexture::Format::MaxCount)> m_mipmap_render_pipelines = + {}; }; class D3D12SwapChain : public GPUSwapChain diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp index 789761935..4305f7afd 100644 --- a/src/util/d3d12_texture.cpp +++ b/src/util/d3d12_texture.cpp @@ -18,12 +18,12 @@ LOG_CHANNEL(GPUDevice); D3D12Texture::D3D12Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, - DXGI_FORMAT dxgi_format, ComPtr resource, + Flags flags, DXGI_FORMAT dxgi_format, ComPtr resource, ComPtr allocation, const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& uav_descriptor, WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state) : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), - static_cast(samples), type, format), + static_cast(samples), type, format, flags), m_resource(std::move(resource)), m_allocation(std::move(allocation)), m_srv_descriptor(srv_descriptor), m_write_descriptor(write_descriptor), m_uav_descriptor(uav_descriptor), m_dxgi_format(dxgi_format), m_resource_state(resource_state), m_write_descriptor_type(wdtype) @@ -37,9 +37,10 @@ D3D12Texture::~D3D12Texture() std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, - const void* data /* = nullptr */, u32 data_stride /* = 0 */) + GPUTexture::Flags flags, const void* data /* = nullptr */, + u32 data_stride /* = 0 */, Error* error /* = nullptr */) { - if (!GPUTexture::ValidateConfig(width, height, layers, levels, samples, type, format)) + if (!GPUTexture::ValidateConfig(width, height, layers, levels, samples, type, format, flags, error)) return {}; const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(format); @@ -64,7 +65,6 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 switch (type) { case GPUTexture::Type::Texture: - case GPUTexture::Type::DynamicTexture: { desc.Flags = D3D12_RESOURCE_FLAG_NONE; state = D3D12_RESOURCE_STATE_COPY_DEST; @@ -92,18 +92,20 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 } break; - case GPUTexture::Type::RWTexture: - { - DebugAssert(levels == 1); - allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; - desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - optimized_clear_value.Format = fm.rtv_format; - state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - } - break; + DefaultCaseIsUnreachable(); + } - default: - return {}; + if ((flags & GPUTexture::Flags::AllowBindAsImage) != GPUTexture::Flags::None) + { + DebugAssert(levels == 1); + allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; + desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + if ((flags & GPUTexture::Flags::AllowGenerateMipmaps) != GPUTexture::Flags::None) + { + // requires RTs since we need to draw the mips + desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; } ComPtr resource; @@ -115,10 +117,7 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 allocation.GetAddressOf(), IID_PPV_ARGS(resource.GetAddressOf())); if (FAILED(hr)) [[unlikely]] { - // OOM isn't fatal. - if (hr != E_OUTOFMEMORY) - ERROR_LOG("Create texture failed: 0x{:08X}", static_cast(hr)); - + Error::SetHResult(error, "CreateResource() failed: ", hr); return {}; } @@ -126,16 +125,19 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 D3D12Texture::WriteDescriptorType write_descriptor_type = D3D12Texture::WriteDescriptorType::None; if (fm.srv_format != DXGI_FORMAT_UNKNOWN) { - if (!CreateSRVDescriptor(resource.Get(), layers, levels, samples, fm.srv_format, &srv_descriptor)) + if (!CreateSRVDescriptor(resource.Get(), layers, levels, samples, fm.srv_format, &srv_descriptor, error)) return {}; } switch (type) { + case GPUTexture::Type::Texture: + break; + case GPUTexture::Type::RenderTarget: { write_descriptor_type = D3D12Texture::WriteDescriptorType::RTV; - if (!CreateRTVDescriptor(resource.Get(), samples, fm.rtv_format, &write_descriptor)) + if (!CreateRTVDescriptor(resource.Get(), samples, fm.rtv_format, &write_descriptor, error)) { m_descriptor_heap_manager.Free(&srv_descriptor); return {}; @@ -146,7 +148,7 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 case GPUTexture::Type::DepthStencil: { write_descriptor_type = D3D12Texture::WriteDescriptorType::DSV; - if (!CreateDSVDescriptor(resource.Get(), samples, fm.dsv_format, &write_descriptor)) + if (!CreateDSVDescriptor(resource.Get(), samples, fm.dsv_format, &write_descriptor, error)) { m_descriptor_heap_manager.Free(&srv_descriptor); return {}; @@ -154,30 +156,23 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 } break; - case GPUTexture::Type::RWTexture: + DefaultCaseIsUnreachable(); + } + + if ((flags & GPUTexture::Flags::AllowBindAsImage) != GPUTexture::Flags::None) + { + if (!CreateUAVDescriptor(resource.Get(), samples, fm.srv_format, &uav_descriptor, error)) { - write_descriptor_type = D3D12Texture::WriteDescriptorType::RTV; - if (!CreateRTVDescriptor(resource.Get(), samples, fm.rtv_format, &write_descriptor)) - { - m_descriptor_heap_manager.Free(&srv_descriptor); - return {}; - } - - if (!CreateUAVDescriptor(resource.Get(), samples, fm.srv_format, &uav_descriptor)) - { + if (write_descriptor_type != D3D12Texture::WriteDescriptorType::None) m_descriptor_heap_manager.Free(&write_descriptor); - m_descriptor_heap_manager.Free(&srv_descriptor); - return {}; - } - } - break; - default: - break; + m_descriptor_heap_manager.Free(&srv_descriptor); + return {}; + } } std::unique_ptr tex(new D3D12Texture( - width, height, layers, levels, samples, type, format, fm.resource_format, std::move(resource), + width, height, layers, levels, samples, type, format, flags, fm.resource_format, std::move(resource), std::move(allocation), srv_descriptor, write_descriptor, uav_descriptor, write_descriptor_type, state)); if (data) @@ -190,11 +185,11 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 } bool D3D12Device::CreateSRVDescriptor(ID3D12Resource* resource, u32 layers, u32 levels, u32 samples, DXGI_FORMAT format, - D3D12DescriptorHandle* dh) + D3D12DescriptorHandle* dh, Error* error) { if (!m_descriptor_heap_manager.Allocate(dh)) { - ERROR_LOG("Failed to allocate SRV descriptor"); + Error::SetStringView(error, "Failed to allocate SRV descriptor"); return false; } @@ -233,11 +228,11 @@ bool D3D12Device::CreateSRVDescriptor(ID3D12Resource* resource, u32 layers, u32 } bool D3D12Device::CreateRTVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, - D3D12DescriptorHandle* dh) + D3D12DescriptorHandle* dh, Error* error) { if (!m_rtv_heap_manager.Allocate(dh)) { - ERROR_LOG("Failed to allocate SRV descriptor"); + Error::SetStringView(error, "Failed to allocate SRV descriptor"); return false; } @@ -248,11 +243,11 @@ bool D3D12Device::CreateRTVDescriptor(ID3D12Resource* resource, u32 samples, DXG } bool D3D12Device::CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, - D3D12DescriptorHandle* dh) + D3D12DescriptorHandle* dh, Error* error) { if (!m_dsv_heap_manager.Allocate(dh)) { - ERROR_LOG("Failed to allocate SRV descriptor"); + Error::SetStringView(error, "Failed to allocate SRV descriptor"); return false; } @@ -263,11 +258,11 @@ bool D3D12Device::CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXG } bool D3D12Device::CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, - D3D12DescriptorHandle* dh) + D3D12DescriptorHandle* dh, Error* error) { if (!m_descriptor_heap_manager.Allocate(dh)) { - ERROR_LOG("Failed to allocate UAV descriptor"); + Error::SetStringView(error, "Failed to allocate UAV descriptor"); return false; } @@ -334,9 +329,9 @@ void D3D12Texture::Destroy(bool defer) ID3D12GraphicsCommandList4* D3D12Texture::GetCommandBufferForUpdate() { D3D12Device& dev = D3D12Device::GetInstance(); - if ((m_type != Type::Texture && m_type != Type::DynamicTexture) || m_use_fence_counter == dev.GetCurrentFenceValue()) + if (m_type != Type::Texture || m_use_fence_counter == dev.GetCurrentFenceValue()) { - // Console.WriteLn("Texture update within frame, can't use do beforehand"); + // DEV_LOG("Texture update within frame, can't use do beforehand"); if (dev.InRenderPass()) dev.EndRenderPass(); return dev.GetCommandList(); @@ -562,6 +557,28 @@ void D3D12Texture::Unmap() m_map_level = 0; } +void D3D12Texture::GenerateMipmaps() +{ + Panic("Not implemented"); + + for (u32 layer = 0; layer < m_layers; layer++) + { + for (u32 dst_level = 1; dst_level < m_levels; dst_level++) + { + const u32 src_level = dst_level - 1; + const u32 src_width = std::max(m_width >> src_level, 1u); + const u32 src_height = std::max(m_height >> src_level, 1u); + const u32 dst_width = std::max(m_width >> dst_level, 1u); + const u32 dst_height = std::max(m_height >> dst_level, 1u); + + D3D12Device::GetInstance().RenderTextureMipmap(this, dst_level, dst_width, dst_height, src_level, src_width, + src_height); + } + } + + SetUseFenceValue(D3D12Device::GetInstance().GetCurrentFenceValue()); +} + void D3D12Texture::CommitClear() { if (m_state != GPUTexture::State::Cleared) @@ -685,7 +702,7 @@ void D3D12Sampler::SetDebugName(std::string_view name) { } -D3D12DescriptorHandle D3D12Device::GetSampler(const GPUSampler::Config& config) +D3D12DescriptorHandle D3D12Device::GetSampler(const GPUSampler::Config& config, Error* error) { const auto it = m_sampler_map.find(config.key); if (it != m_sampler_map.end()) @@ -730,8 +747,10 @@ D3D12DescriptorHandle D3D12Device::GetSampler(const GPUSampler::Config& config) } D3D12DescriptorHandle handle; - if (m_sampler_heap_manager.Allocate(&handle)) + if (m_sampler_heap_manager.Allocate(&handle)) [[likely]] m_device->CreateSampler(&desc, handle); + else + Error::SetStringView(error, "Failed to allocate sampler handle."); m_sampler_map.emplace(config.key, handle); return handle; @@ -747,9 +766,9 @@ void D3D12Device::DestroySamplers() m_sampler_map.clear(); } -std::unique_ptr D3D12Device::CreateSampler(const GPUSampler::Config& config) +std::unique_ptr D3D12Device::CreateSampler(const GPUSampler::Config& config, Error* error /* = nullptr */) { - const D3D12DescriptorHandle handle = GetSampler(config); + const D3D12DescriptorHandle handle = GetSampler(config, error); if (!handle) return {}; @@ -765,21 +784,20 @@ D3D12TextureBuffer::~D3D12TextureBuffer() Destroy(true); } -bool D3D12TextureBuffer::Create(D3D12Device& dev) +bool D3D12TextureBuffer::Create(D3D12Device& dev, Error* error) { static constexpr std::array(GPUTextureBuffer::Format::MaxCount)> format_mapping = {{ DXGI_FORMAT_R16_UINT, // R16UI }}; - Error error; - if (!m_buffer.Create(GetSizeInBytes(), &error)) [[unlikely]] - { - ERROR_LOG("Failed to create stream buffer: {}", error.GetDescription()); + if (!m_buffer.Create(GetSizeInBytes(), error)) [[unlikely]] return false; - } if (!dev.GetDescriptorHeapManager().Allocate(&m_descriptor)) [[unlikely]] + { + Error::SetStringView(error, "Failed to allocate descriptor."); return {}; + } D3D12_SHADER_RESOURCE_VIEW_DESC desc = {format_mapping[static_cast(m_format)], D3D12_SRV_DIMENSION_BUFFER, @@ -831,11 +849,11 @@ void D3D12TextureBuffer::SetDebugName(std::string_view name) } std::unique_ptr D3D12Device::CreateTextureBuffer(GPUTextureBuffer::Format format, - u32 size_in_elements) + u32 size_in_elements, Error* error /* = nullptr */) { std::unique_ptr tb = std::make_unique(format, size_in_elements); - if (!tb->Create(*this)) + if (!tb->Create(*this, error)) tb.reset(); return tb; @@ -858,7 +876,8 @@ D3D12DownloadTexture::~D3D12DownloadTexture() D3D12Device::GetInstance().DeferResourceDestruction(m_allocation.Get(), m_buffer.Get()); } -std::unique_ptr D3D12DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format) +std::unique_ptr D3D12DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, + Error* error) { const u32 buffer_size = GetBufferSize(width, height, format, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); @@ -879,12 +898,12 @@ std::unique_ptr D3D12DownloadTexture::Create(u32 width, u3 ComPtr allocation; ComPtr buffer; - HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( + const HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( &allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.GetAddressOf(), IID_PPV_ARGS(buffer.GetAddressOf())); if (FAILED(hr)) { - ERROR_LOG("CreateResource() failed with HRESULT {:08X}", hr); + Error::SetHResult(error, "CreateResource() failed: ", hr); return {}; } @@ -1015,15 +1034,16 @@ void D3D12DownloadTexture::SetDebugName(std::string_view name) D3D12::SetObjectName(m_buffer.Get(), name); } -std::unique_ptr D3D12Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) +std::unique_ptr D3D12Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + Error* error /* = nullptr */) { - return D3D12DownloadTexture::Create(width, height, format); + return D3D12DownloadTexture::Create(width, height, format, error); } std::unique_ptr D3D12Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, void* memory, size_t memory_size, - u32 memory_stride) + u32 memory_stride, Error* error /* = nullptr */) { - ERROR_LOG("D3D12 cannot import memory for download textures"); + Error::SetStringView(error, "D3D12 cannot import memory for download textures"); return {}; } diff --git a/src/util/d3d12_texture.h b/src/util/d3d12_texture.h index c8f31a022..051d1ca0d 100644 --- a/src/util/d3d12_texture.h +++ b/src/util/d3d12_texture.h @@ -40,6 +40,7 @@ public: bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; void Unmap() override; + void GenerateMipmaps() override; void MakeReadyForSampling() override; void SetDebugName(std::string_view name) override; @@ -71,7 +72,7 @@ private: DSV }; - D3D12Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + D3D12Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, Flags flags, DXGI_FORMAT dxgi_format, ComPtr resource, ComPtr allocation, const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& uav_descriptor, WriteDescriptorType wdtype, @@ -133,7 +134,7 @@ public: ALWAYS_INLINE const D3D12DescriptorHandle& GetDescriptor() const { return m_descriptor; } - bool Create(D3D12Device& dev); + bool Create(D3D12Device& dev, Error* error); void Destroy(bool defer); // Inherited via GPUTextureBuffer @@ -155,7 +156,7 @@ public: ~D3D12DownloadTexture() override; - static std::unique_ptr Create(u32 width, u32 height, GPUTexture::Format format); + static std::unique_ptr Create(u32 width, u32 height, GPUTexture::Format format, Error* error); void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) override; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 6a4c3a6dd..3c6c2fb25 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -590,10 +590,10 @@ bool GPUDevice::GetPipelineCacheData(DynamicHeapArray* data, Error* error) bool GPUDevice::CreateResources(Error* error) { - if (!(m_nearest_sampler = CreateSampler(GPUSampler::GetNearestConfig())) || - !(m_linear_sampler = CreateSampler(GPUSampler::GetLinearConfig()))) + if (!(m_nearest_sampler = CreateSampler(GPUSampler::GetNearestConfig(), error)) || + !(m_linear_sampler = CreateSampler(GPUSampler::GetLinearConfig(), error))) { - Error::SetStringView(error, "Failed to create samplers"); + Error::AddPrefix(error, "Failed to create samplers: "); return false; } @@ -922,10 +922,15 @@ bool GPUDevice::UpdateImGuiFontTexture() return true; } + Error error; std::unique_ptr new_font = - FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, pixels, pitch); - if (!new_font) + FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, GPUTexture::Flags::None, + pixels, pitch, &error); + if (!new_font) [[unlikely]] + { + ERROR_LOG("Failed to create new ImGui font texture: {}", error.GetDescription()); return false; + } RecycleTexture(std::move(m_imgui_font_texture)); m_imgui_font_texture = std::move(new_font); @@ -950,12 +955,13 @@ GSVector4i GPUDevice::FlipToLowerLeft(GSVector4i rc, s32 target_height) bool GPUDevice::IsTexturePoolType(GPUTexture::Type type) { - return (type == GPUTexture::Type::Texture || type == GPUTexture::Type::DynamicTexture); + return (type == GPUTexture::Type::Texture); } std::unique_ptr GPUDevice::FetchTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, - const void* data /*= nullptr*/, u32 data_stride /*= 0*/) + GPUTexture::Flags flags, const void* data /* = nullptr */, + u32 data_stride /* = 0 */, Error* error /* = nullptr */) { std::unique_ptr ret; @@ -966,7 +972,7 @@ std::unique_ptr GPUDevice::FetchTexture(u32 width, u32 height, u32 l static_cast(samples), type, format, - 0u}; + flags}; const bool is_texture = IsTexturePoolType(type); TexturePool& pool = is_texture ? m_texture_pool : m_target_pool; @@ -1018,17 +1024,29 @@ std::unique_ptr GPUDevice::FetchTexture(u32 width, u32 height, u32 l } } - ret = CreateTexture(width, height, layers, levels, samples, type, format, data, data_stride); + Error create_error; + ret = CreateTexture(width, height, layers, levels, samples, type, format, flags, data, data_stride, &create_error); + if (!ret) [[unlikely]] + { + Error::SetStringFmt( + error ? error : &create_error, "Failed to create {}x{} {} {}: {}", width, height, + GPUTexture::GetFormatName(format), + ((type == GPUTexture::Type::RenderTarget) ? "RT" : (type == GPUTexture::Type::DepthStencil ? "DS" : "Texture")), + create_error.TakeDescription()); + if (!error) + ERROR_LOG(create_error.GetDescription()); + } + return ret; } std::unique_ptr GPUDevice::FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, - GPUTexture::Format format, const void* data /*= nullptr*/, u32 data_stride /*= 0*/, - bool dynamic /*= false*/) + GPUTexture::Format format, GPUTexture::Flags flags, const void* data /* = nullptr */, + u32 data_stride /* = 0 */, Error* error /* = nullptr */) { std::unique_ptr ret = - FetchTexture(width, height, layers, levels, samples, type, format, data, data_stride); + FetchTexture(width, height, layers, levels, samples, type, format, flags, data, data_stride, error); return std::unique_ptr(ret.release()); } @@ -1044,7 +1062,7 @@ void GPUDevice::RecycleTexture(std::unique_ptr texture) static_cast(texture->GetSamples()), texture->GetType(), texture->GetFormat(), - 0u}; + texture->GetFlags()}; const bool is_texture = IsTexturePoolType(texture->GetType()); TexturePool& pool = is_texture ? m_texture_pool : m_target_pool; @@ -1118,11 +1136,11 @@ void GPUDevice::TrimTexturePool() } bool GPUDevice::ResizeTexture(std::unique_ptr* tex, u32 new_width, u32 new_height, GPUTexture::Type type, - GPUTexture::Format format, bool preserve /* = true */) + GPUTexture::Format format, GPUTexture::Flags flags, bool preserve /* = true */) { GPUTexture* old_tex = tex->get(); DebugAssert(!old_tex || (old_tex->GetLayers() == 1 && old_tex->GetLevels() == 1 && old_tex->GetSamples() == 1)); - std::unique_ptr new_tex = FetchTexture(new_width, new_height, 1, 1, 1, type, format); + std::unique_ptr new_tex = FetchTexture(new_width, new_height, 1, 1, 1, type, format, flags); if (!new_tex) [[unlikely]] { ERROR_LOG("Failed to create new {}x{} texture", new_width, new_height); diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 6c1060c60..04efc1fc6 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -693,27 +693,28 @@ public: virtual std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, - const void* data = nullptr, u32 data_stride = 0) = 0; - virtual std::unique_ptr CreateSampler(const GPUSampler::Config& config) = 0; - virtual std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, - u32 size_in_elements) = 0; + GPUTexture::Flags flags, const void* data = nullptr, + u32 data_stride = 0, Error* error = nullptr) = 0; + virtual std::unique_ptr CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) = 0; + virtual std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements, + Error* error = nullptr) = 0; // Texture pooling. std::unique_ptr FetchTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Type type, GPUTexture::Format format, const void* data = nullptr, - u32 data_stride = 0); + GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags, + const void* data = nullptr, u32 data_stride = 0, Error* error = nullptr); std::unique_ptr FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, - GPUTexture::Format format, const void* data = nullptr, u32 data_stride = 0, - bool dynamic = false); + GPUTexture::Format format, GPUTexture::Flags flags, const void* data = nullptr, + u32 data_stride = 0, Error* error = nullptr); void RecycleTexture(std::unique_ptr texture); void PurgeTexturePool(); - virtual std::unique_ptr CreateDownloadTexture(u32 width, u32 height, - GPUTexture::Format format) = 0; virtual std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, - u32 memory_stride) = 0; + Error* error = nullptr) = 0; + virtual std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_stride, + Error* error = nullptr) = 0; virtual void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) = 0; @@ -789,7 +790,7 @@ public: bool UsesLowerLeftOrigin() const; static GSVector4i FlipToLowerLeft(GSVector4i rc, s32 target_height); bool ResizeTexture(std::unique_ptr* tex, u32 new_width, u32 new_height, GPUTexture::Type type, - GPUTexture::Format format, bool preserve = true); + GPUTexture::Format format, GPUTexture::Flags flags, bool preserve = true); virtual bool SupportsTextureFormat(GPUTexture::Format format) const = 0; @@ -863,7 +864,7 @@ private: u8 samples; GPUTexture::Type type; GPUTexture::Format format; - u8 pad; + GPUTexture::Flags flags; ALWAYS_INLINE bool operator==(const TexturePoolKey& rhs) const { diff --git a/src/util/gpu_framebuffer_manager.h b/src/util/gpu_framebuffer_manager.h index e5e24c7e9..072cdd701 100644 --- a/src/util/gpu_framebuffer_manager.h +++ b/src/util/gpu_framebuffer_manager.h @@ -92,7 +92,7 @@ template void GPUFramebufferManager::RemoveRTReferences(const GPUTexture* tex) { - DebugAssert(tex->IsRenderTarget() || tex->IsRWTexture()); + DebugAssert(tex->IsRenderTarget()); for (auto it = m_map.begin(); it != m_map.end();) { if (!it->first.ContainsRT(tex)) diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index aff26f256..c1bad139b 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -7,14 +7,12 @@ #include "common/align.h" #include "common/assert.h" #include "common/bitutils.h" -#include "common/log.h" +#include "common/error.h" #include "common/string_util.h" -LOG_CHANNEL(GPUTexture); - -GPUTexture::GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format) +GPUTexture::GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format, Flags flags) : m_width(width), m_height(height), m_layers(layers), m_levels(levels), m_samples(samples), m_type(type), - m_format(format) + m_format(format), m_flags(flags) { GPUDevice::s_total_vram_usage += GetVRAMUsage(); } @@ -119,6 +117,12 @@ u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch) return pitch * ((static_cast(height) + (block_size - 1)) / block_size); } +u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height) +{ + const u32 max_dim = Common::PreviousPow2(std::max(width, height)); + return (std::countr_zero(max_dim) + 1); +} + std::array GPUTexture::GetUNormClearColor() const { return GPUDevice::RGBA8ToFloat(m_clear_value.color); @@ -192,25 +196,28 @@ bool GPUTexture::IsCompressedFormat(Format format) return false; } -bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format) +bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + Flags flags, Error* error) { - if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES) + if (width == 0 || width > MAX_WIDTH || height == 0 || height > MAX_HEIGHT || layers == 0 || layers > MAX_LAYERS || + levels == 0 || levels > MAX_LEVELS || samples == 0 || samples > MAX_SAMPLES) { - ERROR_LOG("Invalid dimensions: {}x{}x{} {} {}.", width, height, layers, levels, samples); + Error::SetStringFmt(error, "Invalid dimensions: {}x{}x{} {} {}.", width, height, layers, levels, samples); return false; } const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); if (width > max_texture_size || height > max_texture_size) { - ERROR_LOG("Texture width ({}) or height ({}) exceeds max texture size ({}).", width, height, max_texture_size); + Error::SetStringFmt(error, "Texture width ({}) or height ({}) exceeds max texture size ({}).", width, height, + max_texture_size); return false; } const u32 max_samples = g_gpu_device->GetMaxMultisamples(); if (samples > max_samples) { - ERROR_LOG("Texture samples ({}) exceeds max samples ({}).", samples, max_samples); + Error::SetStringFmt(error, "Texture samples ({}) exceeds max samples ({}).", samples, max_samples); return false; } @@ -218,25 +225,45 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u { if (levels > 1) { - ERROR_LOG("Multisampled textures can't have mip levels."); + Error::SetStringView(error, "Multisampled textures can't have mip levels."); return false; } else if (type != Type::RenderTarget && type != Type::DepthStencil) { - ERROR_LOG("Multisampled textures must be render targets or depth stencil targets."); + Error::SetStringView(error, "Multisampled textures must be render targets or depth stencil targets."); return false; } } - if (layers > 1 && type != Type::Texture && type != Type::DynamicTexture) + if (layers > 1 && type != Type::Texture) { - ERROR_LOG("Texture arrays are not supported on targets."); + Error::SetStringView(error, "Texture arrays are not supported on targets."); return false; } - if (levels > 1 && type != Type::Texture && type != Type::DynamicTexture) + if (levels > 1 && type != Type::Texture) { - ERROR_LOG("Mipmaps are not supported on targets."); + Error::SetStringView(error, "Mipmaps are not supported on targets."); + return false; + } + + if ((flags & Flags::AllowGenerateMipmaps) != Flags::None && levels <= 1) + { + Error::SetStringView(error, "Allow generate mipmaps requires >1 level."); + return false; + } + + if ((flags & Flags::AllowBindAsImage) != Flags::None && + ((type != Type::Texture && type != Type::RenderTarget) || levels > 1)) + { + Error::SetStringView(error, "Bind as image is not allowed on depth or mipmapped targets."); + return false; + } + + if ((flags & Flags::AllowMap) != Flags::None && + (type != Type::Texture || (flags & Flags::AllowGenerateMipmaps) != Flags::None)) + { + Error::SetStringView(error, "Allow map is not supported on targets."); return false; } @@ -331,7 +358,6 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector(format)); return false; } } diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index 0528090f6..5139c74b5 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -11,6 +11,8 @@ #include #include +class Error; + class GPUTexture { public: @@ -25,12 +27,9 @@ public: enum class Type : u8 { - Unknown, + Texture, RenderTarget, DepthStencil, - Texture, - DynamicTexture, - RWTexture, }; enum class Format : u8 @@ -70,6 +69,15 @@ public: Invalidated }; + enum class Flags : u8 + { + None = 0, + AllowMap = (1 << 0), + AllowBindAsImage = (1 << 2), + AllowGenerateMipmaps = (1 << 3), + AllowMSAAResolveTarget = (1 << 4), + }; + union ClearValue { u32 color; @@ -81,20 +89,22 @@ public: virtual ~GPUTexture(); static const char* GetFormatName(Format format); - static u32 GetPixelSize(GPUTexture::Format format); - static bool IsDepthFormat(GPUTexture::Format format); - static bool IsDepthStencilFormat(GPUTexture::Format format); + static u32 GetPixelSize(Format format); + static bool IsDepthFormat(Format format); + static bool IsDepthStencilFormat(Format format); static bool IsCompressedFormat(Format format); static u32 GetCompressedBytesPerBlock(Format format); static u32 GetCompressedBlockSize(Format format); static u32 CalcUploadPitch(Format format, u32 width); static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch); static u32 CalcUploadSize(Format format, u32 height, u32 pitch); + static u32 GetFullMipmapCount(u32 width, u32 height); - static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format); + static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + Flags flags, Error* error); static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector& texture_data, u32& texture_data_stride, - GPUTexture::Format format); + Format format); static void FlipTextureDataRGBA8(u32 width, u32 height, u8* texture_data, u32 texture_data_stride); ALWAYS_INLINE u32 GetWidth() const { return m_width; } @@ -104,6 +114,8 @@ public: ALWAYS_INLINE u32 GetSamples() const { return m_samples; } ALWAYS_INLINE Type GetType() const { return m_type; } ALWAYS_INLINE Format GetFormat() const { return m_format; } + ALWAYS_INLINE Flags GetFlags() const { return m_flags; } + ALWAYS_INLINE bool HasFlag(Flags flag) const { return ((static_cast(m_flags) & static_cast(flag)) != 0); } ALWAYS_INLINE GSVector4i GetRect() const { return GSVector4i(0, 0, static_cast(m_width), static_cast(m_height)); @@ -121,15 +133,13 @@ public: ALWAYS_INLINE bool IsDirty() const { return (m_state == State::Dirty); } ALWAYS_INLINE bool IsClearedOrInvalidated() const { return (m_state != State::Dirty); } + ALWAYS_INLINE bool IsTexture() const { return (m_type == Type::Texture); } + ALWAYS_INLINE bool IsRenderTarget() const { return (m_type == Type::RenderTarget); } + ALWAYS_INLINE bool IsDepthStencil() const { return (m_type == Type::DepthStencil); } ALWAYS_INLINE bool IsRenderTargetOrDepthStencil() const { return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil); } - ALWAYS_INLINE bool IsRenderTarget() const { return (m_type == Type::RenderTarget); } - ALWAYS_INLINE bool IsDepthStencil() const { return (m_type == Type::DepthStencil); } - ALWAYS_INLINE bool IsTexture() const { return (m_type == Type::Texture || m_type == Type::DynamicTexture); } - ALWAYS_INLINE bool IsDynamicTexture() const { return (m_type == Type::DynamicTexture); } - ALWAYS_INLINE bool IsRWTexture() const { return (m_type == Type::RWTexture); } ALWAYS_INLINE const ClearValue& GetClearValue() const { return m_clear_value; } ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; } @@ -162,27 +172,32 @@ public: virtual bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) = 0; virtual void Unmap() = 0; + virtual void GenerateMipmaps() = 0; + // Instructs the backend that we're finished rendering to this texture. It may transition it to a new layout. virtual void MakeReadyForSampling(); virtual void SetDebugName(std::string_view name) = 0; protected: - GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format); + GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format, Flags flags); u16 m_width = 0; u16 m_height = 0; u8 m_layers = 0; u8 m_levels = 0; u8 m_samples = 0; - Type m_type = Type::Unknown; + Type m_type = Type::Texture; Format m_format = Format::Unknown; + Flags m_flags = Flags::None; State m_state = State::Dirty; ClearValue m_clear_value = {}; }; +IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTexture::Flags); + class GPUDownloadTexture { public: diff --git a/src/util/imgui_fullscreen.cpp b/src/util/imgui_fullscreen.cpp index 1a5da2f86..b7a6b41ce 100644 --- a/src/util/imgui_fullscreen.cpp +++ b/src/util/imgui_fullscreen.cpp @@ -290,9 +290,9 @@ const std::shared_ptr& ImGuiFullscreen::GetPlaceholderTexture() std::unique_ptr ImGuiFullscreen::CreateTextureFromImage(const RGBA8Image& image) { - std::unique_ptr ret = - g_gpu_device->CreateTexture(image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, image.GetPixels(), image.GetPitch()); + std::unique_ptr ret = g_gpu_device->CreateTexture( + image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, + GPUTexture::Flags::None, image.GetPixels(), image.GetPitch()); if (!ret) [[unlikely]] ERROR_LOG("Failed to upload {}x{} RGBA8Image to GPU", image.GetWidth(), image.GetHeight()); return ret; @@ -368,7 +368,7 @@ std::shared_ptr ImGuiFullscreen::UploadTexture(std::string_view path { std::unique_ptr texture = g_gpu_device->FetchTexture(image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, image.GetPixels(), image.GetPitch()); + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, image.GetPixels(), image.GetPitch()); if (!texture) { ERROR_LOG("Failed to create {}x{} texture for resource", image.GetWidth(), image.GetHeight()); diff --git a/src/util/imgui_manager.cpp b/src/util/imgui_manager.cpp index f1153d413..3e6a6cfd6 100644 --- a/src/util/imgui_manager.cpp +++ b/src/util/imgui_manager.cpp @@ -1241,19 +1241,21 @@ void ImGuiManager::UpdateSoftwareCursorTexture(u32 index) return; } + Error error; RGBA8Image image; - if (!image.LoadFromFile(sc.image_path.c_str())) + if (!image.LoadFromFile(sc.image_path.c_str(), &error)) { - ERROR_LOG("Failed to load software cursor {} image '{}'", index, sc.image_path); + ERROR_LOG("Failed to load software cursor {} image '{}': {}", index, sc.image_path, error.GetDescription()); return; } g_gpu_device->RecycleTexture(std::move(sc.texture)); sc.texture = g_gpu_device->FetchTexture(image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, image.GetPixels(), image.GetPitch()); + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, image.GetPixels(), + image.GetPitch(), &error); if (!sc.texture) { - ERROR_LOG("Failed to upload {}x{} software cursor {} image '{}'", image.GetWidth(), image.GetHeight(), index, - sc.image_path); + ERROR_LOG("Failed to upload {}x{} software cursor {} image '{}': {}", image.GetWidth(), image.GetHeight(), index, + sc.image_path, error.GetDescription()); return; } diff --git a/src/util/media_capture.cpp b/src/util/media_capture.cpp index c757f3c37..01541401e 100644 --- a/src/util/media_capture.cpp +++ b/src/util/media_capture.cpp @@ -244,7 +244,7 @@ GPUTexture* MediaCaptureBase::GetRenderTexture() return m_render_texture.get(); m_render_texture = g_gpu_device->CreateTexture(m_video_width, m_video_height, 1, 1, 1, GPUTexture::Type::RenderTarget, - m_video_render_texture_format); + m_video_render_texture_format, GPUTexture::Flags::None); if (!m_render_texture) [[unlikely]] { ERROR_LOG("Failed to create {}x{} render texture.", m_video_width, m_video_height); diff --git a/src/util/metal_device.h b/src/util/metal_device.h index b1a502755..b83880418 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -120,6 +120,7 @@ public: void Unmap() override; void MakeReadyForSampling() override; + void GenerateMipmaps() override; void SetDebugName(std::string_view name) override; @@ -128,7 +129,7 @@ public: private: MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, - Format format); + Format format, Flags flags); id m_texture; @@ -150,7 +151,7 @@ public: ~MetalDownloadTexture() override; static std::unique_ptr Create(u32 width, u32 height, GPUTexture::Format format, void* memory, - size_t memory_size, u32 memory_stride); + size_t memory_size, u32 memory_stride, Error* error); void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) override; @@ -180,7 +181,7 @@ public: ALWAYS_INLINE id GetMTLBuffer() const { return m_buffer.GetBuffer(); } - bool CreateBuffer(id device); + bool CreateBuffer(id device, Error* error); // Inherited via GPUTextureBuffer void* Map(u32 required_elements) override; @@ -234,15 +235,18 @@ public: std::optional exclusive_fullscreen_control, Error* error) override; std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Type type, GPUTexture::Format format, - const void* data = nullptr, u32 data_stride = 0) override; - std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; - std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags, + const void* data = nullptr, u32 data_stride = 0, + Error* error = nullptr) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements, + Error* error = nullptr) override; - std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, - u32 memory_stride) override; + Error* error = nullptr) override; + std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_stride, + Error* error = nullptr) override; bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, @@ -325,7 +329,7 @@ private: static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; - static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32 /*16*/ * 1024 * 1024; // TODO reduce after separate allocations + static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 64 * 1024 * 1024; // TODO reduce after separate allocations static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; using DepthStateMap = std::unordered_map>; @@ -377,7 +381,7 @@ private: void RenderBlankFrame(MetalSwapChain* swap_chain); - bool CreateBuffers(); + bool CreateBuffers(Error* error); void DestroyBuffers(); bool IsRenderTargetBound(const GPUTexture* tex) const; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index ba3cd045b..251f49904 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -347,11 +347,8 @@ bool MetalDevice::CreateDeviceAndMainSwapChain(std::string_view adapter, Feature return false; } - if (!CreateBuffers()) - { - Error::SetStringView(error, "Failed to create buffers."); + if (!CreateBuffers(error)) return false; - } return true; } @@ -575,13 +572,14 @@ std::string MetalDevice::GetDriverInfo() const } } -bool MetalDevice::CreateBuffers() +bool MetalDevice::CreateBuffers(Error* error) { - if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE) || !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE) || - !m_uniform_buffer.Create(m_device, UNIFORM_BUFFER_SIZE) || - !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE)) + if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE, error) || + !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE, error) || + !m_uniform_buffer.Create(m_device, UNIFORM_BUFFER_SIZE, error) || + !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE, error)) { - ERROR_LOG("Failed to create vertex/index/uniform buffers."); + Error::AddPrefix(error, "Failed to create vertex/index/uniform buffers: "); return false; } @@ -980,8 +978,8 @@ std::unique_ptr MetalDevice::CreatePipeline(const GPUPipeline::Comp } MetalTexture::MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, - Format format) - : GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture) + Format format, Flags flags) + : GPUTexture(width, height, layers, levels, samples, type, format, flags), m_texture(texture) { } @@ -1141,6 +1139,15 @@ void MetalTexture::MakeReadyForSampling() dev.EndRenderPass(); } +void MetalTexture::GenerateMipmaps() +{ + DebugAssert(HasFlag(Flags::AllowGenerateMipmaps)); + MetalDevice& dev = MetalDevice::GetInstance(); + const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter()); + id encoder = dev.GetBlitEncoder(is_inline); + [encoder generateMipmapsForTexture:m_texture]; +} + void MetalTexture::SetDebugName(std::string_view name) { @autoreleasepool @@ -1151,14 +1158,18 @@ void MetalTexture::SetDebugName(std::string_view name) std::unique_ptr MetalDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, - const void* data, u32 data_stride) + GPUTexture::Flags flags, const void* data, u32 data_stride, + Error* error) { - if (!GPUTexture::ValidateConfig(width, height, layers, layers, samples, type, format)) + if (!GPUTexture::ValidateConfig(width, height, layers, layers, samples, type, format, flags, error)) return {}; const MTLPixelFormat pixel_format = s_pixel_format_mapping[static_cast(format)]; if (pixel_format == MTLPixelFormatInvalid) + { + Error::SetStringFmt(error, "Pixel format {} is not supported.", GPUTexture::GetFormatName(format)); return {}; + } @autoreleasepool { @@ -1183,7 +1194,6 @@ std::unique_ptr MetalDevice::CreateTexture(u32 width, u32 height, u3 switch (type) { case GPUTexture::Type::Texture: - case GPUTexture::Type::DynamicTexture: desc.usage = MTLTextureUsageShaderRead; break; @@ -1192,25 +1202,25 @@ std::unique_ptr MetalDevice::CreateTexture(u32 width, u32 height, u3 desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget; break; - case GPUTexture::Type::RWTexture: - desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite; - break; + DefaultCaseIsUnreachable(); + } - default: - UnreachableCode(); - break; + if ((flags & (GPUTexture::Flags::AllowBindAsImage | GPUTexture::Flags::AllowMSAAResolveTarget)) != + GPUTexture::Flags::None) + { + desc.usage |= MTLTextureUsageShaderWrite; } id tex = [m_device newTextureWithDescriptor:desc]; if (tex == nil) { - ERROR_LOG("Failed to create {}x{} texture.", width, height); + Error::SetStringView(error, "newTextureWithDescriptor() failed"); return {}; } // This one can *definitely* go on the upload buffer. std::unique_ptr gtex( - new MetalTexture([tex retain], width, height, layers, levels, samples, type, format)); + new MetalTexture([tex retain], width, height, layers, levels, samples, type, format, flags)); if (data) { // TODO: handle multi-level uploads... @@ -1236,7 +1246,8 @@ MetalDownloadTexture::~MetalDownloadTexture() } std::unique_ptr MetalDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, u32 memory_stride) + void* memory, size_t memory_size, u32 memory_stride, + Error* error) { @autoreleasepool { @@ -1257,7 +1268,7 @@ std::unique_ptr MetalDownloadTexture::Create(u32 width, u3 buffer = [[dev.m_device newBufferWithLength:buffer_size options:options] retain]; if (buffer == nil) { - ERROR_LOG("Failed to create {} byte buffer", buffer_size); + Error::SetStringFmt(error, "Failed to create {} byte buffer", buffer_size); return {}; } @@ -1282,7 +1293,7 @@ std::unique_ptr MetalDownloadTexture::Create(u32 width, u3 deallocator:nil] retain]; if (buffer == nil) { - ERROR_LOG("Failed to import {} byte buffer", page_aligned_size); + Error::SetStringFmt(error, "Failed to import {} byte buffer", page_aligned_size); return {}; } @@ -1369,16 +1380,17 @@ void MetalDownloadTexture::SetDebugName(std::string_view name) } } -std::unique_ptr MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) +std::unique_ptr MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + Error* error) { - return MetalDownloadTexture::Create(width, height, format, nullptr, 0, 0); + return MetalDownloadTexture::Create(width, height, format, nullptr, 0, 0, error); } std::unique_ptr MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, void* memory, size_t memory_size, - u32 memory_stride) + u32 memory_stride, Error* error) { - return MetalDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); + return MetalDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride, error); } MetalSampler::MetalSampler(id ss) : m_ss(ss) @@ -1392,7 +1404,7 @@ void MetalSampler::SetDebugName(std::string_view name) // lame.. have to put it on the descriptor :/ } -std::unique_ptr MetalDevice::CreateSampler(const GPUSampler::Config& config) +std::unique_ptr MetalDevice::CreateSampler(const GPUSampler::Config& config, Error* error) { @autoreleasepool { @@ -1448,7 +1460,7 @@ std::unique_ptr MetalDevice::CreateSampler(const GPUSampler::Config& } if (i == std::size(border_color_mapping)) { - ERROR_LOG("Unsupported border color: {:08X}", config.border_color.GetValue()); + Error::SetStringFmt(error, "Unsupported border color: {:08X}", config.border_color.GetValue()); return {}; } @@ -1459,7 +1471,7 @@ std::unique_ptr MetalDevice::CreateSampler(const GPUSampler::Config& id ss = [m_device newSamplerStateWithDescriptor:desc]; if (ss == nil) { - ERROR_LOG("Failed to create sampler state."); + Error::SetStringView(error, "newSamplerStateWithDescriptor failed"); return {}; } @@ -1550,6 +1562,7 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); + DebugAssert(dst->HasFlag(GPUTexture::Flags::AllowMSAAResolveTarget)); // Only does first level for now.. DebugAssert(dst_level == 0 && dst_layer == 0); @@ -1767,9 +1780,9 @@ MetalTextureBuffer::~MetalTextureBuffer() m_buffer.Destroy(); } -bool MetalTextureBuffer::CreateBuffer(id device) +bool MetalTextureBuffer::CreateBuffer(id device, Error* error) { - return m_buffer.Create(device, GetSizeInBytes()); + return m_buffer.Create(device, GetSizeInBytes(), error); } void* MetalTextureBuffer::Map(u32 required_elements) @@ -1804,10 +1817,10 @@ void MetalTextureBuffer::SetDebugName(std::string_view name) } std::unique_ptr MetalDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, - u32 size_in_elements) + u32 size_in_elements, Error* error) { std::unique_ptr tb = std::make_unique(format, size_in_elements); - if (!tb->CreateBuffer(m_device)) + if (!tb->CreateBuffer(m_device, error)) tb.reset(); return tb; diff --git a/src/util/metal_stream_buffer.h b/src/util/metal_stream_buffer.h index 6db4da91c..1399c7d42 100644 --- a/src/util/metal_stream_buffer.h +++ b/src/util/metal_stream_buffer.h @@ -19,6 +19,8 @@ #include #include +class Error; + class MetalStreamBuffer { public: @@ -38,7 +40,7 @@ public: ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; } ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; } - bool Create(id device, u32 size); + bool Create(id device, u32 size, Error* error); void Destroy(); bool ReserveMemory(u32 num_bytes, u32 alignment); diff --git a/src/util/metal_stream_buffer.mm b/src/util/metal_stream_buffer.mm index 1bb958638..66a072f7d 100644 --- a/src/util/metal_stream_buffer.mm +++ b/src/util/metal_stream_buffer.mm @@ -6,6 +6,7 @@ #include "common/align.h" #include "common/assert.h" +#include "common/error.h" #include "common/log.h" LOG_CHANNEL(GPUDevice); @@ -18,7 +19,7 @@ MetalStreamBuffer::~MetalStreamBuffer() Destroy(); } -bool MetalStreamBuffer::Create(id device, u32 size) +bool MetalStreamBuffer::Create(id device, u32 size, Error* error) { @autoreleasepool { @@ -27,7 +28,7 @@ bool MetalStreamBuffer::Create(id device, u32 size) id new_buffer = [device newBufferWithLength:size options:options]; if (new_buffer == nil) { - ERROR_LOG("Failed to create buffer."); + Error::SetStringView(error, "newBufferWithLength failed"); return false; } diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 29a43df2e..97d169b15 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -60,9 +60,10 @@ void OpenGLDevice::SetErrorObject(Error* errptr, std::string_view prefix, GLenum std::unique_ptr OpenGLDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, - const void* data, u32 data_stride) + GPUTexture::Flags flags, const void* data /* = nullptr */, + u32 data_stride /* = 0 */, Error* error /* = nullptr */) { - return OpenGLTexture::Create(width, height, layers, levels, samples, type, format, data, data_stride); + return OpenGLTexture::Create(width, height, layers, levels, samples, type, format, flags, data, data_stride, error); } bool OpenGLDevice::SupportsTextureFormat(GPUTexture::Format format) const diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index d16c3b6d9..c3c1e4ee0 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -52,15 +52,18 @@ public: std::optional exclusive_fullscreen_control, Error* error) override; std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Type type, GPUTexture::Format format, - const void* data = nullptr, u32 data_stride = 0) override; - std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; - std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags, + const void* data = nullptr, u32 data_stride = 0, + Error* error = nullptr) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements, + Error* error = nullptr) override; - std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, - u32 memory_stride) override; + Error* error = nullptr) override; + std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_stride, + Error* error = nullptr) override; bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, diff --git a/src/util/opengl_stream_buffer.cpp b/src/util/opengl_stream_buffer.cpp index f31ebf8e7..212a17187 100644 --- a/src/util/opengl_stream_buffer.cpp +++ b/src/util/opengl_stream_buffer.cpp @@ -5,9 +5,9 @@ #include "common/align.h" #include "common/assert.h" +#include "common/error.h" #include -#include OpenGLStreamBuffer::OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : m_target(target), m_buffer_id(buffer_id), m_size(size) @@ -65,7 +65,7 @@ public: u32 GetChunkSize() const override { return m_size; } - static std::unique_ptr Create(GLenum target, u32 size) + static std::unique_ptr Create(GLenum target, u32 size, Error* error) { glGetError(); @@ -74,9 +74,10 @@ public: glBindBuffer(target, buffer_id); glBufferData(target, size, nullptr, GL_STREAM_DRAW); - GLenum err = glGetError(); - if (err != GL_NO_ERROR) + const GLenum err = glGetError(); + if (err != GL_NO_ERROR) [[unlikely]] { + Error::SetStringFmt(error, "Failed to create buffer: 0x{:X}", err); glBindBuffer(target, 0); glDeleteBuffers(1, &buffer_id); return {}; @@ -119,7 +120,7 @@ public: u32 GetChunkSize() const override { return m_size; } - static std::unique_ptr Create(GLenum target, u32 size) + static std::unique_ptr Create(GLenum target, u32 size, Error* error) { glGetError(); @@ -128,9 +129,10 @@ public: glBindBuffer(target, buffer_id); glBufferData(target, size, nullptr, GL_STREAM_DRAW); - GLenum err = glGetError(); - if (err != GL_NO_ERROR) + const GLenum err = glGetError(); + if (err != GL_NO_ERROR) [[unlikely]] { + Error::SetStringFmt(error, "Failed to create buffer: 0x{:X}", err); glBindBuffer(target, 0); glDeleteBuffers(1, &buffer_id); return {}; @@ -283,7 +285,7 @@ public: return prev_position; } - static std::unique_ptr Create(GLenum target, u32 size, bool coherent = true) + static std::unique_ptr Create(GLenum target, u32 size, Error* error, bool coherent = true) { glGetError(); @@ -298,9 +300,10 @@ public: else if (GLAD_GL_EXT_buffer_storage) glBufferStorageEXT(target, size, nullptr, flags); - GLenum err = glGetError(); - if (err != GL_NO_ERROR) + const GLenum err = glGetError(); + if (err != GL_NO_ERROR) [[unlikely]] { + Error::SetStringFmt(error, "Failed to create buffer: 0x{:X}", err); glBindBuffer(target, 0); glDeleteBuffers(1, &buffer_id); return {}; @@ -325,12 +328,12 @@ private: } // namespace -std::unique_ptr OpenGLStreamBuffer::Create(GLenum target, u32 size) +std::unique_ptr OpenGLStreamBuffer::Create(GLenum target, u32 size, Error* error /* = nullptr */) { std::unique_ptr buf; if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) { - buf = BufferStorageStreamBuffer::Create(target, size); + buf = BufferStorageStreamBuffer::Create(target, size, error); if (buf) return buf; } @@ -341,11 +344,11 @@ std::unique_ptr OpenGLStreamBuffer::Create(GLenum target, u3 if (std::strcmp(vendor, "ARM") == 0 || std::strcmp(vendor, "Qualcomm") == 0) { // Mali and Adreno drivers can't do sub-buffer tracking... - return BufferDataStreamBuffer::Create(target, size); + return BufferDataStreamBuffer::Create(target, size, error); } - return BufferSubDataStreamBuffer::Create(target, size); + return BufferSubDataStreamBuffer::Create(target, size, error); #else - return BufferDataStreamBuffer::Create(target, size); + return BufferDataStreamBuffer::Create(target, size, error); #endif } diff --git a/src/util/opengl_stream_buffer.h b/src/util/opengl_stream_buffer.h index 7ae779eec..fa0bca26d 100644 --- a/src/util/opengl_stream_buffer.h +++ b/src/util/opengl_stream_buffer.h @@ -12,6 +12,8 @@ #include #include +class Error; + class OpenGLStreamBuffer { public: @@ -42,7 +44,7 @@ public: /// Returns the minimum granularity of blocks which sync objects will be created around. virtual u32 GetChunkSize() const = 0; - static std::unique_ptr Create(GLenum target, u32 size); + static std::unique_ptr Create(GLenum target, u32 size, Error* error = nullptr); protected: OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size); diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp index 00793421b..65e1d3ba9 100644 --- a/src/util/opengl_texture.cpp +++ b/src/util/opengl_texture.cpp @@ -7,6 +7,7 @@ #include "common/align.h" #include "common/assert.h" +#include "common/error.h" #include "common/intrin.h" #include "common/log.h" #include "common/string_util.h" @@ -98,9 +99,9 @@ ALWAYS_INLINE static u32 GetUploadAlignment(u32 pitch) } OpenGLTexture::OpenGLTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, - GLuint id) + Flags flags, GLuint id) : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), - static_cast(samples), type, format), + static_cast(samples), type, format, flags), m_id(id) { } @@ -126,14 +127,15 @@ bool OpenGLTexture::UseTextureStorage() const } std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - Type type, Format format, const void* data, u32 data_pitch) + Type type, Format format, Flags flags, const void* data, + u32 data_pitch, Error* error) { - if (!ValidateConfig(width, height, layers, levels, samples, type, format)) + if (!ValidateConfig(width, height, layers, levels, samples, type, format, flags, error)) return nullptr; if (layers > 1 && data) { - ERROR_LOG("Loading texture array data not currently supported"); + Error::SetStringView(error, "Loading texture array data not currently supported"); return nullptr; } @@ -235,15 +237,16 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 } } - GLenum error = glGetError(); - if (error != GL_NO_ERROR) + const GLenum gl_error = glGetError(); + if (gl_error != GL_NO_ERROR) { - ERROR_LOG("Failed to create texture: 0x{:X}", error); + Error::SetStringFmt(error, "Failed to create texture: 0x{:X}", gl_error); glDeleteTextures(1, &id); return nullptr; } - return std::unique_ptr(new OpenGLTexture(width, height, layers, levels, samples, type, format, id)); + return std::unique_ptr( + new OpenGLTexture(width, height, layers, levels, samples, type, format, flags, id)); } void OpenGLTexture::CommitClear() @@ -372,6 +375,16 @@ void OpenGLTexture::Unmap() sb->Unbind(); } +void OpenGLTexture::GenerateMipmaps() +{ + DebugAssert(HasFlag(Flags::AllowGenerateMipmaps)); + OpenGLDevice::BindUpdateTextureUnit(); + const GLenum target = GetGLTarget(); + glBindTexture(target, m_id); + glGenerateMipmap(target); + glBindTexture(target, 0); +} + void OpenGLTexture::SetDebugName(std::string_view name) { #ifdef _DEBUG @@ -405,7 +418,7 @@ void OpenGLSampler::SetDebugName(std::string_view name) #endif } -std::unique_ptr OpenGLDevice::CreateSampler(const GPUSampler::Config& config) +std::unique_ptr OpenGLDevice::CreateSampler(const GPUSampler::Config& config, Error* error /* = nullptr */) { static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ GL_REPEAT, // Repeat @@ -433,7 +446,7 @@ std::unique_ptr OpenGLDevice::CreateSampler(const GPUSampler::Config glGenSamplers(1, &sampler); if (glGetError() != GL_NO_ERROR) { - ERROR_LOG("Failed to create sampler: {:X}", sampler); + Error::SetStringFmt(error, "Failed to create sampler: {:X}", sampler); return {}; } @@ -697,7 +710,7 @@ void OpenGLTextureBuffer::SetDebugName(std::string_view name) } std::unique_ptr OpenGLDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, - u32 size_in_elements) + u32 size_in_elements, Error* error) { const bool use_ssbo = OpenGLDevice::GetInstance().GetFeatures().texture_buffers_emulated_with_ssbo; const u32 buffer_size = GPUTextureBuffer::GetElementSize(format) * size_in_elements; @@ -708,13 +721,13 @@ std::unique_ptr OpenGLDevice::CreateTextureBuffer(GPUTextureBu glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size); if (static_cast(buffer_size) > max_ssbo_size) { - ERROR_LOG("Buffer size of {} not supported, max is {}", buffer_size, max_ssbo_size); + Error::SetStringFmt(error, "Buffer size of {} not supported, max is {}", buffer_size, max_ssbo_size); return {}; } } const GLenum target = (use_ssbo ? GL_SHADER_STORAGE_BUFFER : GL_TEXTURE_BUFFER); - std::unique_ptr buffer = OpenGLStreamBuffer::Create(target, buffer_size); + std::unique_ptr buffer = OpenGLStreamBuffer::Create(target, buffer_size, error); if (!buffer) return {}; buffer->Unbind(); @@ -726,7 +739,7 @@ std::unique_ptr OpenGLDevice::CreateTextureBuffer(GPUTextureBu glGenTextures(1, &texture_id); if (const GLenum err = glGetError(); err != GL_NO_ERROR) { - ERROR_LOG("Failed to create texture for buffer: 0x{:X}", err); + Error::SetStringFmt(error, "Failed to create texture for buffer: 0x{:X}", err); return {}; } @@ -772,7 +785,8 @@ OpenGLDownloadTexture::~OpenGLDownloadTexture() } std::unique_ptr OpenGLDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, u32 memory_pitch) + void* memory, size_t memory_size, u32 memory_pitch, + Error* error) { const u32 buffer_pitch = memory ? memory_pitch : @@ -801,7 +815,7 @@ std::unique_ptr OpenGLDownloadTexture::Create(u32 width, if (!buffer_map) { - ERROR_LOG("Failed to map persistent download buffer"); + Error::SetStringView(error, "Failed to map persistent download buffer"); glDeleteBuffers(1, &buffer_id); return {}; } @@ -814,8 +828,11 @@ std::unique_ptr OpenGLDownloadTexture::Create(u32 width, const bool imported = (memory != nullptr); u8* cpu_buffer = imported ? static_cast(memory) : static_cast(Common::AlignedMalloc(buffer_size, VECTOR_ALIGNMENT)); - if (!cpu_buffer) + if (!cpu_buffer) [[unlikely]] + { + Error::SetStringView(error, "Failed to get client-side memory pointer."); return {}; + } return std::unique_ptr( new OpenGLDownloadTexture(width, height, format, imported, 0, cpu_buffer, buffer_size, cpu_buffer, buffer_pitch)); @@ -929,16 +946,17 @@ void OpenGLDownloadTexture::SetDebugName(std::string_view name) glObjectLabel(GL_BUFFER, m_buffer_id, static_cast(name.length()), name.data()); } -std::unique_ptr OpenGLDevice::CreateDownloadTexture(u32 width, u32 height, - GPUTexture::Format format) +std::unique_ptr +OpenGLDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, Error* error /* = nullptr */) { - return OpenGLDownloadTexture::Create(width, height, format, nullptr, 0, 0); + return OpenGLDownloadTexture::Create(width, height, format, nullptr, 0, 0, error); } std::unique_ptr OpenGLDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, void* memory, - size_t memory_size, u32 memory_stride) + size_t memory_size, u32 memory_stride, + Error* error /* = nullptr */) { // not _really_ memory importing, but PBOs are broken on Intel.... - return OpenGLDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); + return OpenGLDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride, error); } diff --git a/src/util/opengl_texture.h b/src/util/opengl_texture.h index 4a4dd03e8..5bc290c2b 100644 --- a/src/util/opengl_texture.h +++ b/src/util/opengl_texture.h @@ -28,11 +28,13 @@ public: bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; void Unmap() override; + void GenerateMipmaps() override; void SetDebugName(std::string_view name) override; static std::unique_ptr Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, - Format format, const void* data = nullptr, u32 data_pitch = 0); + Format format, Flags flags, const void* data, u32 data_pitch, + Error* error); bool UseTextureStorage() const; @@ -46,7 +48,8 @@ public: OpenGLTexture& operator=(const OpenGLTexture&) = delete; private: - OpenGLTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, GLuint id); + OpenGLTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, Flags flags, + GLuint id); GLuint m_id = 0; @@ -108,7 +111,7 @@ public: ~OpenGLDownloadTexture() override; static std::unique_ptr Create(u32 width, u32 height, GPUTexture::Format format, void* memory, - size_t memory_size, u32 memory_pitch); + size_t memory_size, u32 memory_pitch, Error* error); void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) override; diff --git a/src/util/postprocessing.cpp b/src/util/postprocessing.cpp index 29125e84a..02f208fc4 100644 --- a/src/util/postprocessing.cpp +++ b/src/util/postprocessing.cpp @@ -564,10 +564,12 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t // In case any allocs fail. DestroyTextures(); - if (!(m_input_texture = g_gpu_device->FetchTexture(target_width, target_height, 1, 1, 1, - GPUTexture::Type::RenderTarget, target_format)) || - !(m_output_texture = g_gpu_device->FetchTexture(target_width, target_height, 1, 1, 1, - GPUTexture::Type::RenderTarget, target_format))) + if (!(m_input_texture = + g_gpu_device->FetchTexture(target_width, target_height, 1, 1, 1, GPUTexture::Type::RenderTarget, + target_format, GPUTexture::Flags::None)) || + !(m_output_texture = + g_gpu_device->FetchTexture(target_width, target_height, 1, 1, 1, GPUTexture::Type::RenderTarget, + target_format, GPUTexture::Flags::None))) { DestroyTextures(); return false; @@ -806,7 +808,7 @@ GPUTexture* PostProcessing::GetDummyTexture() const u32 zero = 0; s_dummy_texture = g_gpu_device->FetchTexture(1, 1, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, - &zero, sizeof(zero)); + GPUTexture::Flags::None, &zero, sizeof(zero)); if (!s_dummy_texture) ERROR_LOG("Failed to create dummy texture."); diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index dc4234048..2f3f945e6 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -1138,12 +1138,10 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer tex.rt_scale = 0.0f; tex.texture = g_gpu_device->FetchTexture(image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, image.GetPixels(), image.GetPitch()); + GPUTexture::Format::RGBA8, GPUTexture::Flags::None, image.GetPixels(), + image.GetPitch(), error); if (!tex.texture) - { - Error::SetStringFmt(error, "Failed to create {}x{} texture ({})", image.GetWidth(), image.GetHeight(), source); return false; - } DEV_LOG("Loaded {}x{} texture ({})", image.GetWidth(), image.GetHeight(), source); } @@ -1457,12 +1455,10 @@ bool PostProcessing::ReShadeFXShader::ResizeOutput(GPUTexture::Format format, u3 const u32 t_width = std::max(static_cast(static_cast(width) * tex.rt_scale), 1u); const u32 t_height = std::max(static_cast(static_cast(height) * tex.rt_scale), 1u); - tex.texture = g_gpu_device->FetchTexture(t_width, t_height, 1, 1, 1, GPUTexture::Type::RenderTarget, tex.format); + tex.texture = g_gpu_device->FetchTexture(t_width, t_height, 1, 1, 1, GPUTexture::Type::RenderTarget, tex.format, + GPUTexture::Flags::None); if (!tex.texture) - { - ERROR_LOG("Failed to create {}x{} texture", t_width, t_height); return {}; - } } m_valid = true; diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 4a41a6818..11039075d 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2012,11 +2012,8 @@ bool VulkanDevice::CreateDeviceAndMainSwapChain(std::string_view adapter, Featur m_main_swap_chain = std::move(swap_chain); } - if (!CreateNullTexture()) - { - Error::SetStringView(error, "Failed to create dummy texture"); + if (!CreateNullTexture(error)) return false; - } if (!CreateBuffers() || !CreatePersistentDescriptorSets()) { @@ -2762,12 +2759,15 @@ void VulkanDevice::UnmapUniformBuffer(u32 size) m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; } -bool VulkanDevice::CreateNullTexture() +bool VulkanDevice::CreateNullTexture(Error* error) { - m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RWTexture, GPUTexture::Format::RGBA8, - VK_FORMAT_R8G8B8A8_UNORM); + m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, + GPUTexture::Flags::AllowBindAsImage, VK_FORMAT_R8G8B8A8_UNORM, error); if (!m_null_texture) + { + Error::AddPrefix(error, "Failed to create null texture: "); return false; + } const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); const VkImageSubresourceRange srr{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; @@ -2779,9 +2779,12 @@ bool VulkanDevice::CreateNullTexture() Vulkan::SetObjectName(m_device, m_null_texture->GetView(), "Null texture view"); // Bind null texture and point sampler state to all. - const VkSampler point_sampler = GetSampler(GPUSampler::GetNearestConfig()); + const VkSampler point_sampler = GetSampler(GPUSampler::GetNearestConfig(), error); if (point_sampler == VK_NULL_HANDLE) + { + Error::AddPrefix(error, "Failed to get nearest sampler for init bind: "); return false; + } for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) m_current_samplers[i] = point_sampler; @@ -3010,10 +3013,14 @@ void VulkanDevice::RenderBlankFrame(VulkanSwapChain* swap_chain) } bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage, - VkDeviceMemory* out_memory, VkBuffer* out_buffer, VkDeviceSize* out_offset) + VkDeviceMemory* out_memory, VkBuffer* out_buffer, VkDeviceSize* out_offset, + Error* error) { if (!m_optional_extensions.vk_ext_external_memory_host) + { + Error::SetStringView(error, "VK_EXT_external_memory_host is not supported."); return false; + } // Align to the nearest page void* data_aligned = @@ -3031,7 +3038,7 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa data_aligned, &pointer_properties); if (res != VK_SUCCESS || pointer_properties.memoryTypeBits == 0) { - LOG_VULKAN_ERROR(res, "vkGetMemoryHostPointerPropertiesEXT() failed: "); + Vulkan::SetErrorObject(error, "vkGetMemoryHostPointerPropertiesEXT() failed: ", res); return false; } @@ -3044,7 +3051,7 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa res = vmaFindMemoryTypeIndex(m_allocator, pointer_properties.memoryTypeBits, &vma_alloc_info, &memory_index); if (res != VK_SUCCESS) { - LOG_VULKAN_ERROR(res, "vmaFindMemoryTypeIndex() failed: "); + Vulkan::SetErrorObject(error, "vmaFindMemoryTypeIndex() failed: ", res); return false; } @@ -3060,7 +3067,7 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa res = vkAllocateMemory(m_device, &alloc_info, nullptr, &imported_memory); if (res != VK_SUCCESS) { - LOG_VULKAN_ERROR(res, "vkAllocateMemory() failed: "); + Vulkan::SetErrorObject(error, "vkAllocateMemory() failed: ", res); return false; } @@ -3080,7 +3087,7 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa res = vkCreateBuffer(m_device, &buffer_info, nullptr, &imported_buffer); if (res != VK_SUCCESS) { - LOG_VULKAN_ERROR(res, "vkCreateBuffer() failed: "); + Vulkan::SetErrorObject(error, "vkCreateBuffer() failed: ", res); if (imported_memory != VK_NULL_HANDLE) vkFreeMemory(m_device, imported_memory, nullptr); @@ -3125,14 +3132,13 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText if (InRenderPass()) EndRenderPass(); + m_current_framebuffer = VK_NULL_HANDLE; if (m_num_current_render_targets == 0 && !m_current_depth_target) - { - m_current_framebuffer = VK_NULL_HANDLE; return; - } - if (!m_optional_extensions.vk_khr_dynamic_rendering || - ((flags & GPUPipeline::ColorFeedbackLoop) && !m_optional_extensions.vk_khr_dynamic_rendering_local_read)) + if (!(flags & GPUPipeline::BindRenderTargetsAsImages) && + (!m_optional_extensions.vk_khr_dynamic_rendering || + ((flags & GPUPipeline::ColorFeedbackLoop) && !m_optional_extensions.vk_khr_dynamic_rendering_local_read))) { m_current_framebuffer = m_framebuffer_manager.Lookup( (m_num_current_render_targets > 0) ? reinterpret_cast(m_current_render_targets.data()) : nullptr, @@ -3594,7 +3600,7 @@ void VulkanDevice::UnbindTexture(VulkanTexture* tex) } } - if (tex->IsRenderTarget() || tex->IsRWTexture()) + if (tex->IsRenderTarget()) { for (u32 i = 0; i < m_num_current_render_targets; i++) { diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index 43a982325..8ad11229a 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -88,15 +88,18 @@ public: std::optional exclusive_fullscreen_control, Error* error) override; std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Type type, GPUTexture::Format format, - const void* data = nullptr, u32 data_stride = 0) override; - std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; - std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags, + const void* data = nullptr, u32 data_stride = 0, + Error* error = nullptr) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements, + Error* error = nullptr) override; - std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, - void* memory, size_t memory_size, - u32 memory_stride) override; + Error* error = nullptr) override; + std::unique_ptr CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_stride, + Error* error = nullptr) override; bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, @@ -351,20 +354,20 @@ private: void DestroyCommandBuffers(); bool CreatePersistentDescriptorPool(); void DestroyPersistentDescriptorPool(); - bool CreateNullTexture(); + bool CreateNullTexture(Error* error); bool CreateBuffers(); void DestroyBuffers(); bool CreatePipelineLayouts(); void DestroyPipelineLayouts(); bool CreatePersistentDescriptorSets(); void DestroyPersistentDescriptorSets(); - VkSampler GetSampler(const GPUSampler::Config& config); + VkSampler GetSampler(const GPUSampler::Config& config, Error* error = nullptr); void DestroySamplers(); void RenderBlankFrame(VulkanSwapChain* swap_chain); bool TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage, VkDeviceMemory* out_memory, - VkBuffer* out_buffer, VkDeviceSize* out_offset); + VkBuffer* out_buffer, VkDeviceSize* out_offset, Error* error); /// Set dirty flags on everything to force re-bind at next draw time. void InvalidateCachedState(); diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index cb121ca44..12c833635 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -8,6 +8,7 @@ #include "common/align.h" #include "common/assert.h" #include "common/bitutils.h" +#include "common/error.h" #include "common/log.h" LOG_CHANNEL(GPUDevice); @@ -42,9 +43,9 @@ static VkImageLayout GetVkImageLayout(VulkanTexture::Layout layout) } VulkanTexture::VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, - VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format) + Flags flags, VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format) : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), - static_cast(samples), type, format), + static_cast(samples), type, format, flags), m_image(image), m_allocation(allocation), m_view(view), m_vk_format(vk_format) { } @@ -55,9 +56,10 @@ VulkanTexture::~VulkanTexture() } std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - Type type, Format format, VkFormat vk_format) + Type type, Format format, Flags flags, VkFormat vk_format, + Error* error) { - if (!ValidateConfig(width, height, layers, levels, samples, type, format)) + if (!ValidateConfig(width, height, layers, levels, samples, type, format, flags, error)) return {}; VulkanDevice& dev = VulkanDevice::GetInstance(); @@ -92,11 +94,9 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 s_identity_swizzle, {VK_IMAGE_ASPECT_COLOR_BIT, 0, static_cast(levels), 0, 1}}; - // TODO: Don't need the feedback loop stuff yet. switch (type) { case Type::Texture: - case Type::DynamicTexture: { ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; } @@ -120,17 +120,13 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 } break; - case Type::RWTexture: - { - DebugAssert(levels == 1); - ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; - } - break; + DefaultCaseIsUnreachable(); + } - default: - return {}; + if ((flags & Flags::AllowBindAsImage) != Flags::None) + { + DebugAssert(levels == 1); + ici.usage |= VK_IMAGE_USAGE_STORAGE_BIT; } // Use dedicated allocations for typical RT size @@ -146,14 +142,9 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 aci.flags &= ~VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; res = vmaCreateImage(dev.GetAllocator(), &ici, &aci, &image, &allocation, nullptr); } - if (res == VK_ERROR_OUT_OF_DEVICE_MEMORY) + if (res != VK_SUCCESS) { - ERROR_LOG("Failed to allocate device memory for {}x{} texture", width, height); - return {}; - } - else if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vmaCreateImage failed: "); + Vulkan::SetErrorObject(error, "vmaCreateImage failed: ", res); return {}; } @@ -162,13 +153,13 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 res = vkCreateImageView(dev.GetVulkanDevice(), &vci, nullptr, &view); if (res != VK_SUCCESS) { - LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); + Vulkan::SetErrorObject(error, "vkCreateImageView failed: ", res); vmaDestroyImage(dev.GetAllocator(), image, allocation); return {}; } return std::unique_ptr( - new VulkanTexture(width, height, layers, levels, samples, type, format, image, allocation, view, vk_format)); + new VulkanTexture(width, height, layers, levels, samples, type, format, flags, image, allocation, view, vk_format)); } void VulkanTexture::Destroy(bool defer) @@ -228,10 +219,9 @@ VkClearDepthStencilValue VulkanTexture::GetClearDepthValue() const VkCommandBuffer VulkanTexture::GetCommandBufferForUpdate() { VulkanDevice& dev = VulkanDevice::GetInstance(); - if ((m_type != Type::Texture && m_type != Type::DynamicTexture) || - m_use_fence_counter == dev.GetCurrentFenceCounter()) + if (m_type != Type::Texture || m_use_fence_counter == dev.GetCurrentFenceCounter()) { - // Console.WriteLn("Texture update within frame, can't use do beforehand"); + // DEV_LOG("Texture update within frame, can't use do beforehand"); if (dev.InRenderPass()) dev.EndRenderPass(); return dev.GetCurrentCommandBuffer(); @@ -730,13 +720,52 @@ void VulkanTexture::MakeReadyForSampling() TransitionToLayout(Layout::ShaderReadOnly); } +void VulkanTexture::GenerateMipmaps() +{ + DebugAssert(HasFlag(Flags::AllowGenerateMipmaps)); + + const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); + + if (m_layout == Layout::Undefined) + TransitionToLayout(cmdbuf, Layout::TransferSrc); + + for (u32 layer = 0; layer < m_layers; layer++) + { + for (u32 dst_level = 1; dst_level < m_levels; dst_level++) + { + const u32 src_level = dst_level - 1; + const u32 src_width = std::max(m_width >> src_level, 1u); + const u32 src_height = std::max(m_height >> src_level, 1u); + const u32 dst_width = std::max(m_width >> dst_level, 1u); + const u32 dst_height = std::max(m_height >> dst_level, 1u); + + TransitionSubresourcesToLayout(cmdbuf, layer, 1, src_level, 1, m_layout, Layout::TransferSrc); + TransitionSubresourcesToLayout(cmdbuf, layer, 1, dst_level, 1, m_layout, Layout::TransferDst); + + const VkImageBlit blit = { + {VK_IMAGE_ASPECT_COLOR_BIT, src_level, 0u, 1u}, // srcSubresource + {{0, 0, 0}, {static_cast(src_width), static_cast(src_height), 1}}, // srcOffsets + {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, 0u, 1u}, // dstSubresource + {{0, 0, 0}, {static_cast(dst_width), static_cast(dst_height), 1}} // dstOffsets + }; + + vkCmdBlitImage(cmdbuf, m_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_LINEAR); + + TransitionSubresourcesToLayout(cmdbuf, layer, 1, src_level, 1, Layout::TransferSrc, m_layout); + TransitionSubresourcesToLayout(cmdbuf, layer, 1, dst_level, 1, Layout::TransferDst, m_layout); + } + } +} + std::unique_ptr VulkanDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, - const void* data /* = nullptr */, u32 data_stride /* = 0 */) + GPUTexture::Flags flags, const void* data /* = nullptr */, + u32 data_stride /* = 0 */, Error* error /* = nullptr */) { const VkFormat vk_format = VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast(format)]; std::unique_ptr tex = - VulkanTexture::Create(width, height, layers, levels, samples, type, format, vk_format); + VulkanTexture::Create(width, height, layers, levels, samples, type, format, flags, vk_format, error); if (tex && data) tex->Update(0, 0, width, height, data, data_stride); @@ -757,7 +786,7 @@ void VulkanSampler::SetDebugName(std::string_view name) Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_sampler, name); } -VkSampler VulkanDevice::GetSampler(const GPUSampler::Config& config) +VkSampler VulkanDevice::GetSampler(const GPUSampler::Config& config, Error* error) { const auto it = m_sampler_map.find(config.key); if (it != m_sampler_map.end()) @@ -833,7 +862,10 @@ VkSampler VulkanDevice::GetSampler(const GPUSampler::Config& config) VkSampler sampler = VK_NULL_HANDLE; VkResult res = vkCreateSampler(m_device, &ci, nullptr, &sampler); if (res != VK_SUCCESS) + { LOG_VULKAN_ERROR(res, "vkCreateSampler() failed: "); + Vulkan::SetErrorObject(error, "vkCreateSampler() failed: ", res); + } m_sampler_map.emplace(config.key, sampler); return sampler; @@ -849,9 +881,9 @@ void VulkanDevice::DestroySamplers() m_sampler_map.clear(); } -std::unique_ptr VulkanDevice::CreateSampler(const GPUSampler::Config& config) +std::unique_ptr VulkanDevice::CreateSampler(const GPUSampler::Config& config, Error* error /* = nullptr */) { - const VkSampler vsampler = GetSampler(config); + const VkSampler vsampler = GetSampler(config, error); if (vsampler == VK_NULL_HANDLE) return {}; @@ -925,7 +957,7 @@ void VulkanTextureBuffer::SetDebugName(std::string_view name) } std::unique_ptr VulkanDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, - u32 size_in_elements) + u32 size_in_elements, Error* error) { static constexpr std::array(GPUTextureBuffer::Format::MaxCount)> format_mapping = {{ VK_FORMAT_R16_UINT, // R16UI @@ -939,7 +971,7 @@ std::unique_ptr VulkanDevice::CreateTextureBuffer(GPUTextureBu tb->m_descriptor_set = AllocatePersistentDescriptorSet(m_single_texture_buffer_ds_layout); if (tb->m_descriptor_set == VK_NULL_HANDLE) { - ERROR_LOG("Failed to allocate persistent descriptor set for texture buffer."); + Error::SetStringView(error, "Failed to allocate persistent descriptor set for texture buffer."); tb->Destroy(false); return {}; } @@ -996,7 +1028,7 @@ VulkanDownloadTexture::~VulkanDownloadTexture() std::unique_ptr VulkanDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, void* memory, size_t memory_size, - u32 memory_stride) + u32 memory_stride, Error* error) { VulkanDevice& dev = VulkanDevice::GetInstance(); VmaAllocation allocation = VK_NULL_HANDLE; @@ -1031,7 +1063,7 @@ std::unique_ptr VulkanDownloadTexture::Create(u32 width, VkResult res = vmaCreateBuffer(VulkanDevice::GetInstance().GetAllocator(), &bci, &aci, &buffer, &allocation, &ai); if (res != VK_SUCCESS) { - LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: "); + Vulkan::SetErrorObject(error, "vmaCreateBuffer() failed: ", res); return {}; } @@ -1045,7 +1077,7 @@ std::unique_ptr VulkanDownloadTexture::Create(u32 width, Assert(buffer_size <= memory_size); if (!dev.TryImportHostMemory(memory, memory_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, &dev_memory, &buffer, - &memory_offset)) + &memory_offset, error)) { return {}; } @@ -1177,15 +1209,16 @@ void VulkanDownloadTexture::SetDebugName(std::string_view name) Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_buffer, name); } -std::unique_ptr VulkanDevice::CreateDownloadTexture(u32 width, u32 height, - GPUTexture::Format format) +std::unique_ptr +VulkanDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, Error* error /* = nullptr */) { - return VulkanDownloadTexture::Create(width, height, format, nullptr, 0, 0); + return VulkanDownloadTexture::Create(width, height, format, nullptr, 0, 0, error); } std::unique_ptr VulkanDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, void* memory, - size_t memory_size, u32 memory_stride) + size_t memory_size, u32 memory_stride, + Error* error /* = nullptr */) { - return VulkanDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); + return VulkanDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride, error); } diff --git a/src/util/vulkan_texture.h b/src/util/vulkan_texture.h index 3d7fcfa2f..f7dd8b601 100644 --- a/src/util/vulkan_texture.h +++ b/src/util/vulkan_texture.h @@ -38,7 +38,7 @@ public: ~VulkanTexture() override; static std::unique_ptr Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, - Format format, VkFormat vk_format); + Format format, Flags flags, VkFormat vk_format, Error* error); void Destroy(bool defer); ALWAYS_INLINE VkImage GetImage() const { return m_image; } @@ -54,6 +54,7 @@ public: bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; void Unmap() override; void MakeReadyForSampling() override; + void GenerateMipmaps() override; void SetDebugName(std::string_view name) override; @@ -80,8 +81,8 @@ public: VkDescriptorSet GetDescriptorSetWithSampler(VkSampler sampler); private: - VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, VkImage image, - VmaAllocation allocation, VkImageView view, VkFormat vk_format); + VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, Flags flags, + VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format); VkCommandBuffer GetCommandBufferForUpdate(); void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; @@ -159,7 +160,7 @@ public: ~VulkanDownloadTexture() override; static std::unique_ptr Create(u32 width, u32 height, GPUTexture::Format format, void* memory, - size_t memory_size, u32 memory_stride); + size_t memory_size, u32 memory_stride, Error* error); void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) override; From 24dfd30839377f29d01e5034fc0bf3378df6e222 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 16:36:28 +1000 Subject: [PATCH 06/35] Image: Refactor to a more generic class --- src/core/fullscreen_ui.cpp | 5 +- src/core/game_list.cpp | 2 +- src/core/gpu.cpp | 131 +++---- src/core/gpu.h | 4 +- src/core/gpu_hw_texture_cache.cpp | 59 ++- src/core/gpu_hw_texture_cache.h | 4 +- src/core/system.cpp | 51 ++- src/core/system.h | 2 +- src/util/gpu_device.cpp | 22 ++ src/util/gpu_device.h | 4 + src/util/gpu_texture.cpp | 156 +++----- src/util/gpu_texture.h | 9 +- src/util/image.cpp | 517 ++++++++++++++++++++------ src/util/image.h | 204 ++++------ src/util/imgui_fullscreen.cpp | 41 +- src/util/imgui_fullscreen.h | 3 +- src/util/imgui_manager.cpp | 2 +- src/util/postprocessing_shader_fx.cpp | 2 +- 18 files changed, 698 insertions(+), 520 deletions(-) diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index 16e189aac..cdc45ab2d 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -112,7 +112,6 @@ using ImGuiFullscreen::BeginNavBar; using ImGuiFullscreen::CenterImage; using ImGuiFullscreen::CloseChoiceDialog; using ImGuiFullscreen::CloseFileSelector; -using ImGuiFullscreen::CreateTextureFromImage; using ImGuiFullscreen::DefaultActiveButton; using ImGuiFullscreen::DrawShadowedText; using ImGuiFullscreen::EndFullscreenColumns; @@ -5966,7 +5965,7 @@ bool FullscreenUI::InitializeSaveStateListEntryFromPath(SaveStateListEntry* li, li->path = std::move(path); li->global = global; if (ssi->screenshot.IsValid()) - li->preview_texture = CreateTextureFromImage(ssi->screenshot); + li->preview_texture = g_gpu_device->FetchAndUploadTextureImage(ssi->screenshot); return true; } @@ -5994,7 +5993,7 @@ u32 FullscreenUI::PopulateSaveStateListEntries(const std::string& title, const s li.title = FSUI_STR("Undo Load State"); li.summary = FSUI_STR("Restores the state of the system prior to the last state loaded."); if (ssi->screenshot.IsValid()) - li.preview_texture = CreateTextureFromImage(ssi->screenshot); + li.preview_texture = g_gpu_device->FetchAndUploadTextureImage(ssi->screenshot); s_save_state_selector_slots.push_back(std::move(li)); } } diff --git a/src/core/game_list.cpp b/src/core/game_list.cpp index 3db8ec8ea..0ed468509 100644 --- a/src/core/game_list.cpp +++ b/src/core/game_list.cpp @@ -1778,7 +1778,7 @@ std::string GameList::GetGameIconPath(std::string_view serial, std::string_view INFO_LOG("Extracting memory card icon from {} ({}) to {}", fi.filename, Path::GetFileTitle(memcard_path), Path::GetFileTitle(ret)); - RGBA8Image image(MemoryCardImage::ICON_WIDTH, MemoryCardImage::ICON_HEIGHT); + Image image(MemoryCardImage::ICON_WIDTH, MemoryCardImage::ICON_HEIGHT, ImageFormat::RGBA8); std::memcpy(image.GetPixels(), &fi.icon_frames.front().pixels, MemoryCardImage::ICON_WIDTH * MemoryCardImage::ICON_HEIGHT * sizeof(u32)); serial_entry->icon_was_extracted = image.SaveToFile(ret.c_str()); diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 22946bd1b..284218a45 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -74,10 +74,8 @@ static u32 s_active_gpu_cycles_frames = 0; static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; -static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, - u32 texture_data_stride, GPUTexture::Format texture_format, - std::string osd_key); +static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, + u8 quality, bool clear_alpha, bool flip_y, Image image, std::string osd_key); GPU::GPU() { @@ -2423,48 +2421,38 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota GSVector4(left_padding, top_padding, left_padding + display_width * scale, top_padding + display_height * scale)); } -bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, - u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key) +bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string path, FileSystem::ManagedCFilePtr fp, u8 quality, + bool clear_alpha, bool flip_y, Image image, std::string osd_key) { - bool result; + Error error; - const char* extension = std::strrchr(filename.c_str(), '.'); - if (extension) + if (flip_y) + image.FlipY(); + + if (image.GetFormat() != ImageFormat::RGBA8) { - if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) + std::optional convert_image = image.ConvertToRGBA8(&error); + if (!convert_image.has_value()) { - if (clear_alpha) - { - for (u32& pixel : texture_data) - pixel |= 0xFF000000u; - } - - if (flip_y) - GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast(texture_data.data()), - texture_data_stride); - - Assert(texture_data_stride == sizeof(u32) * width); - RGBA8Image image(width, height, std::move(texture_data)); - if (image.SaveToFile(filename.c_str(), fp.get(), quality)) - { - result = true; - } - else - { - ERROR_LOG("Unknown extension in filename '{}' or save error: '{}'", filename, extension); - result = false; - } + ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()), + error.GetDescription()); + image.Invalidate(); } else { - result = false; + image = std::move(convert_image.value()); } } - else + + bool result = false; + if (image.IsValid()) { - ERROR_LOG("Unable to determine file extension for '{}'", filename); - result = false; + if (clear_alpha) + image.SetAllPixelsOpaque(); + + result = image.SaveToFile(path.c_str(), fp.get(), quality, &error); + if (!result) + ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription()); } if (!osd_key.empty()) @@ -2472,7 +2460,7 @@ bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), - Path::GetFileName(filename), + Path::GetFileName(path), result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); } @@ -2488,17 +2476,16 @@ bool GPU::WriteDisplayTextureToFile(std::string filename) const u32 read_y = static_cast(m_display_texture_view_y); const u32 read_width = static_cast(m_display_texture_view_width); const u32 read_height = static_cast(m_display_texture_view_height); + const ImageFormat read_format = GPUTexture::GetImageFormatForTextureFormat(m_display_texture->GetFormat()); + if (read_format == ImageFormat::None) + return false; - const u32 texture_data_stride = - Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); - std::vector texture_data((texture_data_stride * read_height) / sizeof(u32)); - + Image image(read_width, read_height, read_format); std::unique_ptr dltex; if (g_gpu_device->GetFeatures().memory_import) { - dltex = - g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(), - texture_data.size() * sizeof(u32), texture_data_stride); + dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), + image.GetPixels(), image.GetStorageSize(), image.GetPitch()); } if (!dltex) { @@ -2511,7 +2498,7 @@ bool GPU::WriteDisplayTextureToFile(std::string filename) } dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); - if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) + if (!dltex->ReadTexels(0, 0, read_width, read_height, image.GetPixels(), image.GetPitch())) { RestoreDeviceContext(); return false; @@ -2530,17 +2517,19 @@ bool GPU::WriteDisplayTextureToFile(std::string filename) constexpr bool clear_alpha = true; const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); - return CompressAndWriteTextureToFile( - read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha, - flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), std::string()); + return CompressAndWriteTextureToFile(read_width, read_height, std::move(filename), std::move(fp), + g_settings.display_screenshot_quality, clear_alpha, flip_y, std::move(image), + std::string()); } bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx, std::vector* out_pixels, u32* out_stride, - GPUTexture::Format* out_format) + bool postfx, Image* out_image) { const GPUTexture::Format hdformat = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8; + const ImageFormat image_format = GPUTexture::GetImageFormatForTextureFormat(hdformat); + if (image_format == ImageFormat::None) + return false; auto render_texture = g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat, GPUTexture::Flags::None); @@ -2552,34 +2541,33 @@ bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i displ // TODO: this should use copy shader instead. RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); - const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32)); - out_pixels->resize((height * stride) / sizeof(u32)); + Image image(width, height, image_format); + Error error; std::unique_ptr dltex; if (g_gpu_device->GetFeatures().memory_import) { - dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), - out_pixels->size() * sizeof(u32), stride); + dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, image.GetPixels(), image.GetStorageSize(), + image.GetPitch(), &error); } if (!dltex) { - if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) + if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, &error))) { - ERROR_LOG("Failed to create {}x{} download texture", width, height); + ERROR_LOG("Failed to create {}x{} download texture: {}", width, height, error.GetDescription()); return false; } } dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); - if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride)) + if (!dltex->ReadTexels(0, 0, width, height, image.GetPixels(), image.GetPitch())) { RestoreDeviceContext(); return false; } - *out_stride = stride; - *out_format = hdformat; RestoreDeviceContext(); + *out_image = std::move(image); return true; } @@ -2656,11 +2644,8 @@ bool GPU::RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u if (width == 0 || height == 0) return false; - std::vector pixels; - u32 pixels_stride; - GPUTexture::Format pixels_format; - if (!RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &pixels, &pixels_stride, - &pixels_format)) + Image image; + if (!RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &image)) { ERROR_LOG("Failed to render {}x{} screenshot", width, height); return false; @@ -2687,10 +2672,10 @@ bool GPU::RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u if (compress_on_thread) { System::QueueTaskOnThread([width, height, path = std::move(path), fp = fp.release(), quality, - flip_y = g_gpu_device->UsesLowerLeftOrigin(), pixels = std::move(pixels), pixels_stride, - pixels_format, osd_key = std::move(osd_key)]() mutable { + flip_y = g_gpu_device->UsesLowerLeftOrigin(), image = std::move(image), + osd_key = std::move(osd_key)]() mutable { CompressAndWriteTextureToFile(width, height, std::move(path), FileSystem::ManagedCFilePtr(fp), quality, true, - flip_y, std::move(pixels), pixels_stride, pixels_format, std::move(osd_key)); + flip_y, std::move(image), std::move(osd_key)); System::RemoveSelfFromTaskThreads(); }); @@ -2699,8 +2684,7 @@ bool GPU::RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u else { return CompressAndWriteTextureToFile(width, height, std::move(path), std::move(fp), quality, true, - g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride, - pixels_format, std::move(osd_key)); + g_gpu_device->UsesLowerLeftOrigin(), std::move(image), std::move(osd_key)); } } @@ -2726,20 +2710,23 @@ bool GPU::DumpVRAMToFile(const char* filename) bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha) { - RGBA8Image image(width, height); + Image image(width, height, ImageFormat::RGBA8); const char* ptr_in = static_cast(buffer); for (u32 row = 0; row < height; row++) { const char* row_ptr_in = ptr_in; - u32* ptr_out = image.GetRowPixels(row); + u8* ptr_out = image.GetRowPixels(row); for (u32 col = 0; col < width; col++) { u16 src_col; std::memcpy(&src_col, row_ptr_in, sizeof(u16)); row_ptr_in += sizeof(u16); - *(ptr_out++) = VRAMRGBA5551ToRGBA8888(remove_alpha ? (src_col | u16(0x8000)) : src_col); + + const u32 pixel32 = VRAMRGBA5551ToRGBA8888(remove_alpha ? (src_col | u16(0x8000)) : src_col); + std::memcpy(ptr_out, &pixel32, sizeof(pixel32)); + ptr_out += sizeof(pixel32); } ptr_in += stride; diff --git a/src/core/gpu.h b/src/core/gpu.h index a98b20cbb..5c18c9f98 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -24,6 +24,7 @@ #include class Error; +class Image; class SmallStringBase; class StateWrapper; @@ -233,8 +234,7 @@ public: /// Renders the display, optionally with postprocessing to the specified image. bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, - bool postfx, std::vector* out_pixels, u32* out_stride, - GPUTexture::Format* out_format); + bool postfx, Image* out_image); /// Helper function to save screenshot to PNG. bool RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index 0f6ecc124..3c06e6f3f 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -303,7 +303,7 @@ static void FindTextureReplacements(bool load_vram_write_replacements, bool load static void LoadTextureReplacementAliases(const ryml::ConstNodeRef& root, bool load_vram_write_replacement_aliases, bool load_texture_replacement_aliases); -static const TextureReplacementImage* GetTextureReplacementImage(const std::string& filename); +static const TextureReplacementImage* GetTextureReplacementImage(const std::string& path); static void PreloadReplacementTextures(); static void PurgeUnreferencedTexturesFromCache(); @@ -2493,28 +2493,23 @@ void GPUTextureCache::DumpVRAMWrite(u32 width, u32 height, const void* pixels) if (filename.empty() || FileSystem::FileExists(filename.c_str())) return; - RGBA8Image image; - image.SetSize(width, height); + Image image(width, height, ImageFormat::RGBA8); const u16* src_pixels = reinterpret_cast(pixels); for (u32 y = 0; y < height; y++) { + u8* row_ptr = image.GetPixels(); for (u32 x = 0; x < width; x++) { - image.SetPixel(x, y, VRAMRGBA5551ToRGBA8888(*src_pixels)); - src_pixels++; + const u32 pixel32 = VRAMRGBA5551ToRGBA8888(*(src_pixels++)); + std::memcpy(row_ptr, &pixel32, sizeof(pixel32)); + row_ptr += sizeof(pixel32); } } if (s_state.config.dump_vram_write_force_alpha_channel) - { - for (u32 y = 0; y < height; y++) - { - for (u32 x = 0; x < width; x++) - image.SetPixel(x, y, image.GetPixel(x, y) | 0xFF000000u); - } - } + image.SetAllPixelsOpaque(); INFO_LOG("Dumping {}x{} VRAM write to '{}'", width, height, Path::GetFileName(filename)); if (!image.SaveToFile(filename.c_str())) [[unlikely]] @@ -2599,12 +2594,13 @@ void GPUTextureCache::DumpTexture(TextureReplacementType type, u32 offset_x, u32 DEV_LOG("Dumping VRAM write {:016X} [{}x{}] at {}", src_hash, width, height, rect); - RGBA8Image image(width, height); - GPUTextureCache::DecodeTexture(mode, &g_vram[rect.top * VRAM_WIDTH + rect.left], palette_data, image.GetPixels(), - image.GetPitch(), width, height); + Image image(width, height, ImageFormat::RGBA8); + GPUTextureCache::DecodeTexture(mode, &g_vram[rect.top * VRAM_WIDTH + rect.left], palette_data, + reinterpret_cast(image.GetPixels()), image.GetPitch(), width, height); - u32* image_pixels = image.GetPixels(); - const u32* image_pixels_end = image.GetPixels() + (width * height); + // TODO: Vectorize this. + u32* image_pixels = reinterpret_cast(image.GetPixels()); + const u32* image_pixels_end = image_pixels + (width * height); if (s_state.config.dump_texture_force_alpha_channel) { for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++) @@ -2970,21 +2966,23 @@ void GPUTextureCache::LoadTextureReplacementAliases(const ryml::ConstNodeRef& ro s_state.game_id); } -const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetTextureReplacementImage(const std::string& filename) +const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetTextureReplacementImage(const std::string& path) { - auto it = s_state.replacement_image_cache.find(filename); + auto it = s_state.replacement_image_cache.find(path); if (it != s_state.replacement_image_cache.end()) return &it->second; - RGBA8Image image; - if (!image.LoadFromFile(filename.c_str())) + Image image; + Error error; + if (!image.LoadFromFile(path.c_str(), &error)) { - ERROR_LOG("Failed to load '{}'", Path::GetFileName(filename)); + ERROR_LOG("Failed to load '{}': {}", Path::GetFileName(path), error.GetDescription()); return nullptr; } - VERBOSE_LOG("Loaded '{}': {}x{}", Path::GetFileName(filename), image.GetWidth(), image.GetHeight()); - it = s_state.replacement_image_cache.emplace(filename, std::move(image)).first; + VERBOSE_LOG("Loaded '{}': {}x{} {}", Path::GetFileName(path), image.GetWidth(), image.GetHeight(), + Image::GetFormatName(image.GetFormat())); + it = s_state.replacement_image_cache.emplace(path, std::move(image)).first; return &it->second; } @@ -3206,14 +3204,14 @@ void GPUTextureCache::ReloadTextureReplacements(bool show_info) void GPUTextureCache::PurgeUnreferencedTexturesFromCache() { TextureCache old_map = std::move(s_state.replacement_image_cache); - s_state.replacement_image_cache = {}; + s_state.replacement_image_cache = TextureCache(); for (const auto& it : s_state.vram_replacements) { const auto it2 = old_map.find(it.second); if (it2 != old_map.end()) { - s_state.replacement_image_cache[it.second] = std::move(it2->second); + s_state.replacement_image_cache.emplace(it.second, std::move(it2->second)); old_map.erase(it2); } } @@ -3225,7 +3223,7 @@ void GPUTextureCache::PurgeUnreferencedTexturesFromCache() const auto it2 = old_map.find(it.second.second); if (it2 != old_map.end()) { - s_state.replacement_image_cache[it.second.second] = std::move(it2->second); + s_state.replacement_image_cache.emplace(it.second.second, std::move(it2->second)); old_map.erase(it2); } } @@ -3319,9 +3317,8 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, for (const TextureReplacementSubImage& si : subimages) { - const auto temp_texture = g_gpu_device->FetchAutoRecycleTexture( - si.image.GetWidth(), si.image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT, - GPUTexture::Flags::None, si.image.GetPixels(), si.image.GetPitch()); + std::unique_ptr temp_texture = + g_gpu_device->FetchAndUploadTextureImage(si.image, GPUTexture::Flags::None); if (!temp_texture) continue; @@ -3334,6 +3331,8 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, g_gpu_device->SetPipeline(si.invert_alpha ? s_state.replacement_semitransparent_draw_pipeline.get() : s_state.replacement_draw_pipeline.get()); g_gpu_device->Draw(3, 0); + + g_gpu_device->RecycleTexture(std::move(temp_texture)); } g_gpu_device->CopyTextureRegion(replacement_tex.get(), 0, 0, 0, 0, s_state.replacement_texture_render_target.get(), 0, diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h index 8fc601e64..1c071c920 100644 --- a/src/core/gpu_hw_texture_cache.h +++ b/src/core/gpu_hw_texture_cache.h @@ -5,8 +5,8 @@ #include "gpu_types.h" +class Image; class GPUTexture; -class RGBA8Image; class StateWrapper; struct Settings; @@ -29,7 +29,7 @@ enum class PaletteRecordFlags : u32 IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(PaletteRecordFlags); using HashType = u64; -using TextureReplacementImage = RGBA8Image; +using TextureReplacementImage = Image; struct Source; struct HashCacheEntry; diff --git a/src/core/system.cpp b/src/core/system.cpp index 947ef9ac3..8ee6aa5e9 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -128,7 +128,7 @@ struct SaveStateBuffer std::string media_path; u32 media_subimage_index; u32 version; - RGBA8Image screenshot; + Image screenshot; DynamicHeapArray state_data; size_t state_size; }; @@ -2916,15 +2916,14 @@ bool System::LoadStateBufferFromFile(SaveStateBuffer* buffer, std::FILE* fp, Err // Read screenshot if requested. if (read_screenshot) { - buffer->screenshot.SetSize(header.screenshot_width, header.screenshot_height); - const u32 uncompressed_size = buffer->screenshot.GetPitch() * buffer->screenshot.GetHeight(); - const u32 compressed_size = (header.version >= 69) ? header.screenshot_compressed_size : uncompressed_size; + buffer->screenshot.Resize(header.screenshot_width, header.screenshot_height, ImageFormat::RGBA8, true); + const u32 compressed_size = + (header.version >= 69) ? header.screenshot_compressed_size : buffer->screenshot.GetStorageSize(); const SAVE_STATE_HEADER::CompressionType compression_type = (header.version >= 69) ? static_cast(header.screenshot_compression_type) : SAVE_STATE_HEADER::CompressionType::None; - if (!ReadAndDecompressStateData( - fp, std::span(reinterpret_cast(buffer->screenshot.GetPixels()), uncompressed_size), - header.offset_to_screenshot, compressed_size, compression_type, error)) [[unlikely]] + if (!ReadAndDecompressStateData(fp, buffer->screenshot.GetPixelsSpan(), header.offset_to_screenshot, + compressed_size, compression_type, error)) [[unlikely]] { return false; } @@ -3104,29 +3103,27 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen screenshot_display_rect = screenshot_display_rect.sub32(screenshot_display_rect.xyxy()); VERBOSE_LOG("Saving {}x{} screenshot for state", screenshot_width, screenshot_height); - std::vector screenshot_buffer; - u32 screenshot_stride; - GPUTexture::Format screenshot_format; if (g_gpu->RenderScreenshotToBuffer(screenshot_width, screenshot_height, screenshot_display_rect, - screenshot_draw_rect, false, &screenshot_buffer, &screenshot_stride, - &screenshot_format) && - GPUTexture::ConvertTextureDataToRGBA8(screenshot_width, screenshot_height, screenshot_buffer, screenshot_stride, - screenshot_format)) + screenshot_draw_rect, false, &buffer->screenshot)) { - if (screenshot_stride != (screenshot_width * sizeof(u32))) - { - WARNING_LOG("Failed to save {}x{} screenshot for save state due to incorrect stride({})", screenshot_width, - screenshot_height, screenshot_stride); - } - else - { - if (g_gpu_device->UsesLowerLeftOrigin()) - { - GPUTexture::FlipTextureDataRGBA8(screenshot_width, screenshot_height, - reinterpret_cast(screenshot_buffer.data()), screenshot_stride); - } + if (g_gpu_device->UsesLowerLeftOrigin()) + buffer->screenshot.FlipY(); - buffer->screenshot.SetPixels(screenshot_width, screenshot_height, std::move(screenshot_buffer)); + // Ensure it's RGBA8. + if (buffer->screenshot.GetFormat() != ImageFormat::RGBA8) + { + Error convert_error; + std::optional screenshot_rgba8 = buffer->screenshot.ConvertToRGBA8(&convert_error); + if (!screenshot_rgba8.has_value()) + { + ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", + Image::GetFormatName(buffer->screenshot.GetFormat()), convert_error.GetDescription()); + buffer->screenshot.Invalidate(); + } + else + { + buffer->screenshot = std::move(screenshot_rgba8.value()); + } } } else diff --git a/src/core/system.h b/src/core/system.h index a07350e22..c61a715c2 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -74,7 +74,7 @@ struct ExtendedSaveStateInfo std::string media_path; std::time_t timestamp; - RGBA8Image screenshot; + Image screenshot; }; namespace System { diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 3c6c2fb25..44e3faf50 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -4,6 +4,7 @@ #include "gpu_device.h" #include "compress_helpers.h" #include "gpu_framebuffer_manager.h" +#include "image.h" #include "shadergen.h" #include "common/assert.h" @@ -1050,6 +1051,27 @@ GPUDevice::FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels return std::unique_ptr(ret.release()); } +std::unique_ptr GPUDevice::FetchAndUploadTextureImage(const Image& image, + GPUTexture::Flags flags /*= GPUTexture::Flags::None*/, + Error* error /*= nullptr*/) +{ + const Image* image_to_upload = ℑ + GPUTexture::Format gpu_format = GPUTexture::GetTextureFormatForImageFormat(image.GetFormat()); + std::optional converted_image; + if (!SupportsTextureFormat(gpu_format)) + { + converted_image = image.ConvertToRGBA8(error); + if (!converted_image.has_value()) + return nullptr; + + image_to_upload = &converted_image.value(); + gpu_format = GPUTexture::GetTextureFormatForImageFormat(converted_image->GetFormat()); + } + + return FetchTexture(image_to_upload->GetWidth(), image_to_upload->GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, + gpu_format, flags, image_to_upload->GetPixels(), image_to_upload->GetPitch(), error); +} + void GPUDevice::RecycleTexture(std::unique_ptr texture) { if (!texture) diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 04efc1fc6..02ae401c7 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -24,6 +24,7 @@ #include class Error; +class Image; enum class RenderAPI : u8 { @@ -707,6 +708,9 @@ public: FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags, const void* data = nullptr, u32 data_stride = 0, Error* error = nullptr); + std::unique_ptr FetchAndUploadTextureImage(const Image& image, + GPUTexture::Flags flags = GPUTexture::Flags::None, + Error* error = nullptr); void RecycleTexture(std::unique_ptr texture); void PurgeTexturePool(); diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index c1bad139b..a913d33b4 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -3,6 +3,7 @@ #include "gpu_texture.h" #include "gpu_device.h" +#include "image.h" #include "common/align.h" #include "common/assert.h" @@ -123,6 +124,56 @@ u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height) return (std::countr_zero(max_dim) + 1); } +GPUTexture::Format GPUTexture::GetTextureFormatForImageFormat(ImageFormat format) +{ + static constexpr const std::array(ImageFormat::MaxCount)> mapping = {{ + Format::Unknown, // None + Format::RGBA8, // RGBA8 + Format::BGRA8, // BGRA8 + Format::RGB565, // RGB565 + Format::Unknown, // RGBA5551 + Format::Unknown, // BC1 + Format::Unknown, // BC2 + Format::Unknown, // BC3 + Format::Unknown, // BC7 + }}; + + return mapping[static_cast(format)]; +} + +ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format) +{ + static constexpr const std::array(Format::MaxCount)> mapping = {{ + ImageFormat::None, // Unknown + ImageFormat::RGBA8, // RGBA8 + ImageFormat::BGRA8, // BGRA8 + ImageFormat::RGB565, // RGB565 + ImageFormat::RGBA5551, // RGBA5551 + ImageFormat::None, // R8 + ImageFormat::None, // D16 + ImageFormat::None, // D24S8 + ImageFormat::None, // D32F + ImageFormat::None, // D32FS8 + ImageFormat::None, // R16 + ImageFormat::None, // R16I + ImageFormat::None, // R16U + ImageFormat::None, // R16F + ImageFormat::None, // R32I + ImageFormat::None, // R32U + ImageFormat::None, // R32F + ImageFormat::None, // RG8 + ImageFormat::None, // RG16 + ImageFormat::None, // RG16F + ImageFormat::None, // RG32F + ImageFormat::None, // RGBA16 + ImageFormat::None, // RGBA16F + ImageFormat::None, // RGBA32F + ImageFormat::None, // RGB10A2 + }}; + + return mapping[static_cast(format)]; +} + std::array GPUTexture::GetUNormClearColor() const { return GPUDevice::RGBA8ToFloat(m_clear_value.color); @@ -270,111 +321,6 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u return true; } -bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector& texture_data, - u32& texture_data_stride, GPUTexture::Format format) -{ - switch (format) - { - case Format::BGRA8: - { - for (u32 y = 0; y < height; y++) - { - u8* pixels = reinterpret_cast(texture_data.data()) + (y * texture_data_stride); - for (u32 x = 0; x < width; x++) - { - u32 pixel; - std::memcpy(&pixel, pixels, sizeof(pixel)); - pixel = (pixel & 0xFF00FF00) | ((pixel & 0xFF) << 16) | ((pixel >> 16) & 0xFF); - std::memcpy(pixels, &pixel, sizeof(pixel)); - pixels += sizeof(pixel); - } - } - - return true; - } - - case Format::RGBA8: - return true; - - case Format::RGB565: - { - std::vector temp(width * height); - - for (u32 y = 0; y < height; y++) - { - const u8* pixels_in = reinterpret_cast(texture_data.data()) + (y * texture_data_stride); - u8* pixels_out = reinterpret_cast(temp.data()) + (y * width * sizeof(u32)); - - for (u32 x = 0; x < width; x++) - { - // RGB565 -> RGBA8 - u16 pixel_in; - std::memcpy(&pixel_in, pixels_in, sizeof(u16)); - pixels_in += sizeof(u16); - const u8 r5 = Truncate8(pixel_in >> 11); - const u8 g6 = Truncate8((pixel_in >> 5) & 0x3F); - const u8 b5 = Truncate8(pixel_in & 0x1F); - const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 2) | (g6 & 3)) << 8) | - (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (0xFF000000u); - std::memcpy(pixels_out, &rgba8, sizeof(u32)); - pixels_out += sizeof(u32); - } - } - - texture_data = std::move(temp); - texture_data_stride = sizeof(u32) * width; - return true; - } - - case Format::RGBA5551: - { - std::vector temp(width * height); - - for (u32 y = 0; y < height; y++) - { - const u8* pixels_in = reinterpret_cast(texture_data.data()) + (y * texture_data_stride); - u8* pixels_out = reinterpret_cast(temp.data()) + (y * width * sizeof(u32)); - - for (u32 x = 0; x < width; x++) - { - // RGBA5551 -> RGBA8 - u16 pixel_in; - std::memcpy(&pixel_in, pixels_in, sizeof(u16)); - pixels_in += sizeof(u16); - const u8 a1 = Truncate8(pixel_in >> 15); - const u8 r5 = Truncate8((pixel_in >> 10) & 0x1F); - const u8 g6 = Truncate8((pixel_in >> 5) & 0x1F); - const u8 b5 = Truncate8(pixel_in & 0x1F); - const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) | - (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (a1 ? 0xFF000000u : 0u); - std::memcpy(pixels_out, &rgba8, sizeof(u32)); - pixels_out += sizeof(u32); - } - } - - texture_data = std::move(temp); - texture_data_stride = sizeof(u32) * width; - return true; - } - - default: - return false; - } -} - -void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, u8* texture_data, u32 texture_data_stride) -{ - std::unique_ptr temp = std::make_unique(texture_data_stride); - for (u32 flip_row = 0; flip_row < (height / 2); flip_row++) - { - u8* top_ptr = &texture_data[flip_row * texture_data_stride]; - u8* bottom_ptr = &texture_data[((height - 1) - flip_row) * texture_data_stride]; - std::memcpy(temp.get(), top_ptr, texture_data_stride); - std::memcpy(top_ptr, bottom_ptr, texture_data_stride); - std::memcpy(bottom_ptr, temp.get(), texture_data_stride); - } -} - void GPUTexture::MakeReadyForSampling() { } diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index 5139c74b5..c85113a91 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -13,6 +13,8 @@ class Error; +enum class ImageFormat : u8; + class GPUTexture { public: @@ -100,13 +102,12 @@ public: static u32 CalcUploadSize(Format format, u32 height, u32 pitch); static u32 GetFullMipmapCount(u32 width, u32 height); + static Format GetTextureFormatForImageFormat(ImageFormat format); + static ImageFormat GetImageFormatForTextureFormat(Format format); + static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, Flags flags, Error* error); - static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector& texture_data, u32& texture_data_stride, - Format format); - static void FlipTextureDataRGBA8(u32 width, u32 height, u8* texture_data, u32 texture_data_stride); - ALWAYS_INLINE u32 GetWidth() const { return m_width; } ALWAYS_INLINE u32 GetHeight() const { return m_height; } ALWAYS_INLINE u32 GetLayers() const { return m_layers; } diff --git a/src/util/image.cpp b/src/util/image.cpp index 5d31f607d..c3f5dc212 100644 --- a/src/util/image.cpp +++ b/src/util/image.cpp @@ -10,6 +10,7 @@ #include "common/file_system.h" #include "common/gsvector.h" #include "common/heap_array.h" +#include "common/intrin.h" #include "common/log.h" #include "common/path.h" #include "common/scoped_guard.h" @@ -30,28 +31,28 @@ LOG_CHANNEL(Image); -static bool PNGBufferLoader(RGBA8Image* image, std::span data, Error* error); -static bool PNGBufferSaver(const RGBA8Image& image, DynamicHeapArray* data, u8 quality, Error* error); -static bool PNGFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, Error* error); -static bool PNGFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); +static bool PNGBufferLoader(Image* image, std::span data, Error* error); +static bool PNGBufferSaver(const Image& image, DynamicHeapArray* data, u8 quality, Error* error); +static bool PNGFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); +static bool PNGFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); -static bool JPEGBufferLoader(RGBA8Image* image, std::span data, Error* error); -static bool JPEGBufferSaver(const RGBA8Image& image, DynamicHeapArray* data, u8 quality, Error* error); -static bool JPEGFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, Error* error); -static bool JPEGFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); +static bool JPEGBufferLoader(Image* image, std::span data, Error* error); +static bool JPEGBufferSaver(const Image& image, DynamicHeapArray* data, u8 quality, Error* error); +static bool JPEGFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); +static bool JPEGFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); -static bool WebPBufferLoader(RGBA8Image* image, std::span data, Error* error); -static bool WebPBufferSaver(const RGBA8Image& image, DynamicHeapArray* data, u8 quality, Error* error); -static bool WebPFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, Error* error); -static bool WebPFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); +static bool WebPBufferLoader(Image* image, std::span data, Error* error); +static bool WebPBufferSaver(const Image& image, DynamicHeapArray* data, u8 quality, Error* error); +static bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); +static bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); struct FormatHandler { const char* extension; - bool (*buffer_loader)(RGBA8Image*, std::span, Error*); - bool (*buffer_saver)(const RGBA8Image&, DynamicHeapArray*, u8, Error*); - bool (*file_loader)(RGBA8Image*, std::string_view, std::FILE*, Error*); - bool (*file_saver)(const RGBA8Image&, std::string_view, std::FILE*, u8, Error*); + bool (*buffer_loader)(Image*, std::span, Error*); + bool (*buffer_saver)(const Image&, DynamicHeapArray*, u8, Error*); + bool (*file_loader)(Image*, std::string_view, std::FILE*, Error*); + bool (*file_saver)(const Image&, std::string_view, std::FILE*, u8, Error*); }; static constexpr FormatHandler s_format_handlers[] = { @@ -72,41 +73,254 @@ static const FormatHandler* GetFormatHandler(std::string_view extension) return nullptr; } -RGBA8Image::RGBA8Image() = default; +static void SwapBGRAToRGBA(void* pixels_out, u32 pixels_out_pitch, const void* pixels_in, u32 pixels_in_pitch, + u32 width, u32 height); -RGBA8Image::RGBA8Image(const RGBA8Image& copy) : Image(copy) +Image::Image() = default; + +Image::Image(const Image& copy) +{ + SetPixels(copy.m_width, copy.m_height, copy.m_format, copy.m_pixels.get(), copy.m_pitch); +} + +Image::Image(u32 width, u32 height, ImageFormat format, const void* pixels, u32 pitch) +{ + SetPixels(width, height, format, pixels, pitch); +} + +Image::Image(u32 width, u32 height, ImageFormat format, PixelStorage pixels, u32 pitch) + : m_width(width), m_height(height), m_pitch(pitch), m_format(format), m_pixels(std::move(pixels)) { } -RGBA8Image::RGBA8Image(u32 width, u32 height, const u32* pixels) : Image(width, height, pixels) +Image::Image(u32 width, u32 height, ImageFormat format) { + Resize(width, height, format, false); } -RGBA8Image::RGBA8Image(RGBA8Image&& move) : Image(move) +Image::Image(Image&& move) { + m_width = std::exchange(move.m_width, 0); + m_height = std::exchange(move.m_height, 0); + m_pitch = std::exchange(move.m_pitch, 0); + m_format = std::exchange(move.m_format, ImageFormat::None); + m_pixels = std::move(move.m_pixels); } -RGBA8Image::RGBA8Image(u32 width, u32 height) : Image(width, height) +void Image::Resize(u32 new_width, u32 new_height, bool preserve) { + Resize(new_width, new_height, m_format, preserve); } -RGBA8Image::RGBA8Image(u32 width, u32 height, std::vector pixels) : Image(width, height, std::move(pixels)) +void Image::Resize(u32 new_width, u32 new_height, ImageFormat format, bool preserve) { + if (m_width == new_width && m_height == new_height && m_format == format) + return; + + if (!preserve) + m_pixels.reset(); + + const u32 old_blocks_y = GetBlockYCount(); + const u32 old_pitch = m_pitch; + PixelStorage old_pixels = + std::exchange(m_pixels, Common::make_unique_aligned_for_overwrite( + VECTOR_ALIGNMENT, CalculateStorageSize(new_width, new_height, format))); + + m_width = new_width; + m_height = new_height; + m_format = format; + m_pitch = CalculatePitch(new_width, new_height, format); + if (preserve && old_pixels) + { + StringUtil::StrideMemCpy(m_pixels.get(), m_pitch, old_pixels.get(), old_pitch, std::min(old_pitch, m_pitch), + std::min(old_blocks_y, GetBlockYCount())); + } } -RGBA8Image& RGBA8Image::operator=(const RGBA8Image& copy) +Image& Image::operator=(const Image& copy) { - Image::operator=(copy); + SetPixels(copy.m_width, copy.m_height, copy.m_format, copy.m_pixels.get(), copy.m_pitch); return *this; } -RGBA8Image& RGBA8Image::operator=(RGBA8Image&& move) +Image& Image::operator=(Image&& move) { - Image::operator=(move); + m_width = std::exchange(move.m_width, 0); + m_height = std::exchange(move.m_height, 0); + m_pitch = std::exchange(move.m_pitch, 0); + m_format = std::exchange(move.m_format, ImageFormat::None); + m_pixels = std::move(move.m_pixels); return *this; } -bool RGBA8Image::LoadFromFile(const char* filename, Error* error /* = nullptr */) +const char* Image::GetFormatName(ImageFormat format) +{ + static constexpr std::array(ImageFormat::MaxCount)> names = { + "None", // None + "RGBA8", // RGBA8 + "BGRA8", // BGRA8 + "RGB565", // RGB565 + "RGB5551", // RGBA5551 + "BC1", // BC1 + "BC2", // BC2 + "BC3", // BC3 + "BC7", // BC7 + }; + + return names[static_cast(format)]; +} + +u32 Image::GetPixelSize(ImageFormat format) +{ + static constexpr std::array(ImageFormat::MaxCount)> sizes = {{ + 0, // Unknown + 4, // RGBA8 + 4, // BGRA8 + 2, // RGB565 + 2, // RGBA5551 + 8, // BC1 - 16 pixels in 64 bits + 16, // BC2 - 16 pixels in 128 bits + 16, // BC3 - 16 pixels in 128 bits + 16, // BC4 - 16 pixels in 128 bits + }}; + + return sizes[static_cast(format)]; +} + +bool Image::IsCompressedFormat(ImageFormat format) +{ + return (format >= ImageFormat::BC1); +} + +u32 Image::CalculatePitch(u32 width, u32 height, ImageFormat format) +{ + const u32 pixel_size = GetPixelSize(format); + if (!IsCompressedFormat(format)) + return Common::AlignUpPow2(width * pixel_size, 4); + + // All compressed formats use a block size of 4. + const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4; + return blocks_wide * pixel_size; +} + +u32 Image::CalculateStorageSize(u32 width, u32 height, ImageFormat format) +{ + const u32 pixel_size = GetPixelSize(format); + if (!IsCompressedFormat(format)) + return Common::AlignUpPow2(width * pixel_size, 4) * height; + + const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4; + const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4; + return (blocks_wide * pixel_size) * blocks_high; +} + +u32 Image::CalculateStorageSize(u32 width, u32 height, u32 pitch, ImageFormat format) +{ + height = IsCompressedFormat(format) ? (Common::AlignUpPow2(height, 4) / 4) : height; + return pitch * height; +} + +u32 Image::GetBlockXCount() const +{ + return IsCompressedFormat(m_format) ? (Common::AlignUpPow2(m_width, 4) / 4) : m_width; +} + +u32 Image::GetBlockYCount() const +{ + return IsCompressedFormat(m_format) ? (Common::AlignUpPow2(m_height, 4) / 4) : m_height; +} + +u32 Image::GetStorageSize() const +{ + return GetBlockYCount() * m_pitch; +} + +std::span Image::GetPixelsSpan() const +{ + return std::span(m_pixels.get(), GetStorageSize()); +} + +std::span Image::GetPixelsSpan() +{ + return std::span(m_pixels.get(), GetStorageSize()); +} + +void Image::Clear() +{ + std::memset(m_pixels.get(), 0, CalculateStorageSize(m_width, m_height, m_pitch, m_format)); +} + +void Image::Invalidate() +{ + m_width = 0; + m_height = 0; + m_pitch = 0; + m_format = ImageFormat::None; + m_pixels.reset(); +} + +void Image::SetPixels(u32 width, u32 height, ImageFormat format, const void* pixels, u32 pitch) +{ + Resize(width, height, format, false); + if (m_pixels) + StringUtil::StrideMemCpy(m_pixels.get(), m_pitch, pixels, pitch, m_pitch, GetBlockYCount()); +} + +void Image::SetPixels(u32 width, u32 height, ImageFormat format, PixelStorage pixels, u32 pitch) +{ + m_width = width; + m_height = height; + m_format = format; + m_pitch = pitch; + m_pixels = std::move(pixels); +} + +bool Image::SetAllPixelsOpaque() +{ + if (m_format == ImageFormat::RGBA8 || m_format == ImageFormat::BGRA8) + { + for (u32 y = 0; y < m_height; y++) + { + u8* row = GetRowPixels(y); + for (u32 x = 0; x < m_width; x++, row += sizeof(u32)) + row[3] = 0xFF; + } + + return true; + } + else if (m_format == ImageFormat::RGBA5551) + { + for (u32 y = 0; y < m_height; y++) + { + u8* row = GetRowPixels(y); + for (u32 x = 0; x < m_width; x++, row += sizeof(u32)) + row[1] |= 0x80; + } + + return true; + } + else if (m_format == ImageFormat::RGB565) + { + // Already opaque + return true; + } + else + { + // Unhandled format + return false; + } +} + +Image::PixelStorage Image::TakePixels() +{ + m_width = 0; + m_height = 0; + m_format = ImageFormat::None; + m_pitch = 0; + return std::move(m_pixels); +} + +bool Image::LoadFromFile(const char* filename, Error* error /* = nullptr */) { auto fp = FileSystem::OpenManagedCFile(filename, "rb", error); if (!fp) @@ -115,8 +329,8 @@ bool RGBA8Image::LoadFromFile(const char* filename, Error* error /* = nullptr */ return LoadFromFile(filename, fp.get(), error); } -bool RGBA8Image::SaveToFile(const char* filename, u8 quality /* = DEFAULT_SAVE_QUALITY */, - Error* error /* = nullptr */) const +bool Image::SaveToFile(const char* filename, u8 quality /* = DEFAULT_SAVE_QUALITY */, + Error* error /* = nullptr */) const { auto fp = FileSystem::OpenManagedCFile(filename, "wb", error); if (!fp) @@ -131,7 +345,7 @@ bool RGBA8Image::SaveToFile(const char* filename, u8 quality /* = DEFAULT_SAVE_Q return false; } -bool RGBA8Image::LoadFromFile(std::string_view filename, std::FILE* fp, Error* error /* = nullptr */) +bool Image::LoadFromFile(std::string_view filename, std::FILE* fp, Error* error /* = nullptr */) { const std::string_view extension(Path::GetExtension(filename)); const FormatHandler* handler = GetFormatHandler(extension); @@ -144,7 +358,7 @@ bool RGBA8Image::LoadFromFile(std::string_view filename, std::FILE* fp, Error* e return handler->file_loader(this, filename, fp, error); } -bool RGBA8Image::LoadFromBuffer(std::string_view filename, std::span data, Error* error /* = nullptr */) +bool Image::LoadFromBuffer(std::string_view filename, std::span data, Error* error /* = nullptr */) { const std::string_view extension(Path::GetExtension(filename)); const FormatHandler* handler = GetFormatHandler(extension); @@ -157,7 +371,7 @@ bool RGBA8Image::LoadFromBuffer(std::string_view filename, std::span d return handler->buffer_loader(this, data, error); } -bool RGBA8Image::RasterizeSVG(const std::span data, u32 width, u32 height, Error* error) +bool Image::RasterizeSVG(const std::span data, u32 width, u32 height, Error* error) { if (width == 0 || height == 0) { @@ -181,13 +395,15 @@ bool RGBA8Image::RasterizeSVG(const std::span data, u32 width, u32 hei return false; } - SetPixels(width, height, lunasvg_bitmap_data(bitmap.get()), lunasvg_bitmap_stride(bitmap.get())); - SwapBGRAToRGBA(m_pixels.data(), m_width, m_height, GetPitch()); + // lunasvg works in BGRA, swap to RGBA + Resize(width, height, ImageFormat::RGBA8, false); + SwapBGRAToRGBA(m_pixels.get(), m_pitch, lunasvg_bitmap_data(bitmap.get()), lunasvg_bitmap_stride(bitmap.get()), width, + height); return true; } -bool RGBA8Image::SaveToFile(std::string_view filename, std::FILE* fp, u8 quality /* = DEFAULT_SAVE_QUALITY */, - Error* error /* = nullptr */) const +bool Image::SaveToFile(std::string_view filename, std::FILE* fp, u8 quality /* = DEFAULT_SAVE_QUALITY */, + Error* error /* = nullptr */) const { const std::string_view extension(Path::GetExtension(filename)); const FormatHandler* handler = GetFormatHandler(extension); @@ -209,9 +425,9 @@ bool RGBA8Image::SaveToFile(std::string_view filename, std::FILE* fp, u8 quality return true; } -std::optional> RGBA8Image::SaveToBuffer(std::string_view filename, - u8 quality /* = DEFAULT_SAVE_QUALITY */, - Error* error /* = nullptr */) const +std::optional> Image::SaveToBuffer(std::string_view filename, + u8 quality /* = DEFAULT_SAVE_QUALITY */, + Error* error /* = nullptr */) const { std::optional> ret; @@ -230,81 +446,154 @@ std::optional> RGBA8Image::SaveToBuffer(std::string_view fi return ret; } -void RGBA8Image::SwapBGRAToRGBA(void* pixels, u32 width, u32 height, u32 pitch) +void SwapBGRAToRGBA(void* pixels_out, u32 pixels_out_pitch, const void* pixels_in, u32 pixels_in_pitch, u32 width, + u32 height) { #ifdef GSVECTOR_HAS_FAST_INT_SHUFFLE8 constexpr u32 pixels_per_vec = sizeof(GSVector4i) / 4; const u32 aligned_width = Common::AlignDownPow2(width, pixels_per_vec); #endif - u8* pixels_ptr = static_cast(pixels); + const u8* pixels_in_ptr = static_cast(pixels_in); + u8* pixels_out_ptr = static_cast(pixels_out); for (u32 y = 0; y < height; y++) { - u8* row_pixels_ptr = pixels_ptr; - u32 x; + const u8* row_pixels_in_ptr = pixels_in_ptr; + u8* row_pixels_out_ptr = pixels_out_ptr; + u32 x = 0; #ifdef GSVECTOR_HAS_FAST_INT_SHUFFLE8 - for (x = 0; x < aligned_width; x += pixels_per_vec) + for (; x < aligned_width; x += pixels_per_vec) { static constexpr GSVector4i mask = GSVector4i::cxpr8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); - GSVector4i::store(row_pixels_ptr, GSVector4i::load(row_pixels_ptr).shuffle8(mask)); - row_pixels_ptr += sizeof(GSVector4i); + GSVector4i::store(row_pixels_out_ptr, GSVector4i::load(row_pixels_in_ptr).shuffle8(mask)); + row_pixels_in_ptr += sizeof(GSVector4i); + row_pixels_out_ptr += sizeof(GSVector4i); } #endif - for (x = 0; x < width; x++) + for (; x < width; x++) { u32 pixel; - std::memcpy(&pixel, row_pixels_ptr, sizeof(pixel)); + std::memcpy(&pixel, row_pixels_in_ptr, sizeof(pixel)); pixel = (pixel & 0xFF00FF00) | ((pixel & 0xFF) << 16) | ((pixel >> 16) & 0xFF); - std::memcpy(row_pixels_ptr, &pixel, sizeof(pixel)); - row_pixels_ptr += sizeof(pixel); + std::memcpy(row_pixels_out_ptr, &pixel, sizeof(pixel)); + row_pixels_in_ptr += sizeof(pixel); + row_pixels_out_ptr += sizeof(pixel); } - pixels_ptr += pitch; + pixels_in_ptr += pixels_in_pitch; + pixels_out_ptr += pixels_out_pitch; } } -#if 0 - -void RGBA8Image::Resize(u32 new_width, u32 new_height) +std::optional Image::ConvertToRGBA8(Error* error) const { - if (m_width == new_width && m_height == new_height) - return; + std::optional ret; - std::vector resized_texture_data(new_width * new_height); - u32 resized_texture_stride = sizeof(u32) * new_width; - if (!stbir_resize_uint8(reinterpret_cast(m_pixels.data()), m_width, m_height, GetPitch(), - reinterpret_cast(resized_texture_data.data()), new_width, new_height, - resized_texture_stride, 4)) + if (!IsValid()) { - Panic("stbir_resize_uint8 failed"); - return; + Error::SetStringView(error, "Image is not valid."); + return ret; } - SetPixels(new_width, new_height, std::move(resized_texture_data)); + switch (m_format) + { + case ImageFormat::BGRA8: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + SwapBGRAToRGBA(ret->GetPixels(), ret->GetPitch(), m_pixels.get(), m_pitch, m_width, m_height); + } + break; + + case ImageFormat::RGBA8: + { + ret = Image(m_width, m_height, m_format, m_pixels.get(), m_pitch); + } + break; + + case ImageFormat::RGB565: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + for (u32 y = 0; y < m_height; y++) + { + const u8* pixels_in = GetRowPixels(y); + u8* pixels_out = ret->GetRowPixels(y); + + for (u32 x = 0; x < m_width; x++) + { + // RGB565 -> RGBA8 + u16 pixel_in; + std::memcpy(&pixel_in, pixels_in, sizeof(u16)); + pixels_in += sizeof(u16); + const u8 r5 = Truncate8(pixel_in >> 11); + const u8 g6 = Truncate8((pixel_in >> 5) & 0x3F); + const u8 b5 = Truncate8(pixel_in & 0x1F); + const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 2) | (g6 & 3)) << 8) | + (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (0xFF000000u); + std::memcpy(pixels_out, &rgba8, sizeof(u32)); + pixels_out += sizeof(u32); + } + } + } + break; + + case ImageFormat::RGBA5551: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + for (u32 y = 0; y < m_height; y++) + { + const u8* pixels_in = GetRowPixels(y); + u8* pixels_out = ret->GetRowPixels(y); + + for (u32 x = 0; x < m_width; x++) + { + // RGBA5551 -> RGBA8 + u16 pixel_in; + std::memcpy(&pixel_in, pixels_in, sizeof(u16)); + pixels_in += sizeof(u16); + const u8 a1 = Truncate8(pixel_in >> 15); + const u8 r5 = Truncate8((pixel_in >> 10) & 0x1F); + const u8 g6 = Truncate8((pixel_in >> 5) & 0x1F); + const u8 b5 = Truncate8(pixel_in & 0x1F); + const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) | + (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (a1 ? 0xFF000000u : 0u); + std::memcpy(pixels_out, &rgba8, sizeof(u32)); + pixels_out += sizeof(u32); + } + } + } + break; + + // TODO: Block format decompression + + default: + { + Error::SetStringFmt(error, "Unhandled format {}", GetFormatName(m_format)); + } + break; + } + + return ret; } -void RGBA8Image::Resize(const RGBA8Image* src_image, u32 new_width, u32 new_height) +void Image::FlipY() { - if (src_image->m_width == new_width && src_image->m_height == new_height) - { - SetPixels(src_image->m_width, src_image->m_height, src_image->m_pixels.data()); + if (!IsValid()) return; - } - SetSize(new_width, new_height); - if (!stbir_resize_uint8(reinterpret_cast(src_image->m_pixels.data()), src_image->m_width, - src_image->m_height, src_image->GetPitch(), reinterpret_cast(m_pixels.data()), new_width, - new_height, GetPitch(), 4)) + PixelStorage temp = Common::make_unique_aligned_for_overwrite(VECTOR_ALIGNMENT, m_pitch); + const u32 half_height = m_height / 2; + for (u32 flip_row = 0; flip_row < half_height; flip_row++) { - Panic("stbir_resize_uint8 failed"); - return; + u8* top_ptr = &m_pixels[flip_row * m_pitch]; + u8* bottom_ptr = &m_pixels[((m_height - 1) - flip_row) * m_pitch]; + std::memcpy(temp.get(), top_ptr, m_pitch); + std::memcpy(top_ptr, bottom_ptr, m_pitch); + std::memcpy(bottom_ptr, temp.get(), m_pitch); } } -#endif - static void PNGSetErrorFunction(png_structp png_ptr, Error* error) { png_set_error_fn( @@ -316,8 +605,7 @@ static void PNGSetErrorFunction(png_structp png_ptr, Error* error) [](png_structp png_ptr, png_const_charp message) { WARNING_LOG("libpng warning: {}", message); }); } -static bool PNGCommonLoader(RGBA8Image* image, png_structp png_ptr, png_infop info_ptr, std::vector& new_data, - std::vector& row_pointers) +static bool PNGCommonLoader(Image* image, png_structp png_ptr, png_infop info_ptr, std::vector& row_pointers) { png_read_info(png_ptr, info_ptr); @@ -351,17 +639,16 @@ static bool PNGCommonLoader(RGBA8Image* image, png_structp png_ptr, png_infop in png_read_update_info(png_ptr, info_ptr); - new_data.resize(width * height); + image->Resize(width, height, ImageFormat::RGBA8, false); row_pointers.reserve(height); for (u32 y = 0; y < height; y++) - row_pointers.push_back(reinterpret_cast(new_data.data() + y * width)); + row_pointers.push_back(reinterpret_cast(image->GetRowPixels(y))); png_read_image(png_ptr, row_pointers.data()); - image->SetPixels(width, height, std::move(new_data)); return true; } -bool PNGFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, Error* error) +bool PNGFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error) { png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); if (!png_ptr) @@ -380,12 +667,14 @@ bool PNGFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, ScopedGuard cleanup([&png_ptr, &info_ptr]() { png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); }); - std::vector new_data; std::vector row_pointers; PNGSetErrorFunction(png_ptr, error); if (setjmp(png_jmpbuf(png_ptr))) + { + image->Invalidate(); return false; + } png_set_read_fn(png_ptr, fp, [](png_structp png_ptr, png_bytep data_ptr, png_size_t size) { std::FILE* fp = static_cast(png_get_io_ptr(png_ptr)); @@ -393,10 +682,10 @@ bool PNGFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, png_error(png_ptr, "fread() failed"); }); - return PNGCommonLoader(image, png_ptr, info_ptr, new_data, row_pointers); + return PNGCommonLoader(image, png_ptr, info_ptr, row_pointers); } -bool PNGBufferLoader(RGBA8Image* image, std::span data, Error* error) +bool PNGBufferLoader(Image* image, std::span data, Error* error) { png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); if (!png_ptr) @@ -415,12 +704,14 @@ bool PNGBufferLoader(RGBA8Image* image, std::span data, Error* error) ScopedGuard cleanup([&png_ptr, &info_ptr]() { png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); }); - std::vector new_data; std::vector row_pointers; PNGSetErrorFunction(png_ptr, error); if (setjmp(png_jmpbuf(png_ptr))) + { + image->Invalidate(); return false; + } struct IOData { @@ -439,10 +730,10 @@ bool PNGBufferLoader(RGBA8Image* image, std::span data, Error* error) } }); - return PNGCommonLoader(image, png_ptr, info_ptr, new_data, row_pointers); + return PNGCommonLoader(image, png_ptr, info_ptr, row_pointers); } -static void PNGSaveCommon(const RGBA8Image& image, png_structp png_ptr, png_infop info_ptr, u8 quality) +static void PNGSaveCommon(const Image& image, png_structp png_ptr, png_infop info_ptr, u8 quality) { png_set_compression_level(png_ptr, std::clamp(quality / 10, 0, 9)); png_set_IHDR(png_ptr, info_ptr, image.GetWidth(), image.GetHeight(), 8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE, @@ -455,7 +746,7 @@ static void PNGSaveCommon(const RGBA8Image& image, png_structp png_ptr, png_info png_write_end(png_ptr, nullptr); } -bool PNGFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error) +bool PNGFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error) { png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); png_infop info_ptr = nullptr; @@ -493,7 +784,7 @@ bool PNGFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE* return true; } -bool PNGBufferSaver(const RGBA8Image& image, DynamicHeapArray* data, u8 quality, Error* error) +bool PNGBufferSaver(const Image& image, DynamicHeapArray* data, u8 quality, Error* error) { png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); png_infop info_ptr = nullptr; @@ -570,7 +861,7 @@ struct JPEGErrorHandler } // namespace template -static bool WrapJPEGDecompress(RGBA8Image* image, Error* error, T setup_func) +static bool WrapJPEGDecompress(Image* image, Error* error, T setup_func) { std::vector scanline; jpeg_decompress_struct info = {}; @@ -611,7 +902,7 @@ static bool WrapJPEGDecompress(RGBA8Image* image, Error* error, T setup_func) return false; } - image->SetSize(info.image_width, info.image_height); + image->Resize(info.image_width, info.image_height, ImageFormat::RGBA8, false); scanline.resize(info.image_width * 3); u8* scanline_buffer[1] = {scanline.data()}; @@ -627,11 +918,13 @@ static bool WrapJPEGDecompress(RGBA8Image* image, Error* error, T setup_func) // RGB -> RGBA const u8* src_ptr = scanline.data(); - u32* dst_ptr = image->GetRowPixels(y); + u8* dst_ptr = image->GetRowPixels(y); for (u32 x = 0; x < info.image_width; x++) { - *(dst_ptr++) = + const u32 pixel32 = (ZeroExtend32(src_ptr[0]) | (ZeroExtend32(src_ptr[1]) << 8) | (ZeroExtend32(src_ptr[2]) << 16) | 0xFF000000u); + std::memcpy(dst_ptr, &pixel32, sizeof(pixel32)); + dst_ptr += sizeof(pixel32); src_ptr += 3; } } @@ -641,14 +934,14 @@ static bool WrapJPEGDecompress(RGBA8Image* image, Error* error, T setup_func) return result; } -bool JPEGBufferLoader(RGBA8Image* image, std::span data, Error* error) +bool JPEGBufferLoader(Image* image, std::span data, Error* error) { return WrapJPEGDecompress(image, error, [data](jpeg_decompress_struct& info) { jpeg_mem_src(&info, static_cast(data.data()), static_cast(data.size())); }); } -bool JPEGFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, Error* error) +bool JPEGFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error) { static constexpr u32 BUFFER_SIZE = 16384; @@ -713,7 +1006,7 @@ bool JPEGFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, } template -static bool WrapJPEGCompress(const RGBA8Image& image, u8 quality, Error* error, T setup_func) +static bool WrapJPEGCompress(const Image& image, u8 quality, Error* error, T setup_func) { std::vector scanline; jpeg_compress_struct info = {}; @@ -747,10 +1040,12 @@ static bool WrapJPEGCompress(const RGBA8Image& image, u8 quality, Error* error, { // RGBA -> RGB u8* dst_ptr = scanline.data(); - const u32* src_ptr = image.GetRowPixels(y); + const u8* src_ptr = image.GetRowPixels(y); for (u32 x = 0; x < info.image_width; x++) { - const u32 rgba = *(src_ptr++); + u32 rgba; + std::memcpy(&rgba, src_ptr, sizeof(rgba)); + src_ptr += sizeof(rgba); *(dst_ptr++) = Truncate8(rgba); *(dst_ptr++) = Truncate8(rgba >> 8); *(dst_ptr++) = Truncate8(rgba >> 16); @@ -769,7 +1064,7 @@ static bool WrapJPEGCompress(const RGBA8Image& image, u8 quality, Error* error, return result; } -bool JPEGBufferSaver(const RGBA8Image& image, DynamicHeapArray* buffer, u8 quality, Error* error) +bool JPEGBufferSaver(const Image& image, DynamicHeapArray* buffer, u8 quality, Error* error) { // give enough space to avoid reallocs buffer->resize(image.GetWidth() * image.GetHeight() * 2); @@ -807,7 +1102,7 @@ bool JPEGBufferSaver(const RGBA8Image& image, DynamicHeapArray* buffer, u8 q return WrapJPEGCompress(image, quality, error, [&cb](jpeg_compress_struct& info) { info.dest = &cb.mgr; }); } -bool JPEGFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error) +bool JPEGFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error) { static constexpr u32 BUFFER_SIZE = 16384; @@ -864,7 +1159,7 @@ bool JPEGFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE !cb.write_error); } -bool WebPBufferLoader(RGBA8Image* image, std::span data, Error* error) +bool WebPBufferLoader(Image* image, std::span data, Error* error) { int width, height; if (!WebPGetInfo(data.data(), data.size(), &width, &height) || width <= 0 || height <= 0) @@ -873,20 +1168,18 @@ bool WebPBufferLoader(RGBA8Image* image, std::span data, Error* error) return false; } - std::vector pixels; - pixels.resize(static_cast(width) * static_cast(height)); - if (!WebPDecodeRGBAInto(data.data(), data.size(), reinterpret_cast(pixels.data()), sizeof(u32) * pixels.size(), - sizeof(u32) * static_cast(width))) + image->Resize(static_cast(width), static_cast(height), ImageFormat::RGBA8, false); + if (!WebPDecodeRGBAInto(data.data(), data.size(), image->GetPixels(), image->GetStorageSize(), image->GetPitch())) { Error::SetStringView(error, "WebPDecodeRGBAInto() failed"); + image->Invalidate(); return false; } - image->SetPixels(static_cast(width), static_cast(height), std::move(pixels)); return true; } -bool WebPBufferSaver(const RGBA8Image& image, DynamicHeapArray* data, u8 quality, Error* error) +bool WebPBufferSaver(const Image& image, DynamicHeapArray* data, u8 quality, Error* error) { u8* encoded_data; const size_t encoded_size = @@ -904,7 +1197,7 @@ bool WebPBufferSaver(const RGBA8Image& image, DynamicHeapArray* data, u8 qua return true; } -bool WebPFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, Error* error) +bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error) { std::optional> data = FileSystem::ReadBinaryFile(fp, error); if (!data.has_value()) @@ -913,7 +1206,7 @@ bool WebPFileLoader(RGBA8Image* image, std::string_view filename, std::FILE* fp, return WebPBufferLoader(image, data->cspan(), error); } -bool WebPFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error) +bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error) { DynamicHeapArray buffer; if (!WebPBufferSaver(image, &buffer, quality, error)) @@ -926,4 +1219,4 @@ bool WebPFileSaver(const RGBA8Image& image, std::string_view filename, std::FILE } return true; -} \ No newline at end of file +} diff --git a/src/util/image.h b/src/util/image.h index efc272eda..49a7334a1 100644 --- a/src/util/image.h +++ b/src/util/image.h @@ -3,154 +3,87 @@ #pragma once +#include "common/align.h" #include "common/heap_array.h" #include "common/types.h" -#include #include -#include #include #include #include -#include class Error; -template -class Image +enum class ImageFormat : u8 { -public: - Image() = default; - Image(u32 width, u32 height) { SetSize(width, height); } - Image(u32 width, u32 height, const PixelType* pixels) { SetPixels(width, height, pixels); } - Image(u32 width, u32 height, std::vector pixels) { SetPixels(width, height, std::move(pixels)); } - Image(const Image& copy) - { - m_width = copy.m_width; - m_height = copy.m_height; - m_pixels = copy.m_pixels; - } - Image(Image&& move) - { - m_width = move.m_width; - m_height = move.m_height; - m_pixels = std::move(move.m_pixels); - move.m_width = 0; - move.m_height = 0; - } - - Image& operator=(const Image& copy) - { - m_width = copy.m_width; - m_height = copy.m_height; - m_pixels = copy.m_pixels; - return *this; - } - Image& operator=(Image&& move) - { - m_width = move.m_width; - m_height = move.m_height; - m_pixels = std::move(move.m_pixels); - move.m_width = 0; - move.m_height = 0; - return *this; - } - - ALWAYS_INLINE bool IsValid() const { return (m_width > 0 && m_height > 0); } - ALWAYS_INLINE u32 GetWidth() const { return m_width; } - ALWAYS_INLINE u32 GetHeight() const { return m_height; } - ALWAYS_INLINE u32 GetPitch() const { return (sizeof(PixelType) * m_width); } - ALWAYS_INLINE const PixelType* GetPixels() const { return m_pixels.data(); } - ALWAYS_INLINE PixelType* GetPixels() { return m_pixels.data(); } - ALWAYS_INLINE const PixelType* GetRowPixels(u32 y) const { return &m_pixels[y * m_width]; } - ALWAYS_INLINE PixelType* GetRowPixels(u32 y) { return &m_pixels[y * m_width]; } - ALWAYS_INLINE void SetPixel(u32 x, u32 y, PixelType pixel) { m_pixels[y * m_width + x] = pixel; } - ALWAYS_INLINE PixelType GetPixel(u32 x, u32 y) const { return m_pixels[y * m_width + x]; } - - void Clear(PixelType fill_value = static_cast(0)) - { - std::fill(m_pixels.begin(), m_pixels.end(), fill_value); - } - - void Invalidate() - { - m_width = 0; - m_height = 0; - m_pixels.clear(); - } - - void SetSize(u32 new_width, u32 new_height, PixelType fill_value = static_cast(0)) - { - m_width = new_width; - m_height = new_height; - m_pixels.resize(new_width * new_height); - Clear(fill_value); - } - - void SetPixels(u32 width, u32 height, const PixelType* pixels) - { - m_width = width; - m_height = height; - m_pixels.resize(width * height); - std::memcpy(m_pixels.data(), pixels, width * height * sizeof(PixelType)); - } - - void SetPixels(u32 width, u32 height, std::vector pixels) - { - m_width = width; - m_height = height; - m_pixels = std::move(pixels); - } - - void SetPixels(u32 width, u32 height, const void* data, u32 stride) - { - const u32 copy_width = width * sizeof(PixelType); - if (stride == copy_width) - { - SetPixels(width, height, static_cast(data)); - return; - } - - m_width = width; - m_height = height; - m_pixels.resize(width, height); - PixelType* out_ptr = m_pixels.data(); - const u8* in_ptr = static_cast(data); - for (u32 row = 0; row < height; row++) - { - std::memcpy(out_ptr, in_ptr, copy_width); - out_ptr += width; - in_ptr += stride; - } - } - - std::vector TakePixels() - { - m_width = 0; - m_height = 0; - return std::move(m_pixels); - } - -protected: - u32 m_width = 0; - u32 m_height = 0; - std::vector m_pixels; + None, + RGBA8, + BGRA8, + RGB565, + RGBA5551, + BC1, + BC2, + BC3, + BC7, + MaxCount, }; -class RGBA8Image : public Image +class Image { public: static constexpr u8 DEFAULT_SAVE_QUALITY = 85; - RGBA8Image(); - RGBA8Image(u32 width, u32 height); - RGBA8Image(u32 width, u32 height, const u32* pixels); - RGBA8Image(u32 width, u32 height, std::vector pixels); - RGBA8Image(const RGBA8Image& copy); - RGBA8Image(RGBA8Image&& move); +public: + using PixelStorage = Common::unique_aligned_ptr; - RGBA8Image& operator=(const RGBA8Image& copy); - RGBA8Image& operator=(RGBA8Image&& move); + Image(); + Image(u32 width, u32 height, ImageFormat format); + Image(u32 width, u32 height, ImageFormat format, const void* pixels, u32 pitch); + Image(u32 width, u32 height, ImageFormat format, PixelStorage pixels, u32 pitch); + Image(const Image& copy); + Image(Image&& move); + + Image& operator=(const Image& copy); + Image& operator=(Image&& move); + + static const char* GetFormatName(ImageFormat format); + static u32 GetPixelSize(ImageFormat format); + static bool IsCompressedFormat(ImageFormat format); + static u32 CalculatePitch(u32 width, u32 height, ImageFormat format); + static u32 CalculateStorageSize(u32 width, u32 height, ImageFormat format); + static u32 CalculateStorageSize(u32 width, u32 height, u32 pitch, ImageFormat format); + + ALWAYS_INLINE bool IsValid() const { return (m_width > 0 && m_height > 0); } + ALWAYS_INLINE u32 GetWidth() const { return m_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_height; } + ALWAYS_INLINE u32 GetPitch() const { return m_pitch; } + ALWAYS_INLINE ImageFormat GetFormat() const { return m_format; } + ALWAYS_INLINE const u8* GetPixels() const { return m_pixels.get(); } + ALWAYS_INLINE u8* GetPixels() { return m_pixels.get(); } + ALWAYS_INLINE const u8* GetRowPixels(u32 y) const { return &m_pixels[y * m_pitch]; } + ALWAYS_INLINE u8* GetRowPixels(u32 y) { return &m_pixels[y * m_pitch]; } + // ALWAYS_INLINE void SetPixel(u32 x, u32 y, PixelType pixel) { m_pixels[y * m_width + x] = pixel; } + // ALWAYS_INLINE PixelType GetPixel(u32 x, u32 y) const { return m_pixels[y * m_width + x]; } + + u32 GetBlockXCount() const; + u32 GetBlockYCount() const; + u32 GetStorageSize() const; + + std::span GetPixelsSpan() const; + std::span GetPixelsSpan(); + + void Clear(); + void Invalidate(); + + void Resize(u32 new_width, u32 new_height, bool preserve); + void Resize(u32 new_width, u32 new_height, ImageFormat format, bool preserve); + + void SetPixels(u32 width, u32 height, ImageFormat format, const void* pixels, u32 pitch); + void SetPixels(u32 width, u32 height, ImageFormat format, PixelStorage pixels, u32 pitch); + + bool SetAllPixelsOpaque(); + + PixelStorage TakePixels(); bool LoadFromFile(const char* filename, Error* error = nullptr); bool LoadFromFile(std::string_view filename, std::FILE* fp, Error* error = nullptr); @@ -164,5 +97,14 @@ public: std::optional> SaveToBuffer(std::string_view filename, u8 quality = DEFAULT_SAVE_QUALITY, Error* error = nullptr) const; - static void SwapBGRAToRGBA(void* pixels, u32 width, u32 height, u32 pitch); + std::optional ConvertToRGBA8(Error* error) const; + + void FlipY(); + +protected: + u32 m_width = 0; + u32 m_height = 0; + u32 m_pitch = 0; + ImageFormat m_format = ImageFormat::None; + PixelStorage m_pixels; }; diff --git a/src/util/imgui_fullscreen.cpp b/src/util/imgui_fullscreen.cpp index b7a6b41ce..39b3705ef 100644 --- a/src/util/imgui_fullscreen.cpp +++ b/src/util/imgui_fullscreen.cpp @@ -43,8 +43,8 @@ using MessageDialogCallbackVariant = std::variant LoadTextureImage(std::string_view path, u32 svg_width, u32 svg_height); -static std::shared_ptr UploadTexture(std::string_view path, const RGBA8Image& image); +static std::optional LoadTextureImage(std::string_view path, u32 svg_width, u32 svg_height); +static std::shared_ptr UploadTexture(std::string_view path, const Image& image); static void TextureLoaderThread(); static void DrawFileSelector(); @@ -100,7 +100,7 @@ static std::atomic_bool s_texture_load_thread_quit{false}; static std::mutex s_texture_load_mutex; static std::condition_variable s_texture_load_cv; static std::deque s_texture_load_queue; -static std::deque> s_texture_upload_queue; +static std::deque> s_texture_upload_queue; static std::thread s_texture_load_thread; static SmallString s_fullscreen_footer_text; @@ -288,19 +288,9 @@ const std::shared_ptr& ImGuiFullscreen::GetPlaceholderTexture() return s_placeholder_texture; } -std::unique_ptr ImGuiFullscreen::CreateTextureFromImage(const RGBA8Image& image) +std::optional ImGuiFullscreen::LoadTextureImage(std::string_view path, u32 svg_width, u32 svg_height) { - std::unique_ptr ret = g_gpu_device->CreateTexture( - image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, - GPUTexture::Flags::None, image.GetPixels(), image.GetPitch()); - if (!ret) [[unlikely]] - ERROR_LOG("Failed to upload {}x{} RGBA8Image to GPU", image.GetWidth(), image.GetHeight()); - return ret; -} - -std::optional ImGuiFullscreen::LoadTextureImage(std::string_view path, u32 svg_width, u32 svg_height) -{ - std::optional image; + std::optional image; Error error; if (StringUtil::EqualNoCase(Path::GetExtension(path), "svg")) @@ -313,7 +303,7 @@ std::optional ImGuiFullscreen::LoadTextureImage(std::string_view pat if (svg_data.has_value()) { - image = RGBA8Image(); + image = Image(); if (!image->RasterizeSVG(svg_data->cspan(), svg_width, svg_height)) { ERROR_LOG("Failed to rasterize SVG texture file '{}': {}", path, error.GetDescription()); @@ -331,7 +321,7 @@ std::optional ImGuiFullscreen::LoadTextureImage(std::string_view pat auto fp = FileSystem::OpenManagedCFile(path_str.c_str(), "rb", &error); if (fp) { - image = RGBA8Image(); + image = Image(); if (!image->LoadFromFile(path_str.c_str(), fp.get(), &error)) { ERROR_LOG("Failed to read texture file '{}': {}", path, error.GetDescription()); @@ -348,7 +338,7 @@ std::optional ImGuiFullscreen::LoadTextureImage(std::string_view pat std::optional> data = Host::ReadResourceFile(path, true, &error); if (data.has_value()) { - image = RGBA8Image(); + image = Image(); if (!image->LoadFromBuffer(path, data->cspan(), &error)) { ERROR_LOG("Failed to read texture resource '{}': {}", path, error.GetDescription()); @@ -364,14 +354,13 @@ std::optional ImGuiFullscreen::LoadTextureImage(std::string_view pat return image; } -std::shared_ptr ImGuiFullscreen::UploadTexture(std::string_view path, const RGBA8Image& image) +std::shared_ptr ImGuiFullscreen::UploadTexture(std::string_view path, const Image& image) { - std::unique_ptr texture = - g_gpu_device->FetchTexture(image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None, image.GetPixels(), image.GetPitch()); + Error error; + std::unique_ptr texture = g_gpu_device->FetchAndUploadTextureImage(image, GPUTexture::Flags::None, &error); if (!texture) { - ERROR_LOG("Failed to create {}x{} texture for resource", image.GetWidth(), image.GetHeight()); + ERROR_LOG("Failed to upload texture '{}': {}", Path::GetFileTitle(path), error.GetDescription()); return {}; } @@ -381,7 +370,7 @@ std::shared_ptr ImGuiFullscreen::UploadTexture(std::string_view path std::shared_ptr ImGuiFullscreen::LoadTexture(std::string_view path, u32 width_hint, u32 height_hint) { - std::optional image(LoadTextureImage(path, width_hint, height_hint)); + std::optional image(LoadTextureImage(path, width_hint, height_hint)); if (image.has_value()) { std::shared_ptr ret(UploadTexture(path, image.value())); @@ -447,7 +436,7 @@ void ImGuiFullscreen::UploadAsyncTextures() std::unique_lock lock(s_texture_load_mutex); while (!s_texture_upload_queue.empty()) { - std::pair it(std::move(s_texture_upload_queue.front())); + std::pair it(std::move(s_texture_upload_queue.front())); s_texture_upload_queue.pop_front(); lock.unlock(); @@ -480,7 +469,7 @@ void ImGuiFullscreen::TextureLoaderThread() s_texture_load_queue.pop_front(); lock.unlock(); - std::optional image(LoadTextureImage(path.c_str(), 0, 0)); + std::optional image(LoadTextureImage(path.c_str(), 0, 0)); lock.lock(); // don't bother queuing back if it doesn't exist diff --git a/src/util/imgui_fullscreen.h b/src/util/imgui_fullscreen.h index c28881642..8f1fecbfa 100644 --- a/src/util/imgui_fullscreen.h +++ b/src/util/imgui_fullscreen.h @@ -18,7 +18,7 @@ #include #include -class RGBA8Image; +class Image; class GPUTexture; class SmallStringBase; @@ -129,7 +129,6 @@ void Shutdown(); /// Texture cache. const std::shared_ptr& GetPlaceholderTexture(); -std::unique_ptr CreateTextureFromImage(const RGBA8Image& image); std::shared_ptr LoadTexture(std::string_view path, u32 svg_width = 0, u32 svg_height = 0); GPUTexture* GetCachedTexture(std::string_view name); GPUTexture* GetCachedTexture(std::string_view name, u32 svg_width, u32 svg_height); diff --git a/src/util/imgui_manager.cpp b/src/util/imgui_manager.cpp index 3e6a6cfd6..9a68bf6c2 100644 --- a/src/util/imgui_manager.cpp +++ b/src/util/imgui_manager.cpp @@ -1242,7 +1242,7 @@ void ImGuiManager::UpdateSoftwareCursorTexture(u32 index) } Error error; - RGBA8Image image; + Image image; if (!image.LoadFromFile(sc.image_path.c_str(), &error)) { ERROR_LOG("Failed to load software cursor {} image '{}': {}", index, sc.image_path, error.GetDescription()); diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 2f3f945e6..1fdb70490 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -1121,7 +1121,7 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer return false; } - RGBA8Image image; + Image image; if (const std::string image_path = Path::Combine(EmuFolders::Shaders, Path::Combine("reshade" FS_OSPATH_SEPARATOR_STR "Textures", source)); !image.LoadFromFile(image_path.c_str())) From 7eb1d4e092587d5f8fd20669d2cd9103ac17285c Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 18:10:59 +1000 Subject: [PATCH 07/35] GPUDevice: Support compressed textures --- src/util/d3d11_device.cpp | 7 + src/util/d3d11_texture.cpp | 27 ++- src/util/d3d12_device.cpp | 7 + src/util/d3d12_texture.cpp | 42 ++-- src/util/d3d12_texture.h | 3 +- src/util/d3d_common.cpp | 4 + src/util/gpu_device.cpp | 16 +- src/util/gpu_device.h | 3 + src/util/gpu_texture.cpp | 123 ++++++---- src/util/gpu_texture.h | 15 +- src/util/image.cpp | 444 +++++++++++++++++++++++++++++++++++- src/util/image.h | 1 + src/util/metal_device.mm | 32 ++- src/util/opengl_device.cpp | 6 + src/util/opengl_texture.cpp | 260 ++++++++++++++------- src/util/vulkan_device.cpp | 9 + src/util/vulkan_texture.cpp | 34 ++- src/util/vulkan_texture.h | 4 +- 18 files changed, 841 insertions(+), 196 deletions(-) diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index c04a1e417..b5ea7c7a3 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -200,6 +200,13 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) (SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) && data.ROVsSupported); } + + m_features.dxt_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && + (SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) && + SupportsTextureFormat(GPUTexture::Format::BC3))); + m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7)); } D3D11SwapChain::D3D11SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle, diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index 78b441069..cd5fd2931 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -147,23 +147,24 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, if (HasFlag(Flags::AllowMap)) { void* map; - u32 map_stride; - if (!Map(&map, &map_stride, x, y, width, height, layer, level)) + u32 map_pitch; + if (!Map(&map, &map_pitch, x, y, width, height, layer, level)) return false; - StringUtil::StrideMemCpy(map, map_stride, data, pitch, GetPixelSize() * width, height); + CopyTextureDataForUpload(width, height, m_format, map, map_pitch, data, pitch); Unmap(); return true; } - const CD3D11_BOX box(static_cast(x), static_cast(y), 0, static_cast(x + width), - static_cast(y + height), 1); + const u32 bs = GetBlockSize(); + const D3D11_BOX box = {Common::AlignDownPow2(x, bs), Common::AlignDownPow2(y, bs), 0U, + Common::AlignUpPow2(x + width, bs), Common::AlignUpPow2(y + height, bs), 1U}; const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); ID3D11DeviceContext1* context = D3D11Device::GetD3DContext(); CommitClear(context); - GPUDevice::GetStatistics().buffer_streamed += height * pitch; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, pitch); GPUDevice::GetStatistics().num_uploads++; context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0); @@ -194,10 +195,18 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 return false; } - GPUDevice::GetStatistics().buffer_streamed += height * sr.RowPitch; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, sr.RowPitch); GPUDevice::GetStatistics().num_uploads++; - *map = static_cast(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); + if (IsCompressedFormat(m_format)) + { + *map = static_cast(sr.pData) + ((y / GetBlockSize()) * sr.RowPitch) + + ((x / GetBlockSize()) * GetPixelSize()); + } + else + { + *map = static_cast(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); + } *map_stride = sr.RowPitch; m_mapped_subresource = srnum; m_state = GPUTexture::State::Dirty; @@ -294,7 +303,7 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid if (initial_data) { - GPUDevice::GetStatistics().buffer_streamed += height * initial_data_stride; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, initial_data_stride); GPUDevice::GetStatistics().num_uploads++; } diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 964d067cc..f0e770010 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1366,6 +1366,13 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) && options.ROVsSupported; } + + m_features.dxt_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && + (SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) && + SupportsTextureFormat(GPUTexture::Format::BC3))); + m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7)); } void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp index 4305f7afd..1d7500dfd 100644 --- a/src/util/d3d12_texture.cpp +++ b/src/util/d3d12_texture.cpp @@ -340,24 +340,24 @@ ID3D12GraphicsCommandList4* D3D12Texture::GetCommandBufferForUpdate() return dev.GetInitCommandList(); } -void D3D12Texture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, - u32 upload_pitch) const -{ - StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height); -} - ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, - u32 height) const + u32 height, u32 buffer_size) const { - const u32 size = upload_pitch * height; ComPtr resource; ComPtr allocation; const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE, nullptr, nullptr}; - const D3D12_RESOURCE_DESC resource_desc = { - D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - D3D12_RESOURCE_FLAG_NONE}; + const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + buffer_size, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( &allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.GetAddressOf(), IID_PPV_ARGS(resource.GetAddressOf())); @@ -375,9 +375,9 @@ ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 return nullptr; } - CopyTextureDataForUpload(map_ptr, data, width, height, pitch, upload_pitch); + CopyTextureDataForUpload(width, height, m_format, map_ptr, upload_pitch, data, pitch); - const D3D12_RANGE write_range = {0, size}; + const D3D12_RANGE write_range = {0, buffer_size}; resource->Unmap(0, &write_range); // Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy. @@ -395,8 +395,8 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, D3D12Device& dev = D3D12Device::GetInstance(); D3D12StreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); - const u32 upload_pitch = Common::AlignUpPow2(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 required_size = height * upload_pitch; + const u32 upload_pitch = Common::AlignUpPow2(CalcUploadPitch(width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 required_size = CalcUploadSize(height, upload_pitch); D3D12_TEXTURE_COPY_LOCATION srcloc; srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; @@ -410,7 +410,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, // Otherwise allocation will either fail, or require lots of cmdbuffer submissions. if (required_size > (sbuffer.GetSize() / 2)) { - srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); + srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size); if (!srcloc.pResource) return false; @@ -431,7 +431,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, srcloc.pResource = sbuffer.GetBuffer(); srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset(); - CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); + CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch); sbuffer.CommitMemory(required_size); } @@ -482,8 +482,8 @@ bool D3D12Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 CommitClear(GetCommandBufferForUpdate()); // see note in Update() for the reason why. - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(m_height, aligned_pitch); D3D12StreamBuffer& buffer = dev.GetTextureUploadBuffer(); if (req_size >= (buffer.GetSize() / 2)) return false; @@ -512,8 +512,8 @@ void D3D12Texture::Unmap() { D3D12Device& dev = D3D12Device::GetInstance(); D3D12StreamBuffer& sb = dev.GetTextureUploadBuffer(); - const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 req_size = m_map_height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch); const u32 offset = sb.GetCurrentOffset(); sb.CommitMemory(req_size); diff --git a/src/util/d3d12_texture.h b/src/util/d3d12_texture.h index 051d1ca0d..96ccd0f3d 100644 --- a/src/util/d3d12_texture.h +++ b/src/util/d3d12_texture.h @@ -80,8 +80,7 @@ private: ID3D12GraphicsCommandList4* GetCommandBufferForUpdate(); ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, - u32 height) const; - void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; + u32 height, u32 buffer_size) const; void ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist); ComPtr m_resource; diff --git a/src/util/d3d_common.cpp b/src/util/d3d_common.cpp index 7736bccca..e99d8ce46 100644 --- a/src/util/d3d_common.cpp +++ b/src/util/d3d_common.cpp @@ -650,6 +650,10 @@ static constexpr std::array(GPUTe {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA16F {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA32F {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB10A2 + {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC1 + {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC2 + {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC3 + {DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC7 // clang-format on }}; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 44e3faf50..bd7acad9e 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -1057,8 +1057,22 @@ std::unique_ptr GPUDevice::FetchAndUploadTextureImage(const Image& i { const Image* image_to_upload = ℑ GPUTexture::Format gpu_format = GPUTexture::GetTextureFormatForImageFormat(image.GetFormat()); + bool gpu_format_supported; + + // avoid device query for compressed formats that we've already pretested + if (gpu_format >= GPUTexture::Format::BC1 && gpu_format <= GPUTexture::Format::BC3) + gpu_format_supported = m_features.dxt_textures; + else if (gpu_format == GPUTexture::Format::BC7) + gpu_format_supported = m_features.bptc_textures; + else if (gpu_format == GPUTexture::Format::RGBA8) // always supported + gpu_format_supported = true; + else if (gpu_format != GPUTexture::Format::Unknown) + gpu_format_supported = SupportsTextureFormat(gpu_format); + else + gpu_format_supported = false; + std::optional converted_image; - if (!SupportsTextureFormat(gpu_format)) + if (!gpu_format_supported) { converted_image = image.ConvertToRGBA8(error); if (!converted_image.has_value()) diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 02ae401c7..cd66311a8 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -515,6 +515,7 @@ public: FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6), FEATURE_MASK_MEMORY_IMPORT = (1 << 7), FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8), + FEATURE_MASK_COMPRESSED_TEXTURES = (1 << 9), }; enum class DrawBarrier : u32 @@ -553,6 +554,8 @@ public: bool pipeline_cache : 1; bool prefer_unused_textures : 1; bool raster_order_views : 1; + bool dxt_textures : 1; + bool bptc_textures : 1; }; struct Statistics diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index a913d33b4..c10e6faff 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -25,7 +25,7 @@ GPUTexture::~GPUTexture() const char* GPUTexture::GetFormatName(Format format) { - static constexpr const char* format_names[static_cast(Format::MaxCount)] = { + static constexpr const std::array(Format::MaxCount)> format_names = {{ "Unknown", // Unknown "RGBA8", // RGBA8 "BGRA8", // BGRA8 @@ -51,43 +51,35 @@ const char* GPUTexture::GetFormatName(Format format) "RGBA16F", // RGBA16F "RGBA32F", // RGBA32F "RGB10A2", // RGB10A2 - }; + "BC1", // BC1 + "BC2", // BC2 + "BC3", // BC3 + "BC7", // BC7 + }}; return format_names[static_cast(format)]; } -u32 GPUTexture::GetCompressedBytesPerBlock() const +u32 GPUTexture::GetBlockSize() const { - return GetCompressedBytesPerBlock(m_format); + return GetBlockSize(m_format); } -u32 GPUTexture::GetCompressedBytesPerBlock(Format format) +u32 GPUTexture::GetBlockSize(Format format) { - // TODO: Implement me - return GetPixelSize(format); -} - -u32 GPUTexture::GetCompressedBlockSize() const -{ - return GetCompressedBlockSize(m_format); -} - -u32 GPUTexture::GetCompressedBlockSize(Format format) -{ - // TODO: Implement me - /*if (format >= Format::BC1 && format <= Format::BC7) + if (format >= Format::BC1 && format <= Format::BC7) return 4; - else*/ - return 1; + else + return 1; } u32 GPUTexture::CalcUploadPitch(Format format, u32 width) { - /* + // convert to blocks if (format >= Format::BC1 && format <= Format::BC7) width = Common::AlignUpPow2(width, 4) / 4; - */ - return width * GetCompressedBytesPerBlock(format); + + return width * GetPixelSize(format); } u32 GPUTexture::CalcUploadPitch(u32 width) const @@ -102,9 +94,11 @@ u32 GPUTexture::CalcUploadRowLengthFromPitch(u32 pitch) const u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch) { - const u32 block_size = GetCompressedBlockSize(format); - const u32 bytes_per_block = GetCompressedBytesPerBlock(format); - return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size; + const u32 pixel_size = GetPixelSize(format); + if (IsCompressedFormat(format)) + return (Common::AlignUpPow2(pitch, pixel_size) / pixel_size) * 4; + else + return pitch / pixel_size; } u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const @@ -114,36 +108,64 @@ u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch) { - const u32 block_size = GetCompressedBlockSize(format); + const u32 block_size = GetBlockSize(format); return pitch * ((static_cast(height) + (block_size - 1)) / block_size); } +bool GPUTexture::IsCompressedFormat(Format format) +{ + return (format >= Format::BC1); +} + +bool GPUTexture::IsCompressedFormat() const +{ + return IsCompressedFormat(m_format); +} + u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height) { const u32 max_dim = Common::PreviousPow2(std::max(width, height)); return (std::countr_zero(max_dim) + 1); } +void GPUTexture::CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch, + const void* src, u32 src_pitch) +{ + if (IsCompressedFormat(format)) + { + const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4; + const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4; + const u32 block_size = GetPixelSize(format); + StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, block_size * blocks_wide, blocks_high); + } + else + { + StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, width * GetPixelSize(format), height); + } +} + GPUTexture::Format GPUTexture::GetTextureFormatForImageFormat(ImageFormat format) { - static constexpr const std::array(ImageFormat::MaxCount)> mapping = {{ - Format::Unknown, // None - Format::RGBA8, // RGBA8 - Format::BGRA8, // BGRA8 - Format::RGB565, // RGB565 - Format::Unknown, // RGBA5551 - Format::Unknown, // BC1 - Format::Unknown, // BC2 - Format::Unknown, // BC3 - Format::Unknown, // BC7 - }}; + static constexpr const std::array mapping = { + Format::Unknown, // None + Format::RGBA8, // RGBA8 + Format::BGRA8, // BGRA8 + Format::RGB565, // RGB565 + Format::RGBA5551, // RGBA5551 + Format::Unknown, // BGR8 + Format::BC1, // BC1 + Format::BC2, // BC2 + Format::BC3, // BC3 + Format::BC7, // BC7 + }; + static_assert(mapping.size() == static_cast(ImageFormat::MaxCount)); return mapping[static_cast(format)]; } ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format) { - static constexpr const std::array(Format::MaxCount)> mapping = {{ + static constexpr const std::array mapping = { ImageFormat::None, // Unknown ImageFormat::RGBA8, // RGBA8 ImageFormat::BGRA8, // BGRA8 @@ -169,7 +191,12 @@ ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format) ImageFormat::None, // RGBA16F ImageFormat::None, // RGBA32F ImageFormat::None, // RGB10A2 - }}; + ImageFormat::BC1, // BC1 + ImageFormat::BC2, // BC2 + ImageFormat::BC3, // BC3 + ImageFormat::BC7, // BC7 + }; + static_assert(mapping.size() == static_cast(Format::MaxCount)); return mapping[static_cast(format)]; } @@ -226,6 +253,10 @@ u32 GPUTexture::GetPixelSize(GPUTexture::Format format) 8, // RGBA16F 16, // RGBA32F 4, // RGB10A2 + 8, // BC1 - 16 pixels in 64 bits + 16, // BC2 - 16 pixels in 128 bits + 16, // BC3 - 16 pixels in 128 bits + 16, // BC4 - 16 pixels in 128 bits }}; return sizes[static_cast(format)]; @@ -241,12 +272,6 @@ bool GPUTexture::IsDepthStencilFormat(Format format) return (format == Format::D24S8 || format == Format::D32FS8); } -bool GPUTexture::IsCompressedFormat(Format format) -{ - // TODO: Implement me - return false; -} - bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, Flags flags, Error* error) { @@ -318,6 +343,12 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u return false; } + if (IsCompressedFormat(format) && (type != Type::Texture || ((flags & Flags::AllowBindAsImage) != Flags::None))) + { + Error::SetStringView(error, "Compressed formats are only supported for textures."); + return false; + } + return true; } diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index c85113a91..700d6dca3 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -61,7 +61,11 @@ public: RGBA16F, RGBA32F, RGB10A2, - MaxCount + BC1, ///< BC1, aka DXT1 compressed texture + BC2, ///< BC2, aka DXT2/3 compressed texture + BC3, ///< BC3, aka DXT4/5 compressed texture + BC7, ///< BC7, aka BPTC compressed texture + MaxCount, }; enum class State : u8 @@ -95,12 +99,13 @@ public: static bool IsDepthFormat(Format format); static bool IsDepthStencilFormat(Format format); static bool IsCompressedFormat(Format format); - static u32 GetCompressedBytesPerBlock(Format format); - static u32 GetCompressedBlockSize(Format format); + static u32 GetBlockSize(Format format); static u32 CalcUploadPitch(Format format, u32 width); static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch); static u32 CalcUploadSize(Format format, u32 height, u32 pitch); static u32 GetFullMipmapCount(u32 width, u32 height); + static void CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch, const void* src, + u32 src_pitch); static Format GetTextureFormatForImageFormat(ImageFormat format); static ImageFormat GetImageFormatForTextureFormat(Format format); @@ -160,8 +165,8 @@ public: size_t GetVRAMUsage() const; - u32 GetCompressedBytesPerBlock() const; - u32 GetCompressedBlockSize() const; + bool IsCompressedFormat() const; + u32 GetBlockSize() const; u32 CalcUploadPitch(u32 width) const; u32 CalcUploadRowLengthFromPitch(u32 pitch) const; u32 CalcUploadSize(u32 height, u32 pitch) const; diff --git a/src/util/image.cpp b/src/util/image.cpp index c3f5dc212..4a53fa4df 100644 --- a/src/util/image.cpp +++ b/src/util/image.cpp @@ -46,6 +46,10 @@ static bool WebPBufferSaver(const Image& image, DynamicHeapArray* data, u8 q static bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); static bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); +static bool DDSBufferLoader(Image* image, std::span data, Error* error); +static bool DDSFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); + +namespace { struct FormatHandler { const char* extension; @@ -54,12 +58,14 @@ struct FormatHandler bool (*file_loader)(Image*, std::string_view, std::FILE*, Error*); bool (*file_saver)(const Image&, std::string_view, std::FILE*, u8, Error*); }; +} // namespace static constexpr FormatHandler s_format_handlers[] = { {"png", PNGBufferLoader, PNGBufferSaver, PNGFileLoader, PNGFileSaver}, {"jpg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver}, {"jpeg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver}, {"webp", WebPBufferLoader, WebPBufferSaver, WebPFileLoader, WebPFileSaver}, + {"dds", DDSBufferLoader, nullptr, DDSFileLoader, nullptr}, }; static const FormatHandler* GetFormatHandler(std::string_view extension) @@ -155,17 +161,19 @@ Image& Image::operator=(Image&& move) const char* Image::GetFormatName(ImageFormat format) { - static constexpr std::array(ImageFormat::MaxCount)> names = { + static constexpr std::array names = { "None", // None "RGBA8", // RGBA8 "BGRA8", // BGRA8 "RGB565", // RGB565 "RGB5551", // RGBA5551 + "BGR8", // BGR8 "BC1", // BC1 "BC2", // BC2 "BC3", // BC3 "BC7", // BC7 }; + static_assert(names.size() == static_cast(ImageFormat::MaxCount)); return names[static_cast(format)]; } @@ -178,6 +186,7 @@ u32 Image::GetPixelSize(ImageFormat format) 4, // BGRA8 2, // RGB565 2, // RGBA5551 + 3, // BGR8 8, // BC1 - 16 pixels in 64 bits 16, // BC2 - 16 pixels in 128 bits 16, // BC3 - 16 pixels in 128 bits @@ -563,6 +572,27 @@ std::optional Image::ConvertToRGBA8(Error* error) const } } } + break; + + case ImageFormat::BGR8: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + for (u32 y = 0; y < m_height; y++) + { + const u8* pixels_in = GetRowPixels(y); + u8* pixels_out = ret->GetRowPixels(y); + + for (u32 x = 0; x < m_width; x++) + { + // Set alpha channel to full intensity. + const u32 rgba = (ZeroExtend32(pixels_in[0]) | (ZeroExtend32(pixels_in[2]) << 8) | + (ZeroExtend32(pixels_in[2]) << 16) | 0xFF000000u); + std::memcpy(pixels_out, &rgba, sizeof(rgba)); + pixels_in += 3; + pixels_out += sizeof(rgba); + } + } + } break; // TODO: Block format decompression @@ -1220,3 +1250,415 @@ bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, return true; } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// DDS Handler +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// From https://raw.githubusercontent.com/Microsoft/DirectXTex/master/DirectXTex/DDS.h +// +// This header defines constants and structures that are useful when parsing +// DDS files. DDS files were originally designed to use several structures +// and constants that are native to DirectDraw and are defined in ddraw.h, +// such as DDSURFACEDESC2 and DDSCAPS2. This file defines similar +// (compatible) constants and structures so that one can use DDS files +// without needing to include ddraw.h. +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 + +#pragma pack(push, 1) + +static constexpr uint32_t DDS_MAGIC = 0x20534444; // "DDS " + +struct DDS_PIXELFORMAT +{ + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwFourCC; + uint32_t dwRGBBitCount; + uint32_t dwRBitMask; + uint32_t dwGBitMask; + uint32_t dwBBitMask; + uint32_t dwABitMask; +}; + +#define DDS_FOURCC 0x00000004 // DDPF_FOURCC +#define DDS_RGB 0x00000040 // DDPF_RGB +#define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS +#define DDS_LUMINANCE 0x00020000 // DDPF_LUMINANCE +#define DDS_LUMINANCEA 0x00020001 // DDPF_LUMINANCE | DDPF_ALPHAPIXELS +#define DDS_ALPHA 0x00000002 // DDPF_ALPHA +#define DDS_PAL8 0x00000020 // DDPF_PALETTEINDEXED8 +#define DDS_PAL8A 0x00000021 // DDPF_PALETTEINDEXED8 | DDPF_ALPHAPIXELS +#define DDS_BUMPDUDV 0x00080000 // DDPF_BUMPDUDV + +#ifndef MAKEFOURCC +#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | \ + ((uint32_t)(uint8_t)(ch3) << 24)) +#endif /* defined(MAKEFOURCC) */ + +#define DDS_HEADER_FLAGS_TEXTURE 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT +#define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT +#define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH +#define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH +#define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE +#define DDS_MAX_TEXTURE_SIZE 32768 + +// Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION +enum DDS_RESOURCE_DIMENSION +{ + DDS_DIMENSION_TEXTURE1D = 2, + DDS_DIMENSION_TEXTURE2D = 3, + DDS_DIMENSION_TEXTURE3D = 4, +}; + +struct DDS_HEADER +{ + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwHeight; + uint32_t dwWidth; + uint32_t dwPitchOrLinearSize; + uint32_t dwDepth; // only if DDS_HEADER_FLAGS_VOLUME is set in dwFlags + uint32_t dwMipMapCount; + uint32_t dwReserved1[11]; + DDS_PIXELFORMAT ddspf; + uint32_t dwCaps; + uint32_t dwCaps2; + uint32_t dwCaps3; + uint32_t dwCaps4; + uint32_t dwReserved2; +}; + +struct DDS_HEADER_DXT10 +{ + uint32_t dxgiFormat; + uint32_t resourceDimension; + uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG + uint32_t arraySize; + uint32_t miscFlags2; // see DDS_MISC_FLAGS2 +}; + +#pragma pack(pop) + +static_assert(sizeof(DDS_HEADER) == 124, "DDS Header size mismatch"); +static_assert(sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch"); + +constexpr DDS_PIXELFORMAT DDSPF_A8R8G8B8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000}; +constexpr DDS_PIXELFORMAT DDSPF_X8R8G8B8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}; +constexpr DDS_PIXELFORMAT DDSPF_A8B8G8R8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000}; +constexpr DDS_PIXELFORMAT DDSPF_X8B8G8R8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000}; +constexpr DDS_PIXELFORMAT DDSPF_R8G8B8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 24, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}; + +// End of Microsoft code from DDS.h. + +static bool DDSPixelFormatMatches(const DDS_PIXELFORMAT& pf1, const DDS_PIXELFORMAT& pf2) +{ + return std::tie(pf1.dwSize, pf1.dwFlags, pf1.dwFourCC, pf1.dwRGBBitCount, pf1.dwRBitMask, pf1.dwGBitMask, + pf1.dwGBitMask, pf1.dwBBitMask, + pf1.dwABitMask) == std::tie(pf2.dwSize, pf2.dwFlags, pf2.dwFourCC, pf2.dwRGBBitCount, pf2.dwRBitMask, + pf2.dwGBitMask, pf2.dwGBitMask, pf2.dwBBitMask, pf2.dwABitMask); +} + +struct DDSLoadInfo +{ + u32 block_size = 1; + u32 bytes_per_block = 4; + u32 width = 0; + u32 height = 0; + u32 mip_count = 0; + ImageFormat format = ImageFormat::RGBA8; + s64 base_image_offset = 0; + u32 base_image_size = 0; + u32 base_image_pitch = 0; + bool clear_alpha = false; +}; + +template +static bool ParseDDSHeader(const ReadFunction& RF, DDSLoadInfo* info, Error* error) +{ + u32 magic; + if (!RF(&magic, sizeof(magic), error) || magic != DDS_MAGIC) + { + Error::AddPrefix(error, "Failed to read magic: "); + return false; + } + + DDS_HEADER header; + u32 header_size = sizeof(header); + if (!RF(&header, header_size, error) || header.dwSize < header_size) + { + Error::AddPrefix(error, "Failed to read header: "); + return false; + } + + // We should check for DDS_HEADER_FLAGS_TEXTURE here, but some tools don't seem + // to set it (e.g. compressonator). But we can still validate the size. + if (header.dwWidth == 0 || header.dwWidth >= DDS_MAX_TEXTURE_SIZE || header.dwHeight == 0 || + header.dwHeight >= DDS_MAX_TEXTURE_SIZE) + { + Error::SetStringFmt(error, "Size is invalid: {}x{}", header.dwWidth, header.dwHeight); + return false; + } + + // Image should be 2D. + if (header.dwFlags & DDS_HEADER_FLAGS_VOLUME) + { + Error::SetStringView(error, "Volume textures are not supported."); + return false; + } + + // Presence of width/height fields is already tested by DDS_HEADER_FLAGS_TEXTURE. + info->width = header.dwWidth; + info->height = header.dwHeight; + + // Check for mip levels. + if (header.dwFlags & DDS_HEADER_FLAGS_MIPMAP) + { + info->mip_count = header.dwMipMapCount; + if (header.dwMipMapCount != 0) + { + info->mip_count = header.dwMipMapCount; + } + else + { + const u32 max_dim = Common::PreviousPow2(std::max(header.dwWidth, header.dwHeight)); + info->mip_count = (std::countr_zero(max_dim) + 1); + } + } + else + { + info->mip_count = 1; + } + + // Handle fourcc formats vs uncompressed formats. + const bool has_fourcc = (header.ddspf.dwFlags & DDS_FOURCC) != 0; + if (has_fourcc) + { + // Handle DX10 extension header. + u32 dxt10_format = 0; + if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', '1', '0')) + { + DDS_HEADER_DXT10 dxt10_header; + if (!RF(&dxt10_header, sizeof(dxt10_header), error)) + { + Error::AddPrefix(error, "Failed to read DXT10 header: "); + return false; + } + + // Can't handle array textures here. Doesn't make sense to use them, anyway. + if (dxt10_header.resourceDimension != DDS_DIMENSION_TEXTURE2D || dxt10_header.arraySize != 1) + { + Error::SetStringView(error, "Only 2D textures are supported."); + return false; + } + + header_size += sizeof(dxt10_header); + dxt10_format = dxt10_header.dxgiFormat; + } + + if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '1') || dxt10_format == 71) + { + info->format = ImageFormat::BC1; + info->block_size = 4; + info->bytes_per_block = 8; + } + else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '2') || + header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '3') || dxt10_format == 74) + { + info->format = ImageFormat::BC2; + info->block_size = 4; + info->bytes_per_block = 16; + } + else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '4') || + header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '5') || dxt10_format == 77) + { + info->format = ImageFormat::BC3; + info->block_size = 4; + info->bytes_per_block = 16; + } + else if (dxt10_format == 98) + { + info->format = ImageFormat::BC7; + info->block_size = 4; + info->bytes_per_block = 16; + } + else + { + Error::SetStringFmt(error, "Unknown format with FOURCC 0x{:08X} / DXT10 format {}", header.ddspf.dwFourCC, + dxt10_format); + return false; + } + } + else + { + if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8R8G8B8)) + { + info->format = ImageFormat::BGRA8; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8R8G8B8)) + { + info->format = ImageFormat::BGRA8; + info->clear_alpha = true; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8B8G8R8)) + { + info->format = ImageFormat::RGBA8; + info->clear_alpha = true; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_R8G8B8)) + { + info->format = ImageFormat::BGR8; + info->clear_alpha = true; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8B8G8R8)) + { + info->format = ImageFormat::RGBA8; + } + else + { + Error::SetStringFmt(error, "Unhandled format with FOURCC 0x{:08X}", header.ddspf.dwFourCC); + return false; + } + + // All these formats are RGBA, just with byte swapping. + info->block_size = 1; + info->bytes_per_block = header.ddspf.dwRGBBitCount / 8; + } + + // Mip levels smaller than the block size are padded to multiples of the block size. + const u32 blocks_wide = Common::AlignUpPow2(info->width, info->block_size) / info->block_size; + const u32 blocks_high = Common::AlignUpPow2(info->height, info->block_size) / info->block_size; + + // Pitch can be specified in the header, otherwise we can derive it from the dimensions. For + // compressed formats, both DDS_HEADER_FLAGS_LINEARSIZE and DDS_HEADER_FLAGS_PITCH should be + // set. See https://msdn.microsoft.com/en-us/library/windows/desktop/bb943982(v=vs.85).aspx + if (header.dwFlags & DDS_HEADER_FLAGS_PITCH && header.dwFlags & DDS_HEADER_FLAGS_LINEARSIZE) + { + // Convert pitch (in bytes) to texels/row length. + if (header.dwPitchOrLinearSize < info->bytes_per_block) + { + // Likely a corrupted or invalid file. + Error::SetStringFmt(error, "Invalid pitch: {}", header.dwPitchOrLinearSize); + return false; + } + + info->base_image_pitch = header.dwPitchOrLinearSize; + info->base_image_size = info->base_image_pitch * blocks_high; + } + else + { + // Assume no padding between rows of blocks. + info->base_image_pitch = blocks_wide * info->bytes_per_block; + info->base_image_size = info->base_image_pitch * blocks_high; + } + + info->base_image_offset = sizeof(magic) + header_size; + +#if 0 + // D3D11 cannot handle block compressed textures where the first mip level is not a multiple of the block size. + if (mip_level == 0 && info.block_size > 1 && ((width % info.block_size) != 0 || (height % info.block_size) != 0)) + { + Error::SetStringFmt(error, + "Invalid dimensions for DDS texture. For compressed textures of this format, " + "the width/height of the first mip level must be a multiple of {}.", + info.block_size); + return false; + } +#endif + + return true; +} + +bool DDSFileLoader(Image* image, std::string_view path, std::FILE* fp, Error* error) +{ + const auto header_reader = [fp](void* buffer, size_t size, Error* error) { + if (std::fread(buffer, size, 1, fp) == 1) + return true; + + Error::SetErrno(error, "fread() failed: ", errno); + return false; + }; + + DDSLoadInfo info; + if (!ParseDDSHeader(header_reader, &info, error)) + return false; + + // always load the base image + if (!FileSystem::FSeek64(fp, info.base_image_offset, SEEK_SET, error)) + return false; + + image->Resize(info.width, info.height, info.format, false); + const u32 blocks = image->GetBlockYCount(); + if (image->GetPitch() != info.base_image_pitch) + { + for (u32 y = 0; y < blocks; y++) + { + if (std::fread(image->GetRowPixels(y), info.base_image_pitch, 1, fp) != 1) + { + Error::SetErrno(error, "fread() failed: ", errno); + return false; + } + } + } + else + { + if (std::fread(image->GetPixels(), info.base_image_pitch * blocks, 1, fp) != 1) + { + Error::SetErrno(error, "fread() failed: ", errno); + return false; + } + } + + if (info.clear_alpha) + image->SetAllPixelsOpaque(); + + return true; +} + +bool DDSBufferLoader(Image* image, std::span data, Error* error) +{ + size_t data_pos = 0; + const auto header_reader = [&data, &data_pos](void* buffer, size_t size, Error* error) { + if ((data_pos + size) > data.size()) + { + Error::SetStringView(error, "Buffer does not contain sufficient data."); + return false; + } + + std::memcpy(buffer, &data[data_pos], size); + data_pos += size; + return true; + }; + + DDSLoadInfo info; + if (!ParseDDSHeader(header_reader, &info, error)) + return false; + + if ((static_cast(info.base_image_offset) + info.base_image_size) > data.size()) + { + Error::SetStringFmt(error, "Buffer does not contain complete base image."); + return false; + } + + image->SetPixels(info.width, info.height, info.format, &data[static_cast(info.base_image_offset)], + info.base_image_pitch); + + if (info.clear_alpha) + image->SetAllPixelsOpaque(); + + return true; +} diff --git a/src/util/image.h b/src/util/image.h index 49a7334a1..6f30c0064 100644 --- a/src/util/image.h +++ b/src/util/image.h @@ -21,6 +21,7 @@ enum class ImageFormat : u8 BGRA8, RGB565, RGBA5551, + BGR8, BC1, BC2, BC3, diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 251f49904..14584bbea 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -71,6 +71,11 @@ static constexpr std::array(GPUTexture::Format: MTLPixelFormatRGBA16Float, // RGBA16F MTLPixelFormatRGBA32Float, // RGBA32F MTLPixelFormatBGR10A2Unorm, // RGB10A2 + MTLPixelFormatBC1_RGBA, // BC1 + MTLPixelFormatBC2_RGBA, // BC2 + MTLPixelFormatBC3_RGBA, // BC3 + MTLPixelFormatBC7_RGBAUnorm, // BC7 + }; static void LogNSError(NSError* error, std::string_view message) @@ -385,6 +390,10 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features) m_features.pipeline_cache = true; m_features.prefer_unused_textures = true; + // Same feature bit for both. + m_features.dxt_textures = m_features.bptc_textures = + !(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && m_device.supportsBCTextureCompression; + // Disable pipeline cache on Intel, apparently it's buggy. if ([[m_device name] containsString:@"Intel"]) { @@ -995,8 +1004,8 @@ MetalTexture::~MetalTexture() bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, u32 level /*= 0*/) { - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(height, aligned_pitch); GPUDevice::GetStatistics().buffer_streamed += req_size; GPUDevice::GetStatistics().num_uploads++; @@ -1013,7 +1022,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options]; actual_offset = 0; actual_pitch = pitch; - if (actual_buffer == nil) + if (actual_buffer == nil) [[unlikely]] { Panic("Failed to allocate temporary buffer."); return false; @@ -1026,7 +1035,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) { dev.SubmitCommandBuffer(); - if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) [[unlikely]] { Panic("Failed to reserve texture upload space."); return false; @@ -1034,7 +1043,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, } actual_offset = sb.GetCurrentOffset(); - StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); + CopyTextureDataForUpload(width, height, m_format, sb.GetCurrentHostPointer(), aligned_pitch, data, pitch); sb.CommitMemory(req_size); actual_buffer = sb.GetBuffer(); actual_pitch = aligned_pitch; @@ -1065,8 +1074,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) return false; - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(height, aligned_pitch); MetalDevice& dev = MetalDevice::GetInstance(); if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) @@ -1097,8 +1106,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 void MetalTexture::Unmap() { - const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = m_map_height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch); GPUDevice::GetStatistics().buffer_streamed += req_size; GPUDevice::GetStatistics().num_uploads++; @@ -1488,6 +1497,11 @@ bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const if (![m_device supportsFamily:MTLGPUFamilyApple2]) return false; } + else if (format >= GPUTexture::Format::BC1 && format <= GPUTexture::Format::BC7) + { + if (!m_device.supportsBCTextureCompression) + return false; + } return (s_pixel_format_mapping[static_cast(format)] != MTLPixelFormatInvalid); } diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 97d169b15..79a1474d5 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -506,6 +506,12 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) m_features.shader_cache = false; + m_features.dxt_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && GLAD_GL_EXT_texture_compression_s3tc); + m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && + (GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc)); + m_features.pipeline_cache = m_gl_context->IsGLES() || GLAD_GL_ARB_get_program_binary; if (m_features.pipeline_cache) { diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp index 65e1d3ba9..f26b68c39 100644 --- a/src/util/opengl_texture.cpp +++ b/src/util/opengl_texture.cpp @@ -33,61 +33,69 @@ const std::tuple& OpenGLTexture::GetPixelFormatMapping(G { static constexpr std::array, static_cast(GPUTexture::Format::MaxCount)> mapping = {{ - {}, // Unknown - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16 - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U - {GL_R32F, GL_RED, GL_FLOAT}, // R32F - {GL_RG8, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // RG8 - {GL_RG16F, GL_RG, GL_UNSIGNED_SHORT}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F - {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA16 - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F - {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {}, // Unknown + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16 + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U + {GL_R32F, GL_RED, GL_FLOAT}, // R32F + {GL_RG8, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // RG8 + {GL_RG16F, GL_RG, GL_UNSIGNED_SHORT}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F + {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA16 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F + {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3 + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7 }}; // GLES doesn't have the non-normalized 16-bit formats.. use float and hope for the best, lol. static constexpr std::array, static_cast(GPUTexture::Format::MaxCount)> mapping_gles = {{ - {}, // Unknown - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16 - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U - {GL_R32F, GL_RED, GL_FLOAT}, // R32F - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F - {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16 - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F - {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {}, // Unknown + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16 + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U + {GL_R32F, GL_RED, GL_FLOAT}, // R32F + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F + {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F + {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3 + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7 }}; return gles ? mapping_gles[static_cast(format)] : mapping[static_cast(format)]; @@ -169,6 +177,7 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 else { const bool use_texture_storage = UseTextureStorage(false); + const bool is_compressed = IsCompressedFormat(format); if (use_texture_storage) { if (layers > 1) @@ -183,10 +192,10 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 const u32 alignment = GetUploadAlignment(data_pitch); if (data) { - GPUDevice::GetStatistics().buffer_streamed += data_pitch * height; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, data_pitch); GPUDevice::GetStatistics().num_uploads++; - glPixelStorei(GL_UNPACK_ROW_LENGTH, data_pitch / pixel_size); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(format, data_pitch)); if (alignment != DEFAULT_UPLOAD_ALIGNMENT) glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); } @@ -198,18 +207,55 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 { if (use_texture_storage) { - if (layers > 1) - glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr); + if (is_compressed) + { + const u32 size = CalcUploadSize(format, current_height, data_pitch); + if (layers > 1) + { + glCompressedTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, size, + data_ptr); + } + else + { + glCompressedTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, size, data_ptr); + } + } else - glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr); + { + if (layers > 1) + glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr); + else + glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr); + } } else { - if (layers > 1) - glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type, - data_ptr); + if (is_compressed) + { + const u32 size = CalcUploadSize(format, current_height, data_pitch); + if (layers > 1) + { + glCompressedTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, size, + data_ptr); + } + else + { + glCompressedTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, size, data_ptr); + } + } else - glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, data_ptr); + { + if (layers > 1) + { + glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type, + data_ptr); + } + else + { + glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, + data_ptr); + } + } } if (data_ptr) @@ -257,14 +303,11 @@ void OpenGLTexture::CommitClear() bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, u32 level /*= 0*/) { - // TODO: perf counters - // Worth using the PBO? Driver probably knows better... const GLenum target = GetGLTarget(); const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); - const u32 pixel_size = GetPixelSize(); - const u32 preferred_pitch = Common::AlignUpPow2(static_cast(width) * pixel_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 map_size = preferred_pitch * static_cast(height); + const u32 preferred_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 map_size = CalcUploadSize(height, pitch); OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); CommitClear(); @@ -283,8 +326,22 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data if (alignment != DEFAULT_UPLOAD_ALIGNMENT) glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size); - glTexSubImage2D(target, layer, x, y, width, height, gl_format, gl_type, data); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch)); + if (IsCompressedFormat()) + { + const u32 size = CalcUploadSize(height, pitch); + if (IsTextureArray()) + glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size, data); + else + glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size, data); + } + else + { + if (IsTextureArray()) + glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type, data); + else + glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type, data); + } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); if (alignment != DEFAULT_UPLOAD_ALIGNMENT) @@ -293,13 +350,39 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data else { const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); - StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, width * pixel_size, height); + CopyTextureDataForUpload(width, height, m_format, map.pointer, preferred_pitch, data, pitch); sb->Unmap(map_size); sb->Bind(); - glPixelStorei(GL_UNPACK_ROW_LENGTH, preferred_pitch / pixel_size); - glTexSubImage2D(GL_TEXTURE_2D, layer, x, y, width, height, gl_format, gl_type, - reinterpret_cast(static_cast(map.buffer_offset))); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(preferred_pitch)); + if (IsCompressedFormat()) + { + const u32 size = CalcUploadSize(height, pitch); + if (IsTextureArray()) + { + glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size, + reinterpret_cast(static_cast(map.buffer_offset))); + } + else + { + glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size, + reinterpret_cast(static_cast(map.buffer_offset))); + } + } + else + { + if (IsTextureArray()) + { + glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type, + reinterpret_cast(static_cast(map.buffer_offset))); + } + else + { + glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type, + reinterpret_cast(static_cast(map.buffer_offset))); + } + } + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); sb->Unbind(); @@ -315,8 +398,8 @@ bool OpenGLTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3 if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) return false; - const u32 pitch = Common::AlignUpPow2(static_cast(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 upload_size = pitch * static_cast(height); + const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 upload_size = CalcUploadSize(height, pitch); OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); if (!sb || upload_size > sb->GetSize()) return false; @@ -339,8 +422,8 @@ void OpenGLTexture::Unmap() { CommitClear(); - const u32 pitch = Common::AlignUpPow2(static_cast(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 upload_size = pitch * static_cast(m_map_height); + const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 upload_size = CalcUploadSize(m_map_height, pitch); GPUDevice::GetStatistics().buffer_streamed += upload_size; GPUDevice::GetStatistics().num_uploads++; @@ -354,18 +437,35 @@ void OpenGLTexture::Unmap() const GLenum target = GetGLTarget(); glBindTexture(target, m_id); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize()); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch)); const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); - if (IsTextureArray()) + if (IsCompressedFormat()) { - glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format, - gl_type, reinterpret_cast(static_cast(m_map_offset))); + const u32 size = CalcUploadSize(m_map_height, pitch); + if (IsTextureArray()) + { + glCompressedTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, + gl_format, size, reinterpret_cast(static_cast(m_map_offset))); + } + else + { + glCompressedTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, size, + reinterpret_cast(static_cast(m_map_offset))); + } } else { - glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type, - reinterpret_cast(static_cast(m_map_offset))); + if (IsTextureArray()) + { + glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format, + gl_type, reinterpret_cast(static_cast(m_map_offset))); + } + else + { + glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type, + reinterpret_cast(static_cast(m_map_offset))); + } } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 11039075d..b6061b8a6 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -96,6 +96,10 @@ const std::array(GPUTexture::Format::MaxCount)> Vulka VK_FORMAT_R16G16B16A16_SFLOAT, // RGBA16F VK_FORMAT_R32G32B32A32_SFLOAT, // RGBA32F VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2 + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, // BC1 + VK_FORMAT_BC2_UNORM_BLOCK, // BC2 + VK_FORMAT_BC3_UNORM_BLOCK, // BC3 + VK_FORMAT_BC7_UNORM_BLOCK, // BC7 }; // Handles are always 64-bit, even on 32-bit platforms. @@ -640,6 +644,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay enabled_features.sampleRateShading = available_features.sampleRateShading; enabled_features.geometryShader = available_features.geometryShader; enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics; + enabled_features.textureCompressionBC = available_features.textureCompressionBC; device_info.pEnabledFeatures = &enabled_features; VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { @@ -2456,6 +2461,10 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe m_features.raster_order_views = (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics && m_optional_extensions.vk_ext_fragment_shader_interlock); + + // Same feature bit for both. + m_features.dxt_textures = m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && vk_features.textureCompressionBC); } void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index 12c833635..8d7061806 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -230,20 +230,13 @@ VkCommandBuffer VulkanTexture::GetCommandBufferForUpdate() return dev.GetCurrentInitCommandBuffer(); } -void VulkanTexture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, - u32 upload_pitch) const -{ - StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height); -} - VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, - u32 height) const + u32 height, u32 buffer_size) const { - const u32 size = upload_pitch * height; const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, - static_cast(size), + static_cast(buffer_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, @@ -270,8 +263,8 @@ VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, VulkanDevice::GetInstance().DeferBufferDestruction(buffer, allocation); // And write the data. - CopyTextureDataForUpload(ai.pMappedData, data, width, height, pitch, upload_pitch); - vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, size); + CopyTextureDataForUpload(width, height, m_format, ai.pMappedData, upload_pitch, data, pitch); + vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, buffer_size); return buffer; } @@ -282,7 +275,7 @@ void VulkanTexture::UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 w if (old_layout != Layout::TransferDst) TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, old_layout, Layout::TransferDst); - const u32 row_length = pitch / GetPixelSize(); + const u32 row_length = CalcUploadRowLengthFromPitch(pitch); const VkBufferImageCopy bic = {static_cast(buffer_offset), row_length, @@ -302,8 +295,9 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data DebugAssert(layer < m_layers && level < m_levels); DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level)); - const u32 upload_pitch = Common::AlignUpPow2(pitch, VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment()); - const u32 required_size = height * upload_pitch; + const u32 upload_pitch = + Common::AlignUpPow2(CalcUploadPitch(width), VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment()); + const u32 required_size = CalcUploadSize(height, upload_pitch); VulkanDevice& dev = VulkanDevice::GetInstance(); VulkanStreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); @@ -314,7 +308,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data if (required_size > (sbuffer.GetCurrentSize() / 2)) { buffer_offset = 0; - buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); + buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size); if (buffer == VK_NULL_HANDLE) return false; } @@ -332,7 +326,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data buffer = sbuffer.GetBuffer(); buffer_offset = sbuffer.GetCurrentOffset(); - CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); + CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch); sbuffer.CommitMemory(required_size); } @@ -372,8 +366,8 @@ bool VulkanTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3 CommitClear(GetCommandBufferForUpdate()); // see note in Update() for the reason why. - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), dev.GetBufferCopyRowPitchAlignment()); + const u32 req_size = CalcUploadSize(height, aligned_pitch); VulkanStreamBuffer& buffer = dev.GetTextureUploadBuffer(); if (req_size >= (buffer.GetCurrentSize() / 2)) return false; @@ -402,8 +396,8 @@ void VulkanTexture::Unmap() { VulkanDevice& dev = VulkanDevice::GetInstance(); VulkanStreamBuffer& sb = dev.GetTextureUploadBuffer(); - const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); - const u32 req_size = m_map_height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), dev.GetBufferCopyRowPitchAlignment()); + const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch); const u32 offset = sb.GetCurrentOffset(); sb.CommitMemory(req_size); diff --git a/src/util/vulkan_texture.h b/src/util/vulkan_texture.h index f7dd8b601..b30ba7dfb 100644 --- a/src/util/vulkan_texture.h +++ b/src/util/vulkan_texture.h @@ -85,8 +85,8 @@ private: VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format); VkCommandBuffer GetCommandBufferForUpdate(); - void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; - VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height) const; + VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height, + u32 buffer_size) const; void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, u32 pitch, VkBuffer buffer, u32 buffer_offset); From 8567293103ec4c307c970f205cfbcc0d35213274 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 18:38:15 +1000 Subject: [PATCH 08/35] Util: Add TextureDecompress --- data/resources/thirdparty.html | 25 + src/util/CMakeLists.txt | 2 + src/util/texture_decompress.cpp | 1151 +++++++++++++++++++++++++++++++ src/util/texture_decompress.h | 198 ++++++ src/util/util.vcxproj | 2 + src/util/util.vcxproj.filters | 2 + 6 files changed, 1380 insertions(+) create mode 100644 src/util/texture_decompress.cpp create mode 100644 src/util/texture_decompress.h diff --git a/data/resources/thirdparty.html b/data/resources/thirdparty.html index 231d40d31..6b5b5e307 100644 --- a/data/resources/thirdparty.html +++ b/data/resources/thirdparty.html @@ -2563,6 +2563,31 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +

Texture Decompression Routines

+
+Copyright (C) 2009 Benjamin Dobell, Glass Echidna
+Copyright (C) 2012 - 2022, Matthäus G. "Anteru" Chajdas (https://anteru.net)
+Copyright (C) 2020 Richard Geldreich, Jr.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+ Some shaders provided with the application are sourced from:
  • https://github.com/Matsilagi/RSRetroArch/
  • diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 96c3ee109..f1c6e91ec 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -65,6 +65,8 @@ add_library(util sockets.h state_wrapper.cpp state_wrapper.h + texture_decompress.cpp + texture_decompress.h wav_reader_writer.cpp wav_reader_writer.h window_info.cpp diff --git a/src/util/texture_decompress.cpp b/src/util/texture_decompress.cpp new file mode 100644 index 000000000..a03a046df --- /dev/null +++ b/src/util/texture_decompress.cpp @@ -0,0 +1,1151 @@ +#include "texture_decompress.h" + +/* +DXT1/DXT3/DXT5 texture decompression + +The original code is from Benjamin Dobell, see below for details. Compared to +the original the code is now valid C89, has support for 64-bit architectures +and has been refactored. It also has support for additional formats and uses +a different PackRGBA order. + +--- + +Copyright (c) 2012 - 2022, Matthäus G. "Anteru" Chajdas (https://anteru.net) + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--- + +Copyright (C) 2009 Benjamin Dobell, Glass Echidna + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--- +*/ +static uint32_t PackRGBA (uint8_t r, uint8_t g, uint8_t b, uint8_t a) +{ + return r | (g << 8) | (b << 16) | (a << 24); +} + +static float Int8ToFloat_SNORM (const uint8_t input) +{ + return (float)((int8_t)input) / 127.0f; +} + +static float Int8ToFloat_UNORM (const uint8_t input) +{ + return (float)input / 255.0f; +} + +/** +Decompress a BC 16x3 index block stored as +h g f e +d c b a +p o n m +l k j i + +Bits packed as + +| h | g | f | e | d | c | b | a | // Entry +|765 432 107 654 321 076 543 210| // Bit +|0000000000111111111112222222222| // Byte + +into 16 8-bit indices. +*/ +static void Decompress16x3bitIndices (const uint8_t* packed, uint8_t* unpacked) +{ + uint32_t tmp, block, i; + + for (block = 0; block < 2; ++block) { + tmp = 0; + + // Read three bytes + for (i = 0; i < 3; ++i) { + tmp |= ((uint32_t)packed [i]) << (i * 8); + } + + // Unpack 8x3 bit from last 3 byte block + for (i = 0; i < 8; ++i) { + unpacked [i] = (tmp >> (i*3)) & 0x7; + } + + packed += 3; + unpacked += 8; + } +} + +static void DecompressBlockBC1Internal (const uint8_t* block, + unsigned char* output, uint32_t outputStride, const uint8_t* alphaValues) +{ + uint32_t temp, code; + + uint16_t color0, color1; + uint8_t r0, g0, b0, r1, g1, b1; + + int i, j; + + color0 = *(const uint16_t*)(block); + color1 = *(const uint16_t*)(block + 2); + + temp = (color0 >> 11) * 255 + 16; + r0 = (uint8_t)((temp/32 + temp)/32); + temp = ((color0 & 0x07E0) >> 5) * 255 + 32; + g0 = (uint8_t)((temp/64 + temp)/64); + temp = (color0 & 0x001F) * 255 + 16; + b0 = (uint8_t)((temp/32 + temp)/32); + + temp = (color1 >> 11) * 255 + 16; + r1 = (uint8_t)((temp/32 + temp)/32); + temp = ((color1 & 0x07E0) >> 5) * 255 + 32; + g1 = (uint8_t)((temp/64 + temp)/64); + temp = (color1 & 0x001F) * 255 + 16; + b1 = (uint8_t)((temp/32 + temp)/32); + + code = *(const uint32_t*)(block + 4); + + if (color0 > color1) { + for (j = 0; j < 4; ++j) { + for (i = 0; i < 4; ++i) { + uint32_t finalColor, positionCode; + uint8_t alpha; + + alpha = alphaValues [j*4+i]; + + finalColor = 0; + positionCode = (code >> 2*(4*j+i)) & 0x03; + + switch (positionCode) { + case 0: + finalColor = PackRGBA(r0, g0, b0, alpha); + break; + case 1: + finalColor = PackRGBA(r1, g1, b1, alpha); + break; + case 2: + finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, alpha); + break; + case 3: + finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, alpha); + break; + } + + *(uint32_t*)(output + j*outputStride + i * sizeof (uint32_t)) = finalColor; + } + } + } else { + for (j = 0; j < 4; ++j) { + for (i = 0; i < 4; ++i) { + uint32_t finalColor, positionCode; + uint8_t alpha; + + alpha = alphaValues [j*4+i]; + + finalColor = 0; + positionCode = (code >> 2*(4*j+i)) & 0x03; + + switch (positionCode) { + case 0: + finalColor = PackRGBA(r0, g0, b0, alpha); + break; + case 1: + finalColor = PackRGBA(r1, g1, b1, alpha); + break; + case 2: + finalColor = PackRGBA((r0+r1)/2, (g0+g1)/2, (b0+b1)/2, alpha); + break; + case 3: + finalColor = PackRGBA(0, 0, 0, alpha); + break; + } + + *(uint32_t*)(output + j*outputStride + i * sizeof (uint32_t)) = finalColor; + } + } + } +} + +/* +Decompresses one block of a BC1 (DXT1) texture and stores the resulting pixels at the appropriate offset in 'image'. + +uint32_t x: x-coordinate of the first pixel in the block. +uint32_t y: y-coordinate of the first pixel in the block. +uint32_t stride: stride of a scanline in bytes. +const uint8_t* blockStorage: pointer to the block to decompress. +uint32_t* image: pointer to image where the decompressed pixel data should be stored. +*/ +void DecompressBlockBC1 (uint32_t x, uint32_t y, uint32_t stride, + const uint8_t* blockStorage, unsigned char* image) +{ + static const uint8_t const_alpha [] = { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255 + }; + + DecompressBlockBC1Internal (blockStorage, + image + x * sizeof (uint32_t) + (y * stride), stride, const_alpha); +} + +/* +Decompresses one block of a BC3 (DXT5) texture and stores the resulting pixels at the appropriate offset in 'image'. + +uint32_t x: x-coordinate of the first pixel in the block. +uint32_t y: y-coordinate of the first pixel in the block. +uint32_t stride: stride of a scanline in bytes. +const uint8_t *blockStorage: pointer to the block to decompress. +uint32_t *image: pointer to image where the decompressed pixel data should be stored. +*/ +void DecompressBlockBC3 (uint32_t x, uint32_t y, uint32_t stride, + const uint8_t* blockStorage, unsigned char* image) +{ + uint8_t alpha0, alpha1; + uint8_t alphaIndices [16]; + + uint16_t color0, color1; + uint8_t r0, g0, b0, r1, g1, b1; + + int i, j; + + uint32_t temp, code; + + alpha0 = *(blockStorage); + alpha1 = *(blockStorage + 1); + + Decompress16x3bitIndices (blockStorage + 2, alphaIndices); + + color0 = *(const uint16_t*)(blockStorage + 8); + color1 = *(const uint16_t*)(blockStorage + 10); + + temp = (color0 >> 11) * 255 + 16; + r0 = (uint8_t)((temp / 32 + temp) / 32); + temp = ((color0 & 0x07E0) >> 5) * 255 + 32; + g0 = (uint8_t)((temp / 64 + temp) / 64); + temp = (color0 & 0x001F) * 255 + 16; + b0 = (uint8_t)((temp / 32 + temp) / 32); + + temp = (color1 >> 11) * 255 + 16; + r1 = (uint8_t)((temp / 32 + temp) / 32); + temp = ((color1 & 0x07E0) >> 5) * 255 + 32; + g1 = (uint8_t)((temp / 64 + temp) / 64); + temp = (color1 & 0x001F) * 255 + 16; + b1 = (uint8_t)((temp / 32 + temp) / 32); + + code = *(const uint32_t*)(blockStorage + 12); + + for (j = 0; j < 4; j++) { + for (i = 0; i < 4; i++) { + uint8_t finalAlpha; + int alphaCode; + uint8_t colorCode; + uint32_t finalColor; + + alphaCode = alphaIndices [4 * j + i]; + + if (alphaCode == 0) { + finalAlpha = alpha0; + } else if (alphaCode == 1) { + finalAlpha = alpha1; + } else { + if (alpha0 > alpha1) { + finalAlpha = (uint8_t)(((8 - alphaCode)*alpha0 + (alphaCode - 1)*alpha1) / 7); + } else { + if (alphaCode == 6) { + finalAlpha = 0; + } else if (alphaCode == 7) { + finalAlpha = 255; + } else { + finalAlpha = (uint8_t)(((6 - alphaCode)*alpha0 + (alphaCode - 1)*alpha1) / 5); + } + } + } + + colorCode = (code >> 2 * (4 * j + i)) & 0x03; + finalColor = 0; + + switch (colorCode) { + case 0: + finalColor = PackRGBA (r0, g0, b0, finalAlpha); + break; + case 1: + finalColor = PackRGBA (r1, g1, b1, finalAlpha); + break; + case 2: + finalColor = PackRGBA ((2 * r0 + r1) / 3, (2 * g0 + g1) / 3, (2 * b0 + b1) / 3, finalAlpha); + break; + case 3: + finalColor = PackRGBA ((r0 + 2 * r1) / 3, (g0 + 2 * g1) / 3, (b0 + 2 * b1) / 3, finalAlpha); + break; + } + + + *(uint32_t*)(image + sizeof (uint32_t) * (i + x) + (stride * (y + j))) = finalColor; + } + } +} + +/* +Decompresses one block of a BC2 (DXT3) texture and stores the resulting pixels at the appropriate offset in 'image'. + +uint32_t x: x-coordinate of the first pixel in the block. +uint32_t y: y-coordinate of the first pixel in the block. +uint32_t stride: stride of a scanline in bytes. +const uint8_t *blockStorage: pointer to the block to decompress. +uint32_t *image: pointer to image where the decompressed pixel data should be stored. +*/ +void DecompressBlockBC2 (uint32_t x, uint32_t y, uint32_t stride, + const uint8_t* blockStorage, unsigned char* image) +{ + int i; + + uint8_t alphaValues [16] = { 0 }; + + for (i = 0; i < 4; ++i) { + const uint16_t* alphaData = (const uint16_t*)(blockStorage); + + alphaValues [i * 4 + 0] = (((*alphaData) >> 0) & 0xF) * 17; + alphaValues [i * 4 + 1] = (((*alphaData) >> 4) & 0xF) * 17; + alphaValues [i * 4 + 2] = (((*alphaData) >> 8) & 0xF) * 17; + alphaValues [i * 4 + 3] = (((*alphaData) >> 12) & 0xF) * 17; + + blockStorage += 2; + } + + DecompressBlockBC1Internal (blockStorage, + image + x * sizeof (uint32_t) + (y * stride), stride, alphaValues); +} + +static void DecompressBlockBC4Internal ( + const uint8_t* block, unsigned char* output, + uint32_t outputStride, const float* colorTable) +{ + uint8_t indices [16]; + int x, y; + + Decompress16x3bitIndices (block + 2, indices); + + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + *(float*)(output + x * sizeof (float)) = colorTable [indices [y*4 + x]]; + } + + output += outputStride; + } +} + +/* +Decompresses one block of a BC4 texture and stores the resulting pixels at the appropriate offset in 'image'. + +uint32_t x: x-coordinate of the first pixel in the block. +uint32_t y: y-coordinate of the first pixel in the block. +uint32_t stride: stride of a scanline in bytes. +const uint8_t* blockStorage: pointer to the block to decompress. +float* image: pointer to image where the decompressed pixel data should be stored. +*/ +void DecompressBlockBC4 (uint32_t x, uint32_t y, uint32_t stride, enum BC4Mode mode, + const uint8_t* blockStorage, unsigned char* image) +{ + float colorTable [8]; + float r0, r1; + + if (mode == BC4_UNORM) { + r0 = Int8ToFloat_UNORM (blockStorage [0]); + r1 = Int8ToFloat_UNORM (blockStorage [1]); + + colorTable [0] = r0; + colorTable [1] = r1; + + if (r0 > r1) { + // 6 interpolated color values + colorTable [2] = (6*r0 + 1*r1)/7.0f; // bit code 010 + colorTable [3] = (5*r0 + 2*r1)/7.0f; // bit code 011 + colorTable [4] = (4*r0 + 3*r1)/7.0f; // bit code 100 + colorTable [5] = (3*r0 + 4*r1)/7.0f; // bit code 101 + colorTable [6] = (2*r0 + 5*r1)/7.0f; // bit code 110 + colorTable [7] = (1*r0 + 6*r1)/7.0f; // bit code 111 + } else { + // 4 interpolated color values + colorTable [2] = (4*r0 + 1*r1)/5.0f; // bit code 010 + colorTable [3] = (3*r0 + 2*r1)/5.0f; // bit code 011 + colorTable [4] = (2*r0 + 3*r1)/5.0f; // bit code 100 + colorTable [5] = (1*r0 + 4*r1)/5.0f; // bit code 101 + colorTable [6] = 0.0f; // bit code 110 + colorTable [7] = 1.0f; // bit code 111 + } + } else if (mode == BC4_SNORM) { + r0 = Int8ToFloat_SNORM (blockStorage [0]); + r1 = Int8ToFloat_SNORM (blockStorage [1]); + + colorTable [0] = r0; + colorTable [1] = r1; + + if (r0 > r1) { + // 6 interpolated color values + colorTable [2] = (6*r0 + 1*r1)/7.0f; // bit code 010 + colorTable [3] = (5*r0 + 2*r1)/7.0f; // bit code 011 + colorTable [4] = (4*r0 + 3*r1)/7.0f; // bit code 100 + colorTable [5] = (3*r0 + 4*r1)/7.0f; // bit code 101 + colorTable [6] = (2*r0 + 5*r1)/7.0f; // bit code 110 + colorTable [7] = (1*r0 + 6*r1)/7.0f; // bit code 111 + } else { + // 4 interpolated color values + colorTable [2] = (4*r0 + 1*r1)/5.0f; // bit code 010 + colorTable [3] = (3*r0 + 2*r1)/5.0f; // bit code 011 + colorTable [4] = (2*r0 + 3*r1)/5.0f; // bit code 100 + colorTable [5] = (1*r0 + 4*r1)/5.0f; // bit code 101 + colorTable [6] = -1.0f; // bit code 110 + colorTable [7] = 1.0f; // bit code 111 + } + } + + DecompressBlockBC4Internal (blockStorage, + image + x * sizeof (float) + (y * stride), stride, colorTable); +} + + +/* +Decompresses one block of a BC5 texture and stores the resulting pixels at the appropriate offset in 'image'. + +uint32_t x: x-coordinate of the first pixel in the block. +uint32_t y: y-coordinate of the first pixel in the block. +uint32_t stride: stride of a scanline in bytes. +const uint8_t* blockStorage: pointer to the block to decompress. +float* image: pointer to image where the decompressed pixel data should be stored. +*/ +void DecompressBlockBC5 (uint32_t x, uint32_t y, uint32_t stride, enum BC5Mode mode, + const uint8_t* blockStorage, unsigned char* image) +{ + // We decompress the two channels separately and interleave them when + // writing to the output + float c0 [16]; + float c1 [16]; + + int dx, dy; + + DecompressBlockBC4 (0, 0, 4 * sizeof (float), (enum BC4Mode)mode, + blockStorage, (unsigned char*)c0); + DecompressBlockBC4 (0, 0, 4 * sizeof (float), (enum BC4Mode)mode, + blockStorage + 8, (unsigned char*)c1); + + for (dy = 0; dy < 4; ++dy) { + for (dx = 0; dx < 4; ++dx) { + *(float*)(image + stride * (y + dy) + ((x + dx) * 2 + 0) * sizeof (float)) = c0 [dy * 4 + dx]; + *(float*)(image + stride * (y + dy) + ((x + dx) * 2 + 1) * sizeof (float)) = c1 [dy * 4 + dx]; + } + } +} + +// File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) +#include + +#if (defined(_M_AMD64) || defined(__x86_64__) || defined(__SSE2__)) +# define BC7DECOMP_USE_SSE2 +#endif + +#ifdef BC7DECOMP_USE_SSE2 +#include +#include +#endif + +namespace bc7decomp +{ + +#ifdef BC7DECOMP_USE_SSE2 + const __m128i g_bc7_weights4_sse2[8] = + { + _mm_set_epi16(4, 4, 4, 4, 0, 0, 0, 0), + _mm_set_epi16(13, 13, 13, 13, 9, 9, 9, 9), + _mm_set_epi16(21, 21, 21, 21, 17, 17, 17, 17), + _mm_set_epi16(30, 30, 30, 30, 26, 26, 26, 26), + _mm_set_epi16(38, 38, 38, 38, 34, 34, 34, 34), + _mm_set_epi16(47, 47, 47, 47, 43, 43, 43, 43), + _mm_set_epi16(55, 55, 55, 55, 51, 51, 51, 51), + _mm_set_epi16(64, 64, 64, 64, 60, 60, 60, 60), + }; +#endif + +const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; +const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; +const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + +const uint8_t g_bc7_partition2[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 +}; + +const uint8_t g_bc7_partition3[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, +}; + +const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 }; + +const uint8_t g_bc7_table_anchor_index_third_subset_1[64] = +{ + 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3 +}; + +const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = +{ + 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 +}; + +const uint8_t g_bc7_first_byte_to_mode[256] = +{ + 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +}; + +inline void insert_weight_zero(uint64_t& index_bits, uint32_t bits_per_index, uint32_t offset) +{ + uint64_t LOW_BIT_MASK = (static_cast(1) << ((bits_per_index * (offset + 1)) - 1)) - 1; + uint64_t HIGH_BIT_MASK = ~LOW_BIT_MASK; + + index_bits = ((index_bits & HIGH_BIT_MASK) << 1) | (index_bits & LOW_BIT_MASK); +} + +// BC7 mode 0-7 decompression. +// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + +static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } +static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + +static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - g_bc7_weights2[w]) + h * g_bc7_weights2[w] + 32) >> 6; } +static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - g_bc7_weights3[w]) + h * g_bc7_weights3[w] + 32) >> 6; } +static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - g_bc7_weights4[w]) + h * g_bc7_weights4[w] + 32) >> 6; } +static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) +{ + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; +} + + +#ifdef BC7DECOMP_USE_SSE2 +static inline __m128i bc7_interp_sse2(__m128i l, __m128i h, __m128i w, __m128i iw) +{ + return _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(l, iw), _mm_mullo_epi16(h, w)), _mm_set1_epi16(32)), 6); +} + +static inline void bc7_interp2_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16 = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + + __m128i endpoints_16_swapped = _mm_shuffle_epi32(endpoints_16, _MM_SHUFFLE(1, 0, 3, 2)); + + // Interpolated colors will be color 1 and 2 + __m128i interpolated_colors = bc7_interp_sse2(endpoints_16, endpoints_16_swapped, _mm_set1_epi16(21), _mm_set1_epi16(43)); + + // all_colors will be 1, 2, 0, 3 + __m128i all_colors = _mm_packus_epi16(interpolated_colors, endpoints_16); + + all_colors = _mm_shuffle_epi32(all_colors, _MM_SHUFFLE(3, 1, 0, 2)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors); +} + +static inline void bc7_interp3_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16bit = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + __m128i endpoints_16bit_swapped = _mm_shuffle_epi32(endpoints_16bit, _MM_SHUFFLE(1, 0, 3, 2)); + + __m128i interpolated_16 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set1_epi16(9), _mm_set1_epi16(55)); + __m128i interpolated_23 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(37, 37, 37, 37, 18, 18, 18, 18), _mm_set_epi16(27, 27, 27, 27, 46, 46, 46, 46)); + __m128i interpolated_45 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(18, 18, 18, 18, 37, 37, 37, 37), _mm_set_epi16(46, 46, 46, 46, 27, 27, 27, 27)); + + __m128i interpolated_01 = _mm_unpacklo_epi64(endpoints_16bit, interpolated_16); + __m128i interpolated_67 = _mm_unpackhi_epi64(interpolated_16, endpoints_16bit); + + __m128i all_colors_0 = _mm_packus_epi16(interpolated_01, interpolated_23); + __m128i all_colors_1 = _mm_packus_epi16(interpolated_45, interpolated_67); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors + 4), all_colors_1); +} +#endif + +bool unpack_bc7_mode0_2(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; + const uint32_t PBITS = (mode == 0) ? 6 : 0; +#ifndef BC7DECOMP_USE_SSE2 + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; +#endif + const uint32_t PART_BITS = (mode == 0) ? 4 : 6; + const uint32_t PART_MASK = (1 << PART_BITS) - 1; + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; + + const uint32_t part = (low_chunk >> (mode + 1)) & PART_MASK; + + uint64_t channel_read_chunks[3] = { 0, 0, 0 }; + + if (mode == 0) + { + channel_read_chunks[0] = low_chunk >> 5; + channel_read_chunks[1] = low_chunk >> 29; + channel_read_chunks[2] = ((low_chunk >> 53) | (high_chunk << 11)); + } + else + { + channel_read_chunks[0] = low_chunk >> 9; + channel_read_chunks[1] = ((low_chunk >> 39) | (high_chunk << 25)); + channel_read_chunks[2] = high_chunk >> 5; + } + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; + for (uint32_t e = 0; e < ENDPOINTS; e++) + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } + + uint32_t pbits[6]; + if (mode == 0) + { + uint8_t p_bits_chunk = static_cast((high_chunk >> 13) & 0xff); + + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = (p_bits_chunk >> p) & 1; + } + + uint64_t weights_read_chunk = high_chunk >> (67 - 16 * WEIGHT_BITS); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::min(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::max(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + { + weights[i] = static_cast(weights_read_chunk & WEIGHT_MASK); + weights_read_chunk >>= WEIGHT_BITS; + } + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = static_cast((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 3; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); + block_colors[s][i][3] = 255; + } +#endif + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode1_3_7(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; +#ifndef BC7DECOMP_USE_SSE2 + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; +#endif + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; + + const uint32_t part = ((low_chunk >> (mode + 1)) & 0x3f); + + color_rgba endpoints[ENDPOINTS]; + + uint64_t channel_read_chunks[4] = { 0, 0, 0, 0 }; + uint64_t p_read_chunk = 0; + channel_read_chunks[0] = (low_chunk >> (mode + 7)); + uint64_t weight_read_chunk; + + switch (mode) + { + case 1: + channel_read_chunks[1] = (low_chunk >> 32); + channel_read_chunks[2] = ((low_chunk >> 56) | (high_chunk << 8)); + p_read_chunk = high_chunk >> 16; + weight_read_chunk = high_chunk >> 18; + break; + case 3: + channel_read_chunks[1] = ((low_chunk >> 38) | (high_chunk << 26)); + channel_read_chunks[2] = high_chunk >> 2; + p_read_chunk = high_chunk >> 30; + weight_read_chunk = high_chunk >> 34; + break; + case 7: + channel_read_chunks[1] = low_chunk >> 34; + channel_read_chunks[2] = ((low_chunk >> 54) | (high_chunk << 10)); + channel_read_chunks[3] = high_chunk >> 10; + p_read_chunk = (high_chunk >> 30); + weight_read_chunk = (high_chunk >> 34); + break; + default: + return false; + }; + + for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; + for (uint32_t e = 0; e < ENDPOINTS; e++) + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = (p_read_chunk >> p) & 1; + + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, g_bc7_table_anchor_index_second_subset[part]); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + { + weights[i] = static_cast(weight_read_chunk & WEIGHT_MASK); + weight_read_chunk >>= WEIGHT_BITS; + } + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = static_cast((mode != 7U && c == 3U) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 2; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } +#endif + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode4_5(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) +{ + const uint32_t ENDPOINTS = 2; + //const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t A_WEIGHT_MASK = (1 << A_WEIGHT_BITS) - 1; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + const uint32_t A_ENDPOINT_MASK = (1 << A_ENDPOINT_BITS) - 1; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; + + const uint32_t comp_rot = (low_chunk >> (mode + 1)) & 0x3; + const uint32_t index_mode = (mode == 4) ? static_cast((low_chunk >> 7) & 1) : 0; + + uint64_t color_read_bits = low_chunk >> 8; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < 3; c++) + { + for (uint32_t e = 0; e < ENDPOINTS; e++) + { + endpoints[e][c] = static_cast(color_read_bits & ENDPOINT_MASK); + color_read_bits >>= ENDPOINT_BITS; + } + } + + endpoints[0][3] = static_cast(color_read_bits & ENDPOINT_MASK); + + uint64_t rgb_weights_chunk; + uint64_t a_weights_chunk; + if (mode == 4) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast((color_read_bits >> A_ENDPOINT_BITS) & A_ENDPOINT_MASK); + rgb_weights_chunk = ((low_chunk >> 50) | (high_chunk << 14)); + a_weights_chunk = high_chunk >> 17; + } + else if (mode == 5) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast(((low_chunk >> 58) | (high_chunk << 6)) & A_ENDPOINT_MASK); + rgb_weights_chunk = high_chunk >> 2; + a_weights_chunk = high_chunk >> 33; + } + else + return false; + + insert_weight_zero(rgb_weights_chunk, WEIGHT_BITS, 0); + insert_weight_zero(a_weights_chunk, A_WEIGHT_BITS, 0); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + const uint32_t weight_mask[2] = { index_mode ? A_WEIGHT_MASK : WEIGHT_MASK, index_mode ? WEIGHT_MASK : A_WEIGHT_MASK }; + + uint32_t weights[16], a_weights[16]; + + if (index_mode) + std::swap(rgb_weights_chunk, a_weights_chunk); + + for (uint32_t i = 0; i < 16; i++) + { + weights[i] = (rgb_weights_chunk & weight_mask[0]); + rgb_weights_chunk >>= weight_bits[0]; + } + + for (uint32_t i = 0; i < 16; i++) + { + a_weights[i] = (a_weights_chunk & weight_mask[1]); + a_weights_chunk >>= weight_bits[1]; + } + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = static_cast(bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS)); + + color_rgba block_colors[8]; +#ifdef BC7DECOMP_USE_SSE2 + if (weight_bits[0] == 3) + bc7_interp3_sse2(endpoints, block_colors); + else + bc7_interp2_sse2(endpoints, block_colors); +#else + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = static_cast(bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0])); +#endif + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = static_cast(bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1])); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; +} + +struct bc7_mode_6 +{ + struct + { + uint64_t m_mode : 7; + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 7; + uint64_t m_a1 : 7; + uint64_t m_p0 : 1; + } m_lo; + + union + { + struct + { + uint64_t m_p1 : 1; + uint64_t m_s00 : 3; + uint64_t m_s10 : 4; + uint64_t m_s20 : 4; + uint64_t m_s30 : 4; + + uint64_t m_s01 : 4; + uint64_t m_s11 : 4; + uint64_t m_s21 : 4; + uint64_t m_s31 : 4; + + uint64_t m_s02 : 4; + uint64_t m_s12 : 4; + uint64_t m_s22 : 4; + uint64_t m_s32 : 4; + + uint64_t m_s03 : 4; + uint64_t m_s13 : 4; + uint64_t m_s23 : 4; + uint64_t m_s33 : 4; + + } m_hi; + + uint64_t m_hi_bits; + }; +}; + +bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) +{ + static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); + + const bc7_mode_6 &block = *static_cast(pBlock_bits); + + if (block.m_lo.m_mode != (1 << 6)) + return false; + + const uint32_t r0 = static_cast((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = static_cast((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = static_cast((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = static_cast((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = static_cast((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = static_cast((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = static_cast((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = static_cast((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + + color_rgba vals[16]; +#ifdef BC7DECOMP_USE_SSE2 + __m128i vep0 = _mm_set_epi16((short)a0, (short)b0, (short)g0, (short)r0, (short)a0, (short)b0, (short)g0, (short)r0); + __m128i vep1 = _mm_set_epi16((short)a1, (short)b1, (short)g1, (short)r1, (short)a1, (short)b1, (short)g1, (short)r1); + + for (uint32_t i = 0; i < 16; i += 4) + { + const __m128i w0 = g_bc7_weights4_sse2[i / 4 * 2 + 0]; + const __m128i w1 = g_bc7_weights4_sse2[i / 4 * 2 + 1]; + + const __m128i iw0 = _mm_sub_epi16(_mm_set1_epi16(64), w0); + const __m128i iw1 = _mm_sub_epi16(_mm_set1_epi16(64), w1); + + __m128i first_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw0), _mm_mullo_epi16(vep1, w0)), _mm_set1_epi16(32)), 6); + __m128i second_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw1), _mm_mullo_epi16(vep1, w1)), _mm_set1_epi16(32)), 6); + __m128i combined = _mm_packus_epi16(first_half, second_half); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(vals + i), combined); + } +#else + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t w = g_bc7_weights4[i]; + const uint32_t iw = 64 - w; + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, + (a0 * iw + a1 * w + 32) >> 6); + } +#endif + + pPixels[0] = vals[block.m_hi.m_s00]; + pPixels[1] = vals[block.m_hi.m_s10]; + pPixels[2] = vals[block.m_hi.m_s20]; + pPixels[3] = vals[block.m_hi.m_s30]; + + pPixels[4] = vals[block.m_hi.m_s01]; + pPixels[5] = vals[block.m_hi.m_s11]; + pPixels[6] = vals[block.m_hi.m_s21]; + pPixels[7] = vals[block.m_hi.m_s31]; + + pPixels[8] = vals[block.m_hi.m_s02]; + pPixels[9] = vals[block.m_hi.m_s12]; + pPixels[10] = vals[block.m_hi.m_s22]; + pPixels[11] = vals[block.m_hi.m_s32]; + + pPixels[12] = vals[block.m_hi.m_s03]; + pPixels[13] = vals[block.m_hi.m_s13]; + pPixels[14] = vals[block.m_hi.m_s23]; + pPixels[15] = vals[block.m_hi.m_s33]; + + return true; +} + +bool unpack_bc7(const void *pBlock, color_rgba *pPixels) +{ + const uint8_t *block_bytes = static_cast(pBlock); + uint8_t mode = g_bc7_first_byte_to_mode[block_bytes[0]]; + + uint64_t data_chunks[2]; + + uint64_t endian_check = 1; + if (*reinterpret_cast(&endian_check) == 1) + memcpy(data_chunks, pBlock, 16); + else + { + data_chunks[0] = data_chunks[1] = 0; + for (int chunk_index = 0; chunk_index < 2; chunk_index++) + { + for (int byte_index = 0; byte_index < 8; byte_index++) + data_chunks[chunk_index] |= static_cast(block_bytes[chunk_index * 8 + byte_index]) << (byte_index * 8); + } + } + + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, data_chunks, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, data_chunks, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, data_chunks, pPixels); + case 6: + return unpack_bc7_mode6(data_chunks, pPixels); + default: + memset(pPixels, 0, sizeof(color_rgba) * 16); + break; + } + + return false; +} + +} // namespace bc7decomp + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright(c) 2020 Richard Geldreich, Jr. +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files(the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions : +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain(www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non - commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain.We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors.We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + diff --git a/src/util/texture_decompress.h b/src/util/texture_decompress.h new file mode 100644 index 000000000..f19719a19 --- /dev/null +++ b/src/util/texture_decompress.h @@ -0,0 +1,198 @@ +// See TextureDecompress.cpp for license info. + +#pragma once + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) // nonstandard extension used: nameless struct/union +#endif + +#include +#include +#include +#include +#include + +enum BC4Mode +{ + BC4_UNORM = 0, + BC4_SNORM = 1 +}; + +enum BC5Mode +{ + BC5_UNORM = 0, + BC5_SNORM = 1 +}; + +void DecompressBlockBC1(uint32_t x, uint32_t y, uint32_t stride, + const uint8_t* blockStorage, unsigned char* image); +void DecompressBlockBC2(uint32_t x, uint32_t y, uint32_t stride, + const uint8_t* blockStorage, unsigned char* image); +void DecompressBlockBC3(uint32_t x, uint32_t y, uint32_t stride, + const uint8_t* blockStorage, unsigned char* image); +void DecompressBlockBC4(uint32_t x, uint32_t y, uint32_t stride, + enum BC4Mode mode, const uint8_t* blockStorage, unsigned char* image); +void DecompressBlockBC5(uint32_t x, uint32_t y, uint32_t stride, + enum BC5Mode mode, const uint8_t* blockStorage, unsigned char* image); + +namespace bc7decomp +{ + +enum eNoClamp { cNoClamp }; + +template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } + +class color_rgba +{ +public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() = default; + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + { + set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + } + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba &set(int y) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba &set(int y, int na) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(clamp(na, 0, 255)); + return *this; + } + + inline color_rgba &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + m_comps[3] = static_cast(clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(const color_rgba &other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba &rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba &rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba &rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } + + static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); } + static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); } +}; + +static_assert(sizeof(color_rgba) == 4); + +bool unpack_bc7(const void *pBlock, color_rgba *pPixels); + +} // namespace bc7decomp + +#ifdef _MSC_VER +#pragma warning(pop) +#endif \ No newline at end of file diff --git a/src/util/util.vcxproj b/src/util/util.vcxproj index d2d9f3176..e6b414fbd 100644 --- a/src/util/util.vcxproj +++ b/src/util/util.vcxproj @@ -93,6 +93,7 @@ + @@ -199,6 +200,7 @@ + diff --git a/src/util/util.vcxproj.filters b/src/util/util.vcxproj.filters index 3da4f0953..10c0f48f2 100644 --- a/src/util/util.vcxproj.filters +++ b/src/util/util.vcxproj.filters @@ -75,6 +75,7 @@ + @@ -156,6 +157,7 @@ + From ae14c8715d9aa2bf89df97d269475c5666821561 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 18:39:49 +1000 Subject: [PATCH 09/35] Image: Support decompressing BC1-7 --- src/util/image.cpp | 99 ++++++++++++++++++++++++++++++++++++++++++---- src/util/image.h | 4 +- 2 files changed, 93 insertions(+), 10 deletions(-) diff --git a/src/util/image.cpp b/src/util/image.cpp index 4a53fa4df..7d5e91d27 100644 --- a/src/util/image.cpp +++ b/src/util/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "image.h" +#include "texture_decompress.h" #include "common/assert.h" #include "common/bitutils.h" @@ -126,7 +127,7 @@ void Image::Resize(u32 new_width, u32 new_height, ImageFormat format, bool prese if (!preserve) m_pixels.reset(); - const u32 old_blocks_y = GetBlockYCount(); + const u32 old_blocks_y = GetBlocksHigh(); const u32 old_pitch = m_pitch; PixelStorage old_pixels = std::exchange(m_pixels, Common::make_unique_aligned_for_overwrite( @@ -139,7 +140,7 @@ void Image::Resize(u32 new_width, u32 new_height, ImageFormat format, bool prese if (preserve && old_pixels) { StringUtil::StrideMemCpy(m_pixels.get(), m_pitch, old_pixels.get(), old_pitch, std::min(old_pitch, m_pitch), - std::min(old_blocks_y, GetBlockYCount())); + std::min(old_blocks_y, GetBlocksHigh())); } } @@ -229,19 +230,19 @@ u32 Image::CalculateStorageSize(u32 width, u32 height, u32 pitch, ImageFormat fo return pitch * height; } -u32 Image::GetBlockXCount() const +u32 Image::GetBlocksWide() const { return IsCompressedFormat(m_format) ? (Common::AlignUpPow2(m_width, 4) / 4) : m_width; } -u32 Image::GetBlockYCount() const +u32 Image::GetBlocksHigh() const { return IsCompressedFormat(m_format) ? (Common::AlignUpPow2(m_height, 4) / 4) : m_height; } u32 Image::GetStorageSize() const { - return GetBlockYCount() * m_pitch; + return GetBlocksHigh() * m_pitch; } std::span Image::GetPixelsSpan() const @@ -272,7 +273,7 @@ void Image::SetPixels(u32 width, u32 height, ImageFormat format, const void* pix { Resize(width, height, format, false); if (m_pixels) - StringUtil::StrideMemCpy(m_pixels.get(), m_pitch, pixels, pitch, m_pitch, GetBlockYCount()); + StringUtil::StrideMemCpy(m_pixels.get(), m_pitch, pixels, pitch, m_pitch, GetBlocksHigh()); } void Image::SetPixels(u32 width, u32 height, ImageFormat format, PixelStorage pixels, u32 pitch) @@ -496,6 +497,62 @@ void SwapBGRAToRGBA(void* pixels_out, u32 pixels_out_pitch, const void* pixels_i } } +template +static void DecompressBC(Image& image_out, const Image& image_in) +{ + constexpr u32 BC_BLOCK_SIZE = 4; + constexpr u32 BC_BLOCK_BYTES = 16; + + const u32 blocks_wide = image_in.GetBlocksWide(); + const u32 blocks_high = image_in.GetBlocksHigh(); + for (u32 y = 0; y < blocks_high; y++) + { + const u8* block_in = image_in.GetRowPixels(y); + for (u32 x = 0; x < blocks_wide; x++, block_in += BC_BLOCK_BYTES) + { + // decompress block + switch (format) + { + case ImageFormat::BC1: + { + DecompressBlockBC1(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, image_out.GetPitch(), block_in, + image_out.GetPixels()); + } + break; + case ImageFormat::BC2: + { + DecompressBlockBC2(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, image_out.GetPitch(), block_in, + image_out.GetPixels()); + } + break; + case ImageFormat::BC3: + { + DecompressBlockBC3(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, image_out.GetPitch(), block_in, + image_out.GetPixels()); + } + break; + + case ImageFormat::BC7: + { + u32 block_pixels_out[BC_BLOCK_SIZE * BC_BLOCK_SIZE]; + bc7decomp::unpack_bc7(block_in, reinterpret_cast(block_pixels_out)); + + // and write it to the new image + const u32* copy_in_ptr = block_pixels_out; + u8* copy_out_ptr = image_out.GetRowPixels(y * BC_BLOCK_SIZE) + (x * BC_BLOCK_SIZE * sizeof(u32)); + for (u32 sy = 0; sy < 4; sy++) + { + std::memcpy(copy_out_ptr, copy_in_ptr, sizeof(u32) * BC_BLOCK_SIZE); + copy_in_ptr += BC_BLOCK_SIZE; + copy_out_ptr += image_out.GetPitch(); + } + } + break; + } + } + } +} + std::optional Image::ConvertToRGBA8(Error* error) const { std::optional ret; @@ -595,7 +652,33 @@ std::optional Image::ConvertToRGBA8(Error* error) const } break; - // TODO: Block format decompression + case ImageFormat::BC1: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + DecompressBC(ret.value(), *this); + } + break; + + case ImageFormat::BC2: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + DecompressBC(ret.value(), *this); + } + break; + + case ImageFormat::BC3: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + DecompressBC(ret.value(), *this); + } + break; + + case ImageFormat::BC7: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + DecompressBC(ret.value(), *this); + } + break; default: { @@ -1602,7 +1685,7 @@ bool DDSFileLoader(Image* image, std::string_view path, std::FILE* fp, Error* er return false; image->Resize(info.width, info.height, info.format, false); - const u32 blocks = image->GetBlockYCount(); + const u32 blocks = image->GetBlocksHigh(); if (image->GetPitch() != info.base_image_pitch) { for (u32 y = 0; y < blocks; y++) diff --git a/src/util/image.h b/src/util/image.h index 6f30c0064..400ec0b42 100644 --- a/src/util/image.h +++ b/src/util/image.h @@ -66,8 +66,8 @@ public: // ALWAYS_INLINE void SetPixel(u32 x, u32 y, PixelType pixel) { m_pixels[y * m_width + x] = pixel; } // ALWAYS_INLINE PixelType GetPixel(u32 x, u32 y) const { return m_pixels[y * m_width + x]; } - u32 GetBlockXCount() const; - u32 GetBlockYCount() const; + u32 GetBlocksWide() const; + u32 GetBlocksHigh() const; u32 GetStorageSize() const; std::span GetPixelsSpan() const; From 7b230dc4c18e961a70be7bf5ecdd9dab08042162 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 22:40:59 +1000 Subject: [PATCH 10/35] GPU/HW: MSAA samples change needs to recreate pipelines Otherwise Metal/Vulkan go boom boom. --- src/core/gpu_hw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 4e4d6bf9e..b481929fe 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -417,7 +417,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || (!old_settings.gpu_texture_cache && g_settings.gpu_texture_cache)); const bool shaders_changed = - ((m_resolution_scale > 1) != (resolution_scale > 1) || (m_multisamples > 1) != (multisamples > 1) || + ((m_resolution_scale > 1) != (resolution_scale > 1) || m_multisamples != multisamples || m_true_color != g_settings.gpu_true_color || prev_force_progressive_scan != m_force_progressive_scan || (multisamples > 1 && g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || (resolution_scale > 1 && g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || From 70a4b5c9f23b9be7b4ce605d5af9ea4ba6acc95d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 23:29:10 +1000 Subject: [PATCH 11/35] GameDatabase: Add missing flags --- data/resources/images/flags/{Finish.svg => Finnish.svg} | 0 data/resources/images/flags/Russian.svg | 2 ++ data/resources/images/flags/Turkish.svg | 5 +++++ data/resources/images/flags/sources.txt | 3 ++- src/core/game_database.cpp | 6 +++--- src/core/game_database.h | 1 + 6 files changed, 13 insertions(+), 4 deletions(-) rename data/resources/images/flags/{Finish.svg => Finnish.svg} (100%) create mode 100644 data/resources/images/flags/Russian.svg create mode 100644 data/resources/images/flags/Turkish.svg diff --git a/data/resources/images/flags/Finish.svg b/data/resources/images/flags/Finnish.svg similarity index 100% rename from data/resources/images/flags/Finish.svg rename to data/resources/images/flags/Finnish.svg diff --git a/data/resources/images/flags/Russian.svg b/data/resources/images/flags/Russian.svg new file mode 100644 index 000000000..553014621 --- /dev/null +++ b/data/resources/images/flags/Russian.svg @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/data/resources/images/flags/Turkish.svg b/data/resources/images/flags/Turkish.svg new file mode 100644 index 000000000..0ce226481 --- /dev/null +++ b/data/resources/images/flags/Turkish.svg @@ -0,0 +1,5 @@ + + Flag of Turkey + + + \ No newline at end of file diff --git a/data/resources/images/flags/sources.txt b/data/resources/images/flags/sources.txt index 3f2dc4f9f..9d41adee9 100644 --- a/data/resources/images/flags/sources.txt +++ b/data/resources/images/flags/sources.txt @@ -4,7 +4,7 @@ Czech.svg: https://commons.wikimedia.org/wiki/Flag_of_the_Czech_Republic.svg Danish.svg: https://commons.wikimedia.org/wiki/Flag_of_Denmark.svg Dutch.svg: https://commons.wikimedia.org/wiki/Flag_of_the_Netherlands.svg English.svg: https://commons.wikimedia.org/wiki/Flag_of_the_United_Kingdom_(1-2).svg -Finish.svg: https://commons.wikimedia.org/wiki/File:Flag_of_Finland.svg +Finnish.svg: https://commons.wikimedia.org/wiki/File:Flag_of_Finland.svg French.svg: https://commons.wikimedia.org/wiki/File:Flag_of_France.svg German.svg: https://commons.wikimedia.org/wiki/File:Flag_of_Germany.svg Greek.svg: https://commons.wikimedia.org/wiki/Flag_of_Greece.svg @@ -17,4 +17,5 @@ Polish.svg: https://commons.wikimedia.org/wiki/Flag_of_Poland.svg Portuguese.svg: https://commons.wikimedia.org/wiki/File:Flag_of_Portugal.svg Spanish.svg: https://commons.wikimedia.org/wiki/File:Flag_of_Spain.svg Swedish.svg: https://commons.wikimedia.org/wiki/File:Flag_of_Sweden.svg +Russian.svg: https://commons.wikimedia.org/wiki/File:Flag_of_Russia.svg Other.svg: https://en.wikipedia.org/wiki/File:Flag_with_question_mark.svg diff --git a/src/core/game_database.cpp b/src/core/game_database.cpp index 4e07a01b0..8ed973e14 100644 --- a/src/core/game_database.cpp +++ b/src/core/game_database.cpp @@ -138,9 +138,9 @@ static constexpr const std::array(Trait::MaxCou }}; static constexpr std::array(Language::MaxCount)> s_language_names = {{ - "Catalan", "Chinese", "Czech", "Danish", "Dutch", "English", "Finnish", - "French", "German", "Greek", "Hebrew", "Iranian", "Italian", "Japanese", - "Korean", "Norwegian", "Polish", "Portuguese", "Russian", "Spanish", "Swedish", + "Catalan", "Chinese", "Czech", "Danish", "Dutch", "English", "Finnish", "French", + "German", "Greek", "Hebrew", "Iranian", "Italian", "Japanese", "Korean", "Norwegian", + "Polish", "Portuguese", "Russian", "Spanish", "Swedish", "Turkish", }}; static constexpr const char* GAMEDB_YAML_FILENAME = "gamedb.yaml"; diff --git a/src/core/game_database.h b/src/core/game_database.h index b4b5d4753..6a17746c5 100644 --- a/src/core/game_database.h +++ b/src/core/game_database.h @@ -88,6 +88,7 @@ enum class Language : u8 Russian, Spanish, Swedish, + Turkish, MaxCount, }; From 852239ec8a7acf07be168a357c4a417eb755ef2a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 23:24:39 +1000 Subject: [PATCH 12/35] Qt: Add game list language override option --- src/core/fullscreen_ui.cpp | 2 +- src/core/game_database.cpp | 19 ++ src/core/game_database.h | 4 +- src/core/game_list.cpp | 108 +++++--- src/core/game_list.h | 9 +- src/duckstation-qt/gamelistmodel.cpp | 2 +- src/duckstation-qt/gamesummarywidget.cpp | 31 ++- src/duckstation-qt/gamesummarywidget.h | 1 + src/duckstation-qt/gamesummarywidget.ui | 329 ++++++++++++----------- src/duckstation-qt/qtutils.cpp | 6 + src/duckstation-qt/qtutils.h | 1 + 11 files changed, 313 insertions(+), 199 deletions(-) diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index cdc45ab2d..e3cbf1844 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -6882,7 +6882,7 @@ void FullscreenUI::DrawGameList(const ImVec2& heading_size) const bool display_as_language = (selected_entry->dbentry && selected_entry->dbentry->HasAnyLanguage()); ImGui::TextUnformatted(display_as_language ? FSUI_CSTR("Language: ") : FSUI_CSTR("Region: ")); ImGui::SameLine(); - ImGui::Image(GetCachedTexture(selected_entry->GetLanguageIconFileName(), 23, 16), LayoutScale(23.0f, 16.0f)); + ImGui::Image(GetCachedTexture(selected_entry->GetLanguageIconName(), 23, 16), LayoutScale(23.0f, 16.0f)); ImGui::SameLine(); if (display_as_language) { diff --git a/src/core/game_database.cpp b/src/core/game_database.cpp index 8ed973e14..6d81b5da9 100644 --- a/src/core/game_database.cpp +++ b/src/core/game_database.cpp @@ -21,6 +21,7 @@ #include "ryml.hpp" +#include #include #include #include @@ -312,6 +313,24 @@ std::optional GameDatabase::ParseLanguageName(std::strin return std::nullopt; } +TinyString GameDatabase::GetLanguageFlagResourceName(std::string_view language_name) +{ + return TinyString::from_format("images/flags/{}.svg", language_name); +} + +std::string_view GameDatabase::Entry::GetLanguageFlagName(DiscRegion region) const +{ + // If there's only one language, this is the flag we want to use. + // Except if it's English, then we want to use the disc region's flag. + std::string_view ret; + if (languages.count() == 1 && !languages.test(static_cast(GameDatabase::Language::English))) + ret = GameDatabase::GetLanguageName(static_cast(std::countr_zero(languages.to_ulong()))); + else + ret = Settings::GetDiscRegionName(region); + + return ret; +} + SmallString GameDatabase::Entry::GetLanguagesString() const { SmallString ret; diff --git a/src/core/game_database.h b/src/core/game_database.h index 6a17746c5..7ac805e86 100644 --- a/src/core/game_database.h +++ b/src/core/game_database.h @@ -130,8 +130,9 @@ struct Entry ALWAYS_INLINE bool HasTrait(Trait trait) const { return traits[static_cast(trait)]; } ALWAYS_INLINE bool HasLanguage(Language language) const { return languages.test(static_cast(language)); } - ALWAYS_INLINE bool HasAnyLanguage() const { return !languages.none(); } + ALWAYS_INLINE bool HasAnyLanguage() const { return languages.any(); } + std::string_view GetLanguageFlagName(DiscRegion region) const; SmallString GetLanguagesString() const; void ApplySettings(Settings& settings, bool display_osd_messages) const; @@ -156,6 +157,7 @@ const char* GetCompatibilityRatingDisplayName(CompatibilityRating rating); const char* GetLanguageName(Language language); std::optional ParseLanguageName(std::string_view str); +TinyString GetLanguageFlagResourceName(std::string_view language_name); /// Map of track hashes for image verification struct TrackData diff --git a/src/core/game_list.cpp b/src/core/game_list.cpp index 0ed468509..773cd457f 100644 --- a/src/core/game_list.cpp +++ b/src/core/game_list.cpp @@ -117,6 +117,8 @@ static PlayedTimeEntry UpdatePlayedTimeFile(const std::string& path, const std:: std::time_t add_time); static std::string GetCustomPropertiesFile(); +static bool PutCustomPropertiesField(INISettingsInterface& ini, const std::string& path, const char* field, + const char* value); static FileSystem::ManagedCFilePtr OpenMemoryCardTimestampCache(bool for_write); static bool UpdateMemcardTimestampCache(const MemcardTimestampCacheEntry& entry); @@ -627,6 +629,21 @@ void GameList::ApplyCustomAttributes(const std::string& path, Entry* entry, WARNING_LOG("Invalid region '{}' in custom attributes for '{}'", custom_region_str.value(), path); } } + const std::optional custom_language_str = + custom_attributes_ini.GetOptionalTinyStringValue(path.c_str(), "Language"); + if (custom_language_str.has_value()) + { + const std::optional custom_region = + GameDatabase::ParseLanguageName(custom_region_str.value()); + if (custom_region.has_value()) + { + entry->custom_language = custom_region.value(); + } + else + { + WARNING_LOG("Invalid language '{}' in custom attributes for '{}'", custom_region_str.value(), path); + } + } } std::unique_lock GameList::GetLock() @@ -990,26 +1007,20 @@ std::string GameList::GetNewCoverImagePathForEntry(const Entry* entry, const cha std::string_view GameList::Entry::GetLanguageIcon() const { - // If there's only one language, this is the flag we want to use. - // Except if it's English, then we want to use the disc region's flag. std::string_view ret; - if (dbentry && dbentry->languages.count() == 1 && - !dbentry->languages.test(static_cast(GameDatabase::Language::English))) - { - ret = GameDatabase::GetLanguageName( - static_cast(std::countr_zero(dbentry->languages.to_ulong()))); - } + if (custom_language != GameDatabase::Language::MaxCount) + ret = GameDatabase::GetLanguageName(custom_language); + else if (dbentry) + ret = dbentry->GetLanguageFlagName(region); else - { ret = Settings::GetDiscRegionName(region); - } return ret; } -TinyString GameList::Entry::GetLanguageIconFileName() const +TinyString GameList::Entry::GetLanguageIconName() const { - return TinyString::from_format("images/flags/{}.svg", GetLanguageIcon()); + return GameDatabase::GetLanguageFlagResourceName(GetLanguageIcon()); } TinyString GameList::Entry::GetCompatibilityIconFileName() const @@ -1518,28 +1529,37 @@ std::string GameList::GetCustomPropertiesFile() return Path::Combine(EmuFolders::DataRoot, "custom_properties.ini"); } -void GameList::SaveCustomTitleForPath(const std::string& path, const std::string& custom_title) +bool GameList::PutCustomPropertiesField(INISettingsInterface& ini, const std::string& path, const char* field, + const char* value) { - INISettingsInterface custom_attributes_ini(GetCustomPropertiesFile()); - custom_attributes_ini.Load(); + ini.Load(); - if (!custom_title.empty()) + if (value && *value != '\0') { - custom_attributes_ini.SetStringValue(path.c_str(), "Title", custom_title.c_str()); + ini.SetStringValue(path.c_str(), field, value); } else { - custom_attributes_ini.DeleteValue(path.c_str(), "Title"); - custom_attributes_ini.RemoveEmptySections(); + ini.DeleteValue(path.c_str(), field); + ini.RemoveEmptySections(); } Error error; - if (!custom_attributes_ini.Save(&error)) + if (!ini.Save(&error)) { ERROR_LOG("Failed to save custom attributes: {}", error.GetDescription()); - return; + return false; } + return true; +} + +bool GameList::SaveCustomTitleForPath(const std::string& path, const std::string& custom_title) +{ + INISettingsInterface custom_attributes_ini(GetCustomPropertiesFile()); + if (!PutCustomPropertiesField(custom_attributes_ini, path, "Title", custom_title.c_str())) + return false; + if (!custom_title.empty()) { // Can skip the rescan and just update the value directly. @@ -1556,28 +1576,18 @@ void GameList::SaveCustomTitleForPath(const std::string& path, const std::string // Let the cache update by rescanning. Only need to do this on deletion, to get the original value. RescanCustomAttributesForPath(path, custom_attributes_ini); } + + return true; } -void GameList::SaveCustomRegionForPath(const std::string& path, const std::optional custom_region) +bool GameList::SaveCustomRegionForPath(const std::string& path, const std::optional custom_region) { INISettingsInterface custom_attributes_ini(GetCustomPropertiesFile()); - custom_attributes_ini.Load(); - - if (custom_region.has_value()) + if (!PutCustomPropertiesField(custom_attributes_ini, path, "Region", + custom_region.has_value() ? Settings::GetDiscRegionName(custom_region.value()) : + nullptr)) { - custom_attributes_ini.SetStringValue(path.c_str(), "Region", Settings::GetDiscRegionName(custom_region.value())); - } - else - { - custom_attributes_ini.DeleteValue(path.c_str(), "Region"); - custom_attributes_ini.RemoveEmptySections(); - } - - Error error; - if (!custom_attributes_ini.Save(&error)) - { - ERROR_LOG("Failed to save custom attributes: {}", error.GetDescription()); - return; + return false; } if (custom_region.has_value()) @@ -1596,6 +1606,28 @@ void GameList::SaveCustomRegionForPath(const std::string& path, const std::optio // Let the cache update by rescanning. Only need to do this on deletion, to get the original value. RescanCustomAttributesForPath(path, custom_attributes_ini); } + + return true; +} + +bool GameList::SaveCustomLanguageForPath(const std::string& path, + const std::optional custom_language) +{ + INISettingsInterface custom_attributes_ini(GetCustomPropertiesFile()); + if (!PutCustomPropertiesField(custom_attributes_ini, path, "Language", + custom_language.has_value() ? GameDatabase::GetLanguageName(custom_language.value()) : + nullptr)) + { + return false; + } + + // Don't need to rescan, since there's no original value to restore. + auto lock = GetLock(); + Entry* entry = GetMutableEntryForPath(path); + if (entry) + entry->custom_language = custom_language.value_or(GameDatabase::Language::MaxCount); + + return true; } std::string GameList::GetCustomTitleForPath(const std::string_view path) diff --git a/src/core/game_list.h b/src/core/game_list.h index 1d87635b2..beb697f99 100644 --- a/src/core/game_list.h +++ b/src/core/game_list.h @@ -40,6 +40,7 @@ struct Entry bool disc_set_member = false; bool has_custom_title = false; bool has_custom_region = false; + GameDatabase::Language custom_language = GameDatabase::Language::MaxCount; std::string path; std::string serial; @@ -57,13 +58,14 @@ struct Entry std::string_view GetLanguageIcon() const; - TinyString GetLanguageIconFileName() const; + TinyString GetLanguageIconName() const; TinyString GetCompatibilityIconFileName() const; TinyString GetReleaseDateString() const; ALWAYS_INLINE bool IsDisc() const { return (type == EntryType::Disc); } ALWAYS_INLINE bool IsDiscSet() const { return (type == EntryType::DiscSet); } + ALWAYS_INLINE bool HasCustomLanguage() const { return (custom_language != GameDatabase::Language::MaxCount); } ALWAYS_INLINE EntryType GetSortType() const { return (type == EntryType::DiscSet) ? EntryType::Disc : type; } }; @@ -128,8 +130,9 @@ bool DownloadCovers(const std::vector& url_templates, bool use_seri std::function save_callback = {}); // Custom properties support -void SaveCustomTitleForPath(const std::string& path, const std::string& custom_title); -void SaveCustomRegionForPath(const std::string& path, const std::optional custom_region); +bool SaveCustomTitleForPath(const std::string& path, const std::string& custom_title); +bool SaveCustomRegionForPath(const std::string& path, const std::optional custom_region); +bool SaveCustomLanguageForPath(const std::string& path, const std::optional custom_language); std::string GetCustomTitleForPath(const std::string_view path); std::optional GetCustomRegionForPath(const std::string_view path); diff --git a/src/duckstation-qt/gamelistmodel.cpp b/src/duckstation-qt/gamelistmodel.cpp index ce496b384..3653600f4 100644 --- a/src/duckstation-qt/gamelistmodel.cpp +++ b/src/duckstation-qt/gamelistmodel.cpp @@ -294,7 +294,7 @@ const QPixmap& GameListModel::getFlagPixmapForEntry(const GameList::Entry* ge) c if (it != m_flag_pixmap_cache.end()) return it->second; - const QIcon icon(QString::fromStdString(QtHost::GetResourcePath(ge->GetLanguageIconFileName(), true))); + const QIcon icon(QString::fromStdString(QtHost::GetResourcePath(ge->GetLanguageIconName(), true))); it = m_flag_pixmap_cache.emplace(name, icon.pixmap(FLAG_PIXMAP_WIDTH, FLAG_PIXMAP_HEIGHT)).first; return it->second; } diff --git a/src/duckstation-qt/gamesummarywidget.cpp b/src/duckstation-qt/gamesummarywidget.cpp index 549f06112..aa72ccbba 100644 --- a/src/duckstation-qt/gamesummarywidget.cpp +++ b/src/duckstation-qt/gamesummarywidget.cpp @@ -52,6 +52,15 @@ GameSummaryWidget::GameSummaryWidget(const std::string& path, const std::string& static_cast(i)))); } + // I hate this so much. + m_ui.customLanguage->addItem(QtUtils::GetIconForLanguage(entry->GetLanguageFlagName(region)), + tr("Show Default Flag")); + for (u32 i = 0; i < static_cast(GameDatabase::Language::MaxCount); i++) + { + const char* language_name = GameDatabase::GetLanguageName(static_cast(i)); + m_ui.customLanguage->addItem(QtUtils::GetIconForLanguage(language_name), QString::fromUtf8(language_name)); + } + populateUi(path, serial, region, entry); connect(m_ui.compatibilityComments, &QToolButton::clicked, this, &GameSummaryWidget::onCompatibilityCommentsClicked); @@ -69,6 +78,7 @@ GameSummaryWidget::GameSummaryWidget(const std::string& path, const std::string& connect(m_ui.restoreTitle, &QAbstractButton::clicked, this, [this]() { setCustomTitle(std::string()); }); connect(m_ui.region, &QComboBox::currentIndexChanged, this, [this](int index) { setCustomRegion(index); }); connect(m_ui.restoreRegion, &QAbstractButton::clicked, this, [this]() { setCustomRegion(-1); }); + connect(m_ui.customLanguage, &QComboBox::currentIndexChanged, this, &GameSummaryWidget::onCustomLanguageChanged); } GameSummaryWidget::~GameSummaryWidget() = default; @@ -147,6 +157,8 @@ void GameSummaryWidget::populateUi(const std::string& path, const std::string& s else m_ui.releaseInfo->setText(tr("Unknown")); + m_ui.languages->setText(QtUtils::StringViewToQString(entry->GetLanguagesString())); + QString controllers; if (entry->supported_controllers != 0 && entry->supported_controllers != static_cast(-1)) { @@ -201,7 +213,10 @@ void GameSummaryWidget::populateCustomAttributes() auto lock = GameList::GetLock(); const GameList::Entry* entry = GameList::GetEntryForPath(m_path); if (!entry || entry->IsDiscSet()) + { + m_ui.customLanguage->setEnabled(false); return; + } { QSignalBlocker sb(m_ui.title); @@ -214,6 +229,12 @@ void GameSummaryWidget::populateCustomAttributes() m_ui.region->setCurrentIndex(static_cast(entry->region)); m_ui.restoreRegion->setEnabled(entry->has_custom_region); } + + { + QSignalBlocker sb(m_ui.customLanguage); + m_ui.customLanguage->setCurrentIndex(entry->HasCustomLanguage() ? (static_cast(entry->custom_language) + 1) : + 0); + } } void GameSummaryWidget::updateWindowTitle() @@ -238,7 +259,15 @@ void GameSummaryWidget::setCustomRegion(int region) GameList::SaveCustomRegionForPath(m_path, (region >= 0) ? std::optional(static_cast(region)) : std::optional()); populateCustomAttributes(); - updateWindowTitle(); + g_main_window->refreshGameListModel(); +} + +void GameSummaryWidget::onCustomLanguageChanged(int language) +{ + GameList::SaveCustomLanguageForPath( + m_path, (language > 0) ? std::optional(static_cast(language - 1)) : + std::optional()); + populateCustomAttributes(); g_main_window->refreshGameListModel(); } diff --git a/src/duckstation-qt/gamesummarywidget.h b/src/duckstation-qt/gamesummarywidget.h index f9866362e..a96fa4404 100644 --- a/src/duckstation-qt/gamesummarywidget.h +++ b/src/duckstation-qt/gamesummarywidget.h @@ -27,6 +27,7 @@ public: void reloadGameSettings(); private Q_SLOTS: + void onCustomLanguageChanged(int language); void onCompatibilityCommentsClicked(); void onInputProfileChanged(int index); void onEditInputProfileClicked(); diff --git a/src/duckstation-qt/gamesummarywidget.ui b/src/duckstation-qt/gamesummarywidget.ui index c4f21e620..b8049a780 100644 --- a/src/duckstation-qt/gamesummarywidget.ui +++ b/src/duckstation-qt/gamesummarywidget.ui @@ -30,67 +30,36 @@ 0 - - + + true - - - - - - Clear the line to restore the original title... - - - - - - - false - - - Restore - - - - - - - - - - - - 0 - 0 - - - - - - - - false - - - Restore - - - - - - - - - Image Path: + + + + true - - + + + + Tracks: + + + + + + + Input Profile: + + + + + true @@ -103,7 +72,35 @@ - + + + + true + + + + + + + true + + + + + + + Controllers: + + + + + + + Release Info: + + + + QAbstractItemView::EditTrigger::NoEditTriggers @@ -146,77 +143,7 @@ - - - - Region: - - - - - - - Developer: - - - - - - - Controllers: - - - - - - - Tracks: - - - - - - - true - - - - - - - false - - - - - - - Release Info: - - - - - - - Input Profile: - - - - - - - true - - - - - - - Genre: - - - - + @@ -253,42 +180,42 @@ - - - - true - - + + + + + + Clear the line to restore the original title... + + + + + + + false + + + Restore + + + + - - + + - Type: + Image Path: - - - - Title: + + + + false - - - - true - - - - - - - Compatibility: - - - - + @@ -309,7 +236,28 @@ - + + + + Genre: + + + + + + + Developer: + + + + + + + Region: + + + + @@ -323,6 +271,79 @@ + + + + Compatibility: + + + + + + + Type: + + + + + + + + + + 0 + 0 + + + + + + + + false + + + Restore + + + + + + + + + true + + + + + + + Title: + + + + + + + Languages: + + + + + + + + + true + + + + + + + + diff --git a/src/duckstation-qt/qtutils.cpp b/src/duckstation-qt/qtutils.cpp index d2082af0f..89a561eef 100644 --- a/src/duckstation-qt/qtutils.cpp +++ b/src/duckstation-qt/qtutils.cpp @@ -312,6 +312,12 @@ QIcon QtUtils::GetIconForCompatibility(GameDatabase::CompatibilityRating rating) QtHost::GetResourcePath(TinyString::from_format("images/star-{}.svg", static_cast(rating)), true))); } +QIcon QtUtils::GetIconForLanguage(std::string_view language_name) +{ + return QIcon( + QString::fromStdString(QtHost::GetResourcePath(GameDatabase::GetLanguageFlagResourceName(language_name), true))); +} + qreal QtUtils::GetDevicePixelRatioForWidget(const QWidget* widget) { const QScreen* screen_for_ratio = widget->screen(); diff --git a/src/duckstation-qt/qtutils.h b/src/duckstation-qt/qtutils.h index 2999d1f6f..83b2563cf 100644 --- a/src/duckstation-qt/qtutils.h +++ b/src/duckstation-qt/qtutils.h @@ -113,6 +113,7 @@ QIcon GetIconForRegion(DiscRegion region); /// Returns icon for entry type. QIcon GetIconForEntryType(GameList::EntryType type); QIcon GetIconForCompatibility(GameDatabase::CompatibilityRating rating); +QIcon GetIconForLanguage(std::string_view language_name); /// Returns the pixel ratio/scaling factor for a widget. qreal GetDevicePixelRatioForWidget(const QWidget* widget); From 9fa8fee193611c7b456eaaec9f369e4940ca32d0 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 24 Nov 2024 23:56:16 +1000 Subject: [PATCH 13/35] Achievements: Use rc_client for pause throttling --- src/core/achievements.cpp | 9 +++++++++ src/core/achievements.h | 3 +++ src/core/hotkeys.cpp | 27 ++++++++------------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/core/achievements.cpp b/src/core/achievements.cpp index 1ccadad2d..b8d75c51f 100644 --- a/src/core/achievements.cpp +++ b/src/core/achievements.cpp @@ -1969,6 +1969,15 @@ std::string Achievements::GetLoggedInUserBadgePath() return badge_path; } +u32 Achievements::GetPauseThrottleFrames() +{ + if (!IsActive() || !IsHardcoreModeActive() || IsUsingRAIntegration()) + return 0; + + u32 frames_remaining = 0; + return rc_client_can_pause(s_client, &frames_remaining) ? 0 : frames_remaining; +} + void Achievements::Logout() { if (IsActive()) diff --git a/src/core/achievements.h b/src/core/achievements.h index a572f5e60..7a2b5dc89 100644 --- a/src/core/achievements.h +++ b/src/core/achievements.h @@ -129,6 +129,9 @@ const char* GetLoggedInUserName(); /// Should be called with the lock held. std::string GetLoggedInUserBadgePath(); +/// Returns 0 if pausing is allowed, otherwise the number of frames until pausing is allowed. +u32 GetPauseThrottleFrames(); + /// Clears all cached state used to render the UI. void ClearUIState(); diff --git a/src/core/hotkeys.cpp b/src/core/hotkeys.cpp index 6fc333f4d..15b3bbaef 100644 --- a/src/core/hotkeys.cpp +++ b/src/core/hotkeys.cpp @@ -139,27 +139,16 @@ static void HotkeyToggleOSD() static bool CanPause() { - static constexpr const float PAUSE_INTERVAL = 3.0f; - static Common::Timer::Value s_last_pause_time = 0; - - if (!Achievements::IsHardcoreModeActive() || System::IsPaused()) + const u32 frames_until_pause_allowed = Achievements::GetPauseThrottleFrames(); + if (frames_until_pause_allowed == 0) return true; - const Common::Timer::Value time = Common::Timer::GetCurrentValue(); - const float delta = static_cast(Common::Timer::ConvertValueToSeconds(time - s_last_pause_time)); - if (delta < PAUSE_INTERVAL) - { - Host::AddIconOSDMessage("PauseCooldown", ICON_FA_CLOCK, - TRANSLATE_PLURAL_STR("Hotkeys", "You cannot pause until another %n second(s) have passed.", - "", static_cast(std::ceil(PAUSE_INTERVAL - delta))), - Host::OSD_QUICK_DURATION); - return false; - } - - Host::RemoveKeyedOSDMessage("PauseCooldown"); - s_last_pause_time = time; - - return true; + const float seconds = static_cast(frames_until_pause_allowed) / System::GetVideoFrameRate(); + Host::AddIconOSDMessage("PauseCooldown", ICON_FA_CLOCK, + TRANSLATE_PLURAL_STR("Hotkeys", "You cannot pause until another %n second(s) have passed.", + "", static_cast(std::ceil(seconds))), + std::max(seconds, Host::OSD_QUICK_DURATION)); + return false; } #endif From 40a1bee9ea05d5714f9131ae74a1734ab709befe Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 01:05:06 +1000 Subject: [PATCH 14/35] CDROM: Fix incorrect clmap in XA-ADPCM decoding Fixes crunchy audio in GT2: A-Spec. --- src/core/cdrom.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/cdrom.cpp b/src/core/cdrom.cpp index b9e423ab8..eb9731538 100644 --- a/src/core/cdrom.cpp +++ b/src/core/cdrom.cpp @@ -3460,7 +3460,7 @@ void CDROM::DecodeXAADPCMChunks(const u8* chunk_ptr, s16* samples) // mix in previous values s32* prev = IS_STEREO ? &s_state.xa_last_samples[(block & 1) * 2] : &s_state.xa_last_samples[0]; const s32 interp_sample = std::clamp( - static_cast(sample) + ((prev[0] * filter_pos) >> 6) + ((prev[1] * filter_neg) >> 6), -32767, 32768); + static_cast(sample) + ((prev[0] * filter_pos) >> 6) + ((prev[1] * filter_neg) >> 6), -32768, 32767); // update previous values prev[1] = prev[0]; From 0076af6974a285fe7d884b12d258323c0848f53c Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 01:14:48 +1000 Subject: [PATCH 15/35] SPU: Zero out upper ADPCM filters Also in CD-ROM. --- src/core/cdrom.cpp | 13 +++++-------- src/core/spu.cpp | 8 ++++---- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/core/cdrom.cpp b/src/core/cdrom.cpp index eb9731538..4cec6c0f2 100644 --- a/src/core/cdrom.cpp +++ b/src/core/cdrom.cpp @@ -263,7 +263,7 @@ union XA_ADPCMBlockHeader u8 bits; BitField shift; - BitField filter; + BitField filter; // For both 4bit and 8bit ADPCM, reserved shift values 13..15 will act same as shift=9). u8 GetShift() const @@ -3415,11 +3415,8 @@ s16 CDROM::SaturateVolume(s32 volume) template void CDROM::DecodeXAADPCMChunks(const u8* chunk_ptr, s16* samples) { - static constexpr std::array s_xa_adpcm_filter_table_pos = { - {0, 60, 115, 98, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; - - static constexpr std::array s_xa_adpcm_filter_table_neg = { - {0, 0, -52, -55, -60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + static constexpr std::array filter_table_pos = {{0, 60, 115, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + static constexpr std::array filter_table_neg = {{0, 0, -52, -55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; // The data layout is annoying here. Each word of data is interleaved with the other blocks, requiring multiple // passes to decode the whole chunk. @@ -3440,8 +3437,8 @@ void CDROM::DecodeXAADPCMChunks(const u8* chunk_ptr, s16* samples) const XA_ADPCMBlockHeader block_header{headers_ptr[block]}; const u8 shift = block_header.GetShift(); const u8 filter = block_header.GetFilter(); - const s32 filter_pos = s_xa_adpcm_filter_table_pos[filter]; - const s32 filter_neg = s_xa_adpcm_filter_table_neg[filter]; + const s32 filter_pos = filter_table_pos[filter]; + const s32 filter_neg = filter_table_neg[filter]; s16* out_samples_ptr = IS_STEREO ? &samples[(block / 2) * (WORDS_PER_BLOCK * 2) + (block % 2)] : &samples[block * WORDS_PER_BLOCK]; diff --git a/src/core/spu.cpp b/src/core/spu.cpp index f24c8c8b2..67b8083fa 100644 --- a/src/core/spu.cpp +++ b/src/core/spu.cpp @@ -189,7 +189,7 @@ struct ADPCMBlock u8 bits; BitField shift; - BitField filter; + BitField filter; } shift_filter; ADPCMFlags flags; u8 data[NUM_SAMPLES_PER_ADPCM_BLOCK / 2]; @@ -201,7 +201,7 @@ struct ADPCMBlock return (shift > 12) ? 9 : shift; } - u8 GetFilter() const { return std::min(shift_filter.filter, 4); } + u8 GetFilter() const { return shift_filter.filter; } u8 GetNibble(u32 index) const { return (data[index / 2] >> ((index % 2) * 4)) & 0x0F; } }; @@ -1877,8 +1877,8 @@ void SPU::Voice::TickADSR() void SPU::Voice::DecodeBlock(const ADPCMBlock& block) { - static constexpr std::array filter_table_pos = {{0, 60, 115, 98, 122}}; - static constexpr std::array filter_table_neg = {{0, 0, -52, -55, -60}}; + static constexpr std::array filter_table_pos = {{0, 60, 115, 98, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + static constexpr std::array filter_table_neg = {{0, 0, -52, -55, -60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; // store samples needed for interpolation current_block_samples[2] = current_block_samples[NUM_SAMPLES_FROM_LAST_ADPCM_BLOCK + NUM_SAMPLES_PER_ADPCM_BLOCK - 1]; From 0ae8fcced305a721d98b7caa1a1d30503a6705b5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 13:15:07 +1000 Subject: [PATCH 16/35] GPU/HW: Fix incorrect sampling at 1x with TC --- src/core/gpu_hw_shadergen.cpp | 16 ++++++++++------ src/core/shader_cache_version.h | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 997cd86e9..e85738c17 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -880,13 +880,17 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) float4 SampleFromPageTexture(float2 coords) { // Cached textures. -#if UPSCALED == 0 - float2 fpart = coords - roundEven(coords); -#else - float2 fpart = frac(coords); -#endif uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); - coords = (float2(icoord) + fpart) * (1.0f / 256.0f); +#if UPSCALED + float2 fpart = frac(coords); + coords = (float2(icoord) + fpart); +#else + // Drop fractional part. + coords = float2(icoord); +#endif + + // Normalize. + coords = coords * (1.0f / 256.0f); return SAMPLE_TEXTURE(samp0, coords); } diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 9d926866a..89fec6d32 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -5,4 +5,4 @@ #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 20; +static constexpr u32 SHADER_CACHE_VERSION = 21; From a804801a1b148a357314c15c3a10388f98aece00 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 13:23:39 +1000 Subject: [PATCH 17/35] GPU/HW: Support filtering with texture cache --- src/core/gpu_hw_shadergen.cpp | 59 +++++++++++++++++++-------------- src/core/shader_cache_version.h | 2 +- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index e85738c17..64cdcb1ef 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -822,11 +822,34 @@ uint2 FloatToIntegerCoords(float2 coords) return uint2((UPSCALED == 0 || FORCE_ROUND_TEXCOORDS != 0) ? roundEven(coords) : floor(coords)); } -#if !PAGE_TEXTURE +#if PAGE_TEXTURE + +float4 SampleFromPageTexture(float2 coords) +{ + // Cached textures. + uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); +#if UPSCALED + float2 fpart = frac(coords); + coords = (float2(icoord) + fpart); +#else + // Drop fractional part. + coords = float2(icoord); +#endif + + // Normalize. + coords = coords * (1.0f / 256.0f); + return SAMPLE_TEXTURE(samp0, coords); +} + +#endif + +#if !PAGE_TEXTURE || TEXTURE_FILTERING float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) { - #if PALETTE + #if PAGE_TEXTURE + return SampleFromPageTexture(coords); + #elif PALETTE uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); uint2 vicoord; @@ -875,26 +898,7 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) #endif } -#else - -float4 SampleFromPageTexture(float2 coords) -{ - // Cached textures. - uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); -#if UPSCALED - float2 fpart = frac(coords); - coords = (float2(icoord) + fpart); -#else - // Drop fractional part. - coords = float2(icoord); -#endif - - // Normalize. - coords = coords * (1.0f / 256.0f); - return SAMPLE_TEXTURE(samp0, coords); -} - -#endif +#endif // !PAGE_TEXTURE || TEXTURE_FILTERING #endif // TEXTURED )"; @@ -902,6 +906,9 @@ float4 SampleFromPageTexture(float2 coords) const u32 num_fragment_outputs = use_rov ? 0 : (use_dual_source ? 2 : 1); if (textured && page_texture) { + if (texture_filtering != GPUTextureFilter::Nearest) + WriteBatchTextureFilter(ss, texture_filtering); + if (uv_limits) { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "float4 v_uv_limits"}}, true, num_fragment_outputs, @@ -960,7 +967,7 @@ float4 SampleFromPageTexture(float2 coords) #if TEXTURED float4 texcol; - #if PAGE_TEXTURE + #if PAGE_TEXTURE && !TEXTURE_FILTERING #if UV_LIMITS texcol = SampleFromPageTexture(clamp(v_tex0, v_uv_limits.xy, v_uv_limits.zw)); #else @@ -971,7 +978,11 @@ float4 SampleFromPageTexture(float2 coords) ialpha = 1.0; #elif TEXTURE_FILTERING - FilteredSampleFromVRAM(v_texpage, v_tex0, v_uv_limits, texcol, ialpha); + #if PAGE_TEXTURE + FilteredSampleFromVRAM(int2(0, 0), v_tex0, v_uv_limits, texcol, ialpha); + #else + FilteredSampleFromVRAM(v_texpage, v_tex0, v_uv_limits, texcol, ialpha); + #endif if (ialpha < 0.5) discard; #else diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 89fec6d32..95a2b141b 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -5,4 +5,4 @@ #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 21; +static constexpr u32 SHADER_CACHE_VERSION = 22; From dec468966c70c7e9575cabea8ed1d4610a1d4f2a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 15:39:30 +1000 Subject: [PATCH 18/35] GPU/HW: Fix sprite mode triggering on screen-aligned UVs Some polygons just end up being perfect. For example, Croc 2 minecart level. --- src/core/gpu_hw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index b481929fe..3d32b949c 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -2551,7 +2551,7 @@ void GPU_HW::LoadVertices() if (m_resolution_scale > 1 && !is_3d && rc.quad_polygon) HandleFlippedQuadTextureCoordinates(vertices.data()); else if (m_allow_sprite_mode) - SetBatchSpriteMode((pgxp && !is_3d) || IsPossibleSpritePolygon(vertices.data())); + SetBatchSpriteMode(pgxp ? !is_3d : IsPossibleSpritePolygon(vertices.data())); if (m_sw_renderer) { From b180b26728aa8d98a303ef992fc483073826d914 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 16:22:31 +1000 Subject: [PATCH 19/35] GPU: Rework "All Borders" cropping to be aspect correct The "All Borders" crop mode was previously creating an aspect ratio that was completely incorrect when using modes outside of Auto/4:3. We now scale the aspect ratio relative to the PAL/NTSC aspect ratio to account for this, regardless of how much of a border the game configures. Overscan cropping also produced an incorrect aspect ratio outside of 4:3 mode, resulting in minor horizontal stretching. It is now correct, however, this results in black borders being added in 16:9 for most games. To remove these borders, you have two options: - Use the "Stretch to Fill" aspect ratio. This will scale the GTE aspect ratio to fill the screen. - Use the "Only Overscan Area (Aspect Uncorrected)" crop mode. This mode retains the "old" behaviour, resulting in a stretched image. --- src/core/fullscreen_ui.cpp | 2 +- src/core/gpu.cpp | 80 +++++++++++++------ src/core/gpu.h | 1 + src/core/gte.cpp | 40 ++-------- src/core/gte.h | 4 +- src/core/host.cpp | 16 ++-- src/core/settings.cpp | 28 +------ src/core/system.cpp | 56 ++++++++++--- src/core/system_private.h | 7 +- src/core/types.h | 3 +- src/duckstation-qt/graphicssettingswidget.cpp | 2 +- src/duckstation-qt/mainwindow.cpp | 2 +- 12 files changed, 131 insertions(+), 110 deletions(-) diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index e3cbf1844..6b3c97e2c 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -4323,7 +4323,7 @@ void FullscreenUI::DrawGraphicsSettingsPage() DrawEnumSetting(bsi, FSUI_ICONSTR(ICON_FA_CROP_ALT, "Crop Mode"), FSUI_CSTR("Determines how much of the area typically not visible on a consumer TV set to crop/hide."), "Display", "CropMode", Settings::DEFAULT_DISPLAY_CROP_MODE, &Settings::ParseDisplayCropMode, - &Settings::GetDisplayCropModeName, &Settings::GetDisplayCropModeDisplayName, DisplayCropMode::Count); + &Settings::GetDisplayCropModeName, &Settings::GetDisplayCropModeDisplayName, DisplayCropMode::MaxCount); DrawEnumSetting( bsi, FSUI_ICONSTR(ICON_FA_EXPAND, "Scaling"), diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 284218a45..4340c2042 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -594,45 +594,62 @@ float GPU::ComputeVerticalFrequency() const float GPU::ComputeDisplayAspectRatio() const { if (g_settings.debugging.show_vram) - { return static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT); - } - else if (g_settings.display_force_4_3_for_24bit && m_GPUSTAT.display_area_color_depth_24) - { + + // Display off => Doesn't matter. + if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0) return 4.0f / 3.0f; - } - else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Auto) + + // PAR 1:1 is not corrected. + if (g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1) + return static_cast(m_crtc_state.display_width) / static_cast(m_crtc_state.display_height); + + float ar = 4.0f / 3.0f; + if (!g_settings.display_force_4_3_for_24bit || !m_GPUSTAT.display_area_color_depth_24) { - const CRTCState& cs = m_crtc_state; - float relative_width = static_cast(cs.horizontal_visible_end - cs.horizontal_visible_start); - float relative_height = static_cast(cs.vertical_visible_end - cs.vertical_visible_start); - - if (relative_width <= 0 || relative_height <= 0) - return 4.0f / 3.0f; - - if (m_GPUSTAT.pal_mode) + if (g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow && g_gpu_device->HasMainSwapChain()) { - relative_width /= static_cast(PAL_HORIZONTAL_ACTIVE_END - PAL_HORIZONTAL_ACTIVE_START); - relative_height /= static_cast(PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START); + // Match window has already been corrected. + return static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()) / + static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); + } + else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Custom) + { + ar = static_cast(g_settings.display_aspect_ratio_custom_numerator) / + static_cast(g_settings.display_aspect_ratio_custom_denominator); } else { - relative_width /= static_cast(NTSC_HORIZONTAL_ACTIVE_END - NTSC_HORIZONTAL_ACTIVE_START); - relative_height /= static_cast(NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START); + ar = g_settings.GetDisplayAspectRatioValue(); } - return (relative_width / relative_height) * (4.0f / 3.0f); } - else if (g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1) - { - if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0) - return 4.0f / 3.0f; - return static_cast(m_crtc_state.display_width) / static_cast(m_crtc_state.display_height); + return ComputeAspectRatioCorrection() * ar; +} + +float GPU::ComputeAspectRatioCorrection() const +{ + const CRTCState& cs = m_crtc_state; + float relative_width = static_cast(cs.horizontal_visible_end - cs.horizontal_visible_start); + float relative_height = static_cast(cs.vertical_visible_end - cs.vertical_visible_start); + if (relative_width <= 0 || relative_height <= 0 || + g_settings.display_crop_mode == DisplayCropMode::OverscanUncorrected) + { + return 1.0f; + } + + if (m_GPUSTAT.pal_mode) + { + relative_width /= static_cast(PAL_HORIZONTAL_ACTIVE_END - PAL_HORIZONTAL_ACTIVE_START); + relative_height /= static_cast(PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START); } else { - return g_settings.GetDisplayAspectRatioValue(); + relative_width /= static_cast(NTSC_HORIZONTAL_ACTIVE_END - NTSC_HORIZONTAL_ACTIVE_START); + relative_height /= static_cast(NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START); } + + return (relative_width / relative_height); } void GPU::UpdateCRTCConfig() @@ -725,6 +742,10 @@ void GPU::UpdateCRTCDisplayParameters() (std::min(cs.regs.X2, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; const u16 vertical_display_start = std::min(cs.regs.Y1, vertical_total); const u16 vertical_display_end = std::min(cs.regs.Y2, vertical_total); + const u16 old_horizontal_visible_start = cs.horizontal_visible_start; + const u16 old_horizontal_visible_end = cs.horizontal_visible_end; + const u16 old_vertical_visible_start = cs.vertical_visible_start; + const u16 old_vertical_visible_end = cs.vertical_visible_end; if (m_GPUSTAT.pal_mode) { @@ -739,6 +760,7 @@ void GPU::UpdateCRTCDisplayParameters() break; case DisplayCropMode::Overscan: + case DisplayCropMode::OverscanUncorrected: cs.horizontal_visible_start = static_cast(std::max(0, 628 + g_settings.display_active_start_offset)); cs.horizontal_visible_end = static_cast(std::max(cs.horizontal_visible_start, 3188 + g_settings.display_active_end_offset)); @@ -776,6 +798,7 @@ void GPU::UpdateCRTCDisplayParameters() break; case DisplayCropMode::Overscan: + case DisplayCropMode::OverscanUncorrected: cs.horizontal_visible_start = static_cast(std::max(0, 608 + g_settings.display_active_start_offset)); cs.horizontal_visible_end = static_cast(std::max(cs.horizontal_visible_start, 3168 + g_settings.display_active_end_offset)); @@ -872,6 +895,13 @@ void GPU::UpdateCRTCDisplayParameters() << height_shift; } + if (old_horizontal_visible_start != cs.horizontal_visible_start || + old_horizontal_visible_end != cs.horizontal_visible_end || + old_vertical_visible_start != cs.vertical_visible_start || old_vertical_visible_end != cs.vertical_visible_end) + { + System::UpdateGTEAspectRatio(); + } + if (cs.display_vram_width != old_vram_width || cs.display_vram_height != old_vram_height) UpdateResolutionScale(); } diff --git a/src/core/gpu.h b/src/core/gpu.h index 5c18c9f98..55c3b8d4c 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -184,6 +184,7 @@ public: float ComputeHorizontalFrequency() const; float ComputeVerticalFrequency() const; float ComputeDisplayAspectRatio() const; + float ComputeAspectRatioCorrection() const; static std::unique_ptr CreateHardwareRenderer(Error* error); static std::unique_ptr CreateSoftwareRenderer(Error* error); diff --git a/src/core/gte.cpp b/src/core/gte.cpp index f6d340f41..6b0dcb1ff 100644 --- a/src/core/gte.cpp +++ b/src/core/gte.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gte.h" - #include "cpu_core.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" @@ -227,47 +226,22 @@ bool GTE::DoState(StateWrapper& sw) return !sw.HasError(); } -void GTE::UpdateAspectRatio(u32 window_width, u32 window_height) +void GTE::SetAspectRatio(DisplayAspectRatio aspect, u32 custom_num, u32 custom_denom) { - if (!g_settings.gpu_widescreen_hack) - { - s_config.aspect_ratio = DisplayAspectRatio::R4_3; + s_config.aspect_ratio = aspect; + if (aspect != DisplayAspectRatio::Custom) return; - } - - s_config.aspect_ratio = g_settings.display_aspect_ratio; - - u32 num, denom; - switch (s_config.aspect_ratio) - { - case DisplayAspectRatio::MatchWindow: - { - num = window_width; - denom = window_height; - } - break; - - case DisplayAspectRatio::Custom: - { - num = g_settings.display_aspect_ratio_custom_numerator; - denom = g_settings.display_aspect_ratio_custom_denominator; - } - break; - - default: - return; - } // (4 / 3) / (num / denom) => gcd((4 * denom) / (3 * num)) - const u32 x = 4u * denom; - const u32 y = 3u * num; + const u32 x = 4u * custom_denom; + const u32 y = 3u * custom_num; const u32 gcd = std::gcd(x, y); s_config.custom_aspect_ratio_numerator = x / gcd; s_config.custom_aspect_ratio_denominator = y / gcd; s_config.custom_aspect_ratio_f = - static_cast((4.0 / 3.0) / (static_cast(num) / static_cast(denom))); + static_cast((4.0 / 3.0) / (static_cast(custom_num) / static_cast(custom_denom))); } u32 GTE::ReadRegister(u32 index) @@ -709,7 +683,6 @@ void GTE::RTPS(const s16 V[3], u8 shift, bool lm, bool last) break; case DisplayAspectRatio::Custom: - case DisplayAspectRatio::MatchWindow: Sx = ((((s64(result) * s64(REGS.IR1)) * s64(s_config.custom_aspect_ratio_numerator)) / s64(s_config.custom_aspect_ratio_denominator)) + s64(REGS.OFX)); @@ -764,7 +737,6 @@ void GTE::RTPS(const s16 V[3], u8 shift, bool lm, bool last) switch (s_config.aspect_ratio) { - case DisplayAspectRatio::MatchWindow: case DisplayAspectRatio::Custom: precise_x = precise_x * s_config.custom_aspect_ratio_f; break; diff --git a/src/core/gte.h b/src/core/gte.h index 444b39c10..eeb702251 100644 --- a/src/core/gte.h +++ b/src/core/gte.h @@ -6,12 +6,14 @@ class StateWrapper; +enum class DisplayAspectRatio : u8; + namespace GTE { void Initialize(); void Reset(); bool DoState(StateWrapper& sw); -void UpdateAspectRatio(u32 window_width, u32 window_height); +void SetAspectRatio(DisplayAspectRatio aspect, u32 custom_num, u32 custom_denom); // control registers are offset by +32 u32 ReadRegister(u32 index); diff --git a/src/core/host.cpp b/src/core/host.cpp index 1d26548be..22a23be10 100644 --- a/src/core/host.cpp +++ b/src/core/host.cpp @@ -465,13 +465,11 @@ void Host::UpdateDisplayWindow(bool fullscreen) return; } - const u32 new_width = g_gpu_device->GetMainSwapChain()->GetWidth(); - const u32 new_height = g_gpu_device->GetMainSwapChain()->GetHeight(); - const float f_width = static_cast(new_width); - const float f_height = static_cast(new_height); + const float f_width = static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()); + const float f_height = static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); ImGuiManager::WindowResized(f_width, f_height); InputManager::SetDisplayWindowSize(f_width, f_height); - System::DisplayWindowResized(new_width, new_height); + System::DisplayWindowResized(); } void Host::ResizeDisplayWindow(s32 width, s32 height, float scale) @@ -489,13 +487,11 @@ void Host::ResizeDisplayWindow(s32 width, s32 height, float scale) return; } - const u32 new_width = g_gpu_device->GetMainSwapChain()->GetWidth(); - const u32 new_height = g_gpu_device->GetMainSwapChain()->GetHeight(); - const float f_width = static_cast(new_width); - const float f_height = static_cast(new_height); + const float f_width = static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()); + const float f_height = static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); ImGuiManager::WindowResized(f_width, f_height); InputManager::SetDisplayWindowSize(f_width, f_height); - System::DisplayWindowResized(new_width, new_height); + System::DisplayWindowResized(); } void Host::ReleaseGPUDevice() diff --git a/src/core/settings.cpp b/src/core/settings.cpp index f3bce23b7..cbecae52e 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -1614,10 +1614,11 @@ const char* Settings::GetDisplayDeinterlacingModeDisplayName(DisplayDeinterlacin "DisplayDeinterlacingMode"); } -static constexpr const std::array s_display_crop_mode_names = {"None", "Overscan", "Borders"}; +static constexpr const std::array s_display_crop_mode_names = {"None", "Overscan", "OverscanUncorrected", "Borders"}; static constexpr const std::array s_display_crop_mode_display_names = { TRANSLATE_DISAMBIG_NOOP("Settings", "None", "DisplayCropMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Only Overscan Area", "DisplayCropMode"), + TRANSLATE_DISAMBIG_NOOP("Settings", "Only Overscan Area (Aspect Uncorrected)", "DisplayCropMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "All Borders", "DisplayCropMode"), }; @@ -1662,7 +1663,7 @@ static constexpr const std::array s_display_aspect_ratio_names = { "20:9", "PAR 1:1"}; static constexpr const std::array s_display_aspect_ratio_values = { - -1.0f, -1.0f, -1.0f, 4.0f / 3.0f, 16.0f / 9.0f, 19.0f / 9.0f, 20.0f / 9.0f, -1.0f}; + 4.0f / 3.0f, 4.0f / 3.0f, 4.0f / 3.0f, 4.0f / 3.0f, 16.0f / 9.0f, 19.0f / 9.0f, 20.0f / 9.0f, -1.0f}; std::optional Settings::ParseDisplayAspectRatio(const char* str) { @@ -1691,28 +1692,7 @@ const char* Settings::GetDisplayAspectRatioDisplayName(DisplayAspectRatio ar) float Settings::GetDisplayAspectRatioValue() const { - switch (display_aspect_ratio) - { - case DisplayAspectRatio::MatchWindow: - { - if (!g_gpu_device || !g_gpu_device->HasMainSwapChain()) - return s_display_aspect_ratio_values[static_cast(DEFAULT_DISPLAY_ASPECT_RATIO)]; - - return static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()) / - static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); - } - - case DisplayAspectRatio::Custom: - { - return static_cast(display_aspect_ratio_custom_numerator) / - static_cast(display_aspect_ratio_custom_denominator); - } - - default: - { - return s_display_aspect_ratio_values[static_cast(display_aspect_ratio)]; - } - } + return s_display_aspect_ratio_values[static_cast(display_aspect_ratio)]; } static constexpr const std::array s_display_alignment_names = {"LeftOrTop", "Center", "RightOrBottom"}; diff --git a/src/core/system.cpp b/src/core/system.cpp index 8ee6aa5e9..ccc70d709 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -1941,9 +1941,6 @@ bool System::Initialize(std::unique_ptr disc, DiscRegion disc_region, b if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, false, fullscreen, error)) return false; - if (GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) - GTE::UpdateAspectRatio(swap_chain->GetWidth(), swap_chain->GetHeight()); - if (g_settings.gpu_pgxp_enable) CPU::PGXP::Initialize(); @@ -1965,6 +1962,7 @@ bool System::Initialize(std::unique_ptr disc, DiscRegion disc_region, b s_state.cpu_thread_handle = Threading::ThreadHandle::GetForCallingThread(); + UpdateGTEAspectRatio(); UpdateThrottlePeriod(); UpdateMemorySaveStateSettings(); @@ -4402,8 +4400,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) (g_settings.display_aspect_ratio_custom_numerator != old_settings.display_aspect_ratio_custom_numerator || g_settings.display_aspect_ratio_custom_denominator != old_settings.display_aspect_ratio_custom_denominator))) { - if (GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) - GTE::UpdateAspectRatio(swap_chain->GetWidth(), swap_chain->GetHeight()); + UpdateGTEAspectRatio(); } if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || @@ -5651,8 +5648,7 @@ void System::ToggleWidescreen() Settings::GetDisplayAspectRatioDisplayName(g_settings.display_aspect_ratio), 5.0f)); } - if (GPUSwapChain* swap_chain = g_gpu_device->GetMainSwapChain()) - GTE::UpdateAspectRatio(swap_chain->GetWidth(), swap_chain->GetHeight()); + UpdateGTEAspectRatio(); } void System::ToggleSoftwareRendering() @@ -5698,13 +5694,12 @@ void System::RequestDisplaySize(float scale /*= 0.0f*/) Host::RequestResizeHostDisplay(static_cast(requested_width), static_cast(requested_height)); } -void System::DisplayWindowResized(u32 width, u32 height) +void System::DisplayWindowResized() { if (!IsValid()) return; - if (g_settings.gpu_widescreen_hack && g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow) - GTE::UpdateAspectRatio(width, height); + UpdateGTEAspectRatio(); g_gpu->RestoreDeviceContext(); g_gpu->UpdateResolutionScale(); @@ -5719,6 +5714,47 @@ void System::DisplayWindowResized(u32 width, u32 height) } } +void System::UpdateGTEAspectRatio() +{ + if (!IsValid()) + return; + + DisplayAspectRatio gte_ar = g_settings.display_aspect_ratio; + u32 custom_num = 0; + u32 custom_denom = 0; + if (!g_settings.gpu_widescreen_hack) + { + // No WS hack => no correction. + gte_ar = DisplayAspectRatio::R4_3; + } + else if (gte_ar == DisplayAspectRatio::Custom) + { + // Custom AR => use values. + custom_num = g_settings.display_aspect_ratio_custom_numerator; + custom_denom = g_settings.display_aspect_ratio_custom_denominator; + } + else if (gte_ar == DisplayAspectRatio::MatchWindow) + { + if (const GPUSwapChain* main_swap_chain = g_gpu_device->GetMainSwapChain()) + { + // Pre-apply the native aspect ratio correction to the window size. + // MatchWindow does not correct the display aspect ratio, so we need to apply it here. + const float correction = g_gpu->ComputeAspectRatioCorrection(); + custom_num = + static_cast(std::max(std::round(static_cast(main_swap_chain->GetWidth()) / correction), 1.0f)); + custom_denom = std::max(main_swap_chain->GetHeight(), 1u); + gte_ar = DisplayAspectRatio::Custom; + } + else + { + // Assume 4:3 until we get a window. + gte_ar = DisplayAspectRatio::R4_3; + } + } + + GTE::SetAspectRatio(gte_ar, custom_num, custom_denom); +} + bool System::PresentDisplay(bool explicit_present, u64 present_time) { // acquire for IO.MousePos. diff --git a/src/core/system_private.h b/src/core/system_private.h index e47baf4c0..e79882ff3 100644 --- a/src/core/system_private.h +++ b/src/core/system_private.h @@ -33,8 +33,11 @@ void FrameDone(); GPUVSyncMode GetEffectiveVSyncMode(); bool ShouldAllowPresentThrottle(); -/// Call when host display size changes, use with "match display" aspect ratio setting. -void DisplayWindowResized(u32 width, u32 height); +/// Call when host display size changes. +void DisplayWindowResized(); + +/// Updates the internal GTE aspect ratio. Use with "match display" aspect ratio setting. +void UpdateGTEAspectRatio(); /// Performs mandatory hardware checks. bool PerformEarlyHardwareChecks(Error* error); diff --git a/src/core/types.h b/src/core/types.h index fae40cffa..4ebcc2dfb 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -141,8 +141,9 @@ enum class DisplayCropMode : u8 { None, Overscan, + OverscanUncorrected, Borders, - Count + MaxCount }; enum class DisplayAspectRatio : u8 diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index c0b52fd92..a3c540a50 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -671,7 +671,7 @@ void GraphicsSettingsWidget::setupAdditionalUi() QString::fromUtf8(Settings::GetDisplayDeinterlacingModeDisplayName(static_cast(i)))); } - for (u32 i = 0; i < static_cast(DisplayCropMode::Count); i++) + for (u32 i = 0; i < static_cast(DisplayCropMode::MaxCount); i++) { m_ui.displayCropMode->addItem( QString::fromUtf8(Settings::GetDisplayCropModeDisplayName(static_cast(i)))); diff --git a/src/duckstation-qt/mainwindow.cpp b/src/duckstation-qt/mainwindow.cpp index c0cd615ea..d01ed6eda 100644 --- a/src/duckstation-qt/mainwindow.cpp +++ b/src/duckstation-qt/mainwindow.cpp @@ -2030,7 +2030,7 @@ void MainWindow::connectSignals() Settings::DEFAULT_GPU_RENDERER, GPURenderer::Count); SettingWidgetBinder::BindMenuToEnumSetting( m_ui.menuCropMode, "Display", "CropMode", &Settings::ParseDisplayCropMode, &Settings::GetDisplayCropModeName, - &Settings::GetDisplayCropModeDisplayName, Settings::DEFAULT_DISPLAY_CROP_MODE, DisplayCropMode::Count); + &Settings::GetDisplayCropModeDisplayName, Settings::DEFAULT_DISPLAY_CROP_MODE, DisplayCropMode::MaxCount); SettingWidgetBinder::BindMenuToEnumSetting(m_ui.menuLogLevel, "Logging", "LogLevel", &Settings::ParseLogLevelName, &Settings::GetLogLevelName, &Settings::GetLogLevelDisplayName, Settings::DEFAULT_LOG_LEVEL, Log::Level::MaxCount); From 982dccb99063f21866d74c6c8ad6918aa367790f Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 16:30:23 +1000 Subject: [PATCH 20/35] D3D12Device: Clang warning fixes --- src/util/d3d12_device.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index f0e770010..998882eab 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -2260,13 +2260,16 @@ void D3D12Device::RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 SubmitCommandList(false, "Allocate SRV for RenderTextureMipmap()"); // Setup views. This will be a partial view for the SRV. - D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {texture->GetDXGIFormat(), D3D12_RTV_DIMENSION_TEXTURE2D}; - rtv_desc.Texture2D = {dst_level, 0u}; + const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {.Format = texture->GetDXGIFormat(), + .ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D, + .Texture2D = {.MipSlice = dst_level, .PlaneSlice = 0}}; m_device->CreateRenderTargetView(texture->GetResource(), &rtv_desc, rtv_handle); - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {texture->GetDXGIFormat(), D3D12_SRV_DIMENSION_TEXTURE2D, - D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; - srv_desc.Texture2D = {src_level, 1u, 0u, 0.0f}; + const D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { + .Format = texture->GetDXGIFormat(), + .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Texture2D = {.MostDetailedMip = src_level, .MipLevels = 1, .PlaneSlice = 0, .ResourceMinLODClamp = 0.0f}}; m_device->CreateShaderResourceView(texture->GetResource(), &srv_desc, srv_handle); // *now* we don't have to worry about running out of anything. @@ -2282,10 +2285,10 @@ void D3D12Device::RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 D3D12_RESOURCE_STATE_RENDER_TARGET); } - const D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {.cpuDescriptor = rtv_handle, - .BeginningAccess = - D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, - .EndingAccess = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE}; + const D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = { + .cpuDescriptor = rtv_handle, + .BeginningAccess = {.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, .Clear = {}}, + .EndingAccess = {.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, .Resolve = {}}}; cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE); const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast(dst_width), static_cast(dst_height), 0.0f, 1.0f}; From 4a650fcce2a8d1df95d06c0df6fcc1975a412a42 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 18:44:33 +1000 Subject: [PATCH 21/35] GPUDevice: Fix VRAM usage calculation for compressed textures --- src/util/gpu_texture.cpp | 52 +++++++++++++++++++++++++++------------- src/util/gpu_texture.h | 2 ++ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index c10e6faff..c29e5482c 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -68,7 +68,7 @@ u32 GPUTexture::GetBlockSize() const u32 GPUTexture::GetBlockSize(Format format) { if (format >= Format::BC1 && format <= Format::BC7) - return 4; + return COMPRESSED_TEXTURE_BLOCK_SIZE; else return 1; } @@ -77,7 +77,7 @@ u32 GPUTexture::CalcUploadPitch(Format format, u32 width) { // convert to blocks if (format >= Format::BC1 && format <= Format::BC7) - width = Common::AlignUpPow2(width, 4) / 4; + width = Common::AlignUpPow2(width, COMPRESSED_TEXTURE_BLOCK_SIZE) / COMPRESSED_TEXTURE_BLOCK_SIZE; return width * GetPixelSize(format); } @@ -96,7 +96,7 @@ u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch) { const u32 pixel_size = GetPixelSize(format); if (IsCompressedFormat(format)) - return (Common::AlignUpPow2(pitch, pixel_size) / pixel_size) * 4; + return (Common::AlignUpPow2(pitch, pixel_size) / pixel_size) * COMPRESSED_TEXTURE_BLOCK_SIZE; else return pitch / pixel_size; } @@ -133,8 +133,8 @@ void GPUTexture::CopyTextureDataForUpload(u32 width, u32 height, Format format, { if (IsCompressedFormat(format)) { - const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4; - const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4; + const u32 blocks_wide = Common::AlignUpPow2(width, COMPRESSED_TEXTURE_BLOCK_SIZE) / COMPRESSED_TEXTURE_BLOCK_SIZE; + const u32 blocks_high = Common::AlignUpPow2(height, COMPRESSED_TEXTURE_BLOCK_SIZE) / COMPRESSED_TEXTURE_BLOCK_SIZE; const u32 block_size = GetPixelSize(format); StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, block_size * blocks_wide, blocks_high); } @@ -208,21 +208,41 @@ std::array GPUTexture::GetUNormClearColor() const size_t GPUTexture::GetVRAMUsage() const { - if (m_levels == 1) [[likely]] - return ((static_cast(m_width * m_height) * GetPixelSize(m_format)) * m_layers * m_samples); - const size_t ps = GetPixelSize(m_format) * m_layers * m_samples; - u32 width = m_width; - u32 height = m_height; - size_t ts = 0; - for (u32 i = 0; i < m_levels; i++) + size_t mem; + + // Max width/height is 65535, 65535*65535 as u32 is okay. + if (IsCompressedFormat()) { - width = (width > 1) ? (width / 2) : width; - height = (height > 1) ? (height / 2) : height; - ts += static_cast(width * height) * ps; +#define COMPRESSED_SIZE(width, height) \ + (static_cast((Common::AlignUpPow2(width, COMPRESSED_TEXTURE_BLOCK_SIZE) / COMPRESSED_TEXTURE_BLOCK_SIZE) * \ + (Common::AlignUpPow2(height, COMPRESSED_TEXTURE_BLOCK_SIZE) / COMPRESSED_TEXTURE_BLOCK_SIZE)) * \ + ps) + + u32 width = m_width, height = m_height; + mem = COMPRESSED_SIZE(width, height); + for (u32 i = 1; i < m_levels; i++) + { + width = (width > 1) ? (width / 2) : width; + height = (height > 1) ? (height / 2) : height; + mem += COMPRESSED_SIZE(width, height); + } + +#undef COMPRESSED_SIZE + } + else + { + u32 width = m_width, height = m_height; + mem = static_cast(width * height) * ps; + for (u32 i = 1; i < m_levels; i++) + { + width = (width > 1) ? (width / 2) : width; + height = (height > 1) ? (height / 2) : height; + mem += static_cast(width * height) * ps; + } } - return ts; + return mem; } u32 GPUTexture::GetPixelSize(GPUTexture::Format format) diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index 700d6dca3..c264cb73b 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -188,6 +188,8 @@ public: protected: GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format, Flags flags); + static constexpr u32 COMPRESSED_TEXTURE_BLOCK_SIZE = 4; + u16 m_width = 0; u16 m_height = 0; u8 m_layers = 0; From 57595c47af8139d6e2b8459d65b24230abc263f5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 25 Nov 2024 19:37:26 +1000 Subject: [PATCH 22/35] GPU/TextureCache: Move replacement cache onto the GPU And enforce a maximum. Should sort out the upload spam. --- src/core/gpu_hw.cpp | 37 +----- src/core/gpu_hw.h | 4 +- src/core/gpu_hw_shadergen.cpp | 16 +++ src/core/gpu_hw_shadergen.h | 1 + src/core/gpu_hw_texture_cache.cpp | 186 +++++++++++++++++++++++------- src/core/gpu_hw_texture_cache.h | 2 +- src/core/settings.cpp | 6 + src/core/settings.h | 2 + src/util/gpu_texture.h | 1 + 9 files changed, 177 insertions(+), 78 deletions(-) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 3d32b949c..81f606e84 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1545,8 +1545,8 @@ bool GPU_HW::CompilePipelines(Error* error) // VRAM write replacement { - std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateCopyFragmentShader(), error); + std::unique_ptr fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateVRAMReplacementBlitFragmentShader(), error); if (!fs) return false; @@ -2924,41 +2924,14 @@ void GPU_HW::LoadVertices() } } -bool GPU_HW::BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacementImage* tex, u32 dst_x, u32 dst_y, - u32 width, u32 height) +bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height) { - if (!m_vram_replacement_texture || m_vram_replacement_texture->GetWidth() < tex->GetWidth() || - m_vram_replacement_texture->GetHeight() < tex->GetHeight() || g_gpu_device->GetFeatures().prefer_unused_textures) - { - g_gpu_device->RecycleTexture(std::move(m_vram_replacement_texture)); - - if (!(m_vram_replacement_texture = g_gpu_device->FetchTexture( - tex->GetWidth(), tex->GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, - GPUTexture::Flags::None, tex->GetPixels(), tex->GetPitch()))) - { - return false; - } - } - else - { - if (!m_vram_replacement_texture->Update(0, 0, tex->GetWidth(), tex->GetHeight(), tex->GetPixels(), tex->GetPitch())) - { - ERROR_LOG("Update {}x{} texture failed.", width, height); - return false; - } - } - GL_SCOPE_FMT("BlitVRAMReplacementTexture() {}x{} to {},{} => {},{} ({}x{})", tex->GetWidth(), tex->GetHeight(), dst_x, dst_y, dst_x + width, dst_y + height, width, height); - const float src_rect[4] = { - 0.0f, 0.0f, static_cast(tex->GetWidth()) / static_cast(m_vram_replacement_texture->GetWidth()), - static_cast(tex->GetHeight()) / static_cast(m_vram_replacement_texture->GetHeight())}; - - g_gpu_device->SetTextureSampler(0, m_vram_replacement_texture.get(), g_gpu_device->GetLinearSampler()); + g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler()); g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get()); g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height); - g_gpu_device->PushUniformBuffer(src_rect, sizeof(src_rect)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); @@ -3381,7 +3354,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b } else { - const GPUTextureCache::TextureReplacementImage* rtex = GPUTextureCache::GetVRAMReplacement(width, height, data); + GPUTexture* rtex = GPUTextureCache::GetVRAMReplacement(width, height, data); if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale)) { diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 53fb6da01..77595b5ba 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -228,8 +228,7 @@ private: void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, bool check_mask, const GSVector4i bounds); - bool BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width, - u32 height); + bool BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); /// Expands a line into two triangles. void DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth); @@ -259,7 +258,6 @@ private: std::unique_ptr m_vram_read_texture; std::unique_ptr m_vram_readback_texture; std::unique_ptr m_vram_readback_download_texture; - std::unique_ptr m_vram_replacement_texture; std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 64cdcb1ef..5179c5ed5 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1284,6 +1284,22 @@ float3 SampleVRAM24(uint2 icoords) return ss.str(); } +std::string GPU_HW_ShaderGen::GenerateVRAMReplacementBlitFragmentShader() const +{ + std::stringstream ss; + WriteHeader(ss); + DeclareTexture(ss, "samp0", 0); + DeclareFragmentEntryPoint(ss, 0, 1); + + ss << R"( +{ + o_col0 = SAMPLE_TEXTURE(samp0, v_tex0); +} +)"; + + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateWireframeGeometryShader() const { std::stringstream ss; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 8894e484b..e26228fb3 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -32,6 +32,7 @@ public: std::string GenerateVRAMUpdateDepthFragmentShader(bool msaa) const; std::string GenerateVRAMExtractFragmentShader(u32 resolution_scale, u32 multisamples, bool color_24bit, bool depth_buffer) const; + std::string GenerateVRAMReplacementBlitFragmentShader() const; std::string GenerateAdaptiveDownsampleVertexShader() const; std::string GenerateAdaptiveDownsampleMipFragmentShader() const; diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index 3c06e6f3f..d76e2f9e1 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -16,6 +16,7 @@ #include "common/error.h" #include "common/file_system.h" #include "common/gsvector_formatter.h" +#include "common/heterogeneous_containers.h" #include "common/log.h" #include "common/path.h" #include "common/string_util.h" @@ -128,7 +129,7 @@ struct TextureReplacementSubImage { GSVector4i dst_rect; GSVector4i src_rect; - const TextureReplacementImage& image; + GPUTexture* texture; float scale_x; float scale_y; bool invert_alpha; @@ -229,7 +230,8 @@ struct DumpedTextureKeyHash } // namespace using HashCache = std::unordered_map; -using TextureCache = std::unordered_map; +using ReplacementImageCache = PreferUnorderedStringMap; +using GPUReplacementImageCache = PreferUnorderedStringMap, u32>>; using VRAMReplacementMap = std::unordered_map; using TextureReplacementMap = @@ -304,6 +306,8 @@ static void LoadTextureReplacementAliases(const ryml::ConstNodeRef& root, bool l bool load_texture_replacement_aliases); static const TextureReplacementImage* GetTextureReplacementImage(const std::string& path); +static GPUTexture* GetTextureReplacementGPUImage(const std::string& path); +static void CompactTextureReplacementGPUImages(); static void PreloadReplacementTextures(); static void PurgeUnreferencedTexturesFromCache(); @@ -529,7 +533,10 @@ struct GPUTextureCacheState TextureReplacementMap texture_page_texture_replacements; // TODO: Check the size, purge some when it gets too large. - TextureCache replacement_image_cache; + ReplacementImageCache replacement_image_cache; + GPUReplacementImageCache gpu_replacement_image_cache; + size_t gpu_replacement_image_cache_vram_usage = 0; + std::vector> gpu_replacement_image_cache_purge_list; std::unordered_set dumped_vram_writes; std::unordered_set dumped_textures; @@ -744,10 +751,18 @@ void GPUTextureCache::Shutdown() ClearHashCache(); DestroyPipelines(); s_state.replacement_texture_render_target.reset(); + s_state.gpu_replacement_image_cache_purge_list = {}; s_state.hash_cache_purge_list = {}; s_state.temp_vram_write_list = {}; s_state.track_vram_writes = false; + for (auto it = s_state.gpu_replacement_image_cache.begin(); it != s_state.gpu_replacement_image_cache.end();) + { + g_gpu_device->RecycleTexture(std::move(it->second.first)); + it = s_state.gpu_replacement_image_cache.erase(it); + } + s_state.gpu_replacement_image_cache_vram_usage = 0; + s_state.replacement_image_cache.clear(); s_state.vram_replacements.clear(); s_state.vram_write_texture_replacements.clear(); @@ -2150,6 +2165,8 @@ void GPUTextureCache::Compact() RemoveFromHashCache(s_state.hash_cache_purge_list[purge_index++].first); } } + + CompactTextureReplacementGPUImages(); } size_t GPUTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& k) const @@ -2463,8 +2480,7 @@ void GPUTextureCache::SetGameID(std::string game_id) ReloadTextureReplacements(false); } -const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetVRAMReplacement(u32 width, u32 height, - const void* pixels) +GPUTexture* GPUTextureCache::GetVRAMReplacement(u32 width, u32 height, const void* pixels) { const VRAMReplacementName hash = GetVRAMWriteHash(width, height, pixels); @@ -2472,7 +2488,7 @@ const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetVRAMReplacem if (it == s_state.vram_replacements.end()) return nullptr; - return GetTextureReplacementImage(it->second); + return GetTextureReplacementGPUImage(it->second); } bool GPUTextureCache::ShouldDumpVRAMWrite(u32 width, u32 height) @@ -2675,12 +2691,7 @@ void GPUTextureCache::GetVRAMWriteTextureReplacements(std::vectorsecond.first)) continue; - const TextureReplacementImage* image = GetTextureReplacementImage(it->second.second); - if (!image) - continue; - const TextureReplacementName& name = it->second.first; - const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); const GSVector4i rect_in_write_space = name.GetDestRect(); const GSVector4i rect_in_page_space = rect_in_write_space.sub32(offset_to_page_v); @@ -2700,7 +2711,12 @@ void GPUTextureCache::GetVRAMWriteTextureReplacements(std::vector(TEXTURE_PAGE_WIDTH)); DebugAssert(rect_in_page_space.height() <= static_cast(TEXTURE_PAGE_HEIGHT)); - replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, + GPUTexture* texture = GetTextureReplacementGPUImage(it->second.second); + if (!texture) + continue; + + const GSVector2 scale = GSVector2(texture->GetSizeVec()) / GSVector2(name.GetSizeVec()); + replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), texture, scale.x, scale.y, name.IsSemitransparent()}); } } @@ -2755,12 +2771,12 @@ void GPUTextureCache::GetTexturePageTextureReplacements(std::vectorsecond.second); - if (!image) + GPUTexture* texture = GetTextureReplacementGPUImage(it->second.second); + if (!texture) continue; - const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); - replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, + const GSVector2 scale = GSVector2(texture->GetSizeVec()) / GSVector2(name.GetSizeVec()); + replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), texture, scale.x, scale.y, name.IsSemitransparent()}); } } @@ -2986,6 +3002,95 @@ const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetTextureRepla return &it->second; } +GPUTexture* GPUTextureCache::GetTextureReplacementGPUImage(const std::string& path) +{ + // Already in cache? + const auto git = s_state.gpu_replacement_image_cache.find(path); + if (git != s_state.gpu_replacement_image_cache.end()) + { + git->second.second = System::GetFrameNumber(); + return git->second.first.get(); + } + + // Need to upload it. + Error error; + std::unique_ptr tex; + + // Check CPU cache first. + const auto it = s_state.replacement_image_cache.find(path); + if (it != s_state.replacement_image_cache.end()) + { + tex = g_gpu_device->FetchAndUploadTextureImage(it->second, GPUTexture::Flags::None, &error); + } + else + { + // Need to load it. + Image cpu_image; + if (cpu_image.LoadFromFile(path.c_str(), &error)) + tex = g_gpu_device->FetchAndUploadTextureImage(cpu_image, GPUTexture::Flags::None, &error); + } + + if (!tex) + { + ERROR_LOG("Failed to load/upload '{}': {}", Path::GetFileName(path), error.GetDescription()); + return nullptr; + } + + const size_t vram_usage = tex->GetVRAMUsage(); + s_state.gpu_replacement_image_cache_vram_usage += vram_usage; + + VERBOSE_LOG("Uploaded '{}': {}x{} {} {:.2f} KB", Path::GetFileName(path), tex->GetWidth(), tex->GetHeight(), + GPUTexture::GetFormatName(tex->GetFormat()), static_cast(vram_usage) / 1024.0f); + + return s_state.gpu_replacement_image_cache.emplace(path, std::make_pair(std::move(tex), System::GetFrameNumber())) + .first->second.first.get(); +} + +void GPUTextureCache::CompactTextureReplacementGPUImages() +{ + // Instead of compacting to exactly the maximum, let's go down to the maximum less 16MB. + // That way we can hopefully avoid compacting again for a few frames. + static constexpr size_t EXTRA_COMPACT_SIZE = 16 * 1024 * 1024; + + const size_t max_usage = static_cast(s_state.config.max_replacement_cache_vram_usage_mb) * 1048576; + if (s_state.gpu_replacement_image_cache_vram_usage <= max_usage) + return; + + VERBOSE_LOG("Compacting replacement GPU image cache, count = {}, size = {:.1f} MB", + s_state.gpu_replacement_image_cache.size(), + static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); + + const u32 frame_number = System::GetFrameNumber(); + s_state.gpu_replacement_image_cache_purge_list.reserve(s_state.gpu_replacement_image_cache.size()); + for (auto it = s_state.gpu_replacement_image_cache.begin(); it != s_state.gpu_replacement_image_cache.end(); ++it) + s_state.gpu_replacement_image_cache_purge_list.emplace_back(it, frame_number - it->second.second); + + // Reverse sort, put the oldest on the end. + std::sort(s_state.gpu_replacement_image_cache_purge_list.begin(), + s_state.gpu_replacement_image_cache_purge_list.end(), + [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; }); + + // See first comment above. + const size_t target_size = (max_usage < EXTRA_COMPACT_SIZE) ? max_usage : (max_usage - EXTRA_COMPACT_SIZE); + while (s_state.gpu_replacement_image_cache_vram_usage > target_size && + !s_state.gpu_replacement_image_cache_purge_list.empty()) + { + GPUReplacementImageCache::iterator iter = s_state.gpu_replacement_image_cache_purge_list.back().first; + s_state.gpu_replacement_image_cache_purge_list.pop_back(); + + std::unique_ptr tex = std::move(iter->second.first); + s_state.gpu_replacement_image_cache.erase(iter); + s_state.gpu_replacement_image_cache_vram_usage -= tex->GetVRAMUsage(); + g_gpu_device->RecycleTexture(std::move(tex)); + } + + s_state.gpu_replacement_image_cache_purge_list.clear(); + + VERBOSE_LOG("Finished compacting replacement GPU image cache, count = {}, size = {:.1f} MB", + s_state.gpu_replacement_image_cache.size(), + static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); +} + void GPUTextureCache::PreloadReplacementTextures() { static constexpr float UPDATE_INTERVAL = 1.0f; @@ -3203,31 +3308,35 @@ void GPUTextureCache::ReloadTextureReplacements(bool show_info) void GPUTextureCache::PurgeUnreferencedTexturesFromCache() { - TextureCache old_map = std::move(s_state.replacement_image_cache); - s_state.replacement_image_cache = TextureCache(); + ReplacementImageCache old_map = std::move(s_state.replacement_image_cache); + GPUReplacementImageCache old_gpu_map = std::move(s_state.gpu_replacement_image_cache); + s_state.replacement_image_cache = ReplacementImageCache(); + s_state.gpu_replacement_image_cache = GPUReplacementImageCache(); - for (const auto& it : s_state.vram_replacements) - { - const auto it2 = old_map.find(it.second); + const auto reinsert_texture = [&old_map, &old_gpu_map](const std::string& name) { + const auto it2 = old_map.find(name); if (it2 != old_map.end()) { - s_state.replacement_image_cache.emplace(it.second, std::move(it2->second)); + s_state.replacement_image_cache.emplace(name, std::move(it2->second)); old_map.erase(it2); } - } - for (const auto& map : {s_state.vram_write_texture_replacements, s_state.texture_page_texture_replacements}) - { - for (const auto& it : map) + const auto it3 = old_gpu_map.find(name); + if (it3 != old_gpu_map.end()) { - const auto it2 = old_map.find(it.second.second); - if (it2 != old_map.end()) - { - s_state.replacement_image_cache.emplace(it.second.second, std::move(it2->second)); - old_map.erase(it2); - } + s_state.gpu_replacement_image_cache.emplace(name, std::move(it3->second)); + old_gpu_map.erase(it3); } - } + }; + + for (const auto& it : s_state.vram_replacements) + reinsert_texture(it.second); + + for (const auto& it : s_state.vram_write_texture_replacements) + reinsert_texture(it.second.second); + + for (const auto& it : s_state.texture_page_texture_replacements) + reinsert_texture(it.second.second); } void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash, @@ -3317,22 +3426,15 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, for (const TextureReplacementSubImage& si : subimages) { - std::unique_ptr temp_texture = - g_gpu_device->FetchAndUploadTextureImage(si.image, GPUTexture::Flags::None); - if (!temp_texture) - continue; - const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v); - texture_size = GSVector2(GSVector2i(temp_texture->GetWidth(), temp_texture->GetHeight())); + texture_size = GSVector2(si.texture->GetSizeVec()); GSVector2::store(&uniforms[0], texture_size); GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size); g_gpu_device->SetViewportAndScissor(dst_rect); - g_gpu_device->SetTextureSampler(0, temp_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(0, si.texture, g_gpu_device->GetNearestSampler()); g_gpu_device->SetPipeline(si.invert_alpha ? s_state.replacement_semitransparent_draw_pipeline.get() : s_state.replacement_draw_pipeline.get()); g_gpu_device->Draw(3, 0); - - g_gpu_device->RecycleTexture(std::move(temp_texture)); } g_gpu_device->CopyTextureRegion(replacement_tex.get(), 0, 0, 0, 0, s_state.replacement_texture_render_target.get(), 0, diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h index 1c071c920..07482da7b 100644 --- a/src/core/gpu_hw_texture_cache.h +++ b/src/core/gpu_hw_texture_cache.h @@ -128,7 +128,7 @@ void SetGameID(std::string game_id); void ReloadTextureReplacements(bool show_info); // VRAM Write Replacements -const TextureReplacementImage* GetVRAMReplacement(u32 width, u32 height, const void* pixels); +GPUTexture* GetVRAMReplacement(u32 width, u32 height, const void* pixels); void DumpVRAMWrite(u32 width, u32 height, const void* pixels); bool ShouldDumpVRAMWrite(u32 width, u32 height); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index cbecae52e..d13091974 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -454,6 +454,9 @@ void Settings::Load(const SettingsInterface& si, const SettingsInterface& contro texture_replacements.config.replacement_scale_linear_filter = si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", false); + texture_replacements.config.max_replacement_cache_vram_usage_mb = + si.GetUIntValue("TextureReplacements", "MaxReplacementCacheVRAMUsage", 512); + texture_replacements.config.max_vram_write_splits = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", 0u); texture_replacements.config.max_vram_write_coalesce_width = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceWidth", 0u); @@ -714,6 +717,9 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", texture_replacements.config.replacement_scale_linear_filter); + si.SetUIntValue("TextureReplacements", "MaxReplacementCacheVRAMUsage", + texture_replacements.config.max_replacement_cache_vram_usage_mb); + si.SetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", texture_replacements.config.max_vram_write_splits); si.SetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceWidth", texture_replacements.config.max_vram_write_coalesce_width); diff --git a/src/core/settings.h b/src/core/settings.h index 9dbbefbfc..9bdb63f2f 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -245,6 +245,8 @@ struct Settings bool convert_copies_to_writes : 1 = false; bool replacement_scale_linear_filter = false; + u32 max_replacement_cache_vram_usage_mb = 512; + u32 max_vram_write_splits = 0; u32 max_vram_write_coalesce_width = 0; u32 max_vram_write_coalesce_height = 0; diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index c264cb73b..386034ba8 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -122,6 +122,7 @@ public: ALWAYS_INLINE Format GetFormat() const { return m_format; } ALWAYS_INLINE Flags GetFlags() const { return m_flags; } ALWAYS_INLINE bool HasFlag(Flags flag) const { return ((static_cast(m_flags) & static_cast(flag)) != 0); } + ALWAYS_INLINE GSVector2i GetSizeVec() const { return GSVector2i(m_width, m_height); } ALWAYS_INLINE GSVector4i GetRect() const { return GSVector4i(0, 0, static_cast(m_width), static_cast(m_height)); From 5fd79254bd2c023477cee9a24029218510ed082d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 26 Nov 2024 12:58:12 +1000 Subject: [PATCH 23/35] GPU: Fix crash when toggling software renderer --- src/core/gpu.h | 4 ++-- src/core/gpu_hw.cpp | 8 ++------ src/core/gpu_sw.cpp | 8 ++------ src/core/system.cpp | 10 +++++----- 4 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/core/gpu.h b/src/core/gpu.h index 55c3b8d4c..3469a513a 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -186,8 +186,8 @@ public: float ComputeDisplayAspectRatio() const; float ComputeAspectRatioCorrection() const; - static std::unique_ptr CreateHardwareRenderer(Error* error); - static std::unique_ptr CreateSoftwareRenderer(Error* error); + static std::unique_ptr CreateHardwareRenderer(); + static std::unique_ptr CreateSoftwareRenderer(); // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. void ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 81f606e84..5d940c9cf 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -4265,11 +4265,7 @@ void GPU_HW::DrawRendererStats() } } -std::unique_ptr GPU::CreateHardwareRenderer(Error* error) +std::unique_ptr GPU::CreateHardwareRenderer() { - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize(error)) - gpu.reset(); - - return gpu; + return std::make_unique(); } diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index a29d99860..c8b12fa87 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -824,11 +824,7 @@ void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) m_backend.PushCommand(cmd); } -std::unique_ptr GPU::CreateSoftwareRenderer(Error* error) +std::unique_ptr GPU::CreateSoftwareRenderer() { - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize(error)) - gpu.reset(); - - return gpu; + return std::make_unique(); } diff --git a/src/core/system.cpp b/src/core/system.cpp index ccc70d709..cfc60ba89 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -2432,11 +2432,11 @@ bool System::CreateGPU(GPURenderer renderer, bool is_switching, bool fullscreen, } if (renderer == GPURenderer::Software) - g_gpu = GPU::CreateSoftwareRenderer(error); + g_gpu = GPU::CreateSoftwareRenderer(); else - g_gpu = GPU::CreateHardwareRenderer(error); + g_gpu = GPU::CreateHardwareRenderer(); - if (!g_gpu) + if (!g_gpu->Initialize(error)) { ERROR_LOG("Failed to initialize {} renderer, falling back to software renderer", Settings::GetRendererName(renderer)); @@ -2445,8 +2445,8 @@ bool System::CreateGPU(GPURenderer renderer, bool is_switching, bool fullscreen, Settings::GetRendererName(renderer)), Host::OSD_CRITICAL_ERROR_DURATION); g_gpu.reset(); - g_gpu = GPU::CreateSoftwareRenderer(error); - if (!g_gpu) + g_gpu = GPU::CreateSoftwareRenderer(); + if (!g_gpu->Initialize(error)) { ERROR_LOG("Failed to create fallback software renderer."); if (!s_state.keep_gpu_device_on_shutdown) From be7a20fef23b55c6a8a3412f377710ee6a6ec2cb Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 26 Nov 2024 13:08:11 +1000 Subject: [PATCH 24/35] GPU/TextureCache: Map replacement non-255 alpha to fully transparent That way if during the scaling process you end up with interpolated colours, the cutout alpha is preserved. Ideally we'd blend it, but that tends to create more problems than it solves on PSX. --- src/core/gpu_hw_shadergen.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 5179c5ed5..55db9921a 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1842,6 +1842,9 @@ std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitr #else // Leave (0,0,0,0) as 0000 for opaque replacements for cutout alpha. o_col0.a = color.a; + + // Map anything with an alpha below 0.5 to transparent. + o_col0 = lerp(o_col0, float4(0.0, 0.0, 0.0, 0.0), float(o_col0.a < 0.5)); #endif } )"; From 0befbf8021951b7e3a621fd3659622837d8adf59 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 26 Nov 2024 18:00:17 +1000 Subject: [PATCH 25/35] GPU: Add Uncorrected Borders crop mode Should behave the same as the old "All Borders" mode. The pixel aspect ratio WILL BE WRONG. Also fixes the size of screenshots in internal resolution mode. --- src/core/gpu.cpp | 108 +++++++++++++++++++++--------------- src/core/gpu.h | 7 +++ src/core/imgui_overlays.cpp | 2 +- src/core/settings.cpp | 5 +- src/core/system.cpp | 16 +++--- src/core/types.h | 1 + 6 files changed, 82 insertions(+), 57 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 4340c2042..c0748bb6e 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -609,9 +609,8 @@ float GPU::ComputeDisplayAspectRatio() const { if (g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow && g_gpu_device->HasMainSwapChain()) { - // Match window has already been corrected. - return static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()) / - static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); + ar = static_cast(g_gpu_device->GetMainSwapChain()->GetWidth()) / + static_cast(g_gpu_device->GetMainSwapChain()->GetHeight()); } else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Custom) { @@ -624,7 +623,19 @@ float GPU::ComputeDisplayAspectRatio() const } } - return ComputeAspectRatioCorrection() * ar; + return ar; +} + +float GPU::ComputeSourceAspectRatio() const +{ + const float source_aspect_ratio = + static_cast(m_crtc_state.display_width) / static_cast(m_crtc_state.display_height); + + // Correction is applied to the GTE for stretch to fit, that way it fills the window. + const float source_aspect_ratio_correction = + (g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow) ? 1.0f : ComputeAspectRatioCorrection(); + + return source_aspect_ratio / source_aspect_ratio_correction; } float GPU::ComputeAspectRatioCorrection() const @@ -632,8 +643,9 @@ float GPU::ComputeAspectRatioCorrection() const const CRTCState& cs = m_crtc_state; float relative_width = static_cast(cs.horizontal_visible_end - cs.horizontal_visible_start); float relative_height = static_cast(cs.vertical_visible_end - cs.vertical_visible_start); - if (relative_width <= 0 || relative_height <= 0 || - g_settings.display_crop_mode == DisplayCropMode::OverscanUncorrected) + if (relative_width <= 0 || relative_height <= 0 || g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1 || + g_settings.display_crop_mode == DisplayCropMode::OverscanUncorrected || + g_settings.display_crop_mode == DisplayCropMode::BordersUncorrected) { return 1.0f; } @@ -652,6 +664,24 @@ float GPU::ComputeAspectRatioCorrection() const return (relative_width / relative_height); } +void GPU::ApplyPixelAspectRatioToSize(float* width, float* height) const +{ + const float dar = ComputeDisplayAspectRatio(); + const float sar = ComputeSourceAspectRatio(); + const float par = dar / sar; + + if (par < 1.0f) + { + // stretch height, preserve width + *height = std::ceil(*height / par); + } + else + { + // stretch width, preserve height + *width = std::ceil(*width * par); + } +} + void GPU::UpdateCRTCConfig() { static constexpr std::array dot_clock_dividers = {{10, 8, 5, 4, 7, 7, 7, 7}}; @@ -770,6 +800,7 @@ void GPU::UpdateCRTCDisplayParameters() break; case DisplayCropMode::Borders: + case DisplayCropMode::BordersUncorrected: default: cs.horizontal_visible_start = horizontal_display_start; cs.horizontal_visible_end = horizontal_display_end; @@ -808,6 +839,7 @@ void GPU::UpdateCRTCDisplayParameters() break; case DisplayCropMode::Borders: + case DisplayCropMode::BordersUncorrected: default: cs.horizontal_visible_start = horizontal_display_start; cs.horizontal_visible_end = horizontal_display_end; @@ -2341,20 +2373,20 @@ void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rota const bool integer_scale = (g_settings.display_scaling == DisplayScalingMode::NearestInteger || g_settings.display_scaling == DisplayScalingMode::BilinearInteger); const bool show_vram = g_settings.debugging.show_vram; - const float display_aspect_ratio = ComputeDisplayAspectRatio(); const float window_ratio = static_cast(window_width) / static_cast(window_height); const float crtc_display_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_width); const float crtc_display_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_height); - const float x_scale = - apply_aspect_ratio ? - (display_aspect_ratio / (static_cast(crtc_display_width) / static_cast(crtc_display_height))) : - 1.0f; + const float display_aspect_ratio = ComputeDisplayAspectRatio(); + const float source_aspect_ratio = ComputeSourceAspectRatio(); + const float pixel_aspect_ratio = display_aspect_ratio / source_aspect_ratio; + const float x_scale = apply_aspect_ratio ? pixel_aspect_ratio : 1.0f; float display_width = crtc_display_width; float display_height = crtc_display_height; float active_left = static_cast(show_vram ? 0 : m_crtc_state.display_origin_left); float active_top = static_cast(show_vram ? 0 : m_crtc_state.display_origin_top); float active_width = static_cast(show_vram ? VRAM_WIDTH : m_crtc_state.display_vram_width); float active_height = static_cast(show_vram ? VRAM_HEIGHT : m_crtc_state.display_vram_height); + if (!g_settings.display_stretch_vertically) { display_width *= x_scale; @@ -2604,52 +2636,30 @@ bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i displ void GPU::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, GSVector4i* draw_rect) const { - *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; - *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; - CalculateDrawRect(*width, *height, true, !g_settings.debugging.show_vram, display_rect, draw_rect); - const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram); if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) { if (mode == DisplayScreenshotMode::InternalResolution) { - const u32 draw_width = static_cast(display_rect->width()); - const u32 draw_height = static_cast(display_rect->height()); - - // If internal res, scale the computed draw rectangle to the internal res. - // We re-use the draw rect because it's already been AR corrected. - const float sar = - static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); - const float dar = static_cast(draw_width) / static_cast(draw_height); - if (sar >= dar) - { - // stretch height, preserve width - const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); - *width = m_display_texture_view_width; - *height = static_cast(std::round(static_cast(draw_height) * scale)); - } - else - { - // stretch width, preserve height - const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); - *width = static_cast(std::round(static_cast(draw_width) * scale)); - *height = m_display_texture_view_height; - } + float f_width = static_cast(m_display_texture_view_width); + float f_height = static_cast(m_display_texture_view_height); + ApplyPixelAspectRatioToSize(&f_width, &f_height); // DX11 won't go past 16K texture size. - const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); - if (*width > max_texture_size) + const float max_texture_size = static_cast(g_gpu_device->GetMaxTextureSize()); + if (f_width > max_texture_size) { - *height = static_cast(static_cast(*height) / - (static_cast(*width) / static_cast(max_texture_size))); - *width = max_texture_size; + f_height = f_height / (f_width / max_texture_size); + f_width = max_texture_size; } - if (*height > max_texture_size) + if (f_height > max_texture_size) { - *height = max_texture_size; - *width = static_cast(static_cast(*width) / - (static_cast(*height) / static_cast(max_texture_size))); + f_height = max_texture_size; + f_width = f_width / (f_height / max_texture_size); } + + *width = static_cast(std::ceil(f_width)); + *height = static_cast(std::ceil(f_height)); } else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) { @@ -2661,6 +2671,12 @@ void GPU::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* h *draw_rect = GSVector4i(0, 0, static_cast(*width), static_cast(*height)); *display_rect = *draw_rect; } + else + { + *width = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetWidth() : 1; + *height = g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetHeight() : 1; + CalculateDrawRect(*width, *height, true, !g_settings.debugging.show_vram, display_rect, draw_rect); + } } bool GPU::RenderScreenshotToFile(std::string path, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, diff --git a/src/core/gpu.h b/src/core/gpu.h index 3469a513a..cb57fa3d5 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -184,8 +184,15 @@ public: float ComputeHorizontalFrequency() const; float ComputeVerticalFrequency() const; float ComputeDisplayAspectRatio() const; + float ComputeSourceAspectRatio() const; + + /// Computes aspect ratio correction, i.e. the scale to apply to the source aspect ratio to preserve + /// the original pixel aspect ratio regardless of how much cropping has been applied. float ComputeAspectRatioCorrection() const; + /// Applies the pixel aspect ratio to a given size, preserving the larger dimension. + void ApplyPixelAspectRatioToSize(float* width, float* height) const; + static std::unique_ptr CreateHardwareRenderer(); static std::unique_ptr CreateSoftwareRenderer(); diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index e21f1cf94..afd09ec3b 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -389,7 +389,7 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float if (g_settings.display_show_resolution) { const u32 resolution_scale = g_gpu->GetResolutionScale(); - const auto [display_width, display_height] = g_gpu->GetFullDisplayResolution(); + const auto [display_width, display_height] = g_gpu->GetFullDisplayResolution();// wrong const bool interlaced = g_gpu->IsInterlacedDisplayEnabled(); const bool pal = g_gpu->IsInPALMode(); text.format("{}x{} {} {} [{}x]", display_width * resolution_scale, display_height * resolution_scale, diff --git a/src/core/settings.cpp b/src/core/settings.cpp index d13091974..29ef1facb 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -1620,12 +1620,15 @@ const char* Settings::GetDisplayDeinterlacingModeDisplayName(DisplayDeinterlacin "DisplayDeinterlacingMode"); } -static constexpr const std::array s_display_crop_mode_names = {"None", "Overscan", "OverscanUncorrected", "Borders"}; +static constexpr const std::array s_display_crop_mode_names = { + "None", "Overscan", "OverscanUncorrected", "Borders", "BordersUncorrected", +}; static constexpr const std::array s_display_crop_mode_display_names = { TRANSLATE_DISAMBIG_NOOP("Settings", "None", "DisplayCropMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Only Overscan Area", "DisplayCropMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Only Overscan Area (Aspect Uncorrected)", "DisplayCropMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "All Borders", "DisplayCropMode"), + TRANSLATE_DISAMBIG_NOOP("Settings", "All Borders (Aspect Uncorrected)", "DisplayCropMode"), }; std::optional Settings::ParseDisplayCropMode(const char* str) diff --git a/src/core/system.cpp b/src/core/system.cpp index cfc60ba89..4b425f1c8 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -5678,20 +5678,18 @@ void System::RequestDisplaySize(float scale /*= 0.0f*/) if (scale == 0.0f) scale = g_gpu->IsHardwareRenderer() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; - const float y_scale = - (static_cast(g_gpu->GetCRTCDisplayWidth()) / static_cast(g_gpu->GetCRTCDisplayHeight())) / - g_gpu->ComputeDisplayAspectRatio(); - - u32 requested_width = - std::max(static_cast(std::ceil(static_cast(g_gpu->GetCRTCDisplayWidth()) * scale)), 1); - u32 requested_height = - std::max(static_cast(std::ceil(static_cast(g_gpu->GetCRTCDisplayHeight()) * y_scale * scale)), 1); + float requested_width = static_cast(g_gpu->GetCRTCDisplayWidth()) * scale; + float requested_height = static_cast(g_gpu->GetCRTCDisplayHeight()) * scale; + g_gpu->ApplyPixelAspectRatioToSize(&requested_width, &requested_height); if (g_settings.display_rotation == DisplayRotation::Rotate90 || g_settings.display_rotation == DisplayRotation::Rotate270) + { std::swap(requested_width, requested_height); + } - Host::RequestResizeHostDisplay(static_cast(requested_width), static_cast(requested_height)); + Host::RequestResizeHostDisplay(static_cast(std::ceil(requested_width)), + static_cast(std::ceil(requested_height))); } void System::DisplayWindowResized() diff --git a/src/core/types.h b/src/core/types.h index 4ebcc2dfb..3c532b73b 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -143,6 +143,7 @@ enum class DisplayCropMode : u8 Overscan, OverscanUncorrected, Borders, + BordersUncorrected, MaxCount }; From d1b904a1da41d4d6caed7bf45fa353b055547c28 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 12:43:15 +1000 Subject: [PATCH 26/35] Qt: Fix game properties crash opening non-DB game --- src/duckstation-qt/gamesummarywidget.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/duckstation-qt/gamesummarywidget.cpp b/src/duckstation-qt/gamesummarywidget.cpp index aa72ccbba..31f005b42 100644 --- a/src/duckstation-qt/gamesummarywidget.cpp +++ b/src/duckstation-qt/gamesummarywidget.cpp @@ -53,8 +53,9 @@ GameSummaryWidget::GameSummaryWidget(const std::string& path, const std::string& } // I hate this so much. - m_ui.customLanguage->addItem(QtUtils::GetIconForLanguage(entry->GetLanguageFlagName(region)), - tr("Show Default Flag")); + const std::string_view default_language = + entry ? entry->GetLanguageFlagName(region) : Settings::GetDiscRegionName(region); + m_ui.customLanguage->addItem(QtUtils::GetIconForLanguage(default_language), tr("Show Default Flag")); for (u32 i = 0; i < static_cast(GameDatabase::Language::MaxCount); i++) { const char* language_name = GameDatabase::GetLanguageName(static_cast(i)); From 4edae3cdd09e7d6b163b0d468fe0cea6342785f1 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 12:56:01 +1000 Subject: [PATCH 27/35] GPU/HW: Depth buffer needs to be a RT in ROV mode --- src/core/gpu_hw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 5d940c9cf..3a81f53d5 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -862,7 +862,7 @@ bool GPU_HW::CreateBuffers(Error* error) const GPUTexture::Flags vram_texture_flags = m_use_rov_for_shader_blend ? GPUTexture::Flags::AllowBindAsImage : GPUTexture::Flags::None; const GPUTexture::Type depth_texture_type = - m_use_rov_for_shader_blend ? GPUTexture::Type::Texture : GPUTexture::Type::DepthStencil; + m_use_rov_for_shader_blend ? GPUTexture::Type::RenderTarget : GPUTexture::Type::DepthStencil; if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, From b97788a35a4b785008d062cd0ad5e96869bea996 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 13:18:11 +1000 Subject: [PATCH 28/35] PostProcessing/FX: Force native paths for includes --- src/util/postprocessing_shader_fx.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 1fdb70490..4f3445a7b 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -50,7 +50,7 @@ static RenderAPI GetRenderAPI() static bool PreprocessorFileExistsCallback(const std::string& path) { if (Path::IsAbsolute(path)) - return FileSystem::FileExists(path.c_str()); + return FileSystem::FileExists(Path::ToNativePath(path).c_str()); return Host::ResourceFileExists(path.c_str(), true); } @@ -59,7 +59,7 @@ static bool PreprocessorReadFileCallback(const std::string& path, std::string& d { std::optional rdata; if (Path::IsAbsolute(path)) - rdata = FileSystem::ReadFileToString(path.c_str()); + rdata = FileSystem::ReadFileToString(Path::ToNativePath(path).c_str()); else rdata = Host::ReadResourceFileToString(path.c_str(), true); if (!rdata.has_value()) From 1434507b41202a3e6b15a55731bee3ba4d747d25 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 17:19:49 +1000 Subject: [PATCH 29/35] FileSystem: Add span overload for WriteBinaryFile() And normalize filename -> path parameter names. --- src/common/file_system.cpp | 153 ++++++++++++++++++++----------------- src/common/file_system.h | 39 +++++----- 2 files changed, 102 insertions(+), 90 deletions(-) diff --git a/src/common/file_system.cpp b/src/common/file_system.cpp index 279fdcdee..48ae9d7e0 100644 --- a/src/common/file_system.cpp +++ b/src/common/file_system.cpp @@ -608,14 +608,14 @@ std::string Path::ReplaceExtension(std::string_view path, std::string_view new_e return ret; } -static std::string_view::size_type GetLastSeperatorPosition(std::string_view filename, bool include_separator) +static std::string_view::size_type GetLastSeperatorPosition(std::string_view path, bool include_separator) { - std::string_view::size_type last_separator = filename.rfind('/'); + std::string_view::size_type last_separator = path.rfind('/'); if (include_separator && last_separator != std::string_view::npos) last_separator++; #if defined(_WIN32) - std::string_view::size_type other_last_separator = filename.rfind('\\'); + std::string_view::size_type other_last_separator = path.rfind('\\'); if (other_last_separator != std::string_view::npos) { if (include_separator) @@ -845,13 +845,13 @@ std::vector FileSystem::GetRootDirectoryList() return results; } -std::string Path::BuildRelativePath(std::string_view filename, std::string_view new_filename) +std::string Path::BuildRelativePath(std::string_view path, std::string_view new_filename) { std::string new_string; - std::string_view::size_type pos = GetLastSeperatorPosition(filename, true); + std::string_view::size_type pos = GetLastSeperatorPosition(path, true); if (pos != std::string_view::npos) - new_string.assign(filename, 0, pos); + new_string.assign(path, 0, pos); new_string.append(new_filename); return new_string; } @@ -873,10 +873,10 @@ std::string Path::Combine(std::string_view base, std::string_view next) return ret; } -std::FILE* FileSystem::OpenCFile(const char* filename, const char* mode, Error* error) +std::FILE* FileSystem::OpenCFile(const char* path, const char* mode, Error* error) { #ifdef _WIN32 - const std::wstring wfilename = GetWin32Path(filename); + const std::wstring wfilename = GetWin32Path(path); const std::wstring wmode = StringUtil::UTF8StringToWideString(mode); if (!wfilename.empty() && !wmode.empty()) { @@ -892,7 +892,7 @@ std::FILE* FileSystem::OpenCFile(const char* filename, const char* mode, Error* } std::FILE* fp; - const errno_t err = fopen_s(&fp, filename, mode); + const errno_t err = fopen_s(&fp, path, mode); if (err != 0) { Error::SetErrno(error, err); @@ -901,24 +901,24 @@ std::FILE* FileSystem::OpenCFile(const char* filename, const char* mode, Error* return fp; #else - std::FILE* fp = std::fopen(filename, mode); + std::FILE* fp = std::fopen(path, mode); if (!fp) Error::SetErrno(error, errno); return fp; #endif } -std::FILE* FileSystem::OpenExistingOrCreateCFile(const char* filename, s32 retry_ms, Error* error /*= nullptr*/) +std::FILE* FileSystem::OpenExistingOrCreateCFile(const char* path, s32 retry_ms, Error* error /*= nullptr*/) { #ifdef _WIN32 - const std::wstring wfilename = GetWin32Path(filename); - if (wfilename.empty()) + const std::wstring wpath = GetWin32Path(path); + if (wpath.empty()) { Error::SetStringView(error, "Invalid path."); return nullptr; } - HANDLE file = CreateFileW(wfilename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_EXISTING, 0, NULL); + HANDLE file = CreateFileW(wpath.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_EXISTING, 0, NULL); // if there's a sharing violation, keep retrying if (file == INVALID_HANDLE_VALUE && GetLastError() == ERROR_SHARING_VIOLATION && retry_ms >= 0) @@ -927,7 +927,7 @@ std::FILE* FileSystem::OpenExistingOrCreateCFile(const char* filename, s32 retry while (retry_ms == 0 || timer.GetTimeMilliseconds() <= retry_ms) { Sleep(1); - file = CreateFileW(wfilename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_EXISTING, 0, NULL); + file = CreateFileW(wpath.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_EXISTING, 0, NULL); if (file != INVALID_HANDLE_VALUE || GetLastError() != ERROR_SHARING_VIOLATION) break; } @@ -936,11 +936,11 @@ std::FILE* FileSystem::OpenExistingOrCreateCFile(const char* filename, s32 retry if (file == INVALID_HANDLE_VALUE && GetLastError() == ERROR_FILE_NOT_FOUND) { // try creating it - file = CreateFileW(wfilename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, CREATE_NEW, 0, NULL); + file = CreateFileW(wpath.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, CREATE_NEW, 0, NULL); if (file == INVALID_HANDLE_VALUE && GetLastError() == ERROR_FILE_EXISTS) { // someone else beat us in the race, try again with existing. - file = CreateFileW(wfilename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_EXISTING, 0, NULL); + file = CreateFileW(wpath.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_EXISTING, 0, NULL); } } @@ -970,7 +970,7 @@ std::FILE* FileSystem::OpenExistingOrCreateCFile(const char* filename, s32 retry return cfile; #else - std::FILE* fp = std::fopen(filename, "r+b"); + std::FILE* fp = std::fopen(path, "r+b"); if (fp) return fp; @@ -982,13 +982,13 @@ std::FILE* FileSystem::OpenExistingOrCreateCFile(const char* filename, s32 retry } // try again, but create the file. mode "x" exists on all platforms. - fp = std::fopen(filename, "w+bx"); + fp = std::fopen(path, "w+bx"); if (fp) return fp; // if it already exists, someone else beat us in the race. try again with existing. if (errno == EEXIST) - fp = std::fopen(filename, "r+b"); + fp = std::fopen(path, "r+b"); if (!fp) { Error::SetErrno(error, errno); @@ -999,28 +999,28 @@ std::FILE* FileSystem::OpenExistingOrCreateCFile(const char* filename, s32 retry #endif } -int FileSystem::OpenFDFile(const char* filename, int flags, int mode, Error* error) +int FileSystem::OpenFDFile(const char* path, int flags, int mode, Error* error) { #ifdef _WIN32 - const std::wstring wfilename(GetWin32Path(filename)); - if (!wfilename.empty()) - return _wopen(wfilename.c_str(), flags, mode); + const std::wstring wpath = GetWin32Path(path); + if (!wpath.empty()) + return _wopen(wpath.c_str(), flags, mode); return -1; #else - const int fd = open(filename, flags, mode); + const int fd = open(path, flags, mode); if (fd < 0) Error::SetErrno(error, errno); return fd; #endif } -std::FILE* FileSystem::OpenSharedCFile(const char* filename, const char* mode, FileShareMode share_mode, Error* error) +std::FILE* FileSystem::OpenSharedCFile(const char* path, const char* mode, FileShareMode share_mode, Error* error) { #ifdef _WIN32 - const std::wstring wfilename = GetWin32Path(filename); + const std::wstring wpath = GetWin32Path(path); const std::wstring wmode = StringUtil::UTF8StringToWideString(mode); - if (wfilename.empty() || wmode.empty()) + if (wpath.empty() || wmode.empty()) return nullptr; int share_flags = 0; @@ -1041,14 +1041,14 @@ std::FILE* FileSystem::OpenSharedCFile(const char* filename, const char* mode, F break; } - std::FILE* fp = _wfsopen(wfilename.c_str(), wmode.c_str(), share_flags); + std::FILE* fp = _wfsopen(wpath.c_str(), wmode.c_str(), share_flags); if (fp) return fp; Error::SetErrno(error, errno); return nullptr; #else - std::FILE* fp = std::fopen(filename, mode); + std::FILE* fp = std::fopen(path, mode); if (!fp) Error::SetErrno(error, errno); return fp; @@ -1165,8 +1165,8 @@ std::string Path::CreateFileURL(std::string_view path) return ret; } -FileSystem::AtomicRenamedFileDeleter::AtomicRenamedFileDeleter(std::string temp_filename, std::string final_filename) - : m_temp_filename(std::move(temp_filename)), m_final_filename(std::move(final_filename)) +FileSystem::AtomicRenamedFileDeleter::AtomicRenamedFileDeleter(std::string temp_path, std::string final_path) + : m_temp_path(std::move(temp_path)), m_final_path(std::move(final_path)) { } @@ -1180,11 +1180,11 @@ void FileSystem::AtomicRenamedFileDeleter::operator()(std::FILE* fp) Error error; // final filename empty => discarded. - if (!m_final_filename.empty()) + if (!m_final_path.empty()) { if (!commit(fp, &error)) { - ERROR_LOG("Failed to commit temporary file '{}', discarding. Error was {}.", Path::GetFileName(m_temp_filename), + ERROR_LOG("Failed to commit temporary file '{}', discarding. Error was {}.", Path::GetFileName(m_temp_path), error.GetDescription()); } @@ -1194,8 +1194,8 @@ void FileSystem::AtomicRenamedFileDeleter::operator()(std::FILE* fp) // we're discarding the file, don't care if it fails. std::fclose(fp); - if (!DeleteFile(m_temp_filename.c_str(), &error)) - ERROR_LOG("Failed to delete temporary file '{}': {}", Path::GetFileName(m_temp_filename), error.GetDescription()); + if (!DeleteFile(m_temp_path.c_str(), &error)) + ERROR_LOG("Failed to delete temporary file '{}': {}", Path::GetFileName(m_temp_path), error.GetDescription()); } bool FileSystem::AtomicRenamedFileDeleter::commit(std::FILE* fp, Error* error) @@ -1209,38 +1209,38 @@ bool FileSystem::AtomicRenamedFileDeleter::commit(std::FILE* fp, Error* error) if (std::fclose(fp) != 0) { Error::SetErrno(error, "fclose() failed: ", errno); - m_final_filename.clear(); + m_final_path.clear(); } // Should not have been discarded. - if (!m_final_filename.empty()) + if (!m_final_path.empty()) { - return RenamePath(m_temp_filename.c_str(), m_final_filename.c_str(), error); + return RenamePath(m_temp_path.c_str(), m_final_path.c_str(), error); } else { Error::SetStringView(error, "File has already been discarded."); - return DeleteFile(m_temp_filename.c_str(), error); + return DeleteFile(m_temp_path.c_str(), error); } } void FileSystem::AtomicRenamedFileDeleter::discard() { - m_final_filename = {}; + m_final_path = {}; } -FileSystem::AtomicRenamedFile FileSystem::CreateAtomicRenamedFile(std::string filename, Error* error /*= nullptr*/) +FileSystem::AtomicRenamedFile FileSystem::CreateAtomicRenamedFile(std::string path, Error* error /*= nullptr*/) { - std::string temp_filename; + std::string temp_path; std::FILE* fp = nullptr; - if (!filename.empty()) + if (!path.empty()) { // this is disgusting, but we need null termination, and std::string::data() does not guarantee it. - const size_t filename_length = filename.length(); - const size_t name_buf_size = filename_length + 8; + const size_t path_length = path.length(); + const size_t name_buf_size = path_length + 8; std::unique_ptr name_buf = std::make_unique(name_buf_size); - std::memcpy(name_buf.get(), filename.c_str(), filename_length); - StringUtil::Strlcpy(name_buf.get() + filename_length, ".XXXXXX", name_buf_size); + std::memcpy(name_buf.get(), path.c_str(), path_length); + StringUtil::Strlcpy(name_buf.get() + path_length, ".XXXXXX", name_buf_size); #ifdef _WIN32 const errno_t err = _mktemp_s(name_buf.get(), name_buf_size); @@ -1267,18 +1267,18 @@ FileSystem::AtomicRenamedFile FileSystem::CreateAtomicRenamedFile(std::string fi #endif if (fp) - temp_filename.assign(name_buf.get(), name_buf_size - 1); + temp_path.assign(name_buf.get(), name_buf_size - 1); else - filename.clear(); + path.clear(); } - return AtomicRenamedFile(fp, AtomicRenamedFileDeleter(std::move(temp_filename), std::move(filename))); + return AtomicRenamedFile(fp, AtomicRenamedFileDeleter(std::move(temp_path), std::move(path))); } -bool FileSystem::WriteAtomicRenamedFile(std::string filename, const void* data, size_t data_length, +bool FileSystem::WriteAtomicRenamedFile(std::string path, const void* data, size_t data_length, Error* error /*= nullptr*/) { - AtomicRenamedFile fp = CreateAtomicRenamedFile(std::move(filename), error); + AtomicRenamedFile fp = CreateAtomicRenamedFile(std::move(path), error); if (!fp) return false; @@ -1292,6 +1292,11 @@ bool FileSystem::WriteAtomicRenamedFile(std::string filename, const void* data, return true; } +bool FileSystem::WriteAtomicRenamedFile(std::string path, const std::span data, Error* error /* = nullptr */) +{ + return WriteAtomicRenamedFile(std::move(path), data.empty() ? nullptr : data.data(), data.size(), error); +} + void FileSystem::DiscardAtomicRenamedFile(AtomicRenamedFile& file) { file.get_deleter().discard(); @@ -1306,21 +1311,20 @@ bool FileSystem::CommitAtomicRenamedFile(AtomicRenamedFile& file, Error* error) return false; } -FileSystem::ManagedCFilePtr FileSystem::OpenManagedCFile(const char* filename, const char* mode, Error* error) +FileSystem::ManagedCFilePtr FileSystem::OpenManagedCFile(const char* path, const char* mode, Error* error) { - return ManagedCFilePtr(OpenCFile(filename, mode, error)); + return ManagedCFilePtr(OpenCFile(path, mode, error)); } -FileSystem::ManagedCFilePtr FileSystem::OpenExistingOrCreateManagedCFile(const char* filename, s32 retry_ms, - Error* error) +FileSystem::ManagedCFilePtr FileSystem::OpenExistingOrCreateManagedCFile(const char* path, s32 retry_ms, Error* error) { - return ManagedCFilePtr(OpenExistingOrCreateCFile(filename, retry_ms, error)); + return ManagedCFilePtr(OpenExistingOrCreateCFile(path, retry_ms, error)); } -FileSystem::ManagedCFilePtr FileSystem::OpenManagedSharedCFile(const char* filename, const char* mode, +FileSystem::ManagedCFilePtr FileSystem::OpenManagedSharedCFile(const char* path, const char* mode, FileShareMode share_mode, Error* error) { - return ManagedCFilePtr(OpenSharedCFile(filename, mode, share_mode, error)); + return ManagedCFilePtr(OpenSharedCFile(path, mode, share_mode, error)); } int FileSystem::FSeek64(std::FILE* fp, s64 offset, int whence) @@ -1443,20 +1447,20 @@ bool FileSystem::FTruncate64(std::FILE* fp, s64 size, Error* error) #endif } -s64 FileSystem::GetPathFileSize(const char* Path) +s64 FileSystem::GetPathFileSize(const char* path) { FILESYSTEM_STAT_DATA sd; - if (!StatFile(Path, &sd)) + if (!StatFile(path, &sd)) return -1; return sd.Size; } -std::optional> FileSystem::ReadBinaryFile(const char* filename, Error* error) +std::optional> FileSystem::ReadBinaryFile(const char* path, Error* error) { std::optional> ret; - ManagedCFilePtr fp = OpenManagedCFile(filename, "rb", error); + ManagedCFilePtr fp = OpenManagedCFile(path, "rb", error); if (!fp) return ret; @@ -1506,11 +1510,11 @@ std::optional> FileSystem::ReadBinaryFile(std::FILE* fp, Er return ret; } -std::optional FileSystem::ReadFileToString(const char* filename, Error* error) +std::optional FileSystem::ReadFileToString(const char* path, Error* error) { std::optional ret; - ManagedCFilePtr fp = OpenManagedCFile(filename, "rb", error); + ManagedCFilePtr fp = OpenManagedCFile(path, "rb", error); if (!fp) return ret; @@ -1562,9 +1566,9 @@ std::optional FileSystem::ReadFileToString(std::FILE* fp, Error* er return ret; } -bool FileSystem::WriteBinaryFile(const char* filename, const void* data, size_t data_length, Error* error) +bool FileSystem::WriteBinaryFile(const char* path, const void* data, size_t data_length, Error* error) { - ManagedCFilePtr fp = OpenManagedCFile(filename, "wb", error); + ManagedCFilePtr fp = OpenManagedCFile(path, "wb", error); if (!fp) return false; @@ -1577,9 +1581,14 @@ bool FileSystem::WriteBinaryFile(const char* filename, const void* data, size_t return true; } -bool FileSystem::WriteStringToFile(const char* filename, std::string_view sv, Error* error) +bool FileSystem::WriteBinaryFile(const char* path, const std::span data, Error* error /*= nullptr*/) { - ManagedCFilePtr fp = OpenManagedCFile(filename, "wb", error); + return WriteBinaryFile(path, data.empty() ? nullptr : data.data(), data.size(), error); +} + +bool FileSystem::WriteStringToFile(const char* path, std::string_view sv, Error* error) +{ + ManagedCFilePtr fp = OpenManagedCFile(path, "wb", error); if (!fp) return false; @@ -2636,13 +2645,13 @@ bool FileSystem::DeleteDirectory(const char* path) std::string FileSystem::GetProgramPath() { #if defined(__linux__) - static const char* exeFileName = "/proc/self/exe"; + static const char* exe_path = "/proc/self/exe"; int curSize = PATH_MAX; char* buffer = static_cast(std::realloc(nullptr, curSize)); for (;;) { - int len = readlink(exeFileName, buffer, curSize); + int len = readlink(exe_path, buffer, curSize); if (len < 0) { std::free(buffer); diff --git a/src/common/file_system.h b/src/common/file_system.h index 3396f1307..c6e861921 100644 --- a/src/common/file_system.h +++ b/src/common/file_system.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -67,8 +68,8 @@ bool FindFiles(const char* path, const char* pattern, u32 flags, FindResultsArra /// Stat file bool StatFile(const char* path, struct stat* st); bool StatFile(std::FILE* fp, struct stat* st); -bool StatFile(const char* path, FILESYSTEM_STAT_DATA* pStatData); -bool StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* pStatData); +bool StatFile(const char* path, FILESYSTEM_STAT_DATA* sd); +bool StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* sd); s64 GetPathFileSize(const char* path); /// File exists? @@ -99,14 +100,14 @@ struct FileDeleter /// open files using ManagedCFilePtr = std::unique_ptr; -ManagedCFilePtr OpenManagedCFile(const char* filename, const char* mode, Error* error = nullptr); -std::FILE* OpenCFile(const char* filename, const char* mode, Error* error = nullptr); +ManagedCFilePtr OpenManagedCFile(const char* path, const char* mode, Error* error = nullptr); +std::FILE* OpenCFile(const char* path, const char* mode, Error* error = nullptr); /// Atomically opens a file in read/write mode, and if the file does not exist, creates it. /// On Windows, if retry_ms is positive, this function will retry opening the file for this /// number of milliseconds. NOTE: The file is opened in binary mode. -std::FILE* OpenExistingOrCreateCFile(const char* filename, s32 retry_ms = -1, Error* error = nullptr); -ManagedCFilePtr OpenExistingOrCreateManagedCFile(const char* filename, s32 retry_ms = -1, Error* error = nullptr); +std::FILE* OpenExistingOrCreateCFile(const char* path, s32 retry_ms = -1, Error* error = nullptr); +ManagedCFilePtr OpenExistingOrCreateManagedCFile(const char* path, s32 retry_ms = -1, Error* error = nullptr); int FSeek64(std::FILE* fp, s64 offset, int whence); bool FSeek64(std::FILE* fp, s64 offset, int whence, Error* error); @@ -114,7 +115,7 @@ s64 FTell64(std::FILE* fp); s64 FSize64(std::FILE* fp, Error* error = nullptr); bool FTruncate64(std::FILE* fp, s64 size, Error* error = nullptr); -int OpenFDFile(const char* filename, int flags, int mode, Error* error = nullptr); +int OpenFDFile(const char* path, int flags, int mode, Error* error = nullptr); /// Sharing modes for OpenSharedCFile(). enum class FileShareMode @@ -127,15 +128,15 @@ enum class FileShareMode /// Opens a file in shareable mode (where other processes can access it concurrently). /// Only has an effect on Windows systems. -ManagedCFilePtr OpenManagedSharedCFile(const char* filename, const char* mode, FileShareMode share_mode, +ManagedCFilePtr OpenManagedSharedCFile(const char* path, const char* mode, FileShareMode share_mode, Error* error = nullptr); -std::FILE* OpenSharedCFile(const char* filename, const char* mode, FileShareMode share_mode, Error* error = nullptr); +std::FILE* OpenSharedCFile(const char* path, const char* mode, FileShareMode share_mode, Error* error = nullptr); /// Atomically-updated file creation. class AtomicRenamedFileDeleter { public: - AtomicRenamedFileDeleter(std::string temp_filename, std::string final_filename); + AtomicRenamedFileDeleter(std::string temp_path, std::string final_path); ~AtomicRenamedFileDeleter(); void operator()(std::FILE* fp); @@ -143,12 +144,13 @@ public: void discard(); private: - std::string m_temp_filename; - std::string m_final_filename; + std::string m_temp_path; + std::string m_final_path; }; using AtomicRenamedFile = std::unique_ptr; -AtomicRenamedFile CreateAtomicRenamedFile(std::string filename, Error* error = nullptr); -bool WriteAtomicRenamedFile(std::string filename, const void* data, size_t data_length, Error* error = nullptr); +AtomicRenamedFile CreateAtomicRenamedFile(std::string path, Error* error = nullptr); +bool WriteAtomicRenamedFile(std::string path, const void* data, size_t data_length, Error* error = nullptr); +bool WriteAtomicRenamedFile(std::string path, const std::span data, Error* error = nullptr); bool CommitAtomicRenamedFile(AtomicRenamedFile& file, Error* error); void DiscardAtomicRenamedFile(AtomicRenamedFile& file); @@ -166,12 +168,13 @@ private: }; #endif -std::optional> ReadBinaryFile(const char* filename, Error* error = nullptr); +std::optional> ReadBinaryFile(const char* path, Error* error = nullptr); std::optional> ReadBinaryFile(std::FILE* fp, Error* error = nullptr); -std::optional ReadFileToString(const char* filename, Error* error = nullptr); +std::optional ReadFileToString(const char* path, Error* error = nullptr); std::optional ReadFileToString(std::FILE* fp, Error* error = nullptr); -bool WriteBinaryFile(const char* filename, const void* data, size_t data_length, Error* error = nullptr); -bool WriteStringToFile(const char* filename, std::string_view sv, Error* error = nullptr); +bool WriteBinaryFile(const char* path, const void* data, size_t data_length, Error* error = nullptr); +bool WriteBinaryFile(const char* path, const std::span data, Error* error = nullptr); +bool WriteStringToFile(const char* path, std::string_view sv, Error* error = nullptr); /// creates a directory in the local filesystem /// if the directory already exists, the return value will be true. From 57d3aa485019b66aa6261021dfef4161f9032ca4 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 17:36:15 +1000 Subject: [PATCH 30/35] FileSystem: Add Error parameter to StatFile() --- src/common/file_system.cpp | 75 +++++++++++++++++++++++++++++++------- src/common/file_system.h | 8 ++-- 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/src/common/file_system.cpp b/src/common/file_system.cpp index 48ae9d7e0..d195cbca4 100644 --- a/src/common/file_system.cpp +++ b/src/common/file_system.cpp @@ -1874,12 +1874,15 @@ static void TranslateStat64(struct stat* st, const struct _stat64& st64) st->st_ctime = static_cast(st64.st_ctime); } -bool FileSystem::StatFile(const char* path, struct stat* st) +bool FileSystem::StatFile(const char* path, struct stat* st, Error* error) { // convert to wide string const std::wstring wpath = GetWin32Path(path); - if (wpath.empty()) + if (wpath.empty()) [[unlikely]] + { + Error::SetStringView(error, "Path is empty."); return false; + } struct _stat64 st64; if (_wstati64(wpath.c_str(), &st64) != 0) @@ -1889,31 +1892,43 @@ bool FileSystem::StatFile(const char* path, struct stat* st) return true; } -bool FileSystem::StatFile(std::FILE* fp, struct stat* st) +bool FileSystem::StatFile(std::FILE* fp, struct stat* st, Error* error) { const int fd = _fileno(fp); if (fd < 0) + { + Error::SetErrno(error, "_fileno() failed: ", errno); return false; + } struct _stat64 st64; if (_fstati64(fd, &st64) != 0) + { + Error::SetErrno(error, "_fstati64() failed: ", errno); return false; + } TranslateStat64(st, st64); return true; } -bool FileSystem::StatFile(const char* path, FILESYSTEM_STAT_DATA* sd) +bool FileSystem::StatFile(const char* path, FILESYSTEM_STAT_DATA* sd, Error* error) { // convert to wide string const std::wstring wpath = GetWin32Path(path); - if (wpath.empty()) + if (wpath.empty()) [[unlikely]] + { + Error::SetStringView(error, "Path is empty."); return false; + } // determine attributes for the path. if it's a directory, things have to be handled differently.. DWORD fileAttributes = GetFileAttributesW(wpath.c_str()); if (fileAttributes == INVALID_FILE_ATTRIBUTES) + { + Error::SetWin32(error, "GetFileAttributesW() failed: ", GetLastError()); return false; + } // test if it is a directory HANDLE hFile; @@ -1930,12 +1945,16 @@ bool FileSystem::StatFile(const char* path, FILESYSTEM_STAT_DATA* sd) // createfile succeded? if (hFile == INVALID_HANDLE_VALUE) + { + Error::SetWin32(error, "CreateFileW() failed: ", GetLastError()); return false; + } // use GetFileInformationByHandle BY_HANDLE_FILE_INFORMATION bhfi; if (GetFileInformationByHandle(hFile, &bhfi) == FALSE) { + Error::SetWin32(error, "GetFileInformationByHandle() failed: ", GetLastError()); CloseHandle(hFile); return false; } @@ -1951,15 +1970,21 @@ bool FileSystem::StatFile(const char* path, FILESYSTEM_STAT_DATA* sd) return true; } -bool FileSystem::StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* sd) +bool FileSystem::StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* sd, Error* error) { const int fd = _fileno(fp); if (fd < 0) + { + Error::SetErrno(error, "_fileno() failed: ", errno); return false; + } struct _stat64 st; if (_fstati64(fd, &st) != 0) + { + Error::SetErrno(error, "_fstati64() failed: ", errno); return false; + } // parse attributes sd->CreationTime = st.st_ctime; @@ -2420,26 +2445,44 @@ bool FileSystem::FindFiles(const char* path, const char* pattern, u32 flags, Fin return true; } -bool FileSystem::StatFile(const char* path, struct stat* st) +bool FileSystem::StatFile(const char* path, struct stat* st, Error* error) { - return stat(path, st) == 0; + if (stat(path, st) != 0) + { + Error::SetErrno(error, "stat() failed: ", errno); + return false; + } + + return true; } -bool FileSystem::StatFile(std::FILE* fp, struct stat* st) +bool FileSystem::StatFile(std::FILE* fp, struct stat* st, Error* error) { const int fd = fileno(fp); if (fd < 0) + { + Error::SetErrno(error, "fileno() failed: ", errno); return false; + } - return fstat(fd, st) == 0; + if (fstat(fd, st) != 0) + { + Error::SetErrno(error, "fstat() failed: ", errno); + return false; + } + + return true; } -bool FileSystem::StatFile(const char* path, FILESYSTEM_STAT_DATA* sd) +bool FileSystem::StatFile(const char* path, FILESYSTEM_STAT_DATA* sd, Error* error) { // stat file struct stat sysStatData; if (stat(path, &sysStatData) < 0) + { + Error::SetErrno(error, "stat() failed: ", errno); return false; + } // parse attributes sd->CreationTime = sysStatData.st_ctime; @@ -2451,16 +2494,22 @@ bool FileSystem::StatFile(const char* path, FILESYSTEM_STAT_DATA* sd) return true; } -bool FileSystem::StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* sd) +bool FileSystem::StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* sd, Error* error) { const int fd = fileno(fp); if (fd < 0) + { + Error::SetErrno(error, "fileno() failed: ", errno); return false; + } // stat file struct stat sysStatData; - if (fstat(fd, &sysStatData) < 0) + if (fstat(fd, &sysStatData) != 0) + { + Error::SetErrno(error, "stat() failed: ", errno); return false; + } // parse attributes sd->CreationTime = sysStatData.st_ctime; diff --git a/src/common/file_system.h b/src/common/file_system.h index c6e861921..7003001ea 100644 --- a/src/common/file_system.h +++ b/src/common/file_system.h @@ -66,10 +66,10 @@ std::vector GetRootDirectoryList(); bool FindFiles(const char* path, const char* pattern, u32 flags, FindResultsArray* results); /// Stat file -bool StatFile(const char* path, struct stat* st); -bool StatFile(std::FILE* fp, struct stat* st); -bool StatFile(const char* path, FILESYSTEM_STAT_DATA* sd); -bool StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* sd); +bool StatFile(const char* path, struct stat* st, Error* error = nullptr); +bool StatFile(std::FILE* fp, struct stat* st, Error* error = nullptr); +bool StatFile(const char* path, FILESYSTEM_STAT_DATA* sd, Error* error = nullptr); +bool StatFile(std::FILE* fp, FILESYSTEM_STAT_DATA* sd, Error* error = nullptr); s64 GetPathFileSize(const char* path); /// File exists? From b28ca2b78a5b9506b038152c5aa01fa2ab2964ad Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 17:48:05 +1000 Subject: [PATCH 31/35] Qt: Improve updater error reporting And swap from Qt file functions to our own. --- src/duckstation-qt/autoupdaterdialog.cpp | 145 ++++++++++++++--------- 1 file changed, 91 insertions(+), 54 deletions(-) diff --git a/src/duckstation-qt/autoupdaterdialog.cpp b/src/duckstation-qt/autoupdaterdialog.cpp index 30aa12443..7667c83f9 100644 --- a/src/duckstation-qt/autoupdaterdialog.cpp +++ b/src/duckstation-qt/autoupdaterdialog.cpp @@ -22,7 +22,7 @@ #include "fmt/format.h" #include -#include +#include #include #include #include @@ -44,6 +44,8 @@ static constexpr u32 HTTP_POLL_INTERVAL = 10; #include #elif defined(__APPLE__) #include "common/cocoa_tools.h" +#else +#include #endif // Logic to detect whether we can use the auto updater. @@ -602,8 +604,7 @@ static constexpr char UPDATER_ARCHIVE_NAME[] = "update.zip"; bool AutoUpdaterDialog::doesUpdaterNeedElevation(const std::string& application_dir) const { - // Try to create a dummy text file in the PCSX2 updater directory. If it fails, we probably won't have write - // permission. + // Try to create a dummy text file in the updater directory. If it fails, we probably won't have write permission. const std::string dummy_path = Path::Combine(application_dir, "update.txt"); auto fp = FileSystem::OpenManagedCFile(dummy_path.c_str(), "wb"); if (!fp) @@ -620,15 +621,16 @@ bool AutoUpdaterDialog::processUpdate(const std::vector& update_data) const std::string update_zip_path = Path::Combine(EmuFolders::DataRoot, UPDATER_ARCHIVE_NAME); const std::string updater_path = Path::Combine(EmuFolders::DataRoot, UPDATER_EXECUTABLE); - if ((FileSystem::FileExists(update_zip_path.c_str()) && !FileSystem::DeleteFile(update_zip_path.c_str()))) + Error error; + if ((FileSystem::FileExists(update_zip_path.c_str()) && !FileSystem::DeleteFile(update_zip_path.c_str(), &error))) { - reportError("Removing existing update zip failed"); + reportError(fmt::format("Removing existing update zip failed:\n{}", error.GetDescription())); return false; } - if (!FileSystem::WriteBinaryFile(update_zip_path.c_str(), update_data.data(), update_data.size())) + if (!FileSystem::WriteAtomicRenamedFile(update_zip_path.c_str(), update_data, &error)) { - reportError(fmt::format("Writing update zip to '{}' failed", update_zip_path)); + reportError(fmt::format("Writing update zip to '{}' failed:\n{}", update_zip_path, error.GetDescription())); return false; } @@ -686,7 +688,7 @@ bool AutoUpdaterDialog::extractUpdater(const std::string& zip_path, const std::s if (std::fwrite(chunk, size, 1, fp.get()) != 1) { - Error::SetString(error, "Failed to write updater exe"); + Error::SetErrno(error, "Failed to write updater exe: fwrite() failed: ", errno); unzClose(zf); fp.reset(); FileSystem::DeleteFile(destination_path.c_str()); @@ -742,9 +744,12 @@ void AutoUpdaterDialog::cleanupAfterUpdate() if (!FileSystem::FileExists(updater_path.c_str())) return; - if (!FileSystem::DeleteFile(updater_path.c_str())) + Error error; + if (!FileSystem::DeleteFile(updater_path.c_str(), &error)) { - QMessageBox::critical(nullptr, tr("Updater Error"), tr("Failed to remove updater exe after update.")); + QMessageBox::critical( + nullptr, tr("Updater Error"), + tr("Failed to remove updater exe after update:\n%1").arg(QString::fromStdString(error.GetDescription()))); return; } } @@ -784,23 +789,18 @@ bool AutoUpdaterDialog::processUpdate(const std::vector& update_data) // We use the user data directory to temporarily store the update zip. const std::string zip_path = Path::Combine(EmuFolders::DataRoot, "update.zip"); const std::string staging_directory = Path::Combine(EmuFolders::DataRoot, "UPDATE_STAGING"); - if (FileSystem::FileExists(zip_path.c_str()) && !FileSystem::DeleteFile(zip_path.c_str())) + Error error; + if (FileSystem::FileExists(zip_path.c_str()) && !FileSystem::DeleteFile(zip_path.c_str(), &error)) { - reportError("Failed to remove old update zip."); + reportError(fmt::format("Failed to remove old update zip:\n{}", error.GetDescription())); return false; } // Save update. + if (!FileSystem::WriteAtomicRenamedFile(zip_path.c_str(), update_data, &error)) { - QFile zip_file(QString::fromStdString(zip_path)); - if (!zip_file.open(QIODevice::WriteOnly) || - zip_file.write(reinterpret_cast(update_data.data()), static_cast(update_data.size())) != - static_cast(update_data.size())) - { - reportError(fmt::format("Writing update zip to '{}' failed", zip_path)); - return false; - } - zip_file.close(); + reportError(fmt::format("Writing update zip to '{}' failed:\n{}", zip_path, error.GetDescription())); + return false; } INFO_LOG("Beginning update:\nUpdater path: {}\nZip path: {}\nStaging directory: {}\nOutput directory: {}", @@ -832,66 +832,105 @@ bool AutoUpdaterDialog::processUpdate(const std::vector& update_data) return false; } - const QString qappimage_path(QString::fromUtf8(appimage_path)); - if (!QFile::exists(qappimage_path)) + if (!FileSystem::FileExists(appimage_path)) { reportError(fmt::format("Current AppImage does not exist: {}", appimage_path)); return false; } - const QString new_appimage_path(qappimage_path + QStringLiteral(".new")); - const QString backup_appimage_path(qappimage_path + QStringLiteral(".backup")); + const std::string new_appimage_path = fmt::format("{}.new", appimage_path); + const std::string backup_appimage_path = fmt::format("{}.backup", appimage_path); INFO_LOG("APPIMAGE = {}", appimage_path); - INFO_LOG("Backup AppImage path = {}", backup_appimage_path.toStdString()); - INFO_LOG("New AppImage path = {}", new_appimage_path.toStdString()); + INFO_LOG("Backup AppImage path = {}", backup_appimage_path); + INFO_LOG("New AppImage path = {}", new_appimage_path); // Remove old "new" appimage and existing backup appimage. - if (QFile::exists(new_appimage_path) && !QFile::remove(new_appimage_path)) + Error error; + if (FileSystem::FileExists(new_appimage_path.c_str()) && !FileSystem::DeleteFile(new_appimage_path.c_str(), &error)) { - reportError(fmt::format("Failed to remove old destination AppImage: {}", new_appimage_path.toStdString())); + reportError( + fmt::format("Failed to remove old destination AppImage: {}:\n{}", new_appimage_path, error.GetDescription())); return false; } - if (QFile::exists(backup_appimage_path) && !QFile::remove(backup_appimage_path)) + if (FileSystem::FileExists(backup_appimage_path.c_str()) && + !FileSystem::DeleteFile(backup_appimage_path.c_str(), &error)) { - reportError(fmt::format("Failed to remove old backup AppImage: {}", new_appimage_path.toStdString())); + reportError( + fmt::format("Failed to remove old backup AppImage: {}:\n{}", backup_appimage_path, error.GetDescription())); return false; } // Write "new" appimage. { // We want to copy the permissions from the old appimage to the new one. - QFile old_file(qappimage_path); - const QFileDevice::Permissions old_permissions = old_file.permissions(); - QFile new_file(new_appimage_path); - if (!new_file.open(QIODevice::WriteOnly) || - new_file.write(reinterpret_cast(update_data.data()), static_cast(update_data.size())) != - static_cast(update_data.size()) || - !new_file.setPermissions(old_permissions)) + static constexpr int permission_mask = S_IRWXU | S_IRWXG | S_IRWXO; + struct stat old_stat; + if (!FileSystem::StatFile(appimage_path, &old_stat, &error)) { - QFile::remove(new_appimage_path); - reportError(fmt::format("Failed to write new destination AppImage: {}", new_appimage_path.toStdString())); + reportError(fmt::format("Failed to get old AppImage {} permissions:\n{}", appimage_path, error.GetDescription())); + return false; + } + + // We do this as a manual write here, rather than using WriteAtomicUpdatedFile(), because we want to write the file + // and set the permissions as one atomic operation. + FileSystem::ManagedCFilePtr fp = FileSystem::OpenManagedCFile(new_appimage_path.c_str(), "wb", &error); + bool success = static_cast(fp); + if (fp) + { + if (std::fwrite(update_data.data(), update_data.size(), 1, fp.get()) == 1 && std::fflush(fp.get()) == 0) + { + const int fd = fileno(fp.get()); + if (fd >= 0) + { + if (fchmod(fd, old_stat.st_mode & permission_mask) != 0) + { + error.SetErrno("fchmod() failed: ", errno); + success = false; + } + } + else + { + error.SetErrno("fileno() failed: ", errno); + success = false; + } + } + else + { + error.SetErrno("fwrite() failed: ", errno); + success = false; + } + + fp.reset(); + if (!success) + FileSystem::DeleteFile(new_appimage_path.c_str()); + } + + if (!success) + { + reportError( + fmt::format("Failed to write new destination AppImage: {}:\n{}", new_appimage_path, error.GetDescription())); return false; } } // Rename "old" appimage. - if (!QFile::rename(qappimage_path, backup_appimage_path)) + if (!FileSystem::RenamePath(appimage_path, backup_appimage_path.c_str(), &error)) { - reportError(fmt::format("Failed to rename old AppImage to {}", backup_appimage_path.toStdString())); - QFile::remove(new_appimage_path); + reportError(fmt::format("Failed to rename old AppImage to {}:\n{}", backup_appimage_path, error.GetDescription())); + FileSystem::DeleteFile(new_appimage_path.c_str()); return false; } // Rename "new" appimage. - if (!QFile::rename(new_appimage_path, qappimage_path)) + if (!FileSystem::RenamePath(new_appimage_path.c_str(), appimage_path, &error)) { - reportError(fmt::format("Failed to rename new AppImage to {}", qappimage_path.toStdString())); + reportError(fmt::format("Failed to rename new AppImage to {}:\n{}", appimage_path, error.GetDescription())); return false; } // Execute new appimage. QProcess* new_process = new QProcess(); - new_process->setProgram(qappimage_path); + new_process->setProgram(QString::fromUtf8(appimage_path)); new_process->setArguments(QStringList{QStringLiteral("-updatecleanup")}); if (!new_process->startDetached()) { @@ -910,16 +949,14 @@ void AutoUpdaterDialog::cleanupAfterUpdate() if (!appimage_path) return; - const QString qappimage_path(QString::fromUtf8(appimage_path)); - const QString backup_appimage_path(qappimage_path + QStringLiteral(".backup")); - if (!QFile::exists(backup_appimage_path)) + const std::string backup_appimage_path = fmt::format("{}.backup", appimage_path); + if (!FileSystem::FileExists(backup_appimage_path.c_str())) return; - INFO_LOG(QStringLiteral("Removing backup AppImage %1").arg(backup_appimage_path).toStdString().c_str()); - if (!QFile::remove(backup_appimage_path)) - { - ERROR_LOG(QStringLiteral("Failed to remove backup AppImage %1").arg(backup_appimage_path).toStdString().c_str()); - } + Error error; + INFO_LOG("Removing backup AppImage: {}", backup_appimage_path); + if (!FileSystem::DeleteFile(backup_appimage_path.c_str(), &error)) + ERROR_LOG("Failed to remove backup AppImage {}: {}", backup_appimage_path, error.GetDescription()); } #else From eb390a9b5d0a1fc348b6ff630773c4f23d86ec0b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 16:58:48 +1000 Subject: [PATCH 32/35] GPU/TextureCache: Specify max hash cache size/memory in config --- src/core/gpu_hw_texture_cache.cpp | 45 +++++++++++++++++++------------ src/core/settings.cpp | 30 ++++++++++++++++++++- src/core/settings.h | 8 +++++- 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index d76e2f9e1..5c2484914 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -509,7 +509,6 @@ struct GPUTextureCacheState { Settings::TextureReplacementSettings::Configuration config; size_t hash_cache_memory_usage = 0; - size_t max_hash_cache_memory_usage = 1ULL * 1024ULL * 1024ULL * 1024ULL; // 2GB VRAMWrite* last_vram_write = nullptr; bool track_vram_writes = false; @@ -2114,10 +2113,11 @@ void GPUTextureCache::Compact() static constexpr u32 MAX_HASH_CACHE_AGE = 600; // Maximum number of textures which are permitted in the hash cache at the end of the frame. - static constexpr u32 MAX_HASH_CACHE_SIZE = 500; + const u32 max_hash_cache_size = s_state.config.max_hash_cache_entries; + const size_t max_hash_cache_memory = static_cast(s_state.config.max_hash_cache_vram_usage_mb) * 1048576; - bool might_need_cache_purge = (s_state.hash_cache.size() > MAX_HASH_CACHE_SIZE || - s_state.hash_cache_memory_usage >= s_state.max_hash_cache_memory_usage); + bool might_need_cache_purge = + (s_state.hash_cache.size() > max_hash_cache_size || s_state.hash_cache_memory_usage >= max_hash_cache_memory); if (might_need_cache_purge) s_state.hash_cache_purge_list.clear(); @@ -2136,8 +2136,8 @@ void GPUTextureCache::Compact() // We might free up enough just with "normal" removals above. if (might_need_cache_purge) { - might_need_cache_purge = (s_state.hash_cache.size() > MAX_HASH_CACHE_SIZE || - s_state.hash_cache_memory_usage >= s_state.max_hash_cache_memory_usage); + might_need_cache_purge = + (s_state.hash_cache.size() > max_hash_cache_size || s_state.hash_cache_memory_usage >= max_hash_cache_memory); if (might_need_cache_purge) s_state.hash_cache_purge_list.emplace_back(it, static_cast(e.last_used_frame)); } @@ -2148,12 +2148,14 @@ void GPUTextureCache::Compact() // Pushing to a list, sorting, and removing ends up faster than re-iterating the map. if (might_need_cache_purge) { + DEV_LOG("Force compacting hash cache, count = {}, size = {:.1f} MB", s_state.hash_cache.size(), + static_cast(s_state.hash_cache_memory_usage) / 1048576.0f); + std::sort(s_state.hash_cache_purge_list.begin(), s_state.hash_cache_purge_list.end(), [](const auto& lhs, const auto& rhs) { return lhs.second < rhs.second; }); size_t purge_index = 0; - while (s_state.hash_cache.size() > MAX_HASH_CACHE_SIZE || - s_state.hash_cache_memory_usage >= s_state.max_hash_cache_memory_usage) + while (s_state.hash_cache.size() > max_hash_cache_size || s_state.hash_cache_memory_usage >= max_hash_cache_memory) { if (purge_index == s_state.hash_cache_purge_list.size()) { @@ -2164,6 +2166,9 @@ void GPUTextureCache::Compact() RemoveFromHashCache(s_state.hash_cache_purge_list[purge_index++].first); } + + DEV_LOG("Finished compacting hash cache, count = {}, size = {:.1f} MB", s_state.hash_cache.size(), + static_cast(s_state.hash_cache_memory_usage) / 1048576.0f); } CompactTextureReplacementGPUImages(); @@ -3056,9 +3061,9 @@ void GPUTextureCache::CompactTextureReplacementGPUImages() if (s_state.gpu_replacement_image_cache_vram_usage <= max_usage) return; - VERBOSE_LOG("Compacting replacement GPU image cache, count = {}, size = {:.1f} MB", - s_state.gpu_replacement_image_cache.size(), - static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); + DEV_LOG("Compacting replacement GPU image cache, count = {}, size = {:.1f} MB", + s_state.gpu_replacement_image_cache.size(), + static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); const u32 frame_number = System::GetFrameNumber(); s_state.gpu_replacement_image_cache_purge_list.reserve(s_state.gpu_replacement_image_cache.size()); @@ -3086,9 +3091,9 @@ void GPUTextureCache::CompactTextureReplacementGPUImages() s_state.gpu_replacement_image_cache_purge_list.clear(); - VERBOSE_LOG("Finished compacting replacement GPU image cache, count = {}, size = {:.1f} MB", - s_state.gpu_replacement_image_cache.size(), - static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); + DEV_LOG("Finished compacting replacement GPU image cache, count = {}, size = {:.1f} MB", + s_state.gpu_replacement_image_cache.size(), + static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); } void GPUTextureCache::PreloadReplacementTextures() @@ -3241,9 +3246,6 @@ bool GPUTextureCache::LoadLocalConfiguration(bool load_vram_write_replacement_al .value_or(static_cast(s_state.config.reduce_palette_range)); s_state.config.convert_copies_to_writes = GetOptionalTFromObject(root, "ConvertCopiesToWrites") .value_or(static_cast(s_state.config.convert_copies_to_writes)); - s_state.config.replacement_scale_linear_filter = - GetOptionalTFromObject(root, "ReplacementScaleLinearFilter") - .value_or(static_cast(s_state.config.replacement_scale_linear_filter)); s_state.config.max_vram_write_splits = GetOptionalTFromObject(root, "MaxVRAMWriteSplits").value_or(s_state.config.max_vram_write_splits); s_state.config.max_vram_write_coalesce_width = GetOptionalTFromObject(root, "MaxVRAMWriteCoalesceWidth") @@ -3258,6 +3260,15 @@ bool GPUTextureCache::LoadLocalConfiguration(bool load_vram_write_replacement_al .value_or(s_state.config.vram_write_dump_width_threshold); s_state.config.vram_write_dump_height_threshold = GetOptionalTFromObject(root, "DumpVRAMWriteHeightThreshold") .value_or(s_state.config.vram_write_dump_height_threshold); + s_state.config.max_hash_cache_entries = + GetOptionalTFromObject(root, "MaxHashCacheEntries").value_or(s_state.config.max_hash_cache_entries); + s_state.config.max_hash_cache_vram_usage_mb = + GetOptionalTFromObject(root, "MaxHashCacheVRAMUsageMB").value_or(s_state.config.max_hash_cache_vram_usage_mb); + s_state.config.max_replacement_cache_vram_usage_mb = GetOptionalTFromObject(root, "MaxReplacementCacheVRAMUsage") + .value_or(s_state.config.max_replacement_cache_vram_usage_mb); + s_state.config.replacement_scale_linear_filter = + GetOptionalTFromObject(root, "ReplacementScaleLinearFilter") + .value_or(static_cast(s_state.config.replacement_scale_linear_filter)); if (load_vram_write_replacement_aliases || load_texture_replacement_aliases) { diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 29ef1facb..c089466e3 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -454,8 +454,15 @@ void Settings::Load(const SettingsInterface& si, const SettingsInterface& contro texture_replacements.config.replacement_scale_linear_filter = si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", false); + texture_replacements.config.max_hash_cache_entries = + si.GetUIntValue("TextureReplacements", "MaxHashCacheEntries", + TextureReplacementSettings::Configuration::DEFAULT_MAX_HASH_CACHE_ENTRIES); + texture_replacements.config.max_hash_cache_vram_usage_mb = + si.GetUIntValue("TextureReplacements", "MaxHashCacheVRAMUsageMB", + TextureReplacementSettings::Configuration::DEFAULT_MAX_HASH_CACHE_VRAM_USAGE_MB); texture_replacements.config.max_replacement_cache_vram_usage_mb = - si.GetUIntValue("TextureReplacements", "MaxReplacementCacheVRAMUsage", 512); + si.GetUIntValue("TextureReplacements", "MaxReplacementCacheVRAMUsage", + TextureReplacementSettings::Configuration::DEFAULT_MAX_REPLACEMENT_CACHE_VRAM_USAGE_MB); texture_replacements.config.max_vram_write_splits = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", 0u); texture_replacements.config.max_vram_write_coalesce_width = @@ -717,6 +724,9 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", texture_replacements.config.replacement_scale_linear_filter); + si.SetUIntValue("TextureReplacements", "MaxHashCacheEntries", texture_replacements.config.max_hash_cache_entries); + si.SetUIntValue("TextureReplacements", "MaxHashCacheVRAMUsageMB", + texture_replacements.config.max_hash_cache_vram_usage_mb); si.SetUIntValue("TextureReplacements", "MaxReplacementCacheVRAMUsage", texture_replacements.config.max_replacement_cache_vram_usage_mb); @@ -870,6 +880,21 @@ std::string Settings::TextureReplacementSettings::Configuration::ExportToYAML(bo {}DumpVRAMWriteWidthThreshold: {} {}DumpVRAMWriteHeightThreshold: {} +# Sets the maximum size of the hash cache that manages texture replacements. +# Generally the default is sufficient, but some games may require increasing the +# size. Do not set too high, otherwise mobile drivers will break. +{}MaxHashCacheEntries: {} + +# Sets the maximum amount of VRAM in megabytes that the hash cache can utilize. +# Keep in mind your target system requirements, using too much VRAM will result +# in swapping and significantly decreased performance. +{}MaxHashCacheVRAMUsageMB: {} + +# Sets the maximum amount of VRAM in megabytes that are reserved for the cache of +# replacement textures. The cache usage for any given texture is approximately the +# same size as the uncompressed source image on disk. +{}MaxReplacementCacheVRAMUsage: {} + # Enables the use of a bilinear filter when scaling replacement textures. # If more than one replacement texture in a 256x256 texture page has a different # scaling over the native resolution, or the texture page is not covered, a @@ -901,6 +926,9 @@ std::string Settings::TextureReplacementSettings::Configuration::ExportToYAML(bo comment_str, texture_dump_height_threshold, // DumpTextureHeightThreshold comment_str, vram_write_dump_width_threshold, // DumpVRAMWriteWidthThreshold comment_str, vram_write_dump_height_threshold, // DumpVRAMWriteHeightThreshold + comment_str, max_hash_cache_entries, // MaxHashCacheEntries + comment_str, max_hash_cache_vram_usage_mb, // MaxHashCacheVRAMUsageMB + comment_str, max_replacement_cache_vram_usage_mb, // MaxReplacementCacheVRAMUsage comment_str, replacement_scale_linear_filter); // ReplacementScaleLinearFilter } diff --git a/src/core/settings.h b/src/core/settings.h index 9bdb63f2f..70f054cf0 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -234,6 +234,10 @@ struct Settings { struct Configuration { + static constexpr u32 DEFAULT_MAX_HASH_CACHE_ENTRIES = 500; + static constexpr u32 DEFAULT_MAX_HASH_CACHE_VRAM_USAGE_MB = 2048; + static constexpr u32 DEFAULT_MAX_REPLACEMENT_CACHE_VRAM_USAGE_MB = 512; + constexpr Configuration() = default; bool dump_texture_pages : 1 = false; @@ -245,7 +249,9 @@ struct Settings bool convert_copies_to_writes : 1 = false; bool replacement_scale_linear_filter = false; - u32 max_replacement_cache_vram_usage_mb = 512; + u32 max_hash_cache_entries = DEFAULT_MAX_HASH_CACHE_ENTRIES; + u32 max_hash_cache_vram_usage_mb = DEFAULT_MAX_HASH_CACHE_VRAM_USAGE_MB; + u32 max_replacement_cache_vram_usage_mb = DEFAULT_MAX_REPLACEMENT_CACHE_VRAM_USAGE_MB; u32 max_vram_write_splits = 0; u32 max_vram_write_coalesce_width = 0; From 97700b85deeafe17be14947630a8defd154447a6 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 17:10:53 +1000 Subject: [PATCH 33/35] GPU/HW: Clear ROV depth on fill/copy/write It wasn't being specified before, whoops. --- src/core/gpu_hw.cpp | 10 +++++----- src/core/gpu_hw_shadergen.cpp | 33 +++++++++++++++++++++++++-------- src/core/gpu_hw_shadergen.h | 8 +++++--- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 3a81f53d5..8540f4328 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1467,7 +1467,7 @@ bool GPU_HW::CompilePipelines(Error* error) std::unique_ptr fs = g_gpu_device->CreateShader( GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced), - m_write_mask_as_depth), + m_write_mask_as_depth, needs_rov_depth), error); if (!fs) return false; @@ -1485,9 +1485,9 @@ bool GPU_HW::CompilePipelines(Error* error) // VRAM copy { - std::unique_ptr fs = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateVRAMCopyFragmentShader(m_write_mask_as_depth), error); + std::unique_ptr fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateVRAMCopyFragmentShader(m_write_mask_as_depth, needs_rov_depth), error); if (!fs) return false; @@ -1516,7 +1516,7 @@ bool GPU_HW::CompilePipelines(Error* error) const bool use_ssbo = features.texture_buffers_emulated_with_ssbo; std::unique_ptr fs = g_gpu_device->CreateShader( GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateVRAMWriteFragmentShader(use_buffer, use_ssbo, m_write_mask_as_depth), error); + shadergen.GenerateVRAMWriteFragmentShader(use_buffer, use_ssbo, m_write_mask_as_depth, needs_rov_depth), error); if (!fs) return false; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 55db9921a..e13cf4dbd 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1453,14 +1453,17 @@ uint SampleVRAM(uint2 coords) return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, bool use_ssbo, - bool write_mask_as_depth) const +std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, bool use_ssbo, bool write_mask_as_depth, + bool write_depth_as_rt) const { + Assert(!write_mask_as_depth || (write_mask_as_depth != write_depth_as_rt)); + std::stringstream ss; WriteHeader(ss); WriteColorConversionFunctions(ss); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", write_mask_as_depth); + DefineMacro(ss, "WRITE_DEPTH_AS_RT", write_depth_as_rt); DefineMacro(ss, "USE_BUFFER", use_buffer); ss << "CONSTANT float2 VRAM_SIZE = float2(" << VRAM_WIDTH << ".0, " << VRAM_HEIGHT << ".0);\n"; @@ -1496,7 +1499,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n"; } - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, write_mask_as_depth); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1 + BoolToUInt32(write_depth_as_rt), false, write_mask_as_depth); ss << R"( { float2 coords = floor(v_pos.xy / u_resolution_scale); @@ -1523,20 +1526,25 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b o_col0 = RGBA5551ToRGBA8(value); #if WRITE_MASK_AS_DEPTH o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0; +#elif WRITE_DEPTH_AS_RT + o_col1 = float4(1.0f, 0.0f, 0.0f, 0.0f); #endif })"; return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(bool write_mask_as_depth) const +std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(bool write_mask_as_depth, bool write_depth_as_rt) const { + Assert(!write_mask_as_depth || (write_mask_as_depth != write_depth_as_rt)); + // TODO: This won't currently work because we can't bind the texture to both the shader and framebuffer. const bool msaa = false; std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", write_mask_as_depth); + DefineMacro(ss, "WRITE_DEPTH_AS_RT", write_depth_as_rt); DefineMacro(ss, "MSAA_COPY", msaa); DeclareUniformBuffer(ss, @@ -1545,7 +1553,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(bool write_mask_as_ true); DeclareTexture(ss, "samp0", 0, msaa); - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, write_mask_as_depth, false, false, msaa); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1 + BoolToUInt32(write_depth_as_rt), false, write_mask_as_depth, false, + false, msaa); ss << R"( { float2 dst_coords = floor(v_pos.xy); @@ -1575,25 +1584,31 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(bool write_mask_as_ o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a); #if WRITE_MASK_AS_DEPTH o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0)); +#elif WRITE_DEPTH_AS_RT + o_col1 = float4(1.0f, 0.0f, 0.0f, 0.0f); #endif })"; return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced, - bool write_mask_as_depth) const +std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced, bool write_mask_as_depth, + bool write_depth_as_rt) const { + Assert(!write_mask_as_depth || (write_mask_as_depth != write_depth_as_rt)); + std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", write_mask_as_depth); + DefineMacro(ss, "WRITE_DEPTH_AS_RT", write_depth_as_rt); DefineMacro(ss, "WRAPPED", wrapped); DefineMacro(ss, "INTERLACED", interlaced); DeclareUniformBuffer( ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true); - DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, false, write_mask_as_depth, false, false, false); + DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1 + BoolToUInt32(write_depth_as_rt), false, + write_mask_as_depth, false, false, false); ss << R"( { #if INTERLACED || WRAPPED @@ -1617,6 +1632,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool o_col0 = u_fill_color; #if WRITE_MASK_AS_DEPTH o_depth = u_fill_color.a; +#elif WRITE_DEPTH_AS_RT + o_col1 = float4(1.0f, 0.0f, 0.0f, 0.0f); #endif })"; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index e26228fb3..e68323564 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -26,9 +26,11 @@ public: std::string GenerateWireframeGeometryShader() const; std::string GenerateWireframeFragmentShader() const; std::string GenerateVRAMReadFragmentShader(u32 resolution_scale, u32 multisamples) const; - std::string GenerateVRAMWriteFragmentShader(bool use_buffer, bool use_ssbo, bool write_mask_as_depth) const; - std::string GenerateVRAMCopyFragmentShader(bool write_mask_as_depth) const; - std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced, bool write_mask_as_depth) const; + std::string GenerateVRAMWriteFragmentShader(bool use_buffer, bool use_ssbo, bool write_mask_as_depth, + bool write_depth_as_rt) const; + std::string GenerateVRAMCopyFragmentShader(bool write_mask_as_depth, bool write_depth_as_rt) const; + std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced, bool write_mask_as_depth, + bool write_depth_as_rt) const; std::string GenerateVRAMUpdateDepthFragmentShader(bool msaa) const; std::string GenerateVRAMExtractFragmentShader(u32 resolution_scale, u32 multisamples, bool color_24bit, bool depth_buffer) const; From a879c11c34224712fa406210532c3094c17210f9 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 27 Nov 2024 18:17:16 +1000 Subject: [PATCH 34/35] Qt: Prevent multiple update download button clicks --- src/duckstation-qt/autoupdaterdialog.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/duckstation-qt/autoupdaterdialog.cpp b/src/duckstation-qt/autoupdaterdialog.cpp index 7667c83f9..8dcc37951 100644 --- a/src/duckstation-qt/autoupdaterdialog.cpp +++ b/src/duckstation-qt/autoupdaterdialog.cpp @@ -511,6 +511,11 @@ void AutoUpdaterDialog::getChangesComplete(s32 status_code, std::vector resp void AutoUpdaterDialog::downloadUpdateClicked() { #ifdef AUTO_UPDATER_SUPPORTED + // Prevent multiple clicks of the button. + if (!m_ui.downloadAndInstall->isEnabled()) + return; + m_ui.downloadAndInstall->setEnabled(false); + m_display_messages = true; std::optional download_result; From 4e43b1ec8cc0a10310d1407b453c67051da8b89b Mon Sep 17 00:00:00 2001 From: dreamsyntax Date: Wed, 27 Nov 2024 01:30:02 -0700 Subject: [PATCH 35/35] Debugger/MemoryScanner: Add 'Freeze Selected' (#3334) --- src/core/memory_scanner.cpp | 8 ++++++++ src/core/memory_scanner.h | 2 ++ src/duckstation-qt/memoryscannerwindow.cpp | 20 ++++++++++++++++++++ src/duckstation-qt/memoryscannerwindow.h | 1 + src/duckstation-qt/memoryscannerwindow.ui | 12 +++++++++++- 5 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/core/memory_scanner.cpp b/src/core/memory_scanner.cpp index 4a75150fd..30b1a7f70 100644 --- a/src/core/memory_scanner.cpp +++ b/src/core/memory_scanner.cpp @@ -340,6 +340,14 @@ bool MemoryWatchList::AddEntry(std::string description, u32 address, MemoryAcces return true; } +bool MemoryWatchList::GetEntryFreeze(u32 index) const +{ + if (index >= m_entries.size()) + return false; + + return m_entries[index].freeze; +} + void MemoryWatchList::RemoveEntry(u32 index) { if (index >= m_entries.size()) diff --git a/src/core/memory_scanner.h b/src/core/memory_scanner.h index 659dc6d62..fefb46052 100644 --- a/src/core/memory_scanner.h +++ b/src/core/memory_scanner.h @@ -112,6 +112,8 @@ public: u32 GetEntryCount() const { return static_cast(m_entries.size()); } bool AddEntry(std::string description, u32 address, MemoryAccessSize size, bool is_signed, bool freeze); + bool GetEntryFreeze(u32 index) const; + void RemoveEntry(u32 index); bool RemoveEntryByDescription(const char* description); bool RemoveEntryByAddress(u32 address); diff --git a/src/duckstation-qt/memoryscannerwindow.cpp b/src/duckstation-qt/memoryscannerwindow.cpp index ff56d6c46..30d0501a3 100644 --- a/src/duckstation-qt/memoryscannerwindow.cpp +++ b/src/duckstation-qt/memoryscannerwindow.cpp @@ -171,6 +171,7 @@ void MemoryScannerWindow::connectUi() }); connect(m_ui.scanAddWatch, &QPushButton::clicked, this, &MemoryScannerWindow::addToWatchClicked); connect(m_ui.scanAddManualAddress, &QPushButton::clicked, this, &MemoryScannerWindow::addManualWatchAddressClicked); + connect(m_ui.scanFreezeWatch, &QPushButton::clicked, this, &MemoryScannerWindow::freezeWatchClicked); connect(m_ui.scanRemoveWatch, &QPushButton::clicked, this, &MemoryScannerWindow::removeWatchClicked); connect(m_ui.scanTable, &QTableWidget::currentItemChanged, this, &MemoryScannerWindow::scanCurrentItemChanged); connect(m_ui.watchTable, &QTableWidget::currentItemChanged, this, &MemoryScannerWindow::watchCurrentItemChanged); @@ -208,6 +209,7 @@ void MemoryScannerWindow::enableUi(bool enabled) m_ui.scanAddWatch->setEnabled(enabled && !m_ui.scanTable->selectedItems().empty()); m_ui.watchTable->setEnabled(enabled); m_ui.scanAddManualAddress->setEnabled(enabled); + m_ui.scanFreezeWatch->setEnabled(enabled && !m_ui.watchTable->selectedItems().empty()); m_ui.scanRemoveWatch->setEnabled(enabled && !m_ui.watchTable->selectedItems().empty()); } @@ -330,6 +332,22 @@ void MemoryScannerWindow::addManualWatchAddressClicked() updateWatch(); } +void MemoryScannerWindow::freezeWatchClicked() +{ + const int indexFirst = getSelectedWatchIndexFirst(); + const int indexLast = getSelectedWatchIndexLast(); + if (indexFirst < 0) + return; + + const bool freeze = m_watch.GetEntryFreeze(indexFirst); + + for (int index = indexLast; index >= indexFirst; index--) + { + m_watch.SetEntryFreeze(static_cast(index), !freeze); + updateWatch(); + } +} + void MemoryScannerWindow::removeWatchClicked() { const int indexFirst = getSelectedWatchIndexFirst(); @@ -351,6 +369,7 @@ void MemoryScannerWindow::scanCurrentItemChanged(QTableWidgetItem* current, QTab void MemoryScannerWindow::watchCurrentItemChanged(QTableWidgetItem* current, QTableWidgetItem* previous) { + m_ui.scanFreezeWatch->setEnabled((current != nullptr)); m_ui.scanRemoveWatch->setEnabled((current != nullptr)); } @@ -569,6 +588,7 @@ void MemoryScannerWindow::updateWatch() } m_ui.scanSaveWatch->setEnabled(!entries.empty()); + m_ui.scanFreezeWatch->setEnabled(false); m_ui.scanRemoveWatch->setEnabled(false); } diff --git a/src/duckstation-qt/memoryscannerwindow.h b/src/duckstation-qt/memoryscannerwindow.h index 0b47d7e02..cb7af539d 100644 --- a/src/duckstation-qt/memoryscannerwindow.h +++ b/src/duckstation-qt/memoryscannerwindow.h @@ -37,6 +37,7 @@ private Q_SLOTS: void addToWatchClicked(); void addManualWatchAddressClicked(); + void freezeWatchClicked(); void removeWatchClicked(); void scanCurrentItemChanged(QTableWidgetItem* current, QTableWidgetItem* previous); void watchCurrentItemChanged(QTableWidgetItem* current, QTableWidgetItem* previous); diff --git a/src/duckstation-qt/memoryscannerwindow.ui b/src/duckstation-qt/memoryscannerwindow.ui index 8b3baea37..73299a152 100644 --- a/src/duckstation-qt/memoryscannerwindow.ui +++ b/src/duckstation-qt/memoryscannerwindow.ui @@ -436,13 +436,23 @@ + + + + false + + + Freeze Selected Entries + + + false - Remove Selected Entries from Watch List + Remove Selected Entries