diff --git a/Source/Core/Common/GL/GLUtil.h b/Source/Core/Common/GL/GLUtil.h index 1ec63ebad0..2d595e5286 100644 --- a/Source/Core/Common/GL/GLUtil.h +++ b/Source/Core/Common/GL/GLUtil.h @@ -10,6 +10,9 @@ class GLContext; +// Texture which we use to not disturb the other bindings. +constexpr GLenum GL_MUTABLE_TEXTURE_INDEX = GL_TEXTURE10; + namespace GLUtil { GLuint CompileProgram(const std::string& vertexShader, const std::string& fragmentShader); diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index a59b646618..3418e97629 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -137,8 +137,6 @@ const ConfigInfo GFX_STEREO_DEPTH_PERCENTAGE{ const ConfigInfo GFX_HACK_EFB_ACCESS_ENABLE{{System::GFX, "Hacks", "EFBAccessEnable"}, true}; const ConfigInfo GFX_HACK_BBOX_ENABLE{{System::GFX, "Hacks", "BBoxEnable"}, false}; -const ConfigInfo GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION{ - {System::GFX, "Hacks", "BBoxPreferStencilImplementation"}, false}; const ConfigInfo GFX_HACK_FORCE_PROGRESSIVE{{System::GFX, "Hacks", "ForceProgressive"}, true}; const ConfigInfo GFX_HACK_SKIP_EFB_COPY_TO_RAM{{System::GFX, "Hacks", "EFBToTextureEnable"}, true}; diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 73edbe8cf4..8e29c13a77 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -102,7 +102,6 @@ extern const ConfigInfo GFX_STEREO_DEPTH_PERCENTAGE; extern const ConfigInfo GFX_HACK_EFB_ACCESS_ENABLE; extern const ConfigInfo GFX_HACK_BBOX_ENABLE; -extern const ConfigInfo GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION; extern const ConfigInfo GFX_HACK_FORCE_PROGRESSIVE; extern const ConfigInfo GFX_HACK_SKIP_EFB_COPY_TO_RAM; extern const ConfigInfo GFX_HACK_SKIP_XFB_COPY_TO_RAM; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index 146fdd1120..b1021f7827 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -114,7 +114,6 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_HACK_EFB_ACCESS_ENABLE.location, Config::GFX_HACK_BBOX_ENABLE.location, - Config::GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION.location, Config::GFX_HACK_FORCE_PROGRESSIVE.location, Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM.location, Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM.location, diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp index 9909dad3ae..950d647f3c 100644 --- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp @@ -152,10 +152,9 @@ void EnhancementsWidget::ConnectWidgets() void EnhancementsWidget::LoadPPShaders() { const bool anaglyph = g_Config.stereo_mode == StereoMode::Anaglyph; - std::vector shaders = - anaglyph ? PostProcessingShaderImplementation::GetAnaglyphShaderList( - g_Config.backend_info.api_type) : - PostProcessingShaderImplementation::GetShaderList(g_Config.backend_info.api_type); + std::vector shaders = anaglyph ? + VideoCommon::PostProcessing::GetAnaglyphShaderList() : + VideoCommon::PostProcessing::GetShaderList(); m_pp_effect->clear(); @@ -187,7 +186,7 @@ void EnhancementsWidget::LoadPPShaders() tr("%1 doesn't support this feature.") .arg(tr(g_video_backend->GetDisplayName().c_str()))); - PostProcessingShaderConfiguration pp_shader; + VideoCommon::PostProcessingConfiguration pp_shader; if (selected_shader != "(off)" && supports_postprocessing) { pp_shader.LoadShader(selected_shader); @@ -266,7 +265,7 @@ void EnhancementsWidget::SaveSettings() "(off)" : m_pp_effect->currentText().toStdString()); - PostProcessingShaderConfiguration pp_shader; + VideoCommon::PostProcessingConfiguration pp_shader; if (Config::Get(Config::GFX_ENHANCE_POST_SHADER) != "(off)") { pp_shader.LoadShader(Config::Get(Config::GFX_ENHANCE_POST_SHADER)); diff --git a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp index 8de48e7cf3..3a5bfcad28 100644 --- a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp @@ -25,7 +25,7 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/VideoConfig.h" -using ConfigurationOption = PostProcessingShaderConfiguration::ConfigurationOption; +using ConfigurationOption = VideoCommon::PostProcessingConfiguration::ConfigurationOption; using OptionType = ConfigurationOption::OptionType; PostProcessingConfigWindow::PostProcessingConfigWindow(EnhancementsWidget* parent, @@ -38,7 +38,7 @@ PostProcessingConfigWindow::PostProcessingConfigWindow(EnhancementsWidget* paren } else { - m_post_processor = new PostProcessingShaderConfiguration(); + m_post_processor = new VideoCommon::PostProcessingConfiguration(); m_post_processor->LoadShader(m_shader); } @@ -61,7 +61,8 @@ PostProcessingConfigWindow::~PostProcessingConfigWindow() void PostProcessingConfigWindow::PopulateGroups() { - const PostProcessingShaderConfiguration::ConfigMap& config_map = m_post_processor->GetOptions(); + const VideoCommon::PostProcessingConfiguration::ConfigMap& config_map = + m_post_processor->GetOptions(); auto config_groups = std::vector>(); for (const auto& it : config_map) diff --git a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h index 4d78404981..1e54620401 100644 --- a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h +++ b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h @@ -35,7 +35,7 @@ private: { public: explicit ConfigGroup( - const PostProcessingShaderConfiguration::ConfigurationOption* config_option); + const VideoCommon::PostProcessingConfiguration::ConfigurationOption* config_option); const std::string& GetGUIName() const noexcept; const std::string& GetParent() const noexcept; @@ -57,7 +57,7 @@ private: std::vector m_sliders; std::vector m_value_boxes; - const PostProcessingShaderConfiguration::ConfigurationOption* m_config_option; + const VideoCommon::PostProcessingConfiguration::ConfigurationOption* m_config_option; std::vector> m_subgroups; }; void Create(); @@ -72,7 +72,7 @@ private: QDialogButtonBox* m_buttons; const std::string& m_shader; - PostProcessingShaderConfiguration* m_post_processor; + VideoCommon::PostProcessingConfiguration* m_post_processor; std::unordered_map m_config_map; std::vector> m_config_groups; }; diff --git a/Source/Core/VideoBackends/D3D/BoundingBox.cpp b/Source/Core/VideoBackends/D3D/BoundingBox.cpp index e52ebb9c6d..abbd7036a1 100644 --- a/Source/Core/VideoBackends/D3D/BoundingBox.cpp +++ b/Source/Core/VideoBackends/D3D/BoundingBox.cpp @@ -5,6 +5,7 @@ #include "VideoBackends/D3D/BoundingBox.h" #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" +#include "VideoBackends/D3D/D3DState.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -54,6 +55,7 @@ void BBox::Init() hr = D3D::device->CreateUnorderedAccessView(s_bbox_buffer, &UAVdesc, &s_bbox_uav); CHECK(SUCCEEDED(hr), "Create BoundingBox UAV."); D3D::SetDebugObjectName(s_bbox_uav, "BoundingBox UAV"); + D3D::stateman->SetOMUAV(s_bbox_uav); } } @@ -83,4 +85,4 @@ int BBox::Get(int index) D3D::context->Unmap(s_bbox_staging_buffer, 0); return data; } -}; +}; // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/CMakeLists.txt b/Source/Core/VideoBackends/D3D/CMakeLists.txt index 33976f93d7..facd2b07e3 100644 --- a/Source/Core/VideoBackends/D3D/CMakeLists.txt +++ b/Source/Core/VideoBackends/D3D/CMakeLists.txt @@ -3,42 +3,22 @@ add_library(videod3d BoundingBox.h D3DBase.cpp D3DBase.h - D3DBlob.cpp - D3DBlob.h - D3DShader.cpp - D3DShader.h D3DState.cpp D3DState.h - D3DTexture.cpp - D3DTexture.h - D3DUtil.cpp - D3DUtil.h DXPipeline.cpp DXPipeline.h DXShader.cpp DXShader.h DXTexture.cpp DXTexture.h - FramebufferManager.cpp - FramebufferManager.h - GeometryShaderCache.cpp - GeometryShaderCache.h main.cpp NativeVertexFormat.cpp PerfQuery.cpp PerfQuery.h - PixelShaderCache.cpp - PixelShaderCache.h - PSTextureEncoder.cpp - PSTextureEncoder.h Render.cpp Render.h - TextureCache.cpp - TextureCache.h VertexManager.cpp VertexManager.h - VertexShaderCache.cpp - VertexShaderCache.h VideoBackend.h ) diff --git a/Source/Core/VideoBackends/D3D/D3D.vcxproj b/Source/Core/VideoBackends/D3D/D3D.vcxproj index 54549d2f00..5721e4d3ce 100644 --- a/Source/Core/VideoBackends/D3D/D3D.vcxproj +++ b/Source/Core/VideoBackends/D3D/D3D.vcxproj @@ -38,46 +38,26 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters b/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters index d3f7f7b01d..4ca2cfa179 100644 --- a/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters +++ b/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters @@ -12,51 +12,21 @@ D3D - - D3D - - - D3D - - - D3D - - - D3D - D3D - - Render - - - Render - Render Render - - Render - - - Render - Render - - Render - Render - - Render - Render @@ -75,48 +45,18 @@ D3D - - D3D - - - D3D - - - D3D - - - D3D - D3D - - Render - - - Render - Render - - Render - - - Render - Render - - Render - Render - - Render - Render diff --git a/Source/Core/VideoBackends/D3D/D3DBase.cpp b/Source/Core/VideoBackends/D3D/D3DBase.cpp index 654cf17c95..9a86cd3c0c 100644 --- a/Source/Core/VideoBackends/D3D/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D/D3DBase.cpp @@ -12,7 +12,7 @@ #include "Core/ConfigManager.h" #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DTexture.h" +#include "VideoBackends/D3D/DXTexture.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -42,7 +42,8 @@ IDXGISwapChain1* swapchain = nullptr; static IDXGIFactory2* s_dxgi_factory; static ID3D11Debug* s_debug; static D3D_FEATURE_LEVEL s_featlevel; -static D3DTexture2D* s_backbuf; +static std::unique_ptr s_swap_chain_texture; +static std::unique_ptr s_swap_chain_framebuffer; static std::vector s_aa_modes; // supported AA modes of the current adapter @@ -244,18 +245,40 @@ static bool SupportsBPTCTextures(ID3D11Device* dev) return (bc7_support & D3D11_FORMAT_SUPPORT_TEXTURE2D) != 0; } -static bool CreateSwapChainTextures() +static bool CreateSwapChainFramebuffer() { - ID3D11Texture2D* buf; - HRESULT hr = swapchain->GetBuffer(0, IID_ID3D11Texture2D, (void**)&buf); + ID3D11Texture2D* texture; + HRESULT hr = swapchain->GetBuffer(0, IID_ID3D11Texture2D, (void**)&texture); CHECK(SUCCEEDED(hr), "GetBuffer for swap chain failed with HRESULT %08X", hr); if (FAILED(hr)) return false; - s_backbuf = new D3DTexture2D(buf, D3D11_BIND_RENDER_TARGET); - SAFE_RELEASE(buf); - SetDebugObjectName(s_backbuf->GetTex(), "backbuffer texture"); - SetDebugObjectName(s_backbuf->GetRTV(), "backbuffer render target view"); + D3D11_TEXTURE2D_DESC desc; + texture->GetDesc(&desc); + + s_swap_chain_texture = std::make_unique( + TextureConfig(desc.Width, desc.Height, desc.MipLevels, desc.ArraySize, desc.SampleDesc.Count, + AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget), + texture, nullptr, nullptr); + + ID3D11RenderTargetView* rtv; + CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(texture, D3D11_RTV_DIMENSION_TEXTURE2DARRAY, desc.Format, + 0, 0, desc.ArraySize); + hr = device->CreateRenderTargetView(texture, &rtv_desc, &rtv); + CHECK(SUCCEEDED(hr), "Create render target view for swap chain"); + if (FAILED(hr)) + { + s_swap_chain_texture.reset(); + return false; + } + + SetDebugObjectName(texture, "backbuffer texture"); + SetDebugObjectName(rtv, "backbuffer render target view"); + s_swap_chain_framebuffer = std::make_unique( + s_swap_chain_texture.get(), nullptr, AbstractTextureFormat::RGBA8, + AbstractTextureFormat::Undefined, desc.Width, desc.Height, desc.ArraySize, + desc.SampleDesc.Count, rtv, nullptr, nullptr); + return true; } @@ -300,7 +323,7 @@ static bool CreateSwapChain(HWND hWnd) return false; } - if (!CreateSwapChainTextures()) + if (!CreateSwapChainFramebuffer()) { SAFE_RELEASE(swapchain); return false; @@ -451,7 +474,8 @@ void Close() // release all bound resources context->ClearState(); - SAFE_RELEASE(s_backbuf); + s_swap_chain_framebuffer.reset(); + s_swap_chain_texture.reset(); SAFE_RELEASE(swapchain); SAFE_DELETE(stateman); context->Flush(); // immediately destroy device objects @@ -527,9 +551,13 @@ const char* ComputeShaderVersionString() return "cs_4_0"; } -D3DTexture2D* GetBackBuffer() +DXTexture* GetSwapChainTexture() { - return s_backbuf; + return s_swap_chain_texture.get(); +} +DXFramebuffer* GetSwapChainFramebuffer() +{ + return s_swap_chain_framebuffer.get(); } bool BGRATexturesSupported() { @@ -568,7 +596,8 @@ u32 GetMaxTextureSize(D3D_FEATURE_LEVEL feature_level) void Reset(HWND new_wnd) { - SAFE_RELEASE(s_backbuf); + s_swap_chain_framebuffer.reset(); + s_swap_chain_texture.reset(); if (swapchain) { @@ -583,10 +612,11 @@ void Reset(HWND new_wnd) void ResizeSwapChain() { - SAFE_RELEASE(s_backbuf); + s_swap_chain_framebuffer.reset(); + s_swap_chain_texture.reset(); const UINT swap_chain_flags = AllowTearingSupported() ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0; swapchain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_R8G8B8A8_UNORM, swap_chain_flags); - if (!CreateSwapChainTextures()) + if (!CreateSwapChainFramebuffer()) { PanicAlert("Failed to get swap chain textures"); SAFE_RELEASE(swapchain); diff --git a/Source/Core/VideoBackends/D3D/D3DBase.h b/Source/Core/VideoBackends/D3D/D3DBase.h index f17ebd54eb..fedb2fe0c4 100644 --- a/Source/Core/VideoBackends/D3D/D3DBase.h +++ b/Source/Core/VideoBackends/D3D/D3DBase.h @@ -38,7 +38,8 @@ namespace DX11 PanicAlert("%s failed in %s at line %d: " Message, __func__, __FILE__, __LINE__, __VA_ARGS__); \ } -class D3DTexture2D; +class DXTexture; +class DXFramebuffer; namespace D3D { @@ -64,7 +65,8 @@ void Reset(HWND new_wnd); void ResizeSwapChain(); void Present(); -D3DTexture2D* GetBackBuffer(); +DXTexture* GetSwapChainTexture(); +DXFramebuffer* GetSwapChainFramebuffer(); const char* PixelShaderVersionString(); const char* GeometryShaderVersionString(); const char* VertexShaderVersionString(); diff --git a/Source/Core/VideoBackends/D3D/D3DBlob.cpp b/Source/Core/VideoBackends/D3D/D3DBlob.cpp deleted file mode 100644 index c0991825b6..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DBlob.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "VideoBackends/D3D/D3DBlob.h" - -namespace DX11 -{ -D3DBlob::D3DBlob(unsigned int blob_size, const u8* init_data) - : ref(1), size(blob_size), blob(nullptr) -{ - data = new u8[blob_size]; - if (init_data) - memcpy(data, init_data, size); -} - -D3DBlob::D3DBlob(ID3D10Blob* d3dblob) : ref(1) -{ - blob = d3dblob; - data = (u8*)blob->GetBufferPointer(); - size = (unsigned int)blob->GetBufferSize(); - d3dblob->AddRef(); -} - -D3DBlob::~D3DBlob() -{ - if (blob) - blob->Release(); - else - delete[] data; -} - -void D3DBlob::AddRef() -{ - ++ref; -} - -unsigned int D3DBlob::Release() -{ - if (--ref == 0) - { - delete this; - return 0; - } - return ref; -} - -unsigned int D3DBlob::Size() const -{ - return size; -} - -u8* D3DBlob::Data() -{ - return data; -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DBlob.h b/Source/Core/VideoBackends/D3D/D3DBlob.h deleted file mode 100644 index c332b0c517..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DBlob.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/CommonTypes.h" - -struct ID3D10Blob; - -namespace DX11 -{ -// use this class instead ID3D10Blob or ID3D11Blob whenever possible -class D3DBlob -{ -public: - // memory will be copied into an own buffer - D3DBlob(unsigned int blob_size, const u8* init_data = nullptr); - - // d3dblob will be AddRef'd - D3DBlob(ID3D10Blob* d3dblob); - - void AddRef(); - unsigned int Release(); - - unsigned int Size() const; - u8* Data(); - -private: - ~D3DBlob(); - - unsigned int ref; - unsigned int size; - - u8* data; - ID3D10Blob* blob; -}; - -} // namespace diff --git a/Source/Core/VideoBackends/D3D/D3DShader.cpp b/Source/Core/VideoBackends/D3D/D3DShader.cpp deleted file mode 100644 index f47b428904..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DShader.cpp +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include -#include - -#include "Common/FileUtil.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -namespace D3D -{ -// bytecode->shader -ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11VertexShader* v_shader; - HRESULT hr = D3D::device->CreateVertexShader(bytecode, len, nullptr, &v_shader); - if (FAILED(hr)) - return nullptr; - - return v_shader; -} - -// code->bytecode -bool CompileVertexShader(const std::string& code, D3DBlob** blob) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_OPTIMIZATION_LEVEL3 | - D3D10_SHADER_SKIP_VALIDATION; -#endif - HRESULT hr = PD3DCompile(code.c_str(), code.length(), nullptr, nullptr, nullptr, "main", - D3D::VertexShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - if (errorBuffer) - { - INFO_LOG(VIDEO, "Vertex shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_vs_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile vertex shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::VertexShaderVersionString(), (const char*)errorBuffer->GetBufferPointer()); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - return SUCCEEDED(hr); -} - -// bytecode->shader -ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11GeometryShader* g_shader; - HRESULT hr = D3D::device->CreateGeometryShader(bytecode, len, nullptr, &g_shader); - if (FAILED(hr)) - return nullptr; - - return g_shader; -} - -// code->bytecode -bool CompileGeometryShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_OPTIMIZATION_LEVEL3 | - D3D10_SHADER_SKIP_VALIDATION; -#endif - HRESULT hr = - PD3DCompile(code.c_str(), code.length(), nullptr, pDefines, nullptr, "main", - D3D::GeometryShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - - if (errorBuffer) - { - INFO_LOG(VIDEO, "Geometry shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_gs_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile geometry shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::GeometryShaderVersionString(), (const char*)errorBuffer->GetBufferPointer()); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - return SUCCEEDED(hr); -} - -// bytecode->shader -ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11PixelShader* p_shader; - HRESULT hr = D3D::device->CreatePixelShader(bytecode, len, nullptr, &p_shader); - if (FAILED(hr)) - { - PanicAlert("CreatePixelShaderFromByteCode failed at %s %d\n", __FILE__, __LINE__); - p_shader = nullptr; - } - return p_shader; -} - -// code->bytecode -bool CompilePixelShader(const std::string& code, D3DBlob** blob, const D3D_SHADER_MACRO* pDefines) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3; -#endif - HRESULT hr = PD3DCompile(code.c_str(), code.length(), nullptr, pDefines, nullptr, "main", - D3D::PixelShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - - if (errorBuffer) - { - INFO_LOG(VIDEO, "Pixel shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_ps_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile pixel shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::PixelShaderVersionString(), (const char*)errorBuffer->GetBufferPointer()); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - - return SUCCEEDED(hr); -} - -// bytecode->shader -ID3D11ComputeShader* CreateComputeShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11ComputeShader* shader; - HRESULT hr = D3D::device->CreateComputeShader(bytecode, len, nullptr, &shader); - if (FAILED(hr)) - { - PanicAlert("CreateComputeShaderFromByteCode failed at %s %d\n", __FILE__, __LINE__); - return nullptr; - } - return shader; -} - -// code->bytecode -bool CompileComputeShader(const std::string& code, D3DBlob** blob, const D3D_SHADER_MACRO* pDefines) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3; -#endif - HRESULT hr = - PD3DCompile(code.c_str(), code.length(), nullptr, pDefines, nullptr, "main", - D3D::ComputeShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - - if (errorBuffer) - { - INFO_LOG(VIDEO, "Compute shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_cs_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile compute shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::ComputeShaderVersionString(), - reinterpret_cast(errorBuffer->GetBufferPointer())); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - - return SUCCEEDED(hr); -} - -ID3D11VertexShader* CompileAndCreateVertexShader(const std::string& code) -{ - D3DBlob* blob = nullptr; - if (CompileVertexShader(code, &blob)) - { - ID3D11VertexShader* v_shader = CreateVertexShaderFromByteCode(blob); - blob->Release(); - return v_shader; - } - return nullptr; -} - -ID3D11GeometryShader* CompileAndCreateGeometryShader(const std::string& code, - const D3D_SHADER_MACRO* pDefines) -{ - D3DBlob* blob = nullptr; - if (CompileGeometryShader(code, &blob, pDefines)) - { - ID3D11GeometryShader* g_shader = CreateGeometryShaderFromByteCode(blob); - blob->Release(); - return g_shader; - } - return nullptr; -} - -ID3D11PixelShader* CompileAndCreatePixelShader(const std::string& code) -{ - D3DBlob* blob = nullptr; - CompilePixelShader(code, &blob); - if (blob) - { - ID3D11PixelShader* p_shader = CreatePixelShaderFromByteCode(blob); - blob->Release(); - return p_shader; - } - return nullptr; -} - -ID3D11ComputeShader* CompileAndCreateComputeShader(const std::string& code) -{ - D3DBlob* blob = nullptr; - CompileComputeShader(code, &blob); - if (blob) - { - ID3D11ComputeShader* shader = CreateComputeShaderFromByteCode(blob); - blob->Release(); - return shader; - } - return nullptr; -} - -} // namespace - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DShader.h b/Source/Core/VideoBackends/D3D/D3DShader.h deleted file mode 100644 index 4212847e63..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DShader.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DBlob.h" - -struct ID3D11PixelShader; -struct ID3D11VertexShader; - -namespace DX11 -{ -namespace D3D -{ -ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len); -ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len); -ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len); -ID3D11ComputeShader* CreateComputeShaderFromByteCode(const void* bytecode, size_t len); - -// The returned bytecode buffers should be Release()d. -bool CompileVertexShader(const std::string& code, D3DBlob** blob); -bool CompileGeometryShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines = nullptr); -bool CompilePixelShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines = nullptr); -bool CompileComputeShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines = nullptr); - -// Utility functions -ID3D11VertexShader* CompileAndCreateVertexShader(const std::string& code); -ID3D11GeometryShader* CompileAndCreateGeometryShader(const std::string& code, - const D3D_SHADER_MACRO* pDefines = nullptr); -ID3D11PixelShader* CompileAndCreatePixelShader(const std::string& code); -ID3D11ComputeShader* CompileAndCreateComputeShader(const std::string& code); - -inline ID3D11VertexShader* CreateVertexShaderFromByteCode(D3DBlob* bytecode) -{ - return CreateVertexShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} -inline ID3D11GeometryShader* CreateGeometryShaderFromByteCode(D3DBlob* bytecode) -{ - return CreateGeometryShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} -inline ID3D11PixelShader* CreatePixelShaderFromByteCode(D3DBlob* bytecode) -{ - return CreatePixelShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} -inline ID3D11ComputeShader* CreateComputeShaderFromByteCode(D3DBlob* bytecode) -{ - return CreateComputeShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} - -inline ID3D11VertexShader* CompileAndCreateVertexShader(D3DBlob* code) -{ - return CompileAndCreateVertexShader(reinterpret_cast(code->Data())); -} - -inline ID3D11GeometryShader* -CompileAndCreateGeometryShader(D3DBlob* code, const D3D_SHADER_MACRO* pDefines = nullptr) -{ - return CompileAndCreateGeometryShader(reinterpret_cast(code->Data()), pDefines); -} - -inline ID3D11PixelShader* CompileAndCreatePixelShader(D3DBlob* code) -{ - return CompileAndCreatePixelShader(reinterpret_cast(code->Data())); -} -inline ID3D11ComputeShader* CompileAndCreateComputeShader(D3DBlob* code) -{ - return CompileAndCreateComputeShader(reinterpret_cast(code->Data())); -} -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DState.cpp b/Source/Core/VideoBackends/D3D/D3DState.cpp index 25742d2a65..72528e51d6 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D/D3DState.cpp @@ -12,6 +12,7 @@ #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" +#include "VideoBackends/D3D/DXTexture.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -28,19 +29,31 @@ void StateManager::Apply() if (!m_dirtyFlags) return; - const int textureMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Texture0); - const int samplerMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Sampler0); + // Framebuffer changes must occur before texture changes, otherwise the D3D runtime messes with + // our bindings and sets them to null to prevent hazards. + if (m_dirtyFlags & DirtyFlag_Framebuffer) + { + if (g_ActiveConfig.backend_info.bSupportsBBox) + { + D3D::context->OMSetRenderTargetsAndUnorderedAccessViews( + m_pending.framebuffer->GetNumRTVs(), + m_pending.use_integer_rtv ? m_pending.framebuffer->GetIntegerRTVArray() : + m_pending.framebuffer->GetRTVArray(), + m_pending.framebuffer->GetDSV(), 2, 1, &m_pending.uav, nullptr); + } + else + { + D3D::context->OMSetRenderTargets(m_pending.framebuffer->GetNumRTVs(), + m_pending.use_integer_rtv ? + m_pending.framebuffer->GetIntegerRTVArray() : + m_pending.framebuffer->GetRTVArray(), + m_pending.framebuffer->GetDSV()); + } + m_current.framebuffer = m_pending.framebuffer; + m_current.uav = m_pending.uav; + m_current.use_integer_rtv = m_pending.use_integer_rtv; + } - u32 dirtyTextures = - (m_dirtyFlags & - (DirtyFlag_Texture0 | DirtyFlag_Texture1 | DirtyFlag_Texture2 | DirtyFlag_Texture3 | - DirtyFlag_Texture4 | DirtyFlag_Texture5 | DirtyFlag_Texture6 | DirtyFlag_Texture7)) >> - textureMaskShift; - u32 dirtySamplers = - (m_dirtyFlags & - (DirtyFlag_Sampler0 | DirtyFlag_Sampler1 | DirtyFlag_Sampler2 | DirtyFlag_Sampler3 | - DirtyFlag_Sampler4 | DirtyFlag_Sampler5 | DirtyFlag_Sampler6 | DirtyFlag_Sampler7)) >> - samplerMaskShift; u32 dirtyConstants = m_dirtyFlags & (DirtyFlag_PixelConstants | DirtyFlag_VertexConstants | DirtyFlag_GeometryConstants); u32 dirtyShaders = @@ -103,30 +116,6 @@ void StateManager::Apply() } } - while (dirtyTextures) - { - const int index = Common::LeastSignificantSetBit(dirtyTextures); - if (m_current.textures[index] != m_pending.textures[index]) - { - D3D::context->PSSetShaderResources(index, 1, &m_pending.textures[index]); - m_current.textures[index] = m_pending.textures[index]; - } - - dirtyTextures &= ~(1 << index); - } - - while (dirtySamplers) - { - const int index = Common::LeastSignificantSetBit(dirtySamplers); - if (m_current.samplers[index] != m_pending.samplers[index]) - { - D3D::context->PSSetSamplers(index, 1, &m_pending.samplers[index]); - m_current.samplers[index] = m_pending.samplers[index]; - } - - dirtySamplers &= ~(1 << index); - } - if (dirtyShaders) { if (m_current.pixelShader != m_pending.pixelShader) @@ -164,9 +153,51 @@ void StateManager::Apply() m_current.rasterizerState = m_pending.rasterizerState; } + ApplyTextures(); + m_dirtyFlags = 0; } +void StateManager::ApplyTextures() +{ + const int textureMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Texture0); + const int samplerMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Sampler0); + + u32 dirtyTextures = + (m_dirtyFlags & + (DirtyFlag_Texture0 | DirtyFlag_Texture1 | DirtyFlag_Texture2 | DirtyFlag_Texture3 | + DirtyFlag_Texture4 | DirtyFlag_Texture5 | DirtyFlag_Texture6 | DirtyFlag_Texture7)) >> + textureMaskShift; + u32 dirtySamplers = + (m_dirtyFlags & + (DirtyFlag_Sampler0 | DirtyFlag_Sampler1 | DirtyFlag_Sampler2 | DirtyFlag_Sampler3 | + DirtyFlag_Sampler4 | DirtyFlag_Sampler5 | DirtyFlag_Sampler6 | DirtyFlag_Sampler7)) >> + samplerMaskShift; + while (dirtyTextures) + { + const int index = Common::LeastSignificantSetBit(dirtyTextures); + if (m_current.textures[index] != m_pending.textures[index]) + { + D3D::context->PSSetShaderResources(index, 1, &m_pending.textures[index]); + m_current.textures[index] = m_pending.textures[index]; + } + + dirtyTextures &= ~(1 << index); + } + + while (dirtySamplers) + { + const int index = Common::LeastSignificantSetBit(dirtySamplers); + if (m_current.samplers[index] != m_pending.samplers[index]) + { + D3D::context->PSSetSamplers(index, 1, &m_pending.samplers[index]); + m_current.samplers[index] = m_pending.samplers[index]; + } + + dirtySamplers &= ~(1 << index); + } +} + u32 StateManager::UnsetTexture(ID3D11ShaderResourceView* srv) { u32 mask = 0; @@ -193,6 +224,78 @@ void StateManager::SetTextureByMask(u32 textureSlotMask, ID3D11ShaderResourceVie } } +void StateManager::SetComputeUAV(ID3D11UnorderedAccessView* uav) +{ + if (m_compute_image == uav) + return; + + m_compute_image = uav; + D3D::context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); +} + +void StateManager::SetComputeShader(ID3D11ComputeShader* shader) +{ + if (m_compute_shader == shader) + return; + + m_compute_shader = shader; + D3D::context->CSSetShader(shader, nullptr, 0); +} + +void StateManager::SyncComputeBindings() +{ + if (m_compute_constants != m_pending.pixelConstants[0]) + { + m_compute_constants = m_pending.pixelConstants[0]; + D3D::context->CSSetConstantBuffers(0, 1, &m_compute_constants); + } + + for (u32 start = 0; start < static_cast(m_compute_textures.size());) + { + if (m_compute_textures[start] == m_pending.textures[start]) + { + start++; + continue; + } + + m_compute_textures[start] = m_pending.textures[start]; + + u32 end = start + 1; + for (; end < static_cast(m_compute_textures.size()); end++) + { + if (m_compute_textures[end] == m_pending.textures[end]) + break; + + m_compute_textures[end] = m_pending.textures[end]; + } + + D3D::context->CSSetShaderResources(start, end - start, &m_compute_textures[start]); + start = end; + } + + for (u32 start = 0; start < static_cast(m_compute_samplers.size());) + { + if (m_compute_samplers[start] == m_pending.samplers[start]) + { + start++; + continue; + } + + m_compute_samplers[start] = m_pending.samplers[start]; + + u32 end = start + 1; + for (; end < static_cast(m_compute_samplers.size()); end++) + { + if (m_compute_samplers[end] == m_pending.samplers[end]) + break; + + m_compute_samplers[end] = m_pending.samplers[end]; + } + + D3D::context->CSSetSamplers(start, end - start, &m_compute_samplers[start]); + start = end; + } +} } // namespace D3D StateCache::~StateCache() diff --git a/Source/Core/VideoBackends/D3D/D3DState.h b/Source/Core/VideoBackends/D3D/D3DState.h index db48e7f18b..66542c6250 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.h +++ b/Source/Core/VideoBackends/D3D/D3DState.h @@ -16,6 +16,8 @@ namespace DX11 { +class DXFramebuffer; + class StateCache { public: @@ -112,14 +114,6 @@ public: m_pending.geometryConstants = buffer; } - void SetComputeConstants(ID3D11Buffer* buffer) - { - if (m_current.computeConstants != buffer) - m_dirtyFlags |= DirtyFlag_ComputeConstants; - - m_pending.computeConstants = buffer; - } - void SetVertexBuffer(ID3D11Buffer* buffer, u32 stride, u32 offset) { if (m_current.vertexBuffer != buffer || m_current.vertexBufferStride != stride || @@ -187,22 +181,45 @@ public: m_pending.geometryShader = shader; } - void SetComputeShader(ID3D11ComputeShader* shader) + void SetFramebuffer(DXFramebuffer* fb) { - if (m_current.computeShader != shader) - m_dirtyFlags |= DirtyFlag_ComputeShader; + if (m_current.framebuffer != fb) + m_dirtyFlags |= DirtyFlag_Framebuffer; - m_pending.computeShader = shader; + m_pending.framebuffer = fb; + } + + void SetOMUAV(ID3D11UnorderedAccessView* uav) + { + if (m_current.uav != uav) + m_dirtyFlags |= DirtyFlag_Framebuffer; + + m_pending.uav = uav; + } + + void SetIntegerRTV(bool enable) + { + if (m_current.use_integer_rtv != enable) + m_dirtyFlags |= DirtyFlag_Framebuffer; + + m_pending.use_integer_rtv = enable; } // removes currently set texture from all slots, returns mask of previously bound slots u32 UnsetTexture(ID3D11ShaderResourceView* srv); void SetTextureByMask(u32 textureSlotMask, ID3D11ShaderResourceView* srv); + void ApplyTextures(); // call this immediately before any drawing operation or to explicitly apply pending resource // state changes void Apply(); + // Binds constant buffers/textures/samplers to the compute shader stage. + // We don't track these explicitly because it's not often-used. + void SetComputeUAV(ID3D11UnorderedAccessView* uav); + void SetComputeShader(ID3D11ComputeShader* shader); + void SyncComputeBindings(); + private: enum DirtyFlags { @@ -227,20 +244,19 @@ private: DirtyFlag_PixelConstants = 1 << 16, DirtyFlag_VertexConstants = 1 << 17, DirtyFlag_GeometryConstants = 1 << 18, - DirtyFlag_ComputeConstants = 1 << 19, - DirtyFlag_VertexBuffer = 1 << 20, - DirtyFlag_IndexBuffer = 1 << 21, + DirtyFlag_VertexBuffer = 1 << 19, + DirtyFlag_IndexBuffer = 1 << 20, - DirtyFlag_PixelShader = 1 << 22, - DirtyFlag_VertexShader = 1 << 23, - DirtyFlag_GeometryShader = 1 << 24, - DirtyFlag_ComputeShader = 1 << 25, + DirtyFlag_PixelShader = 1 << 21, + DirtyFlag_VertexShader = 1 << 22, + DirtyFlag_GeometryShader = 1 << 23, - DirtyFlag_InputAssembler = 1 << 26, - DirtyFlag_BlendState = 1 << 27, - DirtyFlag_DepthState = 1 << 28, - DirtyFlag_RasterizerState = 1 << 29, + DirtyFlag_InputAssembler = 1 << 24, + DirtyFlag_BlendState = 1 << 25, + DirtyFlag_DepthState = 1 << 26, + DirtyFlag_RasterizerState = 1 << 27, + DirtyFlag_Framebuffer = 1 << 28 }; u32 m_dirtyFlags = ~0u; @@ -252,7 +268,6 @@ private: std::array pixelConstants; ID3D11Buffer* vertexConstants; ID3D11Buffer* geometryConstants; - ID3D11Buffer* computeConstants; ID3D11Buffer* vertexBuffer; ID3D11Buffer* indexBuffer; u32 vertexBufferStride; @@ -262,18 +277,27 @@ private: ID3D11PixelShader* pixelShader; ID3D11VertexShader* vertexShader; ID3D11GeometryShader* geometryShader; - ID3D11ComputeShader* computeShader; ID3D11BlendState* blendState; ID3D11DepthStencilState* depthState; ID3D11RasterizerState* rasterizerState; + DXFramebuffer* framebuffer; + ID3D11UnorderedAccessView* uav; + bool use_integer_rtv; }; Resources m_pending = {}; Resources m_current = {}; + + // Compute resources are synced with the graphics resources when we need them. + ID3D11Buffer* m_compute_constants = nullptr; + std::array m_compute_textures{}; + std::array m_compute_samplers{}; + ID3D11UnorderedAccessView* m_compute_image = nullptr; + ID3D11ComputeShader* m_compute_shader = nullptr; }; extern StateManager* stateman; -} // namespace +} // namespace D3D } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DTexture.cpp b/Source/Core/VideoBackends/D3D/D3DTexture.cpp deleted file mode 100644 index 6ca2247a47..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DTexture.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/CommonTypes.h" -#include "Common/MsgHandler.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DTexture.h" - -namespace DX11 -{ -D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, - D3D11_USAGE usage, DXGI_FORMAT fmt, unsigned int levels, - unsigned int slices, D3D11_SUBRESOURCE_DATA* data) -{ - ID3D11Texture2D* pTexture = nullptr; - HRESULT hr; - - D3D11_CPU_ACCESS_FLAG cpuflags; - if (usage == D3D11_USAGE_STAGING) - cpuflags = (D3D11_CPU_ACCESS_FLAG)((int)D3D11_CPU_ACCESS_WRITE | (int)D3D11_CPU_ACCESS_READ); - else if (usage == D3D11_USAGE_DYNAMIC) - cpuflags = D3D11_CPU_ACCESS_WRITE; - else - cpuflags = (D3D11_CPU_ACCESS_FLAG)0; - D3D11_TEXTURE2D_DESC texdesc = - CD3D11_TEXTURE2D_DESC(fmt, width, height, slices, levels, bind, usage, cpuflags); - hr = D3D::device->CreateTexture2D(&texdesc, data, &pTexture); - if (FAILED(hr)) - { - PanicAlert("Failed to create texture at %s, line %d: hr=%#x\n", __FILE__, __LINE__, hr); - return nullptr; - } - - D3DTexture2D* ret = new D3DTexture2D(pTexture, bind); - SAFE_RELEASE(pTexture); - return ret; -} - -void D3DTexture2D::AddRef() -{ - ++ref; -} - -UINT D3DTexture2D::Release() -{ - --ref; - if (ref == 0) - { - delete this; - return 0; - } - return ref; -} - -ID3D11Texture2D*& D3DTexture2D::GetTex() -{ - return tex; -} -ID3D11ShaderResourceView*& D3DTexture2D::GetSRV() -{ - return srv; -} -ID3D11RenderTargetView*& D3DTexture2D::GetRTV() -{ - return rtv; -} -ID3D11DepthStencilView*& D3DTexture2D::GetDSV() -{ - return dsv; -} - -D3DTexture2D::D3DTexture2D(ID3D11Texture2D* texptr, D3D11_BIND_FLAG bind, DXGI_FORMAT srv_format, - DXGI_FORMAT dsv_format, DXGI_FORMAT rtv_format, bool multisampled) - : tex{texptr} -{ - D3D11_SRV_DIMENSION srv_dim = - multisampled ? D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY : D3D11_SRV_DIMENSION_TEXTURE2DARRAY; - D3D11_DSV_DIMENSION dsv_dim = - multisampled ? D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY : D3D11_DSV_DIMENSION_TEXTURE2DARRAY; - D3D11_RTV_DIMENSION rtv_dim = - multisampled ? D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY : D3D11_RTV_DIMENSION_TEXTURE2DARRAY; - D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc = CD3D11_SHADER_RESOURCE_VIEW_DESC(srv_dim, srv_format); - D3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc = CD3D11_DEPTH_STENCIL_VIEW_DESC(dsv_dim, dsv_format); - D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = CD3D11_RENDER_TARGET_VIEW_DESC(rtv_dim, rtv_format); - if (bind & D3D11_BIND_SHADER_RESOURCE) - D3D::device->CreateShaderResourceView(tex, &srv_desc, &srv); - if (bind & D3D11_BIND_RENDER_TARGET) - D3D::device->CreateRenderTargetView(tex, &rtv_desc, &rtv); - if (bind & D3D11_BIND_DEPTH_STENCIL) - D3D::device->CreateDepthStencilView(tex, &dsv_desc, &dsv); - tex->AddRef(); -} - -D3DTexture2D::~D3DTexture2D() -{ - SAFE_RELEASE(srv); - SAFE_RELEASE(rtv); - SAFE_RELEASE(dsv); - SAFE_RELEASE(tex); -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DTexture.h b/Source/Core/VideoBackends/D3D/D3DTexture.h deleted file mode 100644 index 609ba877c8..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DTexture.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include "Common/CommonTypes.h" - -namespace DX11 -{ -class D3DTexture2D -{ -public: - // there are two ways to create a D3DTexture2D object: - // either create an ID3D11Texture2D object, pass it to the constructor and specify what views - // to create - // or let the texture automatically be created by D3DTexture2D::Create - - D3DTexture2D(ID3D11Texture2D* texptr, D3D11_BIND_FLAG bind, - DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN, bool multisampled = false); - static D3DTexture2D* Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, - D3D11_USAGE usage, DXGI_FORMAT, unsigned int levels = 1, - unsigned int slices = 1, D3D11_SUBRESOURCE_DATA* data = nullptr); - - // reference counting, use AddRef() when creating a new reference and Release() it when you don't - // need it anymore - void AddRef(); - UINT Release(); - - ID3D11Texture2D*& GetTex(); - ID3D11ShaderResourceView*& GetSRV(); - ID3D11RenderTargetView*& GetRTV(); - ID3D11DepthStencilView*& GetDSV(); - -private: - ~D3DTexture2D(); - - ID3D11Texture2D* tex; - ID3D11ShaderResourceView* srv = nullptr; - ID3D11RenderTargetView* rtv = nullptr; - ID3D11DepthStencilView* dsv = nullptr; - UINT ref = 1; -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DUtil.cpp b/Source/Core/VideoBackends/D3D/D3DUtil.cpp deleted file mode 100644 index 0ad02c1ca4..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DUtil.cpp +++ /dev/null @@ -1,407 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/D3DUtil.h" - -#include -#include -#include - -#include "Common/Align.h" -#include "Common/Assert.h" -#include "Common/Logging/Log.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" -#include "VideoCommon/VideoBackendBase.h" - -namespace DX11 -{ -namespace D3D -{ -// Ring buffer class, shared between the draw* functions -class UtilVertexBuffer -{ -public: - UtilVertexBuffer(unsigned int size) : max_size(size) - { - D3D11_BUFFER_DESC desc = CD3D11_BUFFER_DESC(max_size, D3D11_BIND_VERTEX_BUFFER, - D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); - device->CreateBuffer(&desc, nullptr, &buf); - } - ~UtilVertexBuffer() { buf->Release(); } - int GetSize() const { return max_size; } - // returns vertex offset to the new data - int AppendData(void* data, unsigned int size, unsigned int vertex_size) - { - D3D11_MAPPED_SUBRESOURCE map; - if (offset + size >= max_size) - { - // wrap buffer around and notify observers - offset = 0; - context->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - - for (bool* observer : observers) - *observer = true; - } - else - { - context->Map(buf, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &map); - } - offset = Common::AlignUp(offset, vertex_size); - memcpy((u8*)map.pData + offset, data, size); - context->Unmap(buf, 0); - - offset += size; - return (offset - size) / vertex_size; - } - - int BeginAppendData(void** write_ptr, unsigned int size, unsigned int vertex_size) - { - DEBUG_ASSERT(size < max_size); - - D3D11_MAPPED_SUBRESOURCE map; - unsigned int aligned_offset = Common::AlignUp(offset, vertex_size); - if (aligned_offset + size > max_size) - { - // wrap buffer around and notify observers - offset = 0; - aligned_offset = 0; - context->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - - for (bool* observer : observers) - *observer = true; - } - else - { - context->Map(buf, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &map); - } - - *write_ptr = reinterpret_cast(map.pData) + aligned_offset; - offset = aligned_offset + size; - return aligned_offset / vertex_size; - } - - void EndAppendData() { context->Unmap(buf, 0); } - void AddWrapObserver(bool* observer) { observers.push_back(observer); } - inline ID3D11Buffer*& GetBuffer() { return buf; } - -private: - ID3D11Buffer* buf = nullptr; - unsigned int offset = 0; - unsigned int max_size; - - std::list observers; -}; - -static UtilVertexBuffer* util_vbuf = nullptr; -static ID3D11SamplerState* linear_copy_sampler = nullptr; -static ID3D11SamplerState* point_copy_sampler = nullptr; - -struct STQVertex -{ - float x, y, z, u, v, w; -}; -struct ClearVertex -{ - float x, y, z; - u32 col; -}; -struct ColVertex -{ - float x, y, z; - u32 col; -}; - -struct TexQuadData -{ - float u1, v1, u2, v2, S, G; -}; -static TexQuadData tex_quad_data; - -struct DrawQuadData -{ - float x1, y1, x2, y2, z; - u32 col; -}; -static DrawQuadData draw_quad_data; - -struct ClearQuadData -{ - u32 col; - float z; -}; -static ClearQuadData clear_quad_data; - -// ring buffer offsets -static int stq_offset, cq_offset, clearq_offset; - -// observer variables for ring buffer wraps -static bool stq_observer, cq_observer, clearq_observer; - -void InitUtils() -{ - util_vbuf = new UtilVertexBuffer(65536); // 64KiB - - float border[4] = {0.f, 0.f, 0.f, 0.f}; - D3D11_SAMPLER_DESC samDesc = CD3D11_SAMPLER_DESC( - D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_TEXTURE_ADDRESS_BORDER, D3D11_TEXTURE_ADDRESS_BORDER, - D3D11_TEXTURE_ADDRESS_BORDER, 0.f, 1, D3D11_COMPARISON_ALWAYS, border, 0.f, 0.f); - HRESULT hr = D3D::device->CreateSamplerState(&samDesc, &point_copy_sampler); - if (FAILED(hr)) - PanicAlert("Failed to create sampler state at %s %d\n", __FILE__, __LINE__); - else - SetDebugObjectName(point_copy_sampler, "point copy sampler state"); - - samDesc = CD3D11_SAMPLER_DESC(D3D11_FILTER_MIN_MAG_MIP_LINEAR, D3D11_TEXTURE_ADDRESS_BORDER, - D3D11_TEXTURE_ADDRESS_BORDER, D3D11_TEXTURE_ADDRESS_BORDER, 0.f, 1, - D3D11_COMPARISON_ALWAYS, border, 0.f, 0.f); - hr = D3D::device->CreateSamplerState(&samDesc, &linear_copy_sampler); - if (FAILED(hr)) - PanicAlert("Failed to create sampler state at %s %d\n", __FILE__, __LINE__); - else - SetDebugObjectName(linear_copy_sampler, "linear copy sampler state"); - - // cached data used to avoid unnecessarily reloading the vertex buffers - memset(&tex_quad_data, 0, sizeof(tex_quad_data)); - memset(&draw_quad_data, 0, sizeof(draw_quad_data)); - memset(&clear_quad_data, 0, sizeof(clear_quad_data)); - - // make sure to properly load the vertex data whenever the corresponding functions get called the - // first time - stq_observer = cq_observer = clearq_observer = true; - util_vbuf->AddWrapObserver(&stq_observer); - util_vbuf->AddWrapObserver(&cq_observer); - util_vbuf->AddWrapObserver(&clearq_observer); -} - -void ShutdownUtils() -{ - SAFE_RELEASE(point_copy_sampler); - SAFE_RELEASE(linear_copy_sampler); - SAFE_DELETE(util_vbuf); -} - -void SetPointCopySampler() -{ - D3D::stateman->SetSampler(0, point_copy_sampler); -} - -void SetLinearCopySampler() -{ - D3D::stateman->SetSampler(0, linear_copy_sampler); -} - -void drawShadedTexQuad(ID3D11ShaderResourceView* texture, const D3D11_RECT* rSource, - int SourceWidth, int SourceHeight, ID3D11PixelShader* PShader, - ID3D11VertexShader* VShader, ID3D11InputLayout* layout, - ID3D11GeometryShader* GShader, u32 slice) -{ - float sw = 1.0f / (float)SourceWidth; - float sh = 1.0f / (float)SourceHeight; - float u1 = ((float)rSource->left) * sw; - float u2 = ((float)rSource->right) * sw; - float v1 = ((float)rSource->top) * sh; - float v2 = ((float)rSource->bottom) * sh; - float S = (float)slice; - - STQVertex coords[4] = { - {-1.0f, 1.0f, 0.0f, u1, v1, S}, - {1.0f, 1.0f, 0.0f, u2, v1, S}, - {-1.0f, -1.0f, 0.0f, u1, v2, S}, - {1.0f, -1.0f, 0.0f, u2, v2, S}, - }; - - // only upload the data to VRAM if it changed - if (stq_observer || tex_quad_data.u1 != u1 || tex_quad_data.v1 != v1 || tex_quad_data.u2 != u2 || - tex_quad_data.v2 != v2 || tex_quad_data.S != S) - { - stq_offset = util_vbuf->AppendData(coords, sizeof(coords), sizeof(STQVertex)); - stq_observer = false; - - tex_quad_data.u1 = u1; - tex_quad_data.v1 = v1; - tex_quad_data.u2 = u2; - tex_quad_data.v2 = v2; - tex_quad_data.S = S; - } - UINT stride = sizeof(STQVertex); - UINT offset = 0; - - D3D::stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - D3D::stateman->SetInputLayout(layout); - D3D::stateman->SetVertexBuffer(util_vbuf->GetBuffer(), stride, offset); - D3D::stateman->SetPixelShader(PShader); - D3D::stateman->SetTexture(0, texture); - D3D::stateman->SetVertexShader(VShader); - D3D::stateman->SetGeometryShader(GShader); - - D3D::stateman->Apply(); - D3D::context->Draw(4, stq_offset); - - D3D::stateman->SetTexture(0, nullptr); // immediately unbind the texture - D3D::stateman->Apply(); - - D3D::stateman->SetGeometryShader(nullptr); -} - -// Fills a certain area of the current render target with the specified color -// destination coordinates normalized to (-1;1) -void drawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2) -{ - ColVertex coords[4] = { - {x1, y1, z, Color}, - {x2, y1, z, Color}, - {x1, y2, z, Color}, - {x2, y2, z, Color}, - }; - - if (cq_observer || draw_quad_data.x1 != x1 || draw_quad_data.y1 != y1 || - draw_quad_data.x2 != x2 || draw_quad_data.y2 != y2 || draw_quad_data.col != Color || - draw_quad_data.z != z) - { - cq_offset = util_vbuf->AppendData(coords, sizeof(coords), sizeof(ColVertex)); - cq_observer = false; - - draw_quad_data.x1 = x1; - draw_quad_data.y1 = y1; - draw_quad_data.x2 = x2; - draw_quad_data.y2 = y2; - draw_quad_data.col = Color; - draw_quad_data.z = z; - } - - stateman->SetVertexShader(VertexShaderCache::GetClearVertexShader()); - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); - stateman->SetPixelShader(PixelShaderCache::GetClearProgram()); - stateman->SetInputLayout(VertexShaderCache::GetClearInputLayout()); - - UINT stride = sizeof(ColVertex); - UINT offset = 0; - stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - stateman->SetVertexBuffer(util_vbuf->GetBuffer(), stride, offset); - - stateman->Apply(); - context->Draw(4, cq_offset); - - stateman->SetGeometryShader(nullptr); -} - -void drawClearQuad(u32 Color, float z) -{ - ClearVertex coords[4] = { - {-1.0f, 1.0f, z, Color}, - {1.0f, 1.0f, z, Color}, - {-1.0f, -1.0f, z, Color}, - {1.0f, -1.0f, z, Color}, - }; - - if (clearq_observer || clear_quad_data.col != Color || clear_quad_data.z != z) - { - clearq_offset = util_vbuf->AppendData(coords, sizeof(coords), sizeof(ClearVertex)); - clearq_observer = false; - - clear_quad_data.col = Color; - clear_quad_data.z = z; - } - - stateman->SetVertexShader(VertexShaderCache::GetClearVertexShader()); - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); - stateman->SetPixelShader(PixelShaderCache::GetClearProgram()); - stateman->SetInputLayout(VertexShaderCache::GetClearInputLayout()); - - UINT stride = sizeof(ClearVertex); - UINT offset = 0; - stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - stateman->SetVertexBuffer(util_vbuf->GetBuffer(), stride, offset); - - stateman->Apply(); - context->Draw(4, clearq_offset); - - stateman->SetGeometryShader(nullptr); -} - -static void InitColVertex(ColVertex* vert, float x, float y, float z, u32 col) -{ - vert->x = x; - vert->y = y; - vert->z = z; - vert->col = col; -} - -void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - const size_t COL_QUAD_SIZE = sizeof(ColVertex) * 6; - - // Set common state - stateman->SetVertexShader(VertexShaderCache::GetClearVertexShader()); - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); - stateman->SetPixelShader(PixelShaderCache::GetClearProgram()); - stateman->SetInputLayout(VertexShaderCache::GetClearInputLayout()); - stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - stateman->SetVertexBuffer(util_vbuf->GetBuffer(), sizeof(ColVertex), 0); - stateman->Apply(); - - // if drawing a large number of points at once, this will have to be split into multiple passes. - size_t points_per_draw = util_vbuf->GetSize() / COL_QUAD_SIZE; - size_t current_point_index = 0; - while (current_point_index < num_points) - { - size_t points_to_draw = std::min(num_points - current_point_index, points_per_draw); - size_t required_bytes = COL_QUAD_SIZE * points_to_draw; - - // map and reserve enough buffer space for this draw - void* buffer_ptr; - int base_vertex_index = - util_vbuf->BeginAppendData(&buffer_ptr, (int)required_bytes, sizeof(ColVertex)); - - // generate quads for each efb point - ColVertex* base_vertex_ptr = reinterpret_cast(buffer_ptr); - for (size_t i = 0; i < points_to_draw; i++) - { - // generate quad from the single point (clip-space coordinates) - const EfbPokeData* point = &points[current_point_index]; - float x1 = float(point->x) * 2.0f / EFB_WIDTH - 1.0f; - float y1 = -float(point->y) * 2.0f / EFB_HEIGHT + 1.0f; - float x2 = float(point->x + 1) * 2.0f / EFB_WIDTH - 1.0f; - float y2 = -float(point->y + 1) * 2.0f / EFB_HEIGHT + 1.0f; - float z = 0.0f; - u32 col = 0; - - if (type == EFBAccessType::PokeZ) - { - z = 1.0f - static_cast(point->data & 0xFFFFFF) / 16777216.0f; - } - else - { - col = ((point->data & 0xFF00FF00) | ((point->data >> 16) & 0xFF) | - ((point->data << 16) & 0xFF0000)); - } - - current_point_index++; - - // quad -> triangles - ColVertex* vertex = &base_vertex_ptr[i * 6]; - InitColVertex(&vertex[0], x1, y1, z, col); - InitColVertex(&vertex[1], x2, y1, z, col); - InitColVertex(&vertex[2], x1, y2, z, col); - InitColVertex(&vertex[3], x1, y2, z, col); - InitColVertex(&vertex[4], x2, y1, z, col); - InitColVertex(&vertex[5], x2, y2, z, col); - } - - // unmap the util buffer, and issue the draw - util_vbuf->EndAppendData(); - context->Draw(6 * (UINT)points_to_draw, base_vertex_index); - } - - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); -} - -} // namespace D3D - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DUtil.h b/Source/Core/VideoBackends/D3D/D3DUtil.h deleted file mode 100644 index cfbe2e4889..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DUtil.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoCommon/RenderBase.h" - -namespace DX11 -{ -namespace D3D -{ -void InitUtils(); -void ShutdownUtils(); - -void SetPointCopySampler(); -void SetLinearCopySampler(); - -void drawShadedTexQuad(ID3D11ShaderResourceView* texture, const D3D11_RECT* rSource, - int SourceWidth, int SourceHeight, ID3D11PixelShader* PShader, - ID3D11VertexShader* VShader, ID3D11InputLayout* layout, - ID3D11GeometryShader* GShader = nullptr, u32 slice = 0); -void drawClearQuad(u32 Color, float z); -void drawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2); - -void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points); -} -} diff --git a/Source/Core/VideoBackends/D3D/DXPipeline.cpp b/Source/Core/VideoBackends/D3D/DXPipeline.cpp index df827e1fba..9c694d20cd 100644 --- a/Source/Core/VideoBackends/D3D/DXPipeline.cpp +++ b/Source/Core/VideoBackends/D3D/DXPipeline.cpp @@ -22,11 +22,11 @@ DXPipeline::DXPipeline(ID3D11InputLayout* input_layout, ID3D11VertexShader* vert ID3D11GeometryShader* geometry_shader, ID3D11PixelShader* pixel_shader, ID3D11RasterizerState* rasterizer_state, ID3D11DepthStencilState* depth_state, ID3D11BlendState* blend_state, - D3D11_PRIMITIVE_TOPOLOGY primitive_topology) + D3D11_PRIMITIVE_TOPOLOGY primitive_topology, bool use_logic_op) : m_input_layout(input_layout), m_vertex_shader(vertex_shader), m_geometry_shader(geometry_shader), m_pixel_shader(pixel_shader), m_rasterizer_state(rasterizer_state), m_depth_state(depth_state), m_blend_state(blend_state), - m_primitive_topology(primitive_topology) + m_primitive_topology(primitive_topology), m_use_logic_op(use_logic_op) { if (m_input_layout) m_input_layout->AddRef(); @@ -84,13 +84,16 @@ std::unique_ptr DXPipeline::Create(const AbstractPipelineConfig& con ASSERT(vertex_shader != nullptr && pixel_shader != nullptr); ID3D11InputLayout* input_layout = - const_cast(static_cast(config.vertex_format)) - ->GetInputLayout(vertex_shader->GetByteCode()); + config.vertex_format ? + const_cast(static_cast(config.vertex_format)) + ->GetInputLayout(vertex_shader->GetByteCode().data(), + vertex_shader->GetByteCode().size()) : + nullptr; - return std::make_unique(input_layout, vertex_shader->GetD3DVertexShader(), - geometry_shader ? geometry_shader->GetD3DGeometryShader() : - nullptr, - pixel_shader->GetD3DPixelShader(), rasterizer_state, - depth_state, blend_state, primitive_topology); + return std::make_unique( + input_layout, vertex_shader->GetD3DVertexShader(), + geometry_shader ? geometry_shader->GetD3DGeometryShader() : nullptr, + pixel_shader->GetD3DPixelShader(), rasterizer_state, depth_state, blend_state, + primitive_topology, config.blending_state.logicopenable); } } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXPipeline.h b/Source/Core/VideoBackends/D3D/DXPipeline.h index f7b02a7d47..3a03bf948d 100644 --- a/Source/Core/VideoBackends/D3D/DXPipeline.h +++ b/Source/Core/VideoBackends/D3D/DXPipeline.h @@ -16,7 +16,8 @@ public: DXPipeline(ID3D11InputLayout* input_layout, ID3D11VertexShader* vertex_shader, ID3D11GeometryShader* geometry_shader, ID3D11PixelShader* pixel_shader, ID3D11RasterizerState* rasterizer_state, ID3D11DepthStencilState* depth_state, - ID3D11BlendState* blend_state, D3D11_PRIMITIVE_TOPOLOGY primitive_topology); + ID3D11BlendState* blend_state, D3D11_PRIMITIVE_TOPOLOGY primitive_topology, + bool use_logic_op); ~DXPipeline() override; ID3D11InputLayout* GetInputLayout() const { return m_input_layout; } @@ -28,6 +29,8 @@ public: ID3D11BlendState* GetBlendState() const { return m_blend_state; } D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_primitive_topology; } bool HasGeometryShader() const { return m_geometry_shader != nullptr; } + bool UseLogicOp() const { return m_use_logic_op; } + static std::unique_ptr Create(const AbstractPipelineConfig& config); private: @@ -39,5 +42,6 @@ private: ID3D11DepthStencilState* m_depth_state; ID3D11BlendState* m_blend_state; D3D11_PRIMITIVE_TOPOLOGY m_primitive_topology; + bool m_use_logic_op; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXShader.cpp b/Source/Core/VideoBackends/D3D/DXShader.cpp index 588cd2b631..18f9aa0d3d 100644 --- a/Source/Core/VideoBackends/D3D/DXShader.cpp +++ b/Source/Core/VideoBackends/D3D/DXShader.cpp @@ -2,43 +2,28 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include + #include "Common/Assert.h" +#include "Common/FileUtil.h" +#include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" +#include "Common/StringUtil.h" #include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" #include "VideoBackends/D3D/DXShader.h" +#include "VideoCommon/VideoConfig.h" namespace DX11 { -DXShader::DXShader(D3DBlob* bytecode, ID3D11VertexShader* vs) - : AbstractShader(ShaderStage::Vertex), m_bytecode(bytecode), m_shader(vs) -{ -} - -DXShader::DXShader(D3DBlob* bytecode, ID3D11GeometryShader* gs) - : AbstractShader(ShaderStage::Geometry), m_bytecode(bytecode), m_shader(gs) -{ -} - -DXShader::DXShader(D3DBlob* bytecode, ID3D11PixelShader* ps) - : AbstractShader(ShaderStage::Pixel), m_bytecode(bytecode), m_shader(ps) -{ -} - -DXShader::DXShader(D3DBlob* bytecode, ID3D11ComputeShader* cs) - : AbstractShader(ShaderStage::Compute), m_bytecode(bytecode), m_shader(cs) +DXShader::DXShader(ShaderStage stage, BinaryData bytecode, ID3D11DeviceChild* shader) + : AbstractShader(stage), m_bytecode(bytecode), m_shader(shader) { } DXShader::~DXShader() { m_shader->Release(); - m_bytecode->Release(); -} - -D3DBlob* DXShader::GetByteCode() const -{ - return m_bytecode; } ID3D11VertexShader* DXShader::GetD3DVertexShader() const @@ -67,48 +52,62 @@ ID3D11ComputeShader* DXShader::GetD3DComputeShader() const bool DXShader::HasBinary() const { - ASSERT(m_bytecode); return true; } AbstractShader::BinaryData DXShader::GetBinary() const { - return BinaryData(m_bytecode->Data(), m_bytecode->Data() + m_bytecode->Size()); + return m_bytecode; } -std::unique_ptr DXShader::CreateFromBlob(ShaderStage stage, D3DBlob* bytecode) +std::unique_ptr DXShader::CreateFromBytecode(ShaderStage stage, BinaryData bytecode) { switch (stage) { case ShaderStage::Vertex: { - ID3D11VertexShader* vs = D3D::CreateVertexShaderFromByteCode(bytecode); - if (vs) - return std::make_unique(bytecode, vs); + ID3D11VertexShader* vs; + HRESULT hr = D3D::device->CreateVertexShader(bytecode.data(), bytecode.size(), nullptr, &vs); + CHECK(SUCCEEDED(hr), "Create vertex shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Vertex, std::move(bytecode), vs); } - break; case ShaderStage::Geometry: { - ID3D11GeometryShader* gs = D3D::CreateGeometryShaderFromByteCode(bytecode); - if (gs) - return std::make_unique(bytecode, gs); + ID3D11GeometryShader* gs; + HRESULT hr = D3D::device->CreateGeometryShader(bytecode.data(), bytecode.size(), nullptr, &gs); + CHECK(SUCCEEDED(hr), "Create geometry shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Geometry, std::move(bytecode), gs); } break; case ShaderStage::Pixel: { - ID3D11PixelShader* ps = D3D::CreatePixelShaderFromByteCode(bytecode); - if (ps) - return std::make_unique(bytecode, ps); + ID3D11PixelShader* ps; + HRESULT hr = D3D::device->CreatePixelShader(bytecode.data(), bytecode.size(), nullptr, &ps); + CHECK(SUCCEEDED(hr), "Create pixel shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Pixel, std::move(bytecode), ps); } break; case ShaderStage::Compute: { - ID3D11ComputeShader* cs = D3D::CreateComputeShaderFromByteCode(bytecode); - if (cs) - return std::make_unique(bytecode, cs); + ID3D11ComputeShader* cs; + HRESULT hr = D3D::device->CreateComputeShader(bytecode.data(), bytecode.size(), nullptr, &cs); + CHECK(SUCCEEDED(hr), "Create compute shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Compute, std::move(bytecode), cs); } break; @@ -119,65 +118,85 @@ std::unique_ptr DXShader::CreateFromBlob(ShaderStage stage, D3DBlob* b return nullptr; } -std::unique_ptr DXShader::CreateFromSource(ShaderStage stage, const char* source, - size_t length) +static const char* GetCompileTarget(ShaderStage stage) { - D3DBlob* bytecode; switch (stage) { case ShaderStage::Vertex: - { - if (!D3D::CompileVertexShader(std::string(source, length), &bytecode)) - return nullptr; - } - break; - + return D3D::VertexShaderVersionString(); case ShaderStage::Geometry: - { - if (!D3D::CompileGeometryShader(std::string(source, length), &bytecode)) - return nullptr; - } - break; - + return D3D::GeometryShaderVersionString(); case ShaderStage::Pixel: - { - if (!D3D::CompilePixelShader(std::string(source, length), &bytecode)) - return nullptr; - } - break; - + return D3D::PixelShaderVersionString(); case ShaderStage::Compute: - { - if (!D3D::CompileComputeShader(std::string(source, length), &bytecode)) - return nullptr; - } - + return D3D::ComputeShaderVersionString(); default: - return nullptr; + return ""; } +} - std::unique_ptr shader = CreateFromBlob(stage, bytecode); - if (!shader) +bool DXShader::CompileShader(BinaryData* out_bytecode, ShaderStage stage, const char* source, + size_t length) +{ + static constexpr D3D_SHADER_MACRO macros[] = {{"API_D3D", "1"}, {nullptr, nullptr}}; + const UINT flags = g_ActiveConfig.bEnableValidationLayer ? + (D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION) : + (D3DCOMPILE_OPTIMIZATION_LEVEL3 | D3DCOMPILE_SKIP_VALIDATION); + const char* target = GetCompileTarget(stage); + + ID3DBlob* code = nullptr; + ID3DBlob* errors = nullptr; + HRESULT hr = PD3DCompile(source, length, nullptr, macros, nullptr, "main", target, flags, 0, + &code, &errors); + if (FAILED(hr)) { - bytecode->Release(); - return nullptr; + static int num_failures = 0; + std::string filename = StringFromFormat( + "%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), target, num_failures++); + std::ofstream file; + File::OpenFStream(file, filename, std::ios_base::out); + file.write(source, length); + file << "\n"; + file.write(static_cast(errors->GetBufferPointer()), errors->GetBufferSize()); + file.close(); + + PanicAlert("Failed to compile %s:\nDebug info (%s):\n%s", filename.c_str(), target, + static_cast(errors->GetBufferPointer())); + errors->Release(); + return false; } - return shader; + if (errors && errors->GetBufferSize() > 0) + { + WARN_LOG(VIDEO, "%s compilation succeeded with warnings:\n%s", target, + static_cast(errors->GetBufferPointer())); + } + SAFE_RELEASE(errors); + + out_bytecode->resize(code->GetBufferSize()); + std::memcpy(out_bytecode->data(), code->GetBufferPointer(), code->GetBufferSize()); + code->Release(); + return true; +} + +std::unique_ptr DXShader::CreateFromSource(ShaderStage stage, const char* source, + size_t length) +{ + BinaryData bytecode; + if (!CompileShader(&bytecode, stage, source, length)) + return nullptr; + + return CreateFromBytecode(stage, std::move(bytecode)); } std::unique_ptr DXShader::CreateFromBinary(ShaderStage stage, const void* data, size_t length) { - D3DBlob* bytecode = new D3DBlob(static_cast(length), static_cast(data)); - std::unique_ptr shader = CreateFromBlob(stage, bytecode); - if (!shader) - { - bytecode->Release(); + if (length == 0) return nullptr; - } - return shader; + BinaryData bytecode(length); + std::memcpy(bytecode.data(), data, length); + return CreateFromBytecode(stage, std::move(bytecode)); } - } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXShader.h b/Source/Core/VideoBackends/D3D/DXShader.h index d39e638dac..a86a993b80 100644 --- a/Source/Core/VideoBackends/D3D/DXShader.h +++ b/Source/Core/VideoBackends/D3D/DXShader.h @@ -3,13 +3,9 @@ // Refer to the license.txt file included. #pragma once - -#include #include #include -#include "Common/CommonTypes.h" -#include "VideoBackends/D3D/D3DBlob.h" #include "VideoCommon/AbstractShader.h" namespace DX11 @@ -17,14 +13,11 @@ namespace DX11 class DXShader final : public AbstractShader { public: - // Note: vs/gs/ps/cs references are transferred. - DXShader(D3DBlob* bytecode, ID3D11VertexShader* vs); - DXShader(D3DBlob* bytecode, ID3D11GeometryShader* gs); - DXShader(D3DBlob* bytecode, ID3D11PixelShader* ps); - DXShader(D3DBlob* bytecode, ID3D11ComputeShader* cs); + DXShader(ShaderStage stage, BinaryData bytecode, ID3D11DeviceChild* shader); ~DXShader() override; - D3DBlob* GetByteCode() const; + const BinaryData& GetByteCode() const { return m_bytecode; } + ID3D11VertexShader* GetD3DVertexShader() const; ID3D11GeometryShader* GetD3DGeometryShader() const; ID3D11PixelShader* GetD3DPixelShader() const; @@ -33,8 +26,11 @@ public: bool HasBinary() const override; BinaryData GetBinary() const override; - // Creates a new shader object. The reference to bytecode is not transfered upon failure. - static std::unique_ptr CreateFromBlob(ShaderStage stage, D3DBlob* bytecode); + // Creates a new shader object. + static std::unique_ptr CreateFromBytecode(ShaderStage stage, BinaryData bytecode); + static bool CompileShader(BinaryData* out_bytecode, ShaderStage stage, const char* source, + size_t length); + static std::unique_ptr CreateFromBinary(ShaderStage stage, const void* data, size_t length); static std::unique_ptr CreateFromSource(ShaderStage stage, const char* source, @@ -42,7 +38,7 @@ public: private: ID3D11DeviceChild* m_shader; - D3DBlob* m_bytecode; + BinaryData m_bytecode; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXTexture.cpp b/Source/Core/VideoBackends/D3D/DXTexture.cpp index 127a922146..dc1c04991a 100644 --- a/Source/Core/VideoBackends/D3D/DXTexture.cpp +++ b/Source/Core/VideoBackends/D3D/DXTexture.cpp @@ -9,25 +9,47 @@ #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" -#include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/TextureCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" namespace DX11 { namespace { -DXGI_FORMAT GetDXGIFormatForHostFormat(AbstractTextureFormat format) +DXGI_FORMAT GetDXGIFormatForHostFormat(AbstractTextureFormat format, bool typeless) +{ + switch (format) + { + case AbstractTextureFormat::DXT1: + return DXGI_FORMAT_BC1_UNORM; + case AbstractTextureFormat::DXT3: + return DXGI_FORMAT_BC2_UNORM; + case AbstractTextureFormat::DXT5: + return DXGI_FORMAT_BC3_UNORM; + case AbstractTextureFormat::BPTC: + return DXGI_FORMAT_BC7_UNORM; + case AbstractTextureFormat::RGBA8: + return typeless ? DXGI_FORMAT_R8G8B8A8_TYPELESS : DXGI_FORMAT_R8G8B8A8_UNORM; + case AbstractTextureFormat::BGRA8: + return typeless ? DXGI_FORMAT_B8G8R8A8_TYPELESS : DXGI_FORMAT_B8G8R8A8_UNORM; + case AbstractTextureFormat::R16: + return typeless ? DXGI_FORMAT_R16_TYPELESS : DXGI_FORMAT_R16_UNORM; + case AbstractTextureFormat::R32F: + return typeless ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R32_FLOAT; + case AbstractTextureFormat::D16: + return DXGI_FORMAT_R16_TYPELESS; + case AbstractTextureFormat::D24_S8: + return DXGI_FORMAT_R24G8_TYPELESS; + case AbstractTextureFormat::D32F: + return DXGI_FORMAT_R32_TYPELESS; + case AbstractTextureFormat::D32F_S8: + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + default: + PanicAlert("Unhandled texture format."); + return DXGI_FORMAT_R8G8B8A8_UNORM; + } +} +DXGI_FORMAT GetSRVFormatForHostFormat(AbstractTextureFormat format) { switch (format) { @@ -47,23 +69,6 @@ DXGI_FORMAT GetDXGIFormatForHostFormat(AbstractTextureFormat format) return DXGI_FORMAT_R16_UNORM; case AbstractTextureFormat::R32F: return DXGI_FORMAT_R32_FLOAT; - case AbstractTextureFormat::D16: - return DXGI_FORMAT_R16_TYPELESS; - case AbstractTextureFormat::D24_S8: - return DXGI_FORMAT_R24G8_TYPELESS; - case AbstractTextureFormat::D32F: - return DXGI_FORMAT_R32_TYPELESS; - case AbstractTextureFormat::D32F_S8: - return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; - default: - PanicAlert("Unhandled texture format."); - return DXGI_FORMAT_R8G8B8A8_UNORM; - } -} -DXGI_FORMAT GetSRVFormatForHostFormat(AbstractTextureFormat format) -{ - switch (format) - { case AbstractTextureFormat::D16: return DXGI_FORMAT_R16_UNORM; case AbstractTextureFormat::D24_S8: @@ -73,7 +78,25 @@ DXGI_FORMAT GetSRVFormatForHostFormat(AbstractTextureFormat format) case AbstractTextureFormat::D32F_S8: return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; default: - return GetDXGIFormatForHostFormat(format); + PanicAlert("Unhandled SRV format"); + return DXGI_FORMAT_UNKNOWN; + } +} +DXGI_FORMAT GetRTVFormatForHostFormat(AbstractTextureFormat format, bool integer) +{ + switch (format) + { + case AbstractTextureFormat::RGBA8: + return integer ? DXGI_FORMAT_R8G8B8A8_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; + case AbstractTextureFormat::BGRA8: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case AbstractTextureFormat::R16: + return integer ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R16_UNORM; + case AbstractTextureFormat::R32F: + return DXGI_FORMAT_R32_FLOAT; + default: + PanicAlert("Unhandled RTV format"); + return DXGI_FORMAT_UNKNOWN; } } DXGI_FORMAT GetDSVFormatForHostFormat(AbstractTextureFormat format) @@ -89,55 +112,87 @@ DXGI_FORMAT GetDSVFormatForHostFormat(AbstractTextureFormat format) case AbstractTextureFormat::D32F_S8: return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; default: - return GetDXGIFormatForHostFormat(format); + PanicAlert("Unhandled DSV format"); + return DXGI_FORMAT_UNKNOWN; } } } // Anonymous namespace -DXTexture::DXTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config) +DXTexture::DXTexture(const TextureConfig& tex_config, ID3D11Texture2D* d3d_texture, + ID3D11ShaderResourceView* d3d_srv, ID3D11UnorderedAccessView* d3d_uav) + : AbstractTexture(tex_config), m_d3d_texture(d3d_texture), m_d3d_srv(d3d_srv), + m_d3d_uav(d3d_uav) { - DXGI_FORMAT tex_format = GetDXGIFormatForHostFormat(m_config.format); - DXGI_FORMAT srv_format = GetSRVFormatForHostFormat(m_config.format); - DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN; - DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN; - UINT bind_flags = D3D11_BIND_SHADER_RESOURCE; - if (tex_config.rendertarget) - { - if (IsDepthFormat(tex_config.format)) - { - bind_flags |= D3D11_BIND_DEPTH_STENCIL; - dsv_format = GetDSVFormatForHostFormat(m_config.format); - } - else - { - bind_flags |= D3D11_BIND_RENDER_TARGET; - rtv_format = tex_format; - } - } - - CD3D11_TEXTURE2D_DESC texdesc(tex_format, tex_config.width, tex_config.height, tex_config.layers, - tex_config.levels, bind_flags, D3D11_USAGE_DEFAULT, 0, - tex_config.samples, 0, 0); - - ID3D11Texture2D* pTexture; - HRESULT hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &pTexture); - CHECK(SUCCEEDED(hr), "Create backing DXTexture"); - - m_texture = new D3DTexture2D(pTexture, static_cast(bind_flags), srv_format, - dsv_format, rtv_format, tex_config.samples > 1); - - SAFE_RELEASE(pTexture); } DXTexture::~DXTexture() { - g_renderer->UnbindTexture(this); - m_texture->Release(); + if (m_d3d_uav) + m_d3d_uav->Release(); + + if (m_d3d_srv) + { + if (D3D::stateman->UnsetTexture(m_d3d_srv) != 0) + D3D::stateman->ApplyTextures(); + + m_d3d_srv->Release(); + } + m_d3d_texture->Release(); } -D3DTexture2D* DXTexture::GetRawTexIdentifier() const +std::unique_ptr DXTexture::Create(const TextureConfig& config) { - return m_texture; + // Use typeless to create the texture when it's a render target, so we can alias it with an + // integer format (for EFB). + const DXGI_FORMAT tex_format = GetDXGIFormatForHostFormat(config.format, config.IsRenderTarget()); + const DXGI_FORMAT srv_format = GetSRVFormatForHostFormat(config.format); + UINT bindflags = D3D11_BIND_SHADER_RESOURCE; + if (config.IsRenderTarget()) + bindflags |= IsDepthFormat(config.format) ? D3D11_BIND_DEPTH_STENCIL : D3D11_BIND_RENDER_TARGET; + if (config.IsComputeImage()) + bindflags |= D3D11_BIND_UNORDERED_ACCESS; + + CD3D11_TEXTURE2D_DESC desc(tex_format, config.width, config.height, config.layers, config.levels, + bindflags, D3D11_USAGE_DEFAULT, 0, config.samples, 0, 0); + ID3D11Texture2D* d3d_texture; + HRESULT hr = D3D::device->CreateTexture2D(&desc, nullptr, &d3d_texture); + if (FAILED(hr)) + { + PanicAlert("Failed to create %ux%ux%u D3D backing texture", config.width, config.height, + config.layers); + return nullptr; + } + + ID3D11ShaderResourceView* d3d_srv; + const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(d3d_texture, + config.IsMultisampled() ? + D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY : + D3D11_SRV_DIMENSION_TEXTURE2DARRAY, + srv_format, 0, config.levels, 0, config.layers); + hr = D3D::device->CreateShaderResourceView(d3d_texture, &srv_desc, &d3d_srv); + if (FAILED(hr)) + { + PanicAlert("Failed to create %ux%ux%u D3D SRV", config.width, config.height, config.layers); + d3d_texture->Release(); + return nullptr; + } + + ID3D11UnorderedAccessView* d3d_uav = nullptr; + if (config.IsComputeImage()) + { + const CD3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc( + d3d_texture, D3D11_UAV_DIMENSION_TEXTURE2DARRAY, srv_format, 0, 0, config.layers); + hr = D3D::device->CreateUnorderedAccessView(d3d_texture, &uav_desc, &d3d_uav); + if (FAILED(hr)) + { + PanicAlert("Failed to create %ux%ux%u D3D UAV", config.width, config.height, config.layers); + d3d_uav->Release(); + d3d_texture->Release(); + return nullptr; + } + } + + return std::make_unique(config, d3d_texture, d3d_srv, d3d_uav); } void DXTexture::CopyRectangleFromTexture(const AbstractTexture* src, @@ -158,42 +213,11 @@ void DXTexture::CopyRectangleFromTexture(const AbstractTexture* src, src_box.back = 1; D3D::context->CopySubresourceRegion( - m_texture->GetTex(), D3D11CalcSubresource(dst_level, dst_layer, m_config.levels), - dst_rect.left, dst_rect.top, 0, srcentry->m_texture->GetTex(), + m_d3d_texture, D3D11CalcSubresource(dst_level, dst_layer, m_config.levels), dst_rect.left, + dst_rect.top, 0, srcentry->m_d3d_texture, D3D11CalcSubresource(src_level, src_layer, srcentry->m_config.levels), &src_box); } -void DXTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ - const DXTexture* srcentry = static_cast(source); - ASSERT(m_config.rendertarget); - - g_renderer->ResetAPIState(); // reset any game specific settings - - const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(float(dstrect.left), float(dstrect.top), - float(dstrect.GetWidth()), float(dstrect.GetHeight())); - - D3D::stateman->UnsetTexture(m_texture->GetSRV()); - D3D::stateman->Apply(); - - D3D::context->OMSetRenderTargets(1, &m_texture->GetRTV(), nullptr); - D3D::context->RSSetViewports(1, &vp); - D3D::SetLinearCopySampler(); - D3D11_RECT srcRC; - srcRC.left = srcrect.left; - srcRC.right = srcrect.right; - srcRC.top = srcrect.top; - srcRC.bottom = srcrect.bottom; - D3D::drawShadedTexQuad( - srcentry->m_texture->GetSRV(), &srcRC, srcentry->m_config.width, srcentry->m_config.height, - PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader(), 0); - - g_renderer->RestoreAPIState(); -} - void DXTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) { @@ -204,16 +228,16 @@ void DXTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::R rect.top + rect.GetHeight() <= static_cast(srcentry->m_config.height)); D3D::context->ResolveSubresource( - m_texture->GetTex(), D3D11CalcSubresource(level, layer, m_config.levels), - srcentry->m_texture->GetTex(), D3D11CalcSubresource(level, layer, srcentry->m_config.levels), - GetDXGIFormatForHostFormat(m_config.format)); + m_d3d_texture, D3D11CalcSubresource(level, layer, m_config.levels), srcentry->m_d3d_texture, + D3D11CalcSubresource(level, layer, srcentry->m_config.levels), + GetDXGIFormatForHostFormat(m_config.format, false)); } void DXTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) { size_t src_pitch = CalculateStrideForFormat(m_config.format, row_length); - D3D::context->UpdateSubresource(m_texture->GetTex(), level, nullptr, buffer, + D3D::context->UpdateSubresource(m_d3d_texture, level, nullptr, buffer, static_cast(src_pitch), 0); } @@ -251,8 +275,8 @@ std::unique_ptr DXStagingTexture::Create(StagingTextureType ty cpu_flags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; } - CD3D11_TEXTURE2D_DESC desc(GetDXGIFormatForHostFormat(config.format), config.width, config.height, - 1, 1, 0, usage, cpu_flags); + CD3D11_TEXTURE2D_DESC desc(GetDXGIFormatForHostFormat(config.format, false), config.width, + config.height, 1, 1, 0, usage, cpu_flags); ID3D11Texture2D* texture; HRESULT hr = D3D::device->CreateTexture2D(&desc, nullptr, &texture); @@ -267,22 +291,33 @@ void DXStagingTexture::CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect) { - ASSERT(m_type == StagingTextureType::Readback); + ASSERT(m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); - ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && - src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetConfig().height); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetWidth() && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetHeight()); ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); if (IsMapped()) DXStagingTexture::Unmap(); - CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); - D3D::context->CopySubresourceRegion( - m_tex, 0, static_cast(dst_rect.left), static_cast(dst_rect.top), 0, - static_cast(src)->GetRawTexIdentifier()->GetTex(), - D3D11CalcSubresource(src_level, src_layer, src->GetConfig().levels), &src_box); + if (static_cast(src_rect.GetWidth()) == GetWidth() && + static_cast(src_rect.GetHeight()) == GetHeight()) + { + // Copy whole resource, needed for depth textures. + D3D::context->CopySubresourceRegion( + m_tex, 0, 0, 0, 0, static_cast(src)->GetD3DTexture(), + D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), nullptr); + } + else + { + CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); + D3D::context->CopySubresourceRegion( + m_tex, 0, static_cast(dst_rect.left), static_cast(dst_rect.top), 0, + static_cast(src)->GetD3DTexture(), + D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), &src_box); + } m_needs_flush = true; } @@ -294,19 +329,29 @@ void DXStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, A ASSERT(m_type == StagingTextureType::Upload); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); - ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && - src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); - ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && - dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= GetWidth() && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= GetHeight()); + ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetWidth() && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetHeight()); if (IsMapped()) DXStagingTexture::Unmap(); - CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); - D3D::context->CopySubresourceRegion( - static_cast(dst)->GetRawTexIdentifier()->GetTex(), - D3D11CalcSubresource(dst_level, dst_layer, dst->GetConfig().levels), - static_cast(dst_rect.left), static_cast(dst_rect.top), 0, m_tex, 0, &src_box); + if (static_cast(src_rect.GetWidth()) == dst->GetWidth() && + static_cast(src_rect.GetHeight()) == dst->GetHeight()) + { + D3D::context->CopySubresourceRegion( + static_cast(dst)->GetD3DTexture(), + D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), 0, 0, 0, m_tex, 0, nullptr); + } + else + { + CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); + D3D::context->CopySubresourceRegion( + static_cast(dst)->GetD3DTexture(), + D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), + static_cast(dst_rect.left), static_cast(dst_rect.top), 0, m_tex, 0, &src_box); + } } bool DXStagingTexture::Map() @@ -348,11 +393,14 @@ void DXStagingTexture::Flush() m_needs_flush = false; } -DXFramebuffer::DXFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, +DXFramebuffer::DXFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, - ID3D11RenderTargetView* rtv, ID3D11DepthStencilView* dsv) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples), m_rtv(rtv), - m_dsv(dsv) + ID3D11RenderTargetView* rtv, ID3D11RenderTargetView* integer_rtv, + ID3D11DepthStencilView* dsv) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples), + m_rtv(rtv), m_integer_rtv(integer_rtv), m_dsv(dsv) { } @@ -360,12 +408,14 @@ DXFramebuffer::~DXFramebuffer() { if (m_rtv) m_rtv->Release(); + if (m_integer_rtv) + m_integer_rtv->Release(); if (m_dsv) m_dsv->Release(); } -std::unique_ptr DXFramebuffer::Create(const DXTexture* color_attachment, - const DXTexture* depth_attachment) +std::unique_ptr DXFramebuffer::Create(DXTexture* color_attachment, + DXTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -381,55 +431,45 @@ std::unique_ptr DXFramebuffer::Create(const DXTexture* color_atta const u32 samples = either_attachment->GetSamples(); ID3D11RenderTargetView* rtv = nullptr; + ID3D11RenderTargetView* integer_rtv = nullptr; if (color_attachment) { - D3D11_RENDER_TARGET_VIEW_DESC desc; - desc.Format = GetDXGIFormatForHostFormat(color_attachment->GetConfig().format); - if (color_attachment->GetConfig().IsMultisampled()) - { - desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY; - desc.Texture2DMSArray.ArraySize = color_attachment->GetConfig().layers; - desc.Texture2DMSArray.FirstArraySlice = 0; - } - else - { - desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; - desc.Texture2DArray.ArraySize = color_attachment->GetConfig().layers; - desc.Texture2DArray.FirstArraySlice = 0; - desc.Texture2DArray.MipSlice = 0; - } - - HRESULT hr = D3D::device->CreateRenderTargetView( - color_attachment->GetRawTexIdentifier()->GetTex(), &desc, &rtv); + CD3D11_RENDER_TARGET_VIEW_DESC desc( + color_attachment->IsMultisampled() ? D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY : + D3D11_RTV_DIMENSION_TEXTURE2DARRAY, + GetRTVFormatForHostFormat(color_attachment->GetFormat(), false), 0, 0, + color_attachment->GetLayers()); + HRESULT hr = + D3D::device->CreateRenderTargetView(color_attachment->GetD3DTexture(), &desc, &rtv); CHECK(SUCCEEDED(hr), "Create render target view for framebuffer"); + + // Only create the integer RTV on Win8+. + DXGI_FORMAT integer_format = GetRTVFormatForHostFormat(color_attachment->GetFormat(), true); + if (D3D::device1 && integer_format != desc.Format) + { + desc.Format = integer_format; + hr = D3D::device->CreateRenderTargetView(color_attachment->GetD3DTexture(), &desc, + &integer_rtv); + CHECK(SUCCEEDED(hr), "Create integer render target view for framebuffer"); + } } ID3D11DepthStencilView* dsv = nullptr; if (depth_attachment) { - D3D11_DEPTH_STENCIL_VIEW_DESC desc; - desc.Format = GetDXGIFormatForHostFormat(depth_attachment->GetConfig().format); - if (depth_attachment->GetConfig().IsMultisampled()) - { - desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY; - desc.Texture2DMSArray.ArraySize = depth_attachment->GetConfig().layers; - desc.Texture2DMSArray.FirstArraySlice = 0; - } - else - { - desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DARRAY; - desc.Texture2DArray.ArraySize = depth_attachment->GetConfig().layers; - desc.Texture2DArray.FirstArraySlice = 0; - desc.Texture2DArray.MipSlice = 0; - } - - HRESULT hr = D3D::device->CreateDepthStencilView( - depth_attachment->GetRawTexIdentifier()->GetTex(), &desc, &dsv); + const CD3D11_DEPTH_STENCIL_VIEW_DESC desc( + depth_attachment->GetConfig().IsMultisampled() ? D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY : + D3D11_DSV_DIMENSION_TEXTURE2DARRAY, + GetDSVFormatForHostFormat(depth_attachment->GetFormat()), 0, 0, + depth_attachment->GetLayers(), 0); + HRESULT hr = + D3D::device->CreateDepthStencilView(depth_attachment->GetD3DTexture(), &desc, &dsv); CHECK(SUCCEEDED(hr), "Create depth stencil view for framebuffer"); } - return std::make_unique(color_format, depth_format, width, height, layers, samples, - rtv, dsv); + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples, rtv, + integer_rtv, dsv); } } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXTexture.h b/Source/Core/VideoBackends/D3D/DXTexture.h index 96d8f13919..0a4e0ace48 100644 --- a/Source/Core/VideoBackends/D3D/DXTexture.h +++ b/Source/Core/VideoBackends/D3D/DXTexture.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "Common/CommonTypes.h" @@ -11,32 +12,34 @@ #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" -class D3DTexture2D; - namespace DX11 { class DXTexture final : public AbstractTexture { public: - explicit DXTexture(const TextureConfig& tex_config); + explicit DXTexture(const TextureConfig& tex_config, ID3D11Texture2D* d3d_texture, + ID3D11ShaderResourceView* d3d_srv, ID3D11UnorderedAccessView* d3d_uav); ~DXTexture(); + static std::unique_ptr Create(const TextureConfig& config); + void CopyRectangleFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) override; - D3DTexture2D* GetRawTexIdentifier() const; + ID3D11Texture2D* GetD3DTexture() const { return m_d3d_texture; } + ID3D11ShaderResourceView* GetD3DSRV() const { return m_d3d_srv; } + ID3D11UnorderedAccessView* GetD3DUAV() const { return m_d3d_uav; } private: - D3DTexture2D* m_texture; + ID3D11Texture2D* m_d3d_texture; + ID3D11ShaderResourceView* m_d3d_srv; + ID3D11UnorderedAccessView* m_d3d_uav; }; class DXStagingTexture final : public AbstractStagingTexture @@ -68,19 +71,22 @@ private: class DXFramebuffer final : public AbstractFramebuffer { public: - DXFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, + DXFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, ID3D11RenderTargetView* rtv, - ID3D11DepthStencilView* dsv); + ID3D11RenderTargetView* integer_rtv, ID3D11DepthStencilView* dsv); ~DXFramebuffer() override; ID3D11RenderTargetView* const* GetRTVArray() const { return &m_rtv; } + ID3D11RenderTargetView* const* GetIntegerRTVArray() const { return &m_integer_rtv; } UINT GetNumRTVs() const { return m_rtv ? 1 : 0; } ID3D11DepthStencilView* GetDSV() const { return m_dsv; } - static std::unique_ptr Create(const DXTexture* color_attachment, - const DXTexture* depth_attachment); + static std::unique_ptr Create(DXTexture* color_attachment, + DXTexture* depth_attachment); protected: ID3D11RenderTargetView* m_rtv; + ID3D11RenderTargetView* m_integer_rtv; ID3D11DepthStencilView* m_dsv; }; diff --git a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D/FramebufferManager.cpp deleted file mode 100644 index 5a2088f40d..0000000000 --- a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp +++ /dev/null @@ -1,303 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/FramebufferManager.h" - -#include -#include - -#include "Common/CommonTypes.h" -#include "Core/HW/Memmap.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/VertexShaderCache.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -static bool s_integer_efb_render_target = false; - -FramebufferManager::Efb FramebufferManager::m_efb; -unsigned int FramebufferManager::m_target_width; -unsigned int FramebufferManager::m_target_height; - -D3DTexture2D*& FramebufferManager::GetEFBColorTexture() -{ - return m_efb.color_tex; -} - -D3DTexture2D*& FramebufferManager::GetEFBColorReadTexture() -{ - return m_efb.color_read_texture; -} -ID3D11Texture2D*& FramebufferManager::GetEFBColorStagingBuffer() -{ - return m_efb.color_staging_buf; -} - -D3DTexture2D*& FramebufferManager::GetEFBDepthTexture() -{ - return m_efb.depth_tex; -} -D3DTexture2D*& FramebufferManager::GetEFBDepthReadTexture() -{ - return m_efb.depth_read_texture; -} -ID3D11Texture2D*& FramebufferManager::GetEFBDepthStagingBuffer() -{ - return m_efb.depth_staging_buf; -} - -D3DTexture2D*& FramebufferManager::GetResolvedEFBColorTexture() -{ - if (g_ActiveConfig.iMultisamples > 1) - { - for (int i = 0; i < m_efb.slices; i++) - D3D::context->ResolveSubresource(m_efb.resolved_color_tex->GetTex(), - D3D11CalcSubresource(0, i, 1), m_efb.color_tex->GetTex(), - D3D11CalcSubresource(0, i, 1), DXGI_FORMAT_R8G8B8A8_UNORM); - return m_efb.resolved_color_tex; - } - else - { - return m_efb.color_tex; - } -} - -D3DTexture2D*& FramebufferManager::GetResolvedEFBDepthTexture() -{ - if (g_ActiveConfig.iMultisamples > 1) - { - // ResolveSubresource does not work with depth textures. - // Instead, we use a shader that selects the minimum depth from all samples. - g_renderer->ResetAPIState(); - - CD3D11_VIEWPORT viewport(0.f, 0.f, (float)m_target_width, (float)m_target_height); - D3D::context->RSSetViewports(1, &viewport); - D3D::context->OMSetRenderTargets(1, &m_efb.resolved_depth_tex->GetRTV(), nullptr); - - const D3D11_RECT source_rect = CD3D11_RECT(0, 0, m_target_width, m_target_height); - D3D::drawShadedTexQuad( - m_efb.depth_tex->GetSRV(), &source_rect, m_target_width, m_target_height, - PixelShaderCache::GetDepthResolveProgram(), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); - - g_renderer->RestoreAPIState(); - return m_efb.resolved_depth_tex; - } - else - { - return m_efb.depth_tex; - } -} - -void FramebufferManager::SwapReinterpretTexture() -{ - std::swap(m_efb.color_tex, m_efb.color_temp_tex); - std::swap(m_efb.color_int_rtv, m_efb.color_temp_int_rtv); -} - -void FramebufferManager::SetIntegerEFBRenderTarget(bool enabled) -{ - if (s_integer_efb_render_target == enabled) - return; - - // We only use UINT render targets for logic ops, which is only supported with D3D11.1. - if (!D3D::device1) - return; - - s_integer_efb_render_target = enabled; - BindEFBRenderTarget(); -} - -void FramebufferManager::BindEFBRenderTarget(bool bind_depth) -{ - ID3D11RenderTargetView* rtv = - s_integer_efb_render_target ? m_efb.color_int_rtv : m_efb.color_tex->GetRTV(); - ID3D11DepthStencilView* dsv = bind_depth ? m_efb.depth_tex->GetDSV() : nullptr; - D3D::context->OMSetRenderTargets(1, &rtv, dsv); -} - -FramebufferManager::FramebufferManager(int target_width, int target_height) -{ - static constexpr std::array clear_color = {0.0f, 0.0f, 0.0f, 1.0f}; - m_target_width = static_cast(std::max(target_width, 1)); - m_target_height = static_cast(std::max(target_height, 1)); - DXGI_SAMPLE_DESC sample_desc; - sample_desc.Count = g_ActiveConfig.iMultisamples; - sample_desc.Quality = 0; - - ID3D11Texture2D* buf; - D3D11_TEXTURE2D_DESC texdesc; - HRESULT hr; - - m_EFBLayers = m_efb.slices = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - - // EFB color texture - primary render target - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_TYPELESS, m_target_width, m_target_height, - m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, - D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.color_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), - DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, - (sample_desc.Count > 1)); - - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.color_tex->GetTex(), "EFB color texture"); - D3D::SetDebugObjectName(m_efb.color_tex->GetSRV(), "EFB color texture shader resource view"); - D3D::SetDebugObjectName(m_efb.color_tex->GetRTV(), "EFB color texture render target view"); - D3D::context->ClearRenderTargetView(m_efb.color_tex->GetRTV(), clear_color.data()); - - // Temporary EFB color texture - used in ReinterpretPixelData - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_TYPELESS, m_target_width, m_target_height, - m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, - D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color temp texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.color_temp_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), - DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, - (sample_desc.Count > 1)); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.color_temp_tex->GetTex(), "EFB color temp texture"); - D3D::SetDebugObjectName(m_efb.color_temp_tex->GetSRV(), - "EFB color temp texture shader resource view"); - D3D::SetDebugObjectName(m_efb.color_temp_tex->GetRTV(), - "EFB color temp texture render target view"); - D3D::context->ClearRenderTargetView(m_efb.color_temp_tex->GetRTV(), clear_color.data()); - - // Integer render targets for EFB, used for logic op - CD3D11_RENDER_TARGET_VIEW_DESC int_rtv_desc(m_efb.color_tex->GetTex(), - g_ActiveConfig.iMultisamples > 1 ? - D3D11_RTV_DIMENSION_TEXTURE2DMS : - D3D11_RTV_DIMENSION_TEXTURE2D, - DXGI_FORMAT_R8G8B8A8_UINT); - hr = D3D::device->CreateRenderTargetView(m_efb.color_tex->GetTex(), &int_rtv_desc, - &m_efb.color_int_rtv); - CHECK(hr == S_OK, "create EFB integer RTV(hr=%#x)", hr); - hr = D3D::device->CreateRenderTargetView(m_efb.color_temp_tex->GetTex(), &int_rtv_desc, - &m_efb.color_temp_int_rtv); - CHECK(hr == S_OK, "create EFB integer RTV(hr=%#x)", hr); - - // Render buffer for AccessEFB (color data) - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 1, 1, D3D11_BIND_RENDER_TARGET); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color read texture (hr=%#x)", hr); - m_efb.color_read_texture = new D3DTexture2D(buf, D3D11_BIND_RENDER_TARGET); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.color_read_texture->GetTex(), - "EFB color read texture (used in Renderer::AccessEFB)"); - D3D::SetDebugObjectName( - m_efb.color_read_texture->GetRTV(), - "EFB color read texture render target view (used in Renderer::AccessEFB)"); - - // AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 1, 1, 0, D3D11_USAGE_STAGING, - D3D11_CPU_ACCESS_READ); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &m_efb.color_staging_buf); - CHECK(hr == S_OK, "create EFB color staging buffer (hr=%#x)", hr); - D3D::SetDebugObjectName(m_efb.color_staging_buf, - "EFB color staging texture (used for Renderer::AccessEFB)"); - - // EFB depth buffer - primary depth buffer - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, - 1, D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE, - D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB depth texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.depth_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), - DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, (sample_desc.Count > 1)); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.depth_tex->GetTex(), "EFB depth texture"); - D3D::SetDebugObjectName(m_efb.depth_tex->GetDSV(), "EFB depth texture depth stencil view"); - D3D::SetDebugObjectName(m_efb.depth_tex->GetSRV(), "EFB depth texture shader resource view"); - D3D::context->ClearDepthStencilView(m_efb.depth_tex->GetDSV(), D3D11_CLEAR_DEPTH, 0.0f, 0); - - // Render buffer for AccessEFB (depth data) - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_FLOAT, 1, 1, 1, 1, D3D11_BIND_RENDER_TARGET); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB depth read texture (hr=%#x)", hr); - m_efb.depth_read_texture = new D3DTexture2D(buf, D3D11_BIND_RENDER_TARGET); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.depth_read_texture->GetTex(), - "EFB depth read texture (used in Renderer::AccessEFB)"); - D3D::SetDebugObjectName( - m_efb.depth_read_texture->GetRTV(), - "EFB depth read texture render target view (used in Renderer::AccessEFB)"); - - // AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_FLOAT, 1, 1, 1, 1, 0, D3D11_USAGE_STAGING, - D3D11_CPU_ACCESS_READ); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &m_efb.depth_staging_buf); - CHECK(hr == S_OK, "create EFB depth staging buffer (hr=%#x)", hr); - D3D::SetDebugObjectName(m_efb.depth_staging_buf, - "EFB depth staging texture (used for Renderer::AccessEFB)"); - - if (g_ActiveConfig.iMultisamples > 1) - { - // Framebuffer resolve textures (color+depth) - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, - m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE, - D3D11_USAGE_DEFAULT, 0, 1); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color resolve texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.resolved_color_tex = - new D3DTexture2D(buf, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_R8G8B8A8_UNORM); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.resolved_color_tex->GetTex(), "EFB color resolve texture"); - D3D::SetDebugObjectName(m_efb.resolved_color_tex->GetSRV(), - "EFB color resolve texture shader resource view"); - - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_FLOAT, m_target_width, m_target_height, m_efb.slices, - 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.resolved_depth_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), - DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32_FLOAT); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.resolved_depth_tex->GetTex(), "EFB depth resolve texture"); - D3D::SetDebugObjectName(m_efb.resolved_depth_tex->GetSRV(), - "EFB depth resolve texture shader resource view"); - } - else - { - m_efb.resolved_color_tex = nullptr; - m_efb.resolved_depth_tex = nullptr; - } - s_integer_efb_render_target = false; -} - -FramebufferManager::~FramebufferManager() -{ - SAFE_RELEASE(m_efb.color_tex); - SAFE_RELEASE(m_efb.color_int_rtv); - SAFE_RELEASE(m_efb.color_temp_tex); - SAFE_RELEASE(m_efb.color_temp_int_rtv); - SAFE_RELEASE(m_efb.color_staging_buf); - SAFE_RELEASE(m_efb.color_read_texture); - SAFE_RELEASE(m_efb.resolved_color_tex); - SAFE_RELEASE(m_efb.depth_tex); - SAFE_RELEASE(m_efb.depth_staging_buf); - SAFE_RELEASE(m_efb.depth_read_texture); - SAFE_RELEASE(m_efb.resolved_depth_tex); -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/FramebufferManager.h b/Source/Core/VideoBackends/D3D/FramebufferManager.h deleted file mode 100644 index f8767a2c5d..0000000000 --- a/Source/Core/VideoBackends/D3D/FramebufferManager.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoCommon/FramebufferManagerBase.h" - -namespace DX11 -{ -// On the GameCube, the game sends a request for the graphics processor to -// transfer its internal EFB (Embedded Framebuffer) to an area in GameCube RAM -// called the XFB (External Framebuffer). The size and location of the XFB is -// decided at the time of the copy, and the format is always YUYV. The video -// interface is given a pointer to the XFB, which will be decoded and -// displayed on the TV. -// -// There are two ways for Dolphin to emulate this: -// -// Real XFB mode: -// -// Dolphin will behave like the GameCube and encode the EFB to -// a portion of GameCube RAM. The emulated video interface will decode the data -// for output to the screen. -// -// Advantages: Behaves exactly like the GameCube. -// Disadvantages: Resolution will be limited. -// -// Virtual XFB mode: -// -// When a request is made to copy the EFB to an XFB, Dolphin -// will remember the RAM location and size of the XFB in a Virtual XFB list. -// The video interface will look up the XFB in the list and use the enhanced -// data stored there, if available. -// -// Advantages: Enables high resolution graphics, better than real hardware. -// Disadvantages: If the GameCube CPU writes directly to the XFB (which is -// possible but uncommon), the Virtual XFB will not capture this information. - -// There may be multiple XFBs in GameCube RAM. This is the maximum number to -// virtualize. - -class FramebufferManager : public FramebufferManagerBase -{ -public: - FramebufferManager(int target_width, int target_height); - ~FramebufferManager(); - - static D3DTexture2D*& GetEFBColorTexture(); - static D3DTexture2D*& GetEFBColorReadTexture(); - static ID3D11Texture2D*& GetEFBColorStagingBuffer(); - - static D3DTexture2D*& GetEFBDepthTexture(); - static D3DTexture2D*& GetEFBDepthReadTexture(); - static ID3D11Texture2D*& GetEFBDepthStagingBuffer(); - - static D3DTexture2D*& GetResolvedEFBColorTexture(); - static D3DTexture2D*& GetResolvedEFBDepthTexture(); - - static D3DTexture2D*& GetEFBColorTempTexture() { return m_efb.color_temp_tex; } - static void SwapReinterpretTexture(); - static void SetIntegerEFBRenderTarget(bool enabled); - static void BindEFBRenderTarget(bool bind_depth = true); - -private: - static struct Efb - { - D3DTexture2D* color_tex; - ID3D11RenderTargetView* color_int_rtv; - ID3D11Texture2D* color_staging_buf; - D3DTexture2D* color_read_texture; - - D3DTexture2D* depth_tex; - ID3D11Texture2D* depth_staging_buf; - D3DTexture2D* depth_read_texture; - - D3DTexture2D* color_temp_tex; - ID3D11RenderTargetView* color_temp_int_rtv; - - D3DTexture2D* resolved_color_tex; - D3DTexture2D* resolved_depth_tex; - - int slices; - } m_efb; - - static unsigned int m_target_width; - static unsigned int m_target_height; -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp deleted file mode 100644 index baa444bf47..0000000000 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/FileUtil.h" -#include "Common/StringUtil.h" - -#include "Core/ConfigManager.h" - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" - -#include "VideoCommon/Debugger.h" -#include "VideoCommon/GeometryShaderGen.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -ID3D11GeometryShader* ClearGeometryShader = nullptr; -ID3D11GeometryShader* CopyGeometryShader = nullptr; - -ID3D11GeometryShader* GeometryShaderCache::GetClearGeometryShader() -{ - return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? ClearGeometryShader : nullptr; -} -ID3D11GeometryShader* GeometryShaderCache::GetCopyGeometryShader() -{ - return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? CopyGeometryShader : nullptr; -} - -const char clear_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float4 vColor0 : COLOR0;\n" - "};\n" - "struct GSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float4 vColor0 : COLOR0;\n" - " uint slice : SV_RenderTargetArrayIndex;\n" - "};\n" - "[maxvertexcount(6)]\n" - "void main(triangle VSOUTPUT o[3], inout TriangleStream Output)\n" - "{\n" - "for(int slice = 0; slice < 2; slice++)\n" - "{\n" - " for(int i = 0; i < 3; i++)\n" - " {\n" - " GSOUTPUT OUT;\n" - " OUT.vPosition = o[i].vPosition;\n" - " OUT.vColor0 = o[i].vColor0;\n" - " OUT.slice = slice;\n" - " Output.Append(OUT);\n" - " }\n" - " Output.RestartStrip();\n" - "}\n" - "}\n"}; - -const char copy_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float3 vTexCoord : TEXCOORD0;\n" - "};\n" - "struct GSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float3 vTexCoord : TEXCOORD0;\n" - " uint slice : SV_RenderTargetArrayIndex;\n" - "};\n" - "[maxvertexcount(6)]\n" - "void main(triangle VSOUTPUT o[3], inout TriangleStream Output)\n" - "{\n" - "for(int slice = 0; slice < 2; slice++)\n" - "{\n" - " for(int i = 0; i < 3; i++)\n" - " {\n" - " GSOUTPUT OUT;\n" - " OUT.vPosition = o[i].vPosition;\n" - " OUT.vTexCoord = o[i].vTexCoord;\n" - " OUT.vTexCoord.z = float(slice);\n" - " OUT.slice = slice;\n" - " Output.Append(OUT);\n" - " }\n" - " Output.RestartStrip();\n" - "}\n" - "}\n"}; - -void GeometryShaderCache::Init() -{ - // used when drawing clear quads - ClearGeometryShader = D3D::CompileAndCreateGeometryShader(clear_shader_code); - CHECK(ClearGeometryShader != nullptr, "Create clear geometry shader"); - D3D::SetDebugObjectName(ClearGeometryShader, "clear geometry shader"); - - // used for buffer copy - CopyGeometryShader = D3D::CompileAndCreateGeometryShader(copy_shader_code); - CHECK(CopyGeometryShader != nullptr, "Create copy geometry shader"); - D3D::SetDebugObjectName(CopyGeometryShader, "copy geometry shader"); -} - -void GeometryShaderCache::Shutdown() -{ - SAFE_RELEASE(ClearGeometryShader); - SAFE_RELEASE(CopyGeometryShader); -} -} // DX11 diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h deleted file mode 100644 index 38ffde1b51..0000000000 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "VideoCommon/GeometryShaderGen.h" - -namespace DX11 -{ -class GeometryShaderCache -{ -public: - static void Init(); - static void Shutdown(); - - static ID3D11GeometryShader* GetClearGeometryShader(); - static ID3D11GeometryShader* GetCopyGeometryShader(); - - static ID3D11Buffer* GetConstantBuffer(); - static void UpdateConstantBuffer(const void* data, u32 data_size); -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp index 421a1019bd..4d3407ed04 100644 --- a/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp @@ -5,10 +5,10 @@ #include #include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DBlob.h" #include "VideoBackends/D3D/D3DState.h" +#include "VideoBackends/D3D/DXShader.h" +#include "VideoBackends/D3D/Render.h" #include "VideoBackends/D3D/VertexManager.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoCommon/NativeVertexFormat.h" namespace DX11 @@ -16,7 +16,7 @@ namespace DX11 std::mutex s_input_layout_lock; std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) { return std::make_unique(vtx_decl); } @@ -77,11 +77,11 @@ DXGI_FORMAT VarToD3D(VarType t, int size, bool integer) return retval; } -D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) -{ - this->vtx_decl = _vtx_decl; +D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl) - const AttributeFormat* format = &_vtx_decl.position; +{ + const AttributeFormat* format = &vtx_decl.position; if (format->enable) { m_elems[m_num_elems].SemanticName = "POSITION"; @@ -93,7 +93,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) for (int i = 0; i < 3; i++) { - format = &_vtx_decl.normals[i]; + format = &vtx_decl.normals[i]; if (format->enable) { m_elems[m_num_elems].SemanticName = "NORMAL"; @@ -107,7 +107,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) for (int i = 0; i < 2; i++) { - format = &_vtx_decl.colors[i]; + format = &vtx_decl.colors[i]; if (format->enable) { m_elems[m_num_elems].SemanticName = "COLOR"; @@ -121,7 +121,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) for (int i = 0; i < 8; i++) { - format = &_vtx_decl.texcoords[i]; + format = &vtx_decl.texcoords[i]; if (format->enable) { m_elems[m_num_elems].SemanticName = "TEXCOORD"; @@ -133,7 +133,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) } } - format = &_vtx_decl.posmtx; + format = &vtx_decl.posmtx; if (format->enable) { m_elems[m_num_elems].SemanticName = "BLENDINDICES"; @@ -150,7 +150,7 @@ D3DVertexFormat::~D3DVertexFormat() SAFE_RELEASE(layout); } -ID3D11InputLayout* D3DVertexFormat::GetInputLayout(D3DBlob* vs_bytecode) +ID3D11InputLayout* D3DVertexFormat::GetInputLayout(const void* vs_bytecode, size_t vs_bytecode_size) { // CreateInputLayout requires a shader input, but it only looks at the signature of the shader, // so we don't need to recompute it if the shader changes. @@ -158,8 +158,8 @@ ID3D11InputLayout* D3DVertexFormat::GetInputLayout(D3DBlob* vs_bytecode) if (layout) return layout; - HRESULT hr = DX11::D3D::device->CreateInputLayout( - m_elems.data(), m_num_elems, vs_bytecode->Data(), vs_bytecode->Size(), &layout); + HRESULT hr = D3D::device->CreateInputLayout(m_elems.data(), m_num_elems, vs_bytecode, + vs_bytecode_size, &layout); if (FAILED(hr)) PanicAlert("Failed to create input layout, %s %d\n", __FILE__, __LINE__); DX11::D3D::SetDebugObjectName(m_layout, "input layout used to emulate the GX pipeline"); diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp deleted file mode 100644 index 231f317488..0000000000 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/PSTextureEncoder.h" - -#include "Common/Assert.h" -#include "Common/Logging/Log.h" -#include "Core/HW/Memmap.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/TextureCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/AbstractStagingTexture.h" -#include "VideoCommon/AbstractTexture.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/VideoCommon.h" - -namespace DX11 -{ -struct EFBEncodeParams -{ - s32 SrcLeft; - s32 SrcTop; - u32 DestWidth; - u32 ScaleFactor; - float y_scale; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float filter_coefficients[3]; - u32 padding; -}; - -PSTextureEncoder::PSTextureEncoder() -{ -} - -PSTextureEncoder::~PSTextureEncoder() = default; - -void PSTextureEncoder::Init() -{ - m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); - ASSERT(m_encoding_render_texture); - - // Create constant buffer for uploading data to shaders - D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), D3D11_BIND_CONSTANT_BUFFER); - HRESULT hr = D3D::device->CreateBuffer(&bd, nullptr, &m_encode_params); - CHECK(SUCCEEDED(hr), "create efb encode params buffer"); - D3D::SetDebugObjectName(m_encode_params, "efb encoder params buffer"); -} - -void PSTextureEncoder::Shutdown() -{ - for (auto& it : m_encoding_shaders) - SAFE_RELEASE(it.second); - m_encoding_shaders.clear(); - - SAFE_RELEASE(m_encode_params); -} - -void PSTextureEncoder::Encode( - AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) -{ - // Resolve MSAA targets before copying. - // FIXME: Instead of resolving EFB, it would be better to pick out a - // single sample from each pixel. The game may break if it isn't - // expecting the blurred edges around multisampled shapes. - ID3D11ShaderResourceView* pEFB = params.depth ? - FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : - FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); - - // Reset API - g_renderer->ResetAPIState(); - - // Set up all the state for EFB encoding - { - const u32 words_per_row = bytes_per_row / sizeof(u32); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y)); - D3D::context->RSSetViewports(1, &vp); - - constexpr EFBRectangle fullSrcRect(0, 0, EFB_WIDTH, EFB_HEIGHT); - TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); - - D3D::context->OMSetRenderTargets( - 1, - &static_cast(m_encoding_render_texture.get())->GetRawTexIdentifier()->GetRTV(), - nullptr); - - EFBEncodeParams encode_params; - encode_params.SrcLeft = src_rect.left; - encode_params.SrcTop = src_rect.top; - encode_params.DestWidth = native_width; - encode_params.ScaleFactor = scale_by_half ? 2 : 1; - encode_params.y_scale = y_scale; - encode_params.gamma_rcp = 1.0f / gamma; - encode_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - encode_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - for (size_t i = 0; i < filter_coefficients.size(); i++) - encode_params.filter_coefficients[i] = filter_coefficients[i]; - - D3D::context->UpdateSubresource(m_encode_params, 0, nullptr, &encode_params, 0, 0); - D3D::stateman->SetPixelConstants(m_encode_params); - - // We also linear filtering for both box filtering and downsampling higher resolutions to 1x - // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more - // complex down filtering to average all pixels and produce the correct result. - // Also, box filtering won't be correct for anything other than 1x IR - if (scale_by_half || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f) - D3D::SetLinearCopySampler(); - else - D3D::SetPointCopySampler(); - - D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), g_renderer->GetTargetWidth(), - g_renderer->GetTargetHeight(), GetEncodingPixelShader(params), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout()); - - // Copy to staging buffer - MathUtil::Rectangle copy_rect(0, 0, words_per_row, num_blocks_y); - dst->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); - } - - g_renderer->RestoreAPIState(); -} - -ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams& params) -{ - auto iter = m_encoding_shaders.find(params); - if (iter != m_encoding_shaders.end()) - return iter->second; - - D3DBlob* bytecode = nullptr; - const char* shader = TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::D3D); - if (!D3D::CompilePixelShader(shader, &bytecode)) - { - PanicAlert("Failed to compile texture encoding shader."); - m_encoding_shaders[params] = nullptr; - return nullptr; - } - - ID3D11PixelShader* newShader; - HRESULT hr = - D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), nullptr, &newShader); - CHECK(SUCCEEDED(hr), "create efb encoder pixel shader"); - - m_encoding_shaders.emplace(params, newShader); - return newShader; -} -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h deleted file mode 100644 index 43f153c4cf..0000000000 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/VideoCommon.h" - -class AbstractTexture; -class AbstractStagingTexture; - -struct ID3D11Texture2D; -struct ID3D11RenderTargetView; -struct ID3D11Buffer; -struct ID3D11InputLayout; -struct ID3D11VertexShader; -struct ID3D11PixelShader; -struct ID3D11ClassLinkage; -struct ID3D11ClassInstance; -struct ID3D11BlendState; -struct ID3D11DepthStencilState; -struct ID3D11RasterizerState; -struct ID3D11SamplerState; - -namespace DX11 -{ -class PSTextureEncoder final -{ -public: - PSTextureEncoder(); - ~PSTextureEncoder(); - - void Init(); - void Shutdown(); - void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); - -private: - ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyParams& params); - - ID3D11Buffer* m_encode_params = nullptr; - std::unique_ptr m_encoding_render_texture; - std::map m_encoding_shaders; -}; -} diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp deleted file mode 100644 index c86086614f..0000000000 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ /dev/null @@ -1,315 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "Core/ConfigManager.h" -#include "Core/Host.h" - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/PixelShaderCache.h" - -#include "VideoCommon/Debugger.h" -#include "VideoCommon/PixelShaderGen.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr}; -ID3D11PixelShader* s_ClearProgram = nullptr; -ID3D11PixelShader* s_AnaglyphProgram = nullptr; -ID3D11PixelShader* s_DepthResolveProgram = nullptr; -ID3D11PixelShader* s_rgba6_to_rgb8[2] = {nullptr}; -ID3D11PixelShader* s_rgb8_to_rgba6[2] = {nullptr}; - -const char clear_program_code[] = {"void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float4 incol0 : COLOR0){\n" - "ocol0 = incol0;\n" - "}\n"}; - -// TODO: Find some way to avoid having separate shaders for non-MSAA and MSAA... -const char color_copy_program_code[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "ocol0 = Tex0.Sample(samp0,uv0);\n" - "}\n"}; - -// Anaglyph Red-Cyan shader based on Dubois algorithm -// Constants taken from the paper: -// "Conversion of a Stereo Pair to Anaglyph with -// the Least-Squares Projection Method" -// Eric Dubois, March 2009 -const char anaglyph_program_code[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "float4 c0 = Tex0.Sample(samp0, float3(uv0.xy, 0.0));\n" - "float4 c1 = Tex0.Sample(samp0, float3(uv0.xy, 1.0));\n" - "float3x3 l = float3x3( 0.437, 0.449, 0.164,\n" - " -0.062,-0.062,-0.024,\n" - " -0.048,-0.050,-0.017);\n" - "float3x3 r = float3x3(-0.011,-0.032,-0.007,\n" - " 0.377, 0.761, 0.009,\n" - " -0.026,-0.093, 1.234);\n" - "ocol0 = float4(mul(l, c0.rgb) + mul(r, c1.rgb), c0.a);\n" - "}\n"}; - -// TODO: Improve sampling algorithm! -const char color_copy_program_code_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "int width, height, slices, samples;\n" - "Tex0.GetDimensions(width, height, slices, samples);\n" - "ocol0 = 0;\n" - "for(int i = 0; i < SAMPLES; ++i)\n" - " ocol0 += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - "ocol0 /= SAMPLES;\n" - "}\n"}; - -const char depth_resolve_program[] = { - "#define SAMPLES %d\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - " out float ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int width, height, slices, samples;\n" - " Tex0.GetDimensions(width, height, slices, samples);\n" - " ocol0 = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" - " for(int i = 1; i < SAMPLES; ++i)\n" - " ocol0 = min(ocol0, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" - "}\n"}; - -const char reint_rgba6_to_rgb8[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int4 src6 = round(Tex0.Sample(samp0,uv0) * 63.f);\n" - " int4 dst8;\n" - " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" - " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" - " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" - " dst8.a = 255;\n" - " ocol0 = (float4)dst8 / 255.f;\n" - "}"}; - -const char reint_rgba6_to_rgb8_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int width, height, slices, samples;\n" - " Tex0.GetDimensions(width, height, slices, samples);\n" - " float4 texcol = 0;\n" - " for (int i = 0; i < SAMPLES; ++i)\n" - " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - " texcol /= SAMPLES;\n" - " int4 src6 = round(texcol * 63.f);\n" - " int4 dst8;\n" - " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" - " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" - " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" - " dst8.a = 255;\n" - " ocol0 = (float4)dst8 / 255.f;\n" - "}"}; - -const char reint_rgb8_to_rgba6[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int4 src8 = round(Tex0.Sample(samp0,uv0) * 255.f);\n" - " int4 dst6;\n" - " dst6.r = src8.r >> 2;\n" - " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" - " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" - " dst6.a = src8.b & 0x3F;\n" - " ocol0 = (float4)dst6 / 63.f;\n" - "}\n"}; - -const char reint_rgb8_to_rgba6_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int width, height, slices, samples;\n" - " Tex0.GetDimensions(width, height, slices, samples);\n" - " float4 texcol = 0;\n" - " for (int i = 0; i < SAMPLES; ++i)\n" - " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - " texcol /= SAMPLES;\n" - " int4 src8 = round(texcol * 255.f);\n" - " int4 dst6;\n" - " dst6.r = src8.r >> 2;\n" - " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" - " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" - " dst6.a = src8.b & 0x3F;\n" - " ocol0 = (float4)dst6 / 63.f;\n" - "}\n"}; - -ID3D11PixelShader* PixelShaderCache::ReinterpRGBA6ToRGB8(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - if (!s_rgba6_to_rgb8[0]) - { - s_rgba6_to_rgb8[0] = D3D::CompileAndCreatePixelShader(reint_rgba6_to_rgb8); - CHECK(s_rgba6_to_rgb8[0], "Create RGBA6 to RGB8 pixel shader"); - D3D::SetDebugObjectName(s_rgba6_to_rgb8[0], "RGBA6 to RGB8 pixel shader"); - } - return s_rgba6_to_rgb8[0]; - } - else if (!s_rgba6_to_rgb8[1]) - { - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(reint_rgba6_to_rgb8_msaa, g_ActiveConfig.iMultisamples); - s_rgba6_to_rgb8[1] = D3D::CompileAndCreatePixelShader(buf); - - CHECK(s_rgba6_to_rgb8[1], "Create RGBA6 to RGB8 MSAA pixel shader"); - D3D::SetDebugObjectName(s_rgba6_to_rgb8[1], "RGBA6 to RGB8 MSAA pixel shader"); - } - return s_rgba6_to_rgb8[1]; -} - -ID3D11PixelShader* PixelShaderCache::ReinterpRGB8ToRGBA6(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - if (!s_rgb8_to_rgba6[0]) - { - s_rgb8_to_rgba6[0] = D3D::CompileAndCreatePixelShader(reint_rgb8_to_rgba6); - CHECK(s_rgb8_to_rgba6[0], "Create RGB8 to RGBA6 pixel shader"); - D3D::SetDebugObjectName(s_rgb8_to_rgba6[0], "RGB8 to RGBA6 pixel shader"); - } - return s_rgb8_to_rgba6[0]; - } - else if (!s_rgb8_to_rgba6[1]) - { - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(reint_rgb8_to_rgba6_msaa, g_ActiveConfig.iMultisamples); - s_rgb8_to_rgba6[1] = D3D::CompileAndCreatePixelShader(buf); - - CHECK(s_rgb8_to_rgba6[1], "Create RGB8 to RGBA6 MSAA pixel shader"); - D3D::SetDebugObjectName(s_rgb8_to_rgba6[1], "RGB8 to RGBA6 MSAA pixel shader"); - } - return s_rgb8_to_rgba6[1]; -} - -ID3D11PixelShader* PixelShaderCache::GetColorCopyProgram(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - return s_ColorCopyProgram[0]; - } - else if (s_ColorCopyProgram[1]) - { - return s_ColorCopyProgram[1]; - } - else - { - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(color_copy_program_code_msaa, g_ActiveConfig.iMultisamples); - s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(buf); - CHECK(s_ColorCopyProgram[1] != nullptr, "Create color copy MSAA pixel shader"); - D3D::SetDebugObjectName(s_ColorCopyProgram[1], "color copy MSAA pixel shader"); - return s_ColorCopyProgram[1]; - } -} - -ID3D11PixelShader* PixelShaderCache::GetClearProgram() -{ - return s_ClearProgram; -} - -ID3D11PixelShader* PixelShaderCache::GetAnaglyphProgram() -{ - return s_AnaglyphProgram; -} - -ID3D11PixelShader* PixelShaderCache::GetDepthResolveProgram() -{ - if (s_DepthResolveProgram != nullptr) - return s_DepthResolveProgram; - - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(depth_resolve_program, g_ActiveConfig.iMultisamples); - s_DepthResolveProgram = D3D::CompileAndCreatePixelShader(buf); - CHECK(s_DepthResolveProgram != nullptr, "Create depth matrix MSAA pixel shader"); - D3D::SetDebugObjectName(s_DepthResolveProgram, "depth resolve pixel shader"); - return s_DepthResolveProgram; -} - -void PixelShaderCache::Init() -{ - // used when drawing clear quads - s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code); - CHECK(s_ClearProgram != nullptr, "Create clear pixel shader"); - D3D::SetDebugObjectName(s_ClearProgram, "clear pixel shader"); - - // used for anaglyph stereoscopy - s_AnaglyphProgram = D3D::CompileAndCreatePixelShader(anaglyph_program_code); - CHECK(s_AnaglyphProgram != nullptr, "Create anaglyph pixel shader"); - D3D::SetDebugObjectName(s_AnaglyphProgram, "anaglyph pixel shader"); - - // used when copying/resolving the color buffer - s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code); - CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader"); - D3D::SetDebugObjectName(s_ColorCopyProgram[0], "color copy pixel shader"); -} - -// Used in Swap() when AA mode has changed -void PixelShaderCache::InvalidateMSAAShaders() -{ - SAFE_RELEASE(s_ColorCopyProgram[1]); - SAFE_RELEASE(s_rgb8_to_rgba6[1]); - SAFE_RELEASE(s_rgba6_to_rgb8[1]); - SAFE_RELEASE(s_DepthResolveProgram); -} - -void PixelShaderCache::Shutdown() -{ - SAFE_RELEASE(s_ClearProgram); - SAFE_RELEASE(s_AnaglyphProgram); - SAFE_RELEASE(s_DepthResolveProgram); - for (int i = 0; i < 2; ++i) - { - SAFE_RELEASE(s_ColorCopyProgram[i]); - SAFE_RELEASE(s_rgba6_to_rgb8[i]); - SAFE_RELEASE(s_rgb8_to_rgba6[i]); - } -} -} // DX11 diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h deleted file mode 100644 index da1b3b720d..0000000000 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "VideoCommon/AsyncShaderCompiler.h" -#include "VideoCommon/PixelShaderGen.h" -#include "VideoCommon/UberShaderPixel.h" - -namespace DX11 -{ -class D3DBlob; - -class PixelShaderCache -{ -public: - static void Init(); - static void Shutdown(); - - static ID3D11PixelShader* GetColorCopyProgram(bool multisampled); - static ID3D11PixelShader* GetClearProgram(); - static ID3D11PixelShader* GetAnaglyphProgram(); - static ID3D11PixelShader* GetDepthResolveProgram(); - static ID3D11PixelShader* ReinterpRGBA6ToRGB8(bool multisampled); - static ID3D11PixelShader* ReinterpRGB8ToRGBA6(bool multisampled); - - static void InvalidateMSAAShaders(); -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 2a82924708..a172425e3f 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -23,33 +23,19 @@ #include "VideoBackends/D3D/BoundingBox.h" #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/DXPipeline.h" #include "VideoBackends/D3D/DXShader.h" #include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/TextureCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoCommon/BPFunctions.h" -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/PixelEngine.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/PostProcessing.h" #include "VideoCommon/RenderState.h" -#include "VideoCommon/VideoBackendBase.h" -#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" namespace DX11 { -// Reserve 512KB for vertices, and 64KB for uniforms. -// This should be sufficient for our usages, and if more is required, -// we split it into multiple draws. -constexpr u32 UTILITY_VBO_SIZE = 512 * 1024; -constexpr u32 UTILITY_UBO_SIZE = 64 * 1024; - // Nvidia stereo blitting struct defined in "nvstereo.h" from the Nvidia SDK typedef struct _Nv_Stereo_Image_Header { @@ -67,118 +53,9 @@ Renderer::Renderer(int backbuffer_width, int backbuffer_height, float backbuffer AbstractTextureFormat::RGBA8) { m_last_fullscreen_state = D3D::GetFullscreenState(); - g_framebuffer_manager = std::make_unique(m_target_width, m_target_height); - SetupDeviceObjects(); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)m_target_width, (float)m_target_height); - D3D::context->RSSetViewports(1, &vp); - FramebufferManager::BindEFBRenderTarget(); - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; } -Renderer::~Renderer() -{ - TeardownDeviceObjects(); -} - -void Renderer::SetupDeviceObjects() -{ - HRESULT hr; - - D3D11_DEPTH_STENCIL_DESC ddesc; - ddesc.DepthEnable = FALSE; - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - ddesc.DepthFunc = D3D11_COMPARISON_ALWAYS; - ddesc.StencilEnable = FALSE; - ddesc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; - ddesc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_clear_depth_states[0]); - CHECK(hr == S_OK, "Create depth state for Renderer::ClearScreen"); - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; - ddesc.DepthEnable = TRUE; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_clear_depth_states[1]); - CHECK(hr == S_OK, "Create depth state for Renderer::ClearScreen"); - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_clear_depth_states[2]); - CHECK(hr == S_OK, "Create depth state for Renderer::ClearScreen"); - D3D::SetDebugObjectName(m_clear_depth_states[0], - "depth state for Renderer::ClearScreen (depth buffer disabled)"); - D3D::SetDebugObjectName( - m_clear_depth_states[1], - "depth state for Renderer::ClearScreen (depth buffer enabled, writing enabled)"); - D3D::SetDebugObjectName( - m_clear_depth_states[2], - "depth state for Renderer::ClearScreen (depth buffer enabled, writing disabled)"); - - D3D11_BLEND_DESC blenddesc; - blenddesc.AlphaToCoverageEnable = FALSE; - blenddesc.IndependentBlendEnable = FALSE; - blenddesc.RenderTarget[0].BlendEnable = FALSE; - blenddesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - blenddesc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; - blenddesc.RenderTarget[0].DestBlend = D3D11_BLEND_ZERO; - blenddesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; - blenddesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - blenddesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; - blenddesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - hr = D3D::device->CreateBlendState(&blenddesc, &m_reset_blend_state); - CHECK(hr == S_OK, "Create blend state for Renderer::ResetAPIState"); - D3D::SetDebugObjectName(m_reset_blend_state, "blend state for Renderer::ResetAPIState"); - - m_clear_blend_states[0] = m_reset_blend_state; - m_reset_blend_state->AddRef(); - - blenddesc.RenderTarget[0].RenderTargetWriteMask = - D3D11_COLOR_WRITE_ENABLE_RED | D3D11_COLOR_WRITE_ENABLE_GREEN | D3D11_COLOR_WRITE_ENABLE_BLUE; - hr = D3D::device->CreateBlendState(&blenddesc, &m_clear_blend_states[1]); - CHECK(hr == S_OK, "Create blend state for Renderer::ClearScreen"); - - blenddesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALPHA; - hr = D3D::device->CreateBlendState(&blenddesc, &m_clear_blend_states[2]); - CHECK(hr == S_OK, "Create blend state for Renderer::ClearScreen"); - - blenddesc.RenderTarget[0].RenderTargetWriteMask = 0; - hr = D3D::device->CreateBlendState(&blenddesc, &m_clear_blend_states[3]); - CHECK(hr == S_OK, "Create blend state for Renderer::ClearScreen"); - - ddesc.DepthEnable = FALSE; - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - ddesc.DepthFunc = D3D11_COMPARISON_LESS; - ddesc.StencilEnable = FALSE; - ddesc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; - ddesc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_reset_depth_state); - CHECK(hr == S_OK, "Create depth state for Renderer::ResetAPIState"); - D3D::SetDebugObjectName(m_reset_depth_state, "depth stencil state for Renderer::ResetAPIState"); - - D3D11_RASTERIZER_DESC rastdesc = CD3D11_RASTERIZER_DESC(D3D11_FILL_SOLID, D3D11_CULL_NONE, false, - 0, 0.f, 0.f, false, false, false, false); - hr = D3D::device->CreateRasterizerState(&rastdesc, &m_reset_rast_state); - CHECK(hr == S_OK, "Create rasterizer state for Renderer::ResetAPIState"); - D3D::SetDebugObjectName(m_reset_rast_state, "rasterizer state for Renderer::ResetAPIState"); - - m_screenshot_texture = nullptr; -} - -// Kill off all device objects -void Renderer::TeardownDeviceObjects() -{ - g_framebuffer_manager.reset(); - - SAFE_RELEASE(m_clear_blend_states[0]); - SAFE_RELEASE(m_clear_blend_states[1]); - SAFE_RELEASE(m_clear_blend_states[2]); - SAFE_RELEASE(m_clear_blend_states[3]); - SAFE_RELEASE(m_clear_depth_states[0]); - SAFE_RELEASE(m_clear_depth_states[1]); - SAFE_RELEASE(m_clear_depth_states[2]); - SAFE_RELEASE(m_reset_blend_state); - SAFE_RELEASE(m_reset_depth_state); - SAFE_RELEASE(m_reset_rast_state); - SAFE_RELEASE(m_screenshot_texture); - SAFE_RELEASE(m_3d_vision_texture); -} +Renderer::~Renderer() = default; void Renderer::Create3DVisionTexture(int width, int height) { @@ -200,9 +77,17 @@ void Renderer::Create3DVisionTexture(int width, int height) sys_data.SysMemPitch = pitch; sys_data.pSysMem = memory.get(); - m_3d_vision_texture = - D3DTexture2D::Create(width * 2, height + 1, D3D11_BIND_RENDER_TARGET, D3D11_USAGE_DEFAULT, - DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, &sys_data); + CD3D11_TEXTURE2D_DESC texture_desc(DXGI_FORMAT_R8G8B8A8_UNORM, width * 2, height + 1, 1, 1, + D3D11_BIND_RENDER_TARGET, D3D11_USAGE_DEFAULT, 0, 1, 0, 0); + ID3D11Texture2D* texture; + HRESULT hr = D3D::device->CreateTexture2D(&texture_desc, &sys_data, &texture); + CHECK(SUCCEEDED(hr), "Create 3D Vision Texture"); + m_3d_vision_texture = std::make_unique(TextureConfig(width * 2, height + 1, 1, 1, 1, + AbstractTextureFormat::RGBA8, + AbstractTextureFlag_RenderTarget), + texture, nullptr, nullptr); + m_3d_vision_framebuffer = + DXFramebuffer::Create(static_cast(m_3d_vision_texture.get()), nullptr); } bool Renderer::IsHeadless() const @@ -212,7 +97,7 @@ bool Renderer::IsHeadless() const std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) { - return std::make_unique(config); + return DXTexture::Create(config); } std::unique_ptr Renderer::CreateStagingTexture(StagingTextureType type, @@ -221,12 +106,11 @@ std::unique_ptr Renderer::CreateStagingTexture(StagingTe return DXStagingTexture::Create(type, config); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return DXFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return DXFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } std::unique_ptr Renderer::CreateShaderFromSource(ShaderStage stage, @@ -249,220 +133,44 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin void Renderer::SetPipeline(const AbstractPipeline* pipeline) { const DXPipeline* dx_pipeline = static_cast(pipeline); - if (!dx_pipeline) + if (m_current_pipeline == dx_pipeline) return; - D3D::stateman->SetRasterizerState(dx_pipeline->GetRasterizerState()); - D3D::stateman->SetDepthState(dx_pipeline->GetDepthState()); - D3D::stateman->SetBlendState(dx_pipeline->GetBlendState()); - D3D::stateman->SetPrimitiveTopology(dx_pipeline->GetPrimitiveTopology()); - D3D::stateman->SetInputLayout(dx_pipeline->GetInputLayout()); - D3D::stateman->SetVertexShader(dx_pipeline->GetVertexShader()); - D3D::stateman->SetGeometryShader(dx_pipeline->GetGeometryShader()); - D3D::stateman->SetPixelShader(dx_pipeline->GetPixelShader()); -} - -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) -{ - TargetRectangle result; - result.left = EFBToScaledX(rc.left); - result.top = EFBToScaledY(rc.top); - result.right = EFBToScaledX(rc.right); - result.bottom = EFBToScaledY(rc.bottom); - return result; + if (dx_pipeline) + { + D3D::stateman->SetRasterizerState(dx_pipeline->GetRasterizerState()); + D3D::stateman->SetDepthState(dx_pipeline->GetDepthState()); + D3D::stateman->SetBlendState(dx_pipeline->GetBlendState()); + D3D::stateman->SetPrimitiveTopology(dx_pipeline->GetPrimitiveTopology()); + D3D::stateman->SetInputLayout(dx_pipeline->GetInputLayout()); + D3D::stateman->SetVertexShader(dx_pipeline->GetVertexShader()); + D3D::stateman->SetGeometryShader(dx_pipeline->GetGeometryShader()); + D3D::stateman->SetPixelShader(dx_pipeline->GetPixelShader()); + D3D::stateman->SetIntegerRTV(dx_pipeline->UseLogicOp()); + } + else + { + // These will be destroyed at pipeline destruction. + D3D::stateman->SetInputLayout(nullptr); + D3D::stateman->SetVertexShader(nullptr); + D3D::stateman->SetGeometryShader(nullptr); + D3D::stateman->SetPixelShader(nullptr); + } } void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) { - const RECT rect = {rc.left, rc.top, rc.right, rc.bottom}; + // TODO: Move to stateman + const CD3D11_RECT rect(rc.left, rc.top, std::max(rc.right, rc.left + 1), + std::max(rc.bottom, rc.top + 1)); D3D::context->RSSetScissorRects(1, &rect); } -// This function allows the CPU to directly access the EFB. -// There are EFB peeks (which will read the color or depth of a pixel) -// and EFB pokes (which will change the color or depth of a pixel). -// -// The behavior of EFB peeks can only be modified by: -// - GX_PokeAlphaRead -// The behavior of EFB pokes can be modified by: -// - GX_PokeAlphaMode (TODO) -// - GX_PokeAlphaUpdate (TODO) -// - GX_PokeBlendMode (TODO) -// - GX_PokeColorUpdate (TODO) -// - GX_PokeDither (TODO) -// - GX_PokeDstAlpha (TODO) -// - GX_PokeZMode (TODO) -u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) -{ - // Convert EFB dimensions to the ones of our render target - EFBRectangle efbPixelRc; - efbPixelRc.left = x; - efbPixelRc.top = y; - efbPixelRc.right = x + 1; - efbPixelRc.bottom = y + 1; - TargetRectangle targetPixelRc = Renderer::ConvertEFBRectangle(efbPixelRc); - - // Take the mean of the resulting dimensions; TODO: Don't use the center pixel, compute the - // average color instead - D3D11_RECT RectToLock; - if (type == EFBAccessType::PeekColor || type == EFBAccessType::PeekZ) - { - RectToLock.left = (targetPixelRc.left + targetPixelRc.right) / 2; - RectToLock.top = (targetPixelRc.top + targetPixelRc.bottom) / 2; - RectToLock.right = RectToLock.left + 1; - RectToLock.bottom = RectToLock.top + 1; - } - else - { - RectToLock.left = targetPixelRc.left; - RectToLock.right = targetPixelRc.right; - RectToLock.top = targetPixelRc.top; - RectToLock.bottom = targetPixelRc.bottom; - } - - // Reset any game specific settings. - ResetAPIState(); - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, 1.f, 1.f); - D3D::context->RSSetViewports(1, &vp); - D3D::SetPointCopySampler(); - - // Select copy and read textures depending on if we are doing a color or depth read (since they - // are different formats). - D3DTexture2D* source_tex; - D3DTexture2D* read_tex; - ID3D11Texture2D* staging_tex; - if (type == EFBAccessType::PeekColor) - { - source_tex = FramebufferManager::GetEFBColorTexture(); - read_tex = FramebufferManager::GetEFBColorReadTexture(); - staging_tex = FramebufferManager::GetEFBColorStagingBuffer(); - } - else - { - source_tex = FramebufferManager::GetEFBDepthTexture(); - read_tex = FramebufferManager::GetEFBDepthReadTexture(); - staging_tex = FramebufferManager::GetEFBDepthStagingBuffer(); - } - - // Select pixel shader (we don't want to average depth samples, instead select the minimum). - ID3D11PixelShader* copy_pixel_shader; - if (type == EFBAccessType::PeekZ && g_ActiveConfig.iMultisamples > 1) - copy_pixel_shader = PixelShaderCache::GetDepthResolveProgram(); - else - copy_pixel_shader = PixelShaderCache::GetColorCopyProgram(true); - - // Draw a quad to grab the texel we want to read. - D3D::context->OMSetRenderTargets(1, &read_tex->GetRTV(), nullptr); - D3D::drawShadedTexQuad(source_tex->GetSRV(), &RectToLock, Renderer::GetTargetWidth(), - Renderer::GetTargetHeight(), copy_pixel_shader, - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout()); - - // Restore expected game state. - RestoreAPIState(); - - // Copy the pixel from the renderable to cpu-readable buffer. - D3D11_BOX box = CD3D11_BOX(0, 0, 0, 1, 1, 1); - D3D::context->CopySubresourceRegion(staging_tex, 0, 0, 0, 0, read_tex->GetTex(), 0, &box); - D3D11_MAPPED_SUBRESOURCE map; - CHECK(D3D::context->Map(staging_tex, 0, D3D11_MAP_READ, 0, &map) == S_OK, - "Map staging buffer failed"); - - // Convert the framebuffer data to the format the game is expecting to receive. - u32 ret; - if (type == EFBAccessType::PeekColor) - { - u32 val; - memcpy(&val, map.pData, sizeof(val)); - - // our buffers are RGBA, yet a BGRA value is expected - val = ((val & 0xFF00FF00) | ((val >> 16) & 0xFF) | ((val << 16) & 0xFF0000)); - - // check what to do with the alpha channel (GX_PokeAlphaRead) - PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); - - if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) - { - val = RGBA8ToRGBA6ToRGBA8(val); - } - else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - val = RGBA8ToRGB565ToRGBA8(val); - } - if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) - { - val |= 0xFF000000; - } - - if (alpha_read_mode.ReadMode == 2) - ret = val; // GX_READ_NONE - else if (alpha_read_mode.ReadMode == 1) - ret = (val | 0xFF000000); // GX_READ_FF - else /*if(alpha_read_mode.ReadMode == 0)*/ - ret = (val & 0x00FFFFFF); // GX_READ_00 - } - else // type == EFBAccessType::PeekZ - { - float val; - memcpy(&val, map.pData, sizeof(val)); - - // depth buffer is inverted in the d3d backend - val = 1.0f - val; - - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - // if Z is in 16 bit format you must return a 16 bit integer - ret = MathUtil::Clamp(static_cast(val * 65536.0f), 0, 0xFFFF); - } - else - { - ret = MathUtil::Clamp(static_cast(val * 16777216.0f), 0, 0xFFFFFF); - } - } - - D3D::context->Unmap(staging_tex, 0); - return ret; -} - -void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - ResetAPIState(); - - if (type == EFBAccessType::PokeColor) - { - D3D11_VIEWPORT vp = - CD3D11_VIEWPORT(0.0f, 0.0f, (float)GetTargetWidth(), (float)GetTargetHeight()); - D3D::context->RSSetViewports(1, &vp); - } - else // if (type == EFBAccessType::PokeZ) - { - D3D::stateman->SetBlendState(m_clear_blend_states[3]); - D3D::stateman->SetDepthState(m_clear_depth_states[1]); - - D3D11_VIEWPORT vp = - CD3D11_VIEWPORT(0.0f, 0.0f, (float)GetTargetWidth(), (float)GetTargetHeight()); - - D3D::context->RSSetViewports(1, &vp); - } - - D3D::DrawEFBPokeQuads(type, points, num_points); - - RestoreAPIState(); -} - void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { - // In D3D, the viewport rectangle must fit within the render target. - D3D11_VIEWPORT vp; - vp.TopLeftX = MathUtil::Clamp(x, 0.0f, static_cast(m_current_framebuffer_width - 1)); - vp.TopLeftY = MathUtil::Clamp(y, 0.0f, static_cast(m_current_framebuffer_height - 1)); - vp.Width = - MathUtil::Clamp(width, 1.0f, static_cast(m_current_framebuffer_width) - vp.TopLeftX); - vp.Height = - MathUtil::Clamp(height, 1.0f, static_cast(m_current_framebuffer_height) - vp.TopLeftY); - vp.MinDepth = near_depth; - vp.MaxDepth = far_depth; + // TODO: Move to stateman + const CD3D11_VIEWPORT vp(x, y, width, height, near_depth, far_depth); D3D::context->RSSetViewports(1, &vp); } @@ -478,89 +186,19 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) D3D::context->DrawIndexed(num_indices, base_index, base_vertex); } -void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, - u32 color, u32 z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) { - ResetAPIState(); - - if (colorEnable && alphaEnable) - D3D::stateman->SetBlendState(m_clear_blend_states[0]); - else if (colorEnable) - D3D::stateman->SetBlendState(m_clear_blend_states[1]); - else if (alphaEnable) - D3D::stateman->SetBlendState(m_clear_blend_states[2]); - else - D3D::stateman->SetBlendState(m_clear_blend_states[3]); - - // TODO: Should we enable Z testing here? - // if (!bpmem.zmode.testenable) D3D::stateman->PushDepthState(s_clear_depth_states[0]); - // else - if (zEnable) - D3D::stateman->SetDepthState(m_clear_depth_states[1]); - else /*if (!zEnable)*/ - D3D::stateman->SetDepthState(m_clear_depth_states[2]); - - // Update the view port for clearing the picture - TargetRectangle targetRc = Renderer::ConvertEFBRectangle(rc); - D3D11_VIEWPORT vp = - CD3D11_VIEWPORT((float)targetRc.left, (float)targetRc.top, (float)targetRc.GetWidth(), - (float)targetRc.GetHeight(), 0.f, 1.f); - D3D::context->RSSetViewports(1, &vp); - FramebufferManager::SetIntegerEFBRenderTarget(false); - - // Color is passed in bgra mode so we need to convert it to rgba - u32 rgbaColor = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000); - D3D::drawClearQuad(rgbaColor, 1.0f - (z & 0xFFFFFF) / 16777216.0f); - - RestoreAPIState(); -} - -void Renderer::ReinterpretPixelData(unsigned int convtype) -{ - // TODO: MSAA support.. - D3D11_RECT source = CD3D11_RECT(0, 0, GetTargetWidth(), GetTargetHeight()); - - ID3D11PixelShader* pixel_shader; - if (convtype == 0) - pixel_shader = PixelShaderCache::ReinterpRGB8ToRGBA6(true); - else if (convtype == 2) - pixel_shader = PixelShaderCache::ReinterpRGBA6ToRGB8(true); - else - { - ERROR_LOG(VIDEO, "Trying to reinterpret pixel data with unsupported conversion type %d", - convtype); - return; - } - - // convert data and set the target texture as our new EFB - ResetAPIState(); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, static_cast(GetTargetWidth()), - static_cast(GetTargetHeight())); - D3D::context->RSSetViewports(1, &vp); - - D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTempTexture()->GetRTV(), - nullptr); - D3D::SetPointCopySampler(); - D3D::drawShadedTexQuad( - FramebufferManager::GetEFBColorTexture()->GetSRV(), &source, GetTargetWidth(), - GetTargetHeight(), pixel_shader, VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); - - FramebufferManager::SwapReinterpretTexture(); - RestoreAPIState(); + D3D::stateman->SetComputeShader(static_cast(shader)->GetD3DComputeShader()); + D3D::stateman->SyncComputeBindings(); + D3D::context->Dispatch(groups_x, groups_y, groups_z); } void Renderer::BindBackbuffer(const ClearColor& clear_color) { CheckForSurfaceChange(); CheckForSurfaceResize(); - - D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), nullptr); - D3D::context->ClearRenderTargetView(D3D::GetBackBuffer()->GetRTV(), clear_color.data()); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_backbuffer_width; - m_current_framebuffer_height = m_backbuffer_height; + SetAndClearFramebuffer(D3D::GetSwapChainFramebuffer(), clear_color); } void Renderer::PresentBackbuffer() @@ -570,14 +208,6 @@ void Renderer::PresentBackbuffer() void Renderer::OnConfigChanged(u32 bits) { - // Resize the back buffers NOW to avoid flickering - if (bits & (CONFIG_CHANGE_BIT_TARGET_SIZE | CONFIG_CHANGE_BIT_MULTISAMPLES | - CONFIG_CHANGE_BIT_STEREO_MODE)) - { - PixelShaderCache::InvalidateMSAAShaders(); - g_framebuffer_manager.reset(); - g_framebuffer_manager = std::make_unique(m_target_width, m_target_height); - } } void Renderer::CheckForSurfaceChange() @@ -585,8 +215,8 @@ void Renderer::CheckForSurfaceChange() if (!m_surface_changed.TestAndClear()) return; - SAFE_RELEASE(m_screenshot_texture); - SAFE_RELEASE(m_3d_vision_texture); + m_3d_vision_framebuffer.reset(); + m_3d_vision_texture.reset(); D3D::Reset(reinterpret_cast(m_new_surface_handle)); m_new_surface_handle = nullptr; @@ -601,8 +231,9 @@ void Renderer::CheckForSurfaceResize() if (!m_surface_resized.TestAndClear() && !exclusive_fullscreen_changed) return; - SAFE_RELEASE(m_screenshot_texture); - SAFE_RELEASE(m_3d_vision_texture); + m_3d_vision_framebuffer.reset(); + m_3d_vision_texture.reset(); + m_last_fullscreen_state = fullscreen_state; if (D3D::swapchain) D3D::ResizeSwapChain(); @@ -625,43 +256,38 @@ void Renderer::UpdateBackbufferSize() } } -// ALWAYS call RestoreAPIState for each ResetAPIState call you're doing -void Renderer::ResetAPIState() +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) { - D3D::stateman->SetBlendState(m_reset_blend_state); - D3D::stateman->SetDepthState(m_reset_depth_state); - D3D::stateman->SetRasterizerState(m_reset_rast_state); -} + if (m_current_framebuffer == framebuffer) + return; -void Renderer::RestoreAPIState() -{ - // Gets us back into a more game-like state. - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; - FramebufferManager::BindEFBRenderTarget(); - BPFunctions::SetViewport(); - BPFunctions::SetScissor(); -} + // We can't leave the framebuffer bound as a texture and a render target. + DXFramebuffer* fb = static_cast(framebuffer); + if ((fb->GetColorAttachment() && + D3D::stateman->UnsetTexture( + static_cast(fb->GetColorAttachment())->GetD3DSRV()) != 0) || + (fb->GetDepthAttachment() && + D3D::stateman->UnsetTexture( + static_cast(fb->GetDepthAttachment())->GetD3DSRV()) != 0)) + { + D3D::stateman->ApplyTextures(); + } -void Renderer::SetFramebuffer(const AbstractFramebuffer* framebuffer) -{ - const DXFramebuffer* fb = static_cast(framebuffer); - D3D::context->OMSetRenderTargets(fb->GetNumRTVs(), fb->GetRTVArray(), fb->GetDSV()); + D3D::stateman->SetFramebuffer(fb); m_current_framebuffer = fb; - m_current_framebuffer_width = fb->GetWidth(); - m_current_framebuffer_height = fb->GetHeight(); } -void Renderer::SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) { SetFramebuffer(framebuffer); } -void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value, float depth_value) { SetFramebuffer(framebuffer); + D3D::stateman->Apply(); + if (framebuffer->GetColorFormat() != AbstractTextureFormat::Undefined) { D3D::context->ClearRenderTargetView( @@ -676,9 +302,8 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, void Renderer::SetTexture(u32 index, const AbstractTexture* texture) { - D3D::stateman->SetTexture( - index, - texture ? static_cast(texture)->GetRawTexIdentifier()->GetSRV() : nullptr); + D3D::stateman->SetTexture(index, texture ? static_cast(texture)->GetD3DSRV() : + nullptr); } void Renderer::SetSamplerState(u32 index, const SamplerState& state) @@ -686,15 +311,15 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state) D3D::stateman->SetSampler(index, m_state_cache.Get(state)); } -void Renderer::UnbindTexture(const AbstractTexture* texture) +void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) { - D3D::stateman->UnsetTexture( - static_cast(texture)->GetRawTexIdentifier()->GetSRV()); + D3D::stateman->SetComputeUAV(texture ? static_cast(texture)->GetD3DUAV() : nullptr); } -void Renderer::SetInterlacingMode() +void Renderer::UnbindTexture(const AbstractTexture* texture) { - // TODO + if (D3D::stateman->UnsetTexture(static_cast(texture)->GetD3DSRV()) != 0) + D3D::stateman->ApplyTextures(); } u16 Renderer::BBoxRead(int index) @@ -736,93 +361,43 @@ void Renderer::BBoxWrite(int index, u16 _value) BBox::Set(index, value); } +void Renderer::Flush() +{ + D3D::context->Flush(); +} + +void Renderer::WaitForGPUIdle() +{ + // There is no glFinish() equivalent in D3D. + D3D::context->Flush(); +} + void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) { - const CD3D11_RECT source_rc(rc.left, rc.top, rc.right, rc.bottom); - const TargetRectangle target_rc = GetTargetRectangle(); + if (g_ActiveConfig.stereo_mode != StereoMode::Nvidia3DVision) + return ::Renderer::RenderXFBToScreen(texture, rc); - // activate linear filtering for the buffer copies - D3D::SetLinearCopySampler(); + if (!m_3d_vision_texture) + Create3DVisionTexture(m_backbuffer_width, m_backbuffer_height); - if (g_ActiveConfig.stereo_mode == StereoMode::SBS || - g_ActiveConfig.stereo_mode == StereoMode::TAB) - { - TargetRectangle left_rc, right_rc; - std::tie(left_rc, right_rc) = ConvertStereoRectangle(target_rc); + // Render to staging texture which is double the width of the backbuffer + SetAndClearFramebuffer(m_3d_vision_framebuffer.get()); - SetViewport(static_cast(left_rc.left), static_cast(left_rc.top), - static_cast(left_rc.GetWidth()), static_cast(right_rc.GetHeight()), - 0.0f, 1.0f); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 0); + const auto target_rc = GetTargetRectangle(); + m_post_processor->BlitFromTexture(target_rc, rc, texture, 0); + m_post_processor->BlitFromTexture( + MathUtil::Rectangle(target_rc.left + m_backbuffer_width, target_rc.top, + target_rc.right + m_backbuffer_width, target_rc.bottom), + rc, texture, 1); - SetViewport(static_cast(right_rc.left), static_cast(right_rc.top), - static_cast(right_rc.GetWidth()), static_cast(right_rc.GetHeight()), - 0.0f, 1.0f); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 1); - } - else if (g_ActiveConfig.stereo_mode == StereoMode::Nvidia3DVision) - { - if (!m_3d_vision_texture) - Create3DVisionTexture(m_backbuffer_width, m_backbuffer_height); + // Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should + // recognize the signature and automatically include the right eye frame. + const CD3D11_BOX box(0, 0, 0, m_backbuffer_width, m_backbuffer_height, 1); + D3D::context->CopySubresourceRegion(D3D::GetSwapChainTexture()->GetD3DTexture(), 0, 0, 0, 0, + m_3d_vision_texture->GetD3DTexture(), 0, &box); - const CD3D11_VIEWPORT left_vp( - static_cast(target_rc.left), static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight())); - const CD3D11_VIEWPORT right_vp( - static_cast(target_rc.left + m_backbuffer_width), static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight())); - - // Render to staging texture which is double the width of the backbuffer - D3D::context->OMSetRenderTargets(1, &m_3d_vision_texture->GetRTV(), nullptr); - - D3D::context->RSSetViewports(1, &left_vp); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 0); - - D3D::context->RSSetViewports(1, &right_vp); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 1); - - // Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should - // recognize the signature and automatically include the right eye frame. - const CD3D11_BOX box(0, 0, 0, m_backbuffer_width, m_backbuffer_height, 1); - D3D::context->CopySubresourceRegion(D3D::GetBackBuffer()->GetTex(), 0, 0, 0, 0, - m_3d_vision_texture->GetTex(), 0, &box); - - // Restore render target to backbuffer - D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), nullptr); - } - else - { - SetViewport(static_cast(target_rc.left), static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight()), - 0.0f, 1.0f); - - ID3D11PixelShader* pixelShader = (g_Config.stereo_mode == StereoMode::Anaglyph) ? - PixelShaderCache::GetAnaglyphProgram() : - PixelShaderCache::GetColorCopyProgram(false); - ID3D11GeometryShader* geomShader = (g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer) ? - GeometryShaderCache::GetCopyGeometryShader() : - nullptr; - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), pixelShader, - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), geomShader); - } + // Restore render target to backbuffer + SetFramebuffer(D3D::GetSwapChainFramebuffer()); } void Renderer::SetFullscreen(bool enable_fullscreen) diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index 399babffad..0f9b38761e 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -9,11 +9,10 @@ #include "VideoBackends/D3D/D3DState.h" #include "VideoCommon/RenderBase.h" -enum class EFBAccessType; - namespace DX11 { -class D3DTexture2D; +class DXTexture; +class DXFramebuffer; class Renderer : public ::Renderer { @@ -32,53 +31,43 @@ public: size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; void SetPipeline(const AbstractPipeline* pipeline) override; - void SetFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, float depth_value = 0.0f) override; void SetScissorRect(const MathUtil::Rectangle& rc) override; void SetTexture(u32 index, const AbstractTexture* texture) override; void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; void UnbindTexture(const AbstractTexture* texture) override; - void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; void SetFullscreen(bool enable_fullscreen) override; bool IsFullscreen() const override; - u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; - u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - void ResetAPIState() override; - void RestoreAPIState() override; - - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + void Flush() override; + void WaitForGPUIdle() override; void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; void OnConfigChanged(u32 bits) override; - void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, - u32 color, u32 z) override; - - void ReinterpretPixelData(unsigned int convtype) override; - private: - void SetupDeviceObjects(); - void TeardownDeviceObjects(); void Create3DVisionTexture(int width, int height); void CheckForSurfaceChange(); void CheckForSurfaceResize(); @@ -86,15 +75,9 @@ private: StateCache m_state_cache; - std::array m_clear_blend_states{}; - std::array m_clear_depth_states{}; - ID3D11BlendState* m_reset_blend_state = nullptr; - ID3D11DepthStencilState* m_reset_depth_state = nullptr; - ID3D11RasterizerState* m_reset_rast_state = nullptr; - - ID3D11Texture2D* m_screenshot_texture = nullptr; - D3DTexture2D* m_3d_vision_texture = nullptr; + std::unique_ptr m_3d_vision_texture; + std::unique_ptr m_3d_vision_framebuffer; bool m_last_fullscreen_state = false; }; -} +} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp deleted file mode 100644 index f17d12f4c3..0000000000 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ /dev/null @@ -1,318 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/TextureCache.h" - -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PSTextureEncoder.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/RenderBase.h" -#include "VideoCommon/TextureConfig.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -static std::unique_ptr g_encoder; - -void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, - scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); -} - -const char palette_shader[] = - R"HLSL( -sampler samp0 : register(s0); -Texture2DArray Tex0 : register(t0); -Buffer Tex1 : register(t1); -uniform float Multiply; - -uint Convert3To8(uint v) -{ - // Swizzle bits: 00000123 -> 12312312 - return (v << 5) | (v << 2) | (v >> 1); -} - -uint Convert4To8(uint v) -{ - // Swizzle bits: 00001234 -> 12341234 - return (v << 4) | v; -} - -uint Convert5To8(uint v) -{ - // Swizzle bits: 00012345 -> 12345123 - return (v << 3) | (v >> 2); -} - -uint Convert6To8(uint v) -{ - // Swizzle bits: 00123456 -> 12345612 - return (v << 2) | (v >> 4); -} - -float4 DecodePixel_RGB5A3(uint val) -{ - int r,g,b,a; - if ((val&0x8000)) - { - r=Convert5To8((val>>10) & 0x1f); - g=Convert5To8((val>>5 ) & 0x1f); - b=Convert5To8((val ) & 0x1f); - a=0xFF; - } - else - { - a=Convert3To8((val>>12) & 0x7); - r=Convert4To8((val>>8 ) & 0xf); - g=Convert4To8((val>>4 ) & 0xf); - b=Convert4To8((val ) & 0xf); - } - return float4(r, g, b, a) / 255; -} - -float4 DecodePixel_RGB565(uint val) -{ - int r, g, b, a; - r = Convert5To8((val >> 11) & 0x1f); - g = Convert6To8((val >> 5) & 0x3f); - b = Convert5To8((val) & 0x1f); - a = 0xFF; - return float4(r, g, b, a) / 255; -} - -float4 DecodePixel_IA8(uint val) -{ - int i = val & 0xFF; - int a = val >> 8; - return float4(i, i, i, a) / 255; -} - -void main( - out float4 ocol0 : SV_Target, - in float4 pos : SV_Position, - in float3 uv0 : TEXCOORD0) -{ - uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r; - src = Tex1.Load(src); - src = ((src << 8) & 0xFF00) | (src >> 8); - ocol0 = DECODE(src); -} -)HLSL"; - -void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, - const void* palette, TLUTFormat format) -{ - DXTexture* source_texture = static_cast(source->texture.get()); - DXTexture* destination_texture = static_cast(destination->texture.get()); - g_renderer->ResetAPIState(); - - // stretch picture with increased internal resolution - const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, static_cast(source->GetWidth()), - static_cast(source->GetHeight())); - D3D::context->RSSetViewports(1, &vp); - - D3D11_BOX box{0, 0, 0, 512, 1, 1}; - D3D::context->UpdateSubresource(palette_buf, 0, &box, palette, 0, 0); - - D3D::stateman->SetTexture(1, palette_buf_srv); - - // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) - float params[8] = {source->format == TextureFormat::I4 ? 15.f : 255.f}; - D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, ¶ms, 0, 0); - D3D::stateman->SetPixelConstants(uniform_buffer); - - const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight()); - - D3D::SetPointCopySampler(); - - // Make sure we don't draw with the texture set as both a source and target. - // (This can happen because we don't unbind textures when we free them.) - D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV()); - D3D::stateman->Apply(); - - D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(), - nullptr); - - // Create texture copy - D3D::drawShadedTexQuad( - source_texture->GetRawTexIdentifier()->GetSRV(), &sourcerect, source->GetWidth(), - source->GetHeight(), palette_pixel_shader[static_cast(format)], - VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), - GeometryShaderCache::GetCopyGeometryShader()); - - g_renderer->RestoreAPIState(); -} - -ID3D11PixelShader* GetConvertShader(const char* Type) -{ - std::string shader = "#define DECODE DecodePixel_"; - shader.append(Type); - shader.append("\n"); - shader.append(palette_shader); - return D3D::CompileAndCreatePixelShader(shader); -} - -TextureCache::TextureCache() -{ - // FIXME: Is it safe here? - g_encoder = std::make_unique(); - g_encoder->Init(); - - palette_buf = nullptr; - palette_buf_srv = nullptr; - uniform_buffer = nullptr; - palette_pixel_shader[static_cast(TLUTFormat::IA8)] = GetConvertShader("IA8"); - palette_pixel_shader[static_cast(TLUTFormat::RGB565)] = GetConvertShader("RGB565"); - palette_pixel_shader[static_cast(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3"); - auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE); - HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf); - CHECK(SUCCEEDED(hr), "create palette decoder lut buffer"); - D3D::SetDebugObjectName(palette_buf, "texture decoder lut buffer"); - // TODO: C14X2 format. - auto outlutUavDesc = - CD3D11_SHADER_RESOURCE_VIEW_DESC(palette_buf, DXGI_FORMAT_R16_UINT, 0, 256, 0); - hr = D3D::device->CreateShaderResourceView(palette_buf, &outlutUavDesc, &palette_buf_srv); - CHECK(SUCCEEDED(hr), "create palette decoder lut srv"); - D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv"); - const D3D11_BUFFER_DESC cbdesc = - CD3D11_BUFFER_DESC(sizeof(float) * 8, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); - hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &uniform_buffer); - CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer"); - D3D::SetDebugObjectName(uniform_buffer, - "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); -} - -TextureCache::~TextureCache() -{ - g_encoder->Shutdown(); - g_encoder.reset(); - - SAFE_RELEASE(palette_buf); - SAFE_RELEASE(palette_buf_srv); - SAFE_RELEASE(uniform_buffer); - for (auto*& shader : palette_pixel_shader) - SAFE_RELEASE(shader); - for (auto& iter : m_efb_to_tex_pixel_shaders) - SAFE_RELEASE(iter.second); -} - -void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - auto* destination_texture = static_cast(entry->texture.get()); - - bool multisampled = g_ActiveConfig.iMultisamples > 1; - ID3D11ShaderResourceView* efb_tex_srv; - if (multisampled) - { - efb_tex_srv = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : - FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); - } - else - { - efb_tex_srv = is_depth_copy ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : - FramebufferManager::GetEFBColorTexture()->GetSRV(); - } - - auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half, - NeedsCopyFilterInShader(filter_coefficients)); - ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid); - if (!pixel_shader) - return; - - g_renderer->ResetAPIState(); - - // stretch picture with increased internal resolution - const D3D11_VIEWPORT vp = - CD3D11_VIEWPORT(0.f, 0.f, static_cast(destination_texture->GetConfig().width), - static_cast(destination_texture->GetConfig().height)); - D3D::context->RSSetViewports(1, &vp); - - const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(src_rect); - // TODO: try targetSource.asRECT(); - const D3D11_RECT sourcerect = - CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom); - - // Use linear filtering if (bScaleByHalf), use point filtering otherwise - if (scale_by_half) - D3D::SetLinearCopySampler(); - else - D3D::SetPointCopySampler(); - - struct PixelConstants - { - float filter_coefficients[3]; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float pixel_height; - u32 padding; - }; - PixelConstants constants; - for (size_t i = 0; i < filter_coefficients.size(); i++) - constants.filter_coefficients[i] = filter_coefficients[i]; - constants.gamma_rcp = 1.0f / gamma; - constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - constants.pixel_height = - g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; - constants.padding = 0; - D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &constants, 0, 0); - D3D::stateman->SetPixelConstants(uniform_buffer); - - // Make sure we don't draw with the texture set as both a source and target. - // (This can happen because we don't unbind textures when we free them.) - D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV()); - D3D::stateman->Apply(); - - D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(), - nullptr); - - // Create texture copy - D3D::drawShadedTexQuad( - efb_tex_srv, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), - pixel_shader, VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); - - g_renderer->RestoreAPIState(); -} - -ID3D11PixelShader* -TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid) -{ - auto iter = m_efb_to_tex_pixel_shaders.find(uid); - if (iter != m_efb_to_tex_pixel_shaders.end()) - return iter->second; - - ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::D3D, uid.GetUidData()); - ID3D11PixelShader* shader = D3D::CompileAndCreatePixelShader(code.GetBuffer()); - m_efb_to_tex_pixel_shaders.emplace(uid, shader); - return shader; -} -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h deleted file mode 100644 index 1bfa34c045..0000000000 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConverterShaderGen.h" - -class AbstractTexture; -struct TextureConfig; - -namespace DX11 -{ -class TextureCache : public TextureCacheBase -{ -public: - TextureCache(); - ~TextureCache(); - -private: - void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, - TLUTFormat format) override; - - void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - bool CompileShaders() override { return true; } - void DeleteShaders() override {} - ID3D11PixelShader* GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid); - - ID3D11Buffer* palette_buf; - ID3D11ShaderResourceView* palette_buf_srv; - ID3D11Buffer* uniform_buffer; - ID3D11PixelShader* palette_pixel_shader[3]; - - std::map m_efb_to_tex_pixel_shaders; -}; -} diff --git a/Source/Core/VideoBackends/D3D/VertexManager.cpp b/Source/Core/VideoBackends/D3D/VertexManager.cpp index 8a84ebadc3..5159be74de 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.cpp +++ b/Source/Core/VideoBackends/D3D/VertexManager.cpp @@ -7,24 +7,19 @@ #include #include "Common/Align.h" +#include "Common/Assert.h" #include "Common/CommonTypes.h" #include "VideoBackends/D3D/BoundingBox.h" #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" #include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoCommon/BoundingBox.h" -#include "VideoCommon/Debugger.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/PixelShaderManager.h" -#include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" @@ -32,11 +27,6 @@ namespace DX11 { -// TODO: Find sensible values for these two -const u32 MAX_IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 8; -const u32 MAX_VBUFFER_SIZE = VertexManager::MAXVBUFFERSIZE; -const u32 MAX_BUFFER_SIZE = MAX_IBUFFER_SIZE + MAX_VBUFFER_SIZE; - static ID3D11Buffer* AllocateConstantBuffer(u32 size) { const u32 cbsize = Common::AlignUp(size, 16u); // must be a multiple of 16 @@ -59,71 +49,172 @@ static void UpdateConstantBuffer(ID3D11Buffer* const buffer, const void* data, u ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); } -void VertexManager::CreateDeviceObjects() +static ID3D11ShaderResourceView* +CreateTexelBufferView(ID3D11Buffer* buffer, TexelBufferFormat format, DXGI_FORMAT srv_format) { - D3D11_BUFFER_DESC bufdesc = - CD3D11_BUFFER_DESC(MAX_BUFFER_SIZE, D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER, - D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); + ID3D11ShaderResourceView* srv; + CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(buffer, srv_format, 0, + VertexManager::TEXEL_STREAM_BUFFER_SIZE / + VertexManager::GetTexelBufferElementSize(format)); + CHECK(SUCCEEDED(D3D::device->CreateShaderResourceView(buffer, &srv_desc, &srv)), + "Create SRV for texel buffer"); + return srv; +} - for (int i = 0; i < MAX_BUFFER_COUNT; i++) +VertexManager::VertexManager() = default; + +VertexManager::~VertexManager() +{ + for (auto& srv_ptr : m_texel_buffer_views) + SAFE_RELEASE(srv_ptr); + SAFE_RELEASE(m_texel_buffer); + SAFE_RELEASE(m_pixel_constant_buffer); + SAFE_RELEASE(m_geometry_constant_buffer); + SAFE_RELEASE(m_vertex_constant_buffer); + for (auto& buffer : m_buffers) + SAFE_RELEASE(buffer); +} + +bool VertexManager::Initialize() +{ + if (!VertexManagerBase::Initialize()) + return false; + + CD3D11_BUFFER_DESC bufdesc((VERTEX_STREAM_BUFFER_SIZE + INDEX_STREAM_BUFFER_SIZE) / BUFFER_COUNT, + D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER, + D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); + + for (int i = 0; i < BUFFER_COUNT; i++) { - m_buffers[i] = nullptr; CHECK(SUCCEEDED(D3D::device->CreateBuffer(&bufdesc, nullptr, &m_buffers[i])), "Failed to create buffer."); D3D::SetDebugObjectName(m_buffers[i], "Buffer of VertexManager"); } - m_buffer_cursor = MAX_BUFFER_SIZE; - m_vertex_constant_buffer = AllocateConstantBuffer(sizeof(VertexShaderConstants)); m_geometry_constant_buffer = AllocateConstantBuffer(sizeof(GeometryShaderConstants)); m_pixel_constant_buffer = AllocateConstantBuffer(sizeof(PixelShaderConstants)); -} + if (!m_vertex_constant_buffer || !m_geometry_constant_buffer || !m_pixel_constant_buffer) + return false; -void VertexManager::DestroyDeviceObjects() -{ - SAFE_RELEASE(m_pixel_constant_buffer); - SAFE_RELEASE(m_geometry_constant_buffer); - SAFE_RELEASE(m_vertex_constant_buffer); - for (int i = 0; i < MAX_BUFFER_COUNT; i++) + CD3D11_BUFFER_DESC texel_buf_desc(TEXEL_STREAM_BUFFER_SIZE, D3D11_BIND_SHADER_RESOURCE, + D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); + CHECK(SUCCEEDED(D3D::device->CreateBuffer(&texel_buf_desc, nullptr, &m_texel_buffer)), + "Creating texel buffer failed"); + if (!m_texel_buffer) + return false; + + static constexpr std::array, NUM_TEXEL_BUFFER_FORMATS> + format_mapping = {{ + {TEXEL_BUFFER_FORMAT_R8_UINT, DXGI_FORMAT_R8_UINT}, + {TEXEL_BUFFER_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT}, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, DXGI_FORMAT_R8G8B8A8_UNORM}, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_UINT}, + }}; + for (const auto& it : format_mapping) { - SAFE_RELEASE(m_buffers[i]); + m_texel_buffer_views[it.first] = CreateTexelBufferView(m_texel_buffer, it.first, it.second); + if (!m_texel_buffer_views[it.first]) + return false; } -} -VertexManager::VertexManager() -{ - m_staging_vertex_buffer.resize(MAXVBUFFERSIZE); - - m_cur_buffer_pointer = m_base_buffer_pointer = &m_staging_vertex_buffer[0]; - m_end_buffer_pointer = m_base_buffer_pointer + m_staging_vertex_buffer.size(); - - m_staging_index_buffer.resize(MAXIBUFFERSIZE); - - CreateDeviceObjects(); -} - -VertexManager::~VertexManager() -{ - DestroyDeviceObjects(); + return true; } void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) { // Just use the one buffer for all three. + InvalidateConstants(); UpdateConstantBuffer(m_vertex_constant_buffer, uniforms, uniforms_size); D3D::stateman->SetVertexConstants(m_vertex_constant_buffer); D3D::stateman->SetGeometryConstants(m_vertex_constant_buffer); D3D::stateman->SetPixelConstants(m_vertex_constant_buffer); - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; } -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) +bool VertexManager::MapTexelBuffer(u32 required_size, D3D11_MAPPED_SUBRESOURCE& sr) { + if ((m_texel_buffer_offset + required_size) > TEXEL_STREAM_BUFFER_SIZE) + { + // Restart buffer. + HRESULT hr = D3D::context->Map(m_texel_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); + CHECK(SUCCEEDED(hr), "Map texel buffer"); + if (FAILED(hr)) + return false; + + m_texel_buffer_offset = 0; + } + else + { + // Don't overwrite the earlier-used space. + HRESULT hr = D3D::context->Map(m_texel_buffer, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &sr); + CHECK(SUCCEEDED(hr), "Map texel buffer"); + if (FAILED(hr)) + return false; + } + + return true; +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + if (data_size > TEXEL_STREAM_BUFFER_SIZE) + return false; + + const u32 elem_size = GetTexelBufferElementSize(format); + m_texel_buffer_offset = Common::AlignUp(m_texel_buffer_offset, elem_size); + + D3D11_MAPPED_SUBRESOURCE sr; + if (!MapTexelBuffer(data_size, sr)) + return false; + + *out_offset = m_texel_buffer_offset / elem_size; + std::memcpy(static_cast(sr.pData) + m_texel_buffer_offset, data, data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + m_texel_buffer_offset += data_size; + + D3D::context->Unmap(m_texel_buffer, 0); + D3D::stateman->SetTexture(0, m_texel_buffer_views[static_cast(format)]); + return true; +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) +{ + const u32 elem_size = GetTexelBufferElementSize(format); + const u32 palette_elem_size = GetTexelBufferElementSize(palette_format); + const u32 reserve_size = data_size + palette_size + palette_elem_size; + if (reserve_size > TEXEL_STREAM_BUFFER_SIZE) + return false; + + m_texel_buffer_offset = Common::AlignUp(m_texel_buffer_offset, elem_size); + + D3D11_MAPPED_SUBRESOURCE sr; + if (!MapTexelBuffer(reserve_size, sr)) + return false; + + const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size); + std::memcpy(static_cast(sr.pData) + m_texel_buffer_offset, data, data_size); + std::memcpy(static_cast(sr.pData) + m_texel_buffer_offset + palette_byte_offset, + palette_data, palette_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size); + *out_offset = m_texel_buffer_offset / elem_size; + *out_palette_offset = (m_texel_buffer_offset + palette_byte_offset) / palette_elem_size; + m_texel_buffer_offset += palette_byte_offset + palette_size; + + D3D::context->Unmap(m_texel_buffer, 0); + D3D::stateman->SetTexture(0, m_texel_buffer_views[static_cast(format)]); + D3D::stateman->SetTexture(1, m_texel_buffer_views[static_cast(palette_format)]); + return true; +} + +void VertexManager::ResetBuffer(u32 vertex_stride) +{ + m_base_buffer_pointer = m_cpu_vertex_buffer.data(); m_cur_buffer_pointer = m_base_buffer_pointer; - IndexGenerator::Start(m_staging_index_buffer.data()); + m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); + IndexGenerator::Start(m_cpu_index_buffer.data()); } void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, @@ -143,10 +234,10 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in } D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE; - if (cursor + totalBufferSize >= MAX_BUFFER_SIZE) + if (cursor + totalBufferSize >= BUFFER_SIZE) { // Wrap around - m_current_buffer = (m_current_buffer + 1) % MAX_BUFFER_COUNT; + m_current_buffer = (m_current_buffer + 1) % BUFFER_COUNT; cursor = 0; MapType = D3D11_MAP_WRITE_DISCARD; } @@ -159,8 +250,7 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in if (vertexBufferSize > 0) std::memcpy(mappedData + cursor, m_base_buffer_pointer, vertexBufferSize); if (indexBufferSize > 0) - std::memcpy(mappedData + cursor + vertexBufferSize, m_staging_index_buffer.data(), - indexBufferSize); + std::memcpy(mappedData + cursor + vertexBufferSize, m_cpu_index_buffer.data(), indexBufferSize); D3D::context->Unmap(m_buffers[m_current_buffer], 0); m_buffer_cursor = cursor + totalBufferSize; @@ -172,7 +262,7 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in D3D::stateman->SetIndexBuffer(m_buffers[m_current_buffer]); } -void VertexManager::UploadConstants() +void VertexManager::UploadUniforms() { if (VertexShaderManager::dirty) { @@ -199,20 +289,4 @@ void VertexManager::UploadConstants() D3D::stateman->SetVertexConstants(m_vertex_constant_buffer); D3D::stateman->SetGeometryConstants(m_geometry_constant_buffer); } - -void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) -{ - FramebufferManager::SetIntegerEFBRenderTarget( - m_current_pipeline_config.blending_state.logicopenable); - - if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active) - { - D3D::context->OMSetRenderTargetsAndUnorderedAccessViews( - D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 2, 1, &BBox::GetUAV(), - nullptr); - } - - D3D::stateman->Apply(); - D3D::context->DrawIndexed(num_indices, base_index, base_vertex); -} } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexManager.h b/Source/Core/VideoBackends/D3D/VertexManager.h index a7eab78ef5..d9cb0a8755 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.h +++ b/Source/Core/VideoBackends/D3D/VertexManager.h @@ -18,13 +18,12 @@ struct ID3D11Buffer; namespace DX11 { -class D3DBlob; class D3DVertexFormat : public NativeVertexFormat { public: D3DVertexFormat(const PortableVertexDeclaration& vtx_decl); ~D3DVertexFormat(); - ID3D11InputLayout* GetInputLayout(D3DBlob* vs_bytecode); + ID3D11InputLayout* GetInputLayout(const void* vs_bytecode, size_t vs_bytecode_size); private: std::array m_elems{}; @@ -39,35 +38,39 @@ public: VertexManager(); ~VertexManager(); - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + bool Initialize(); void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; protected: - void CreateDeviceObjects() override; - void DestroyDeviceObjects() override; - void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void ResetBuffer(u32 vertex_stride) override; void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) override; - void UploadConstants() override; - void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; + void UploadUniforms() override; private: - enum - { - MAX_BUFFER_COUNT = 2 - }; - ID3D11Buffer* m_buffers[MAX_BUFFER_COUNT] = {}; + static constexpr u32 BUFFER_COUNT = 2; + static constexpr u32 BUFFER_SIZE = + (VERTEX_STREAM_BUFFER_SIZE + INDEX_STREAM_BUFFER_SIZE) / BUFFER_COUNT; + + bool MapTexelBuffer(u32 required_size, D3D11_MAPPED_SUBRESOURCE& sr); + + ID3D11Buffer* m_buffers[BUFFER_COUNT] = {}; u32 m_current_buffer = 0; u32 m_buffer_cursor = 0; - std::vector m_staging_vertex_buffer; - std::vector m_staging_index_buffer; - ID3D11Buffer* m_vertex_constant_buffer = nullptr; ID3D11Buffer* m_geometry_constant_buffer = nullptr; ID3D11Buffer* m_pixel_constant_buffer = nullptr; + + ID3D11Buffer* m_texel_buffer = nullptr; + std::array m_texel_buffer_views; + u32 m_texel_buffer_offset = 0; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp deleted file mode 100644 index b7fc3a2582..0000000000 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "Core/ConfigManager.h" -#include "Core/Host.h" - -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/VertexManager.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/Debugger.h" -#include "VideoCommon/Statistics.h" -#include "VideoCommon/UberShaderVertex.h" -#include "VideoCommon/VertexLoaderManager.h" -#include "VideoCommon/VertexShaderGen.h" - -namespace DX11 -{ -static ID3D11VertexShader* SimpleVertexShader = nullptr; -static ID3D11VertexShader* ClearVertexShader = nullptr; -static ID3D11InputLayout* SimpleLayout = nullptr; -static ID3D11InputLayout* ClearLayout = nullptr; - -ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() -{ - return SimpleVertexShader; -} -ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() -{ - return ClearVertexShader; -} -ID3D11InputLayout* VertexShaderCache::GetSimpleInputLayout() -{ - return SimpleLayout; -} -ID3D11InputLayout* VertexShaderCache::GetClearInputLayout() -{ - return ClearLayout; -} - -// this class will load the precompiled shaders into our cache -template -class VertexShaderCacheInserter : public LinearDiskCacheReader -{ -public: - void Read(const UidType& key, const u8* value, u32 value_size) - { - D3DBlob* blob = new D3DBlob(value_size, value); - VertexShaderCache::InsertByteCode(key, blob); - blob->Release(); - } -}; - -const char simple_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float3 vTexCoord : TEXCOORD0;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float3 inTEX0 : TEXCOORD0)\n" - "{\n" - "VSOUTPUT OUT;\n" - "OUT.vPosition = inPosition;\n" - "OUT.vTexCoord = inTEX0;\n" - "return OUT;\n" - "}\n"}; - -const char clear_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float4 vColor0 : COLOR0;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n" - "{\n" - "VSOUTPUT OUT;\n" - "OUT.vPosition = inPosition;\n" - "OUT.vColor0 = inColor0;\n" - "return OUT;\n" - "}\n"}; - -void VertexShaderCache::Init() -{ - const D3D11_INPUT_ELEMENT_DESC simpleelems[2] = { - {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, - - }; - const D3D11_INPUT_ELEMENT_DESC clearelems[2] = { - {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, - }; - - D3DBlob* blob; - D3D::CompileVertexShader(simple_shader_code, &blob); - D3D::device->CreateInputLayout(simpleelems, 2, blob->Data(), blob->Size(), &SimpleLayout); - SimpleVertexShader = D3D::CreateVertexShaderFromByteCode(blob); - if (SimpleLayout == nullptr || SimpleVertexShader == nullptr) - PanicAlert("Failed to create simple vertex shader or input layout at %s %d\n", __FILE__, - __LINE__); - blob->Release(); - D3D::SetDebugObjectName(SimpleVertexShader, "simple vertex shader"); - D3D::SetDebugObjectName(SimpleLayout, "simple input layout"); - - D3D::CompileVertexShader(clear_shader_code, &blob); - D3D::device->CreateInputLayout(clearelems, 2, blob->Data(), blob->Size(), &ClearLayout); - ClearVertexShader = D3D::CreateVertexShaderFromByteCode(blob); - if (ClearLayout == nullptr || ClearVertexShader == nullptr) - PanicAlert("Failed to create clear vertex shader or input layout at %s %d\n", __FILE__, - __LINE__); - blob->Release(); - D3D::SetDebugObjectName(ClearVertexShader, "clear vertex shader"); - D3D::SetDebugObjectName(ClearLayout, "clear input layout"); - - SETSTAT(stats.numVertexShadersCreated, 0); - SETSTAT(stats.numVertexShadersAlive, 0); -} - -void VertexShaderCache::Shutdown() -{ - SAFE_RELEASE(SimpleVertexShader); - SAFE_RELEASE(ClearVertexShader); - - SAFE_RELEASE(SimpleLayout); - SAFE_RELEASE(ClearLayout); -} -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.h b/Source/Core/VideoBackends/D3D/VertexShaderCache.h deleted file mode 100644 index d7f1958806..0000000000 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DBlob.h" - -#include "VideoCommon/AsyncShaderCompiler.h" -#include "VideoCommon/UberShaderVertex.h" -#include "VideoCommon/VertexShaderGen.h" - -namespace DX11 -{ -class D3DVertexFormat; - -class VertexShaderCache -{ -public: - static void Init(); - static void Shutdown(); - - static ID3D11VertexShader* GetSimpleVertexShader(); - static ID3D11VertexShader* GetClearVertexShader(); - static ID3D11InputLayout* GetSimpleInputLayout(); - static ID3D11InputLayout* GetClearInputLayout(); -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 5795fce3a2..70db72206e 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -12,17 +12,14 @@ #include "VideoBackends/D3D/BoundingBox.h" #include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" #include "VideoBackends/D3D/PerfQuery.h" -#include "VideoBackends/D3D/PixelShaderCache.h" #include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/TextureCache.h" #include "VideoBackends/D3D/VertexManager.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoBackends/D3D/VideoBackend.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/ShaderCache.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -51,6 +48,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.api_type = APIType::D3D; g_Config.backend_info.MaxTextureSize = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + g_Config.backend_info.bUsesLowerLeftOrigin = false; g_Config.backend_info.bSupportsExclusiveFullscreen = true; g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; @@ -58,16 +56,17 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsComputeShaders = false; g_Config.backend_info.bSupports3DVision = true; - g_Config.backend_info.bSupportsPostProcessing = false; + g_Config.backend_info.bSupportsPostProcessing = true; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.bSupportsReversedDepthRange = false; g_Config.backend_info.bSupportsLogicOp = true; g_Config.backend_info.bSupportsMultithreading = false; - g_Config.backend_info.bSupportsGPUTextureDecoding = false; + g_Config.backend_info.bSupportsGPUTextureDecoding = true; g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsCopyToVram = true; + g_Config.backend_info.bSupportsLargePoints = false; g_Config.backend_info.bSupportsBitfield = false; g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; g_Config.backend_info.bSupportsBPTCTextures = false; @@ -149,21 +148,20 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) // internal interfaces g_renderer = std::make_unique(backbuffer_width, backbuffer_height, wsi.render_surface_scale); - g_shader_cache = std::make_unique(); - g_texture_cache = std::make_unique(); g_vertex_manager = std::make_unique(); + g_shader_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_texture_cache = std::make_unique(); g_perf_query = std::make_unique(); - - VertexShaderCache::Init(); - PixelShaderCache::Init(); - GeometryShaderCache::Init(); - - if (!g_renderer->Initialize() || !g_shader_cache->Initialize()) + if (!g_renderer->Initialize() || !g_vertex_manager->Initialize() || + !g_shader_cache->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { return false; + } - D3D::InitUtils(); BBox::Init(); - + g_shader_cache->InitializeShaderCache(); return true; } @@ -172,16 +170,13 @@ void VideoBackend::Shutdown() g_shader_cache->Shutdown(); g_renderer->Shutdown(); - D3D::ShutdownUtils(); - PixelShaderCache::Shutdown(); - VertexShaderCache::Shutdown(); - GeometryShaderCache::Shutdown(); BBox::Shutdown(); g_perf_query.reset(); - g_vertex_manager.reset(); g_texture_cache.reset(); + g_framebuffer_manager.reset(); g_shader_cache.reset(); + g_vertex_manager.reset(); g_renderer.reset(); ShutdownShared(); diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index d790331e36..1e5c4e1193 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -13,7 +13,9 @@ #include "VideoBackends/Null/VertexManager.h" #include "VideoBackends/Null/VideoBackend.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "Common/MsgHandler.h" + +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -61,10 +63,21 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) g_renderer = std::make_unique(); g_vertex_manager = std::make_unique(); g_perf_query = std::make_unique(); - g_framebuffer_manager = std::make_unique(); + g_framebuffer_manager = std::make_unique(); g_texture_cache = std::make_unique(); g_shader_cache = std::make_unique(); - return g_renderer->Initialize() && g_shader_cache->Initialize(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); + return true; } void VideoBackend::Shutdown() diff --git a/Source/Core/VideoBackends/Null/NullTexture.cpp b/Source/Core/VideoBackends/Null/NullTexture.cpp index 2a21afb4b7..4b1bcba18e 100644 --- a/Source/Core/VideoBackends/Null/NullTexture.cpp +++ b/Source/Core/VideoBackends/Null/NullTexture.cpp @@ -16,11 +16,6 @@ void NullTexture::CopyRectangleFromTexture(const AbstractTexture* src, u32 dst_layer, u32 dst_level) { } -void NullTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ -} void NullTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) { @@ -70,15 +65,18 @@ void NullStagingTexture::Flush() m_needs_flush = false; } -NullFramebuffer::NullFramebuffer(AbstractTextureFormat color_format, +NullFramebuffer::NullFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples) { } -std::unique_ptr NullFramebuffer::Create(const NullTexture* color_attachment, - const NullTexture* depth_attachment) +std::unique_ptr NullFramebuffer::Create(NullTexture* color_attachment, + NullTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -93,8 +91,8 @@ std::unique_ptr NullFramebuffer::Create(const NullTexture* colo const u32 layers = either_attachment->GetLayers(); const u32 samples = either_attachment->GetSamples(); - return std::make_unique(color_format, depth_format, width, height, layers, - samples); + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples); } } // namespace Null diff --git a/Source/Core/VideoBackends/Null/NullTexture.h b/Source/Core/VideoBackends/Null/NullTexture.h index 5a48ff652c..42cbc7e542 100644 --- a/Source/Core/VideoBackends/Null/NullTexture.h +++ b/Source/Core/VideoBackends/Null/NullTexture.h @@ -25,9 +25,6 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, @@ -58,12 +55,13 @@ private: class NullFramebuffer final : public AbstractFramebuffer { public: - explicit NullFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, + explicit NullFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples); ~NullFramebuffer() override = default; - static std::unique_ptr Create(const NullTexture* color_attachment, - const NullTexture* depth_attachment); + static std::unique_ptr Create(NullTexture* color_attachment, + NullTexture* depth_attachment); }; } // namespace Null diff --git a/Source/Core/VideoBackends/Null/Render.cpp b/Source/Core/VideoBackends/Null/Render.cpp index d076e0f091..ca57a0bdeb 100644 --- a/Source/Core/VideoBackends/Null/Render.cpp +++ b/Source/Core/VideoBackends/Null/Render.cpp @@ -9,6 +9,7 @@ #include "VideoCommon/AbstractPipeline.h" #include "VideoCommon/AbstractShader.h" +#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/VideoConfig.h" namespace Null @@ -74,22 +75,16 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin return std::make_unique(); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return NullFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return NullFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +std::unique_ptr +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) { - TargetRectangle result; - result.left = rc.left; - result.top = rc.top; - result.right = rc.right; - result.bottom = rc.bottom; - return result; + return std::make_unique(vtx_decl); } - } // namespace Null diff --git a/Source/Core/VideoBackends/Null/Render.h b/Source/Core/VideoBackends/Null/Render.h index 75c4adfffc..5ad4d8028e 100644 --- a/Source/Core/VideoBackends/Null/Render.h +++ b/Source/Core/VideoBackends/Null/Render.h @@ -20,26 +20,26 @@ public: std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override { return 0; } void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override {} u16 BBoxRead(int index) override { return 0; } void BBoxWrite(int index, u16 value) override {} - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override { } - void ReinterpretPixelData(unsigned int convtype) override {} + void ReinterpretPixelData(EFBReinterpretType convtype) override {} }; -} +} // namespace Null diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index 648871f626..678f7e8451 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -18,26 +18,21 @@ class TextureCache : public TextureCacheBase public: TextureCache() {} ~TextureCache() {} - bool CompileShaders() override { return true; } - void DeleteShaders() override {} - void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, - TLUTFormat format) override - { - } +protected: void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { } void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { } }; -} // Null name space +} // namespace Null diff --git a/Source/Core/VideoBackends/Null/VertexManager.cpp b/Source/Core/VideoBackends/Null/VertexManager.cpp index 872e9c3657..bf70743262 100644 --- a/Source/Core/VideoBackends/Null/VertexManager.cpp +++ b/Source/Core/VideoBackends/Null/VertexManager.cpp @@ -3,52 +3,16 @@ // Refer to the license.txt file included. #include "VideoBackends/Null/VertexManager.h" +#include "VideoBackends/Null/Render.h" #include "VideoCommon/IndexGenerator.h" -#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/VertexLoaderManager.h" namespace Null { -class NullNativeVertexFormat : public NativeVertexFormat -{ -public: - NullNativeVertexFormat(const PortableVertexDeclaration& vtx_decl_) { vtx_decl = vtx_decl_; } -}; +VertexManager::VertexManager() = default; -std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) -{ - return std::make_unique(vtx_decl); -} - -void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) -{ -} - -VertexManager::VertexManager() : m_local_v_buffer(MAXVBUFFERSIZE), m_local_i_buffer(MAXIBUFFERSIZE) -{ -} - -VertexManager::~VertexManager() -{ -} - -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - m_cur_buffer_pointer = m_base_buffer_pointer = m_local_v_buffer.data(); - m_end_buffer_pointer = m_cur_buffer_pointer + m_local_v_buffer.size(); - IndexGenerator::Start(&m_local_i_buffer[0]); -} - -void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, - u32* out_base_vertex, u32* out_base_index) -{ -} - -void VertexManager::UploadConstants() -{ -} +VertexManager::~VertexManager() = default; void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { diff --git a/Source/Core/VideoBackends/Null/VertexManager.h b/Source/Core/VideoBackends/Null/VertexManager.h index 1d6a706ec3..7ab7557db6 100644 --- a/Source/Core/VideoBackends/Null/VertexManager.h +++ b/Source/Core/VideoBackends/Null/VertexManager.h @@ -17,20 +17,7 @@ public: VertexManager(); ~VertexManager(); - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; - - void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; - protected: - void ResetBuffer(u32 vertex_stride, bool cull_all) override; - void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, - u32* out_base_index) override; - void UploadConstants() override; void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; - -private: - std::vector m_local_v_buffer; - std::vector m_local_i_buffer; }; -} +} // namespace Null diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index c50f831524..18ef7802cc 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -2,175 +2,77 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. -#include -#include #include #include "Common/GL/GLUtil.h" #include "VideoBackends/OGL/BoundingBox.h" -#include "VideoBackends/OGL/FramebufferManager.h" +#include "VideoBackends/OGL/Render.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" static GLuint s_bbox_buffer_id; -static GLuint s_pbo; - -static std::array s_stencil_bounds; -static bool s_stencil_updated; -static bool s_stencil_cleared; - -static int s_target_width; -static int s_target_height; namespace OGL { -void BoundingBox::SetTargetSizeChanged(int target_width, int target_height) +void BoundingBox::Init() { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) return; - s_target_width = target_width; - s_target_height = target_height; - s_stencil_updated = false; - - glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); - glBufferData(GL_PIXEL_PACK_BUFFER, s_target_width * s_target_height, nullptr, GL_STREAM_READ); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); -} - -void BoundingBox::Init(int target_width, int target_height) -{ - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) - { - int initial_values[4] = {0, 0, 0, 0}; - glGenBuffers(1, &s_bbox_buffer_id); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); - } - else - { - s_stencil_bounds = {{0, 0, 0, 0}}; - glGenBuffers(1, &s_pbo); - SetTargetSizeChanged(target_width, target_height); - } + int initial_values[4] = {0, 0, 0, 0}; + glGenBuffers(1, &s_bbox_buffer_id); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); } void BoundingBox::Shutdown() { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) - { - glDeleteBuffers(1, &s_bbox_buffer_id); - } - else - { - glDeleteBuffers(1, &s_pbo); - } + if (!g_ActiveConfig.backend_info.bSupportsBBox) + return; + + glDeleteBuffers(1, &s_bbox_buffer_id); } void BoundingBox::Set(int index, int value) { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) - { - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - } - else - { - s_stencil_bounds[index] = value; + if (!g_ActiveConfig.backend_info.bSupportsBBox) + return; - if (!s_stencil_cleared) - { - // Assumes that the EFB framebuffer is currently bound - glClearStencil(0); - glClear(GL_STENCIL_BUFFER_BIT); - s_stencil_updated = false; - s_stencil_cleared = true; - } - } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); } int BoundingBox::Get(int index) { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) + return 0; + + int data = 0; + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) && + !static_cast(g_renderer.get())->IsGLES()) { - int data = 0; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) && - !static_cast(g_renderer.get())->IsGLES()) - { - // Using glMapBufferRange to read back the contents of the SSBO is extremely slow - // on nVidia drivers. This is more noticeable at higher internal resolutions. - // Using glGetBufferSubData instead does not seem to exhibit this slowdown. - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); - } - else - { - // Using glMapBufferRange is faster on AMD cards by a measurable margin. - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), - GL_MAP_READ_BIT); - if (ptr) - { - memcpy(&data, ptr, sizeof(int)); - glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); - } - } - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - return data; + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); } else { - if (s_stencil_updated) + // Using glMapBufferRange is faster on AMD cards by a measurable margin. + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), + GL_MAP_READ_BIT); + if (ptr) { - s_stencil_updated = false; - - FramebufferManager::ResolveEFBStencilTexture(); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); - glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glReadPixels(0, 0, s_target_width, s_target_height, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); - - // Eke every bit of performance out of the compiler that we can - std::array bounds = s_stencil_bounds; - - u8* data = static_cast(glMapBufferRange( - GL_PIXEL_PACK_BUFFER, 0, s_target_height * s_target_width, GL_MAP_READ_BIT)); - - for (int row = 0; row < s_target_height; row++) - { - for (int col = 0; col < s_target_width; col++) - { - if (data[row * s_target_width + col] == 0) - continue; - bounds[0] = std::min(bounds[0], col); - bounds[1] = std::max(bounds[1], col); - bounds[2] = std::min(bounds[2], row); - bounds[3] = std::max(bounds[3], row); - } - } - - s_stencil_bounds = bounds; - - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + memcpy(&data, ptr, sizeof(int)); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); } - - return s_stencil_bounds[index]; } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + return data; } - -void BoundingBox::StencilWasUpdated() -{ - s_stencil_updated = true; - s_stencil_cleared = false; -} - -bool BoundingBox::NeedsStencilBuffer() -{ - return g_ActiveConfig.bBBoxEnable && !g_ActiveConfig.BBoxUseFragmentShaderImplementation(); -} -}; +}; // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.h b/Source/Core/VideoBackends/OGL/BoundingBox.h index 44365c9fbc..cbf54074ab 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.h +++ b/Source/Core/VideoBackends/OGL/BoundingBox.h @@ -9,19 +9,10 @@ namespace OGL class BoundingBox { public: - static void Init(int target_width, int target_height); + static void Init(); static void Shutdown(); - static void SetTargetSizeChanged(int target_width, int target_height); - - // When SSBO isn't available, the bounding box is calculated directly from the - // stencil buffer. - static bool NeedsStencilBuffer(); - // When the stencil buffer is changed, this function needs to be called to - // invalidate the cached bounding box data. - static void StencilWasUpdated(); - static void Set(int index, int value); static int Get(int index); }; -}; +}; // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/CMakeLists.txt b/Source/Core/VideoBackends/OGL/CMakeLists.txt index c786ba671f..da091c8e9b 100644 --- a/Source/Core/VideoBackends/OGL/CMakeLists.txt +++ b/Source/Core/VideoBackends/OGL/CMakeLists.txt @@ -1,19 +1,15 @@ add_library(videoogl BoundingBox.cpp - FramebufferManager.cpp main.cpp NativeVertexFormat.cpp OGLPipeline.cpp OGLShader.cpp OGLTexture.cpp PerfQuery.cpp - PostProcessing.cpp ProgramShaderCache.cpp Render.cpp SamplerCache.cpp StreamBuffer.cpp - TextureCache.cpp - TextureConverter.cpp VertexManager.cpp ) diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp deleted file mode 100644 index 12de898ff5..0000000000 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ /dev/null @@ -1,634 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/OGL/FramebufferManager.h" - -#include -#include - -#include "Common/Common.h" -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "Core/HW/Memmap.h" - -#include "VideoBackends/OGL/Render.h" -#include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureConverter.h" -#include "VideoBackends/OGL/VertexManager.h" - -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/VertexShaderGen.h" -#include "VideoCommon/VideoBackendBase.h" - -constexpr const char* GLSL_REINTERPRET_PIXELFMT_VS = R"GLSL( -flat out int layer; -void main(void) { - layer = 0; - vec2 rawpos = vec2(gl_VertexID & 1, gl_VertexID & 2); - gl_Position = vec4(rawpos* 2.0 - 1.0, 0.0, 1.0); -})GLSL"; - -constexpr const char* GLSL_SHADER_FS = R"GLSL( -#define MULTILAYER %d -#define MSAA %d - -#if MSAA - -#if MULTILAYER -SAMPLER_BINDING(9) uniform sampler2DMSArray samp9; -#else -SAMPLER_BINDING(9) uniform sampler2DMS samp9; -#endif - -#else -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -#endif - -vec4 sampleEFB(ivec3 pos) { -#if MSAA - -#if MULTILAYER - return texelFetch(samp9, pos, gl_SampleID); -#else - return texelFetch(samp9, pos.xy, gl_SampleID); -#endif - -#else - return texelFetch(samp9, pos, 0); -#endif -})GLSL"; - -constexpr const char* GLSL_SAMPLE_EFB_FS = R"GLSL( -#define MULTILAYER %d - -#if MULTILAYER -SAMPLER_BINDING(9) uniform sampler2DMSArray samp9; -#else -SAMPLER_BINDING(9) uniform sampler2DMS samp9; -#endif -vec4 sampleEFB(ivec3 pos) { - vec4 color = vec4(0.0, 0.0, 0.0, 0.0); - for (int i = 0; i < %d; i++) -#if MULTILAYER - color += texelFetch(samp9, pos, i); -#else - color += texelFetch(samp9, pos.xy, i); -#endif - - return color / %d; -})GLSL"; - -constexpr const char* GLSL_RGBA6_TO_RGB8_FS = R"GLSL( -flat in int layer; -out vec4 ocol0; -void main() { - ivec4 src6 = ivec4(round(sampleEFB(ivec3(gl_FragCoord.xy, layer)) * 63.f)); - ivec4 dst8; - - dst8.r = (src6.r << 2) | (src6.g >> 4); - dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2); - dst8.b = ((src6.b & 0x3) << 6) | src6.a; - dst8.a = 255; - - ocol0 = float4(dst8) / 255.f; -})GLSL"; - -constexpr const char* GLSL_RGB8_TO_RGBA6_FS = R"GLSL( -flat in int layer; -out vec4 ocol0; -void main() { - ivec4 src8 = ivec4(round(sampleEFB(ivec3(gl_FragCoord.xy, layer)) * 255.f)); - ivec4 dst6; - - dst6.r = src8.r >> 2; - dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4); - dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6); - dst6.a = src8.b & 0x3F; - ocol0 = float4(dst6) / 63.f; -})GLSL"; - -constexpr const char* GLSL_GS = R"GLSL( -layout(triangles) in; -layout(triangle_strip, max_vertices = %d) out; -flat out int layer; -void main() { - for (int j = 0; j < %d; ++j) { - for (int i = 0; i < 3; ++i) { - layer = j; - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - EmitVertex(); - } - EndPrimitive(); - } -})GLSL"; - -constexpr const char* GLSL_EFB_POKE_VERTEX_VS = R"GLSL( -in vec2 rawpos; -in vec4 rawcolor0; // color -in int rawcolor1; // depth -out vec4 v_c; -out float v_z; -void main(void) { - gl_Position = vec4(((rawpos + 0.5) / vec2(640.0, 528.0) * 2.0 - 1.0) * vec2(1.0, -1.0), 0.0, 1.0); - gl_PointSize = %d.0 / 640.0; - - v_c = rawcolor0.bgra; - v_z = float(rawcolor1 & 0xFFFFFF) / 16777216.0; -})GLSL"; - -constexpr const char* GLSL_EFB_POKE_PIXEL_FS = R"GLSL( -in vec4 %s_c; -in float %s_z; -out vec4 ocol0; -void main(void) { - ocol0 = %s_c; - gl_FragDepth = %s_z; -})GLSL"; - -constexpr const char* GLSL_EFB_POKE_GEOMETRY_GS = R"GLSL( -layout(points) in; -layout(points, max_vertices = %d) out; -in vec4 v_c[1]; -in float v_z[1]; -out vec4 g_c; -out float g_z; -void main() { - for (int j = 0; j < %d; ++j) { - gl_Layer = j; - gl_Position = gl_in[0].gl_Position; - gl_PointSize = %d.0 / 640.0; - g_c = v_c[0]; - g_z = v_z[0]; - - EmitVertex(); - EndPrimitive(); - } -})GLSL"; - -namespace OGL -{ -int FramebufferManager::m_targetWidth; -int FramebufferManager::m_targetHeight; -int FramebufferManager::m_msaaSamples; -bool FramebufferManager::m_enable_stencil_buffer; - -GLenum FramebufferManager::m_textureType; -std::vector FramebufferManager::m_efbFramebuffer; -GLuint FramebufferManager::m_efbColor; -GLuint FramebufferManager::m_efbDepth; -GLuint FramebufferManager::m_efbColorSwap; // for hot swap when reinterpreting EFB pixel formats - -// Only used in MSAA mode. -std::vector FramebufferManager::m_resolvedFramebuffer; -GLuint FramebufferManager::m_resolvedColorTexture; -GLuint FramebufferManager::m_resolvedDepthTexture; - -// reinterpret pixel format -SHADER FramebufferManager::m_pixel_format_shaders[2]; - -// EFB pokes -GLuint FramebufferManager::m_EfbPokes_VBO; -GLuint FramebufferManager::m_EfbPokes_VAO; -SHADER FramebufferManager::m_EfbPokes; - -GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_format, - GLenum pixel_format, GLenum data_type) -{ - GLuint texture; - glActiveTexture(GL_TEXTURE9); - glGenTextures(1, &texture); - glBindTexture(texture_type, texture); - if (texture_type == GL_TEXTURE_2D_ARRAY) - { - glTexParameteri(texture_type, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(texture_type, 0, internal_format, m_targetWidth, m_targetHeight, m_EFBLayers, 0, - pixel_format, data_type, nullptr); - } - else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) - { - if (g_ogl_config.bSupports3DTextureStorageMultisample) - glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - else - glTexImage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - } - else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE) - { - if (g_ogl_config.bSupports2DTextureStorageMultisample) - glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, false); - else - glTexImage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, false); - } - else - { - PanicAlert("Unhandled texture type %d", texture_type); - } - glBindTexture(texture_type, 0); - return texture; -} - -void FramebufferManager::BindLayeredTexture(GLuint texture, const std::vector& framebuffers, - GLenum attachment, GLenum texture_type) -{ - glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[0]); - FramebufferTexture(GL_FRAMEBUFFER, attachment, texture_type, texture, 0); - // Bind all the other layers as separate FBOs for blitting. - for (unsigned int i = 1; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[i]); - glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, texture, 0, i); - } -} - -bool FramebufferManager::HasStencilBuffer() -{ - return m_enable_stencil_buffer; -} - -FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, - bool enable_stencil_buffer) -{ - m_efbColor = 0; - m_efbDepth = 0; - m_efbColorSwap = 0; - m_resolvedColorTexture = 0; - m_resolvedDepthTexture = 0; - - m_targetWidth = targetWidth; - m_targetHeight = targetHeight; - m_msaaSamples = msaaSamples; - m_enable_stencil_buffer = enable_stencil_buffer; - - // The EFB can be set to different pixel formats by the game through the - // BPMEM_ZCOMPARE register (which should probably have a different name). - // They are: - // - 24-bit RGB (8-bit components) with 24-bit Z - // - 24-bit RGBA (6-bit components) with 24-bit Z - // - Multisampled 16-bit RGB (5-6-5 format) with 16-bit Z - // We only use one EFB format here: 32-bit ARGB with 24-bit Z. - // Multisampling depends on user settings. - // The distinction becomes important for certain operations, i.e. the - // alpha channel should be ignored if the EFB does not have one. - - glActiveTexture(GL_TEXTURE9); - - m_EFBLayers = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - m_efbFramebuffer.resize(m_EFBLayers); - m_resolvedFramebuffer.resize(m_EFBLayers); - - GLenum depth_internal_format = GL_DEPTH_COMPONENT32F; - GLenum depth_pixel_format = GL_DEPTH_COMPONENT; - GLenum depth_data_type = GL_FLOAT; - if (m_enable_stencil_buffer) - { - depth_internal_format = GL_DEPTH32F_STENCIL8; - depth_pixel_format = GL_DEPTH_STENCIL; - depth_data_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; - } - - const bool multilayer = m_EFBLayers > 1; - - if (m_msaaSamples <= 1) - { - m_textureType = GL_TEXTURE_2D_ARRAY; - } - else - { - // Only use a layered multisample texture if needed. Some drivers - // slow down significantly with single-layered multisample textures. - m_textureType = multilayer ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_MULTISAMPLE; - - // Although we are able to access the multisampled texture directly, we don't do it - // everywhere. The old way is to "resolve" this multisampled texture by copying it into a - // non-sampled texture. This would lead to an unneeded copy of the EFB, so we are going to - // avoid it. But as this job isn't done right now, we do need that texture for resolving: - GLenum resolvedType = GL_TEXTURE_2D_ARRAY; - - m_resolvedColorTexture = CreateTexture(resolvedType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - m_resolvedDepthTexture = - CreateTexture(resolvedType, depth_internal_format, depth_pixel_format, depth_data_type); - - // Bind resolved textures to resolved framebuffer. - glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); - BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, - resolvedType); - BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, - resolvedType); - if (m_enable_stencil_buffer) - BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_STENCIL_ATTACHMENT, - resolvedType); - } - - m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - m_efbDepth = - CreateTexture(m_textureType, depth_internal_format, depth_pixel_format, depth_data_type); - m_efbColorSwap = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - - // Bind target textures to EFB framebuffer. - glGenFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); - BindLayeredTexture(m_efbColor, m_efbFramebuffer, GL_COLOR_ATTACHMENT0, m_textureType); - BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_DEPTH_ATTACHMENT, m_textureType); - if (m_enable_stencil_buffer) - BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_STENCIL_ATTACHMENT, m_textureType); - - // EFB framebuffer is currently bound, make sure to clear it before use. - glViewport(0, 0, m_targetWidth, m_targetHeight); - glScissor(0, 0, m_targetWidth, m_targetHeight); - glClearColor(0.f, 0.f, 0.f, 0.f); - glClearDepthf(1.0f); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - if (m_enable_stencil_buffer) - { - glClearStencil(0); - glClear(GL_STENCIL_BUFFER_BIT); - } - - // reinterpret pixel format - std::string vs = GLSL_REINTERPRET_PIXELFMT_VS; - - // The way to sample the EFB is based on the on the current configuration. - // As we use the same sampling way for both interpreting shaders, the sampling - // shader are generated first: - std::string sampler; - - if (m_msaaSamples <= 1) - { - // non-msaa, so just fetch the pixel - sampler = StringFromFormat(GLSL_SHADER_FS, multilayer, false); - } - else if (g_ActiveConfig.backend_info.bSupportsSSAA) - { - // msaa + sample shading available, so just fetch the sample - // This will lead to sample shading, but it's the only way to not loose - // the values of each sample. - sampler = StringFromFormat(GLSL_SHADER_FS, multilayer, true); - } - else - { - // msaa without sample shading: calculate the mean value of the pixel - sampler = StringFromFormat(GLSL_SAMPLE_EFB_FS, multilayer, m_msaaSamples, m_msaaSamples); - } - - std::string ps_rgba6_to_rgb8 = sampler + GLSL_RGBA6_TO_RGB8_FS; - - std::string ps_rgb8_to_rgba6 = sampler + GLSL_RGB8_TO_RGBA6_FS; - - std::string gs = StringFromFormat(GLSL_GS, m_EFBLayers * 3, m_EFBLayers); - - ProgramShaderCache::CompileShader(m_pixel_format_shaders[0], vs, ps_rgb8_to_rgba6, - multilayer ? gs : ""); - ProgramShaderCache::CompileShader(m_pixel_format_shaders[1], vs, ps_rgba6_to_rgb8, - multilayer ? gs : ""); - - const auto prefix = multilayer ? "g" : "v"; - - ProgramShaderCache::CompileShader( - m_EfbPokes, StringFromFormat(GLSL_EFB_POKE_VERTEX_VS, m_targetWidth), - - StringFromFormat(GLSL_EFB_POKE_PIXEL_FS, prefix, prefix, prefix, prefix), - - multilayer ? - StringFromFormat(GLSL_EFB_POKE_GEOMETRY_GS, m_EFBLayers, m_EFBLayers, m_targetWidth) : - ""); - glGenBuffers(1, &m_EfbPokes_VBO); - glGenVertexArrays(1, &m_EfbPokes_VAO); - glBindBuffer(GL_ARRAY_BUFFER, m_EfbPokes_VBO); - glBindVertexArray(m_EfbPokes_VAO); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_UNSIGNED_SHORT, 0, sizeof(EfbPokeData), - (void*)offsetof(EfbPokeData, x)); - glEnableVertexAttribArray(SHADER_COLOR0_ATTRIB); - glVertexAttribPointer(SHADER_COLOR0_ATTRIB, 4, GL_UNSIGNED_BYTE, 1, sizeof(EfbPokeData), - (void*)offsetof(EfbPokeData, data)); - glEnableVertexAttribArray(SHADER_COLOR1_ATTRIB); - glVertexAttribIPointer(SHADER_COLOR1_ATTRIB, 1, GL_INT, sizeof(EfbPokeData), - (void*)offsetof(EfbPokeData, data)); - glBindBuffer(GL_ARRAY_BUFFER, - static_cast(g_vertex_manager.get())->GetVertexBufferHandle()); - - if (!static_cast(g_renderer.get())->IsGLES()) - glEnable(GL_PROGRAM_POINT_SIZE); -} - -FramebufferManager::~FramebufferManager() -{ - glBindFramebuffer(GL_FRAMEBUFFER, 0); - - GLuint glObj[3]; - - // Note: OpenGL deletion functions silently ignore parameters of "0". - - glDeleteFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); - glDeleteFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); - - // Required, as these are static class members - m_efbFramebuffer.clear(); - m_resolvedFramebuffer.clear(); - - glObj[0] = m_resolvedColorTexture; - glObj[1] = m_resolvedDepthTexture; - glDeleteTextures(2, glObj); - m_resolvedColorTexture = 0; - m_resolvedDepthTexture = 0; - - glObj[0] = m_efbColor; - glObj[1] = m_efbDepth; - glObj[2] = m_efbColorSwap; - glDeleteTextures(3, glObj); - m_efbColor = 0; - m_efbDepth = 0; - m_efbColorSwap = 0; - - // reinterpret pixel format - m_pixel_format_shaders[0].Destroy(); - m_pixel_format_shaders[1].Destroy(); - - // EFB pokes - glDeleteBuffers(1, &m_EfbPokes_VBO); - glDeleteVertexArrays(1, &m_EfbPokes_VAO); - m_EfbPokes_VBO = 0; - m_EfbPokes_VAO = 0; - m_EfbPokes.Destroy(); -} - -GLuint FramebufferManager::GetEFBColorTexture(const EFBRectangle& sourceRc) -{ - if (m_msaaSamples <= 1) - { - return m_efbColor; - } - else - { - // Transfer the EFB to a resolved texture. EXT_framebuffer_blit is - // required. - - TargetRectangle targetRc = g_renderer->ConvertEFBRectangle(sourceRc); - targetRc.ClampUL(0, 0, m_targetWidth, m_targetHeight); - - // Resolve. - for (unsigned int i = 0; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glBlitFramebuffer(targetRc.left, targetRc.top, targetRc.right, targetRc.bottom, targetRc.left, - targetRc.top, targetRc.right, targetRc.bottom, GL_COLOR_BUFFER_BIT, - GL_NEAREST); - } - - // Return to EFB. - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); - - return m_resolvedColorTexture; - } -} - -GLuint FramebufferManager::GetEFBDepthTexture(const EFBRectangle& sourceRc) -{ - if (m_msaaSamples <= 1) - { - return m_efbDepth; - } - else - { - // Transfer the EFB to a resolved texture. - - TargetRectangle targetRc = g_renderer->ConvertEFBRectangle(sourceRc); - targetRc.ClampUL(0, 0, m_targetWidth, m_targetHeight); - - // Resolve. - for (unsigned int i = 0; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glBlitFramebuffer(targetRc.left, targetRc.top, targetRc.right, targetRc.bottom, targetRc.left, - targetRc.top, targetRc.right, targetRc.bottom, GL_DEPTH_BUFFER_BIT, - GL_NEAREST); - } - - // Return to EFB. - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); - - return m_resolvedDepthTexture; - } -} - -void FramebufferManager::ResolveEFBStencilTexture() -{ - if (m_msaaSamples <= 1) - return; - - // Resolve. - for (unsigned int i = 0; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glBlitFramebuffer(0, 0, m_targetWidth, m_targetHeight, 0, 0, m_targetWidth, m_targetHeight, - GL_STENCIL_BUFFER_BIT, GL_NEAREST); - } - - // Return to EFB. - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); -} - -GLuint FramebufferManager::GetResolvedFramebuffer() -{ - if (m_msaaSamples <= 1) - return m_efbFramebuffer[0]; - return m_resolvedFramebuffer[0]; -} - -void FramebufferManager::SetFramebuffer(GLuint fb) -{ - glBindFramebuffer(GL_FRAMEBUFFER, fb != 0 ? fb : GetEFBFramebuffer()); -} - -void FramebufferManager::FramebufferTexture(GLenum target, GLenum attachment, GLenum textarget, - GLuint texture, GLint level) -{ - if (textarget == GL_TEXTURE_2D_ARRAY || textarget == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) - { - if (m_EFBLayers > 1) - glFramebufferTexture(target, attachment, texture, level); - else - glFramebufferTextureLayer(target, attachment, texture, level, 0); - } - else - { - glFramebufferTexture2D(target, attachment, textarget, texture, level); - } -} - -// Apply AA if enabled -GLuint FramebufferManager::ResolveAndGetRenderTarget(const EFBRectangle& source_rect) -{ - return GetEFBColorTexture(source_rect); -} - -GLuint FramebufferManager::ResolveAndGetDepthTarget(const EFBRectangle& source_rect) -{ - return GetEFBDepthTexture(source_rect); -} - -void FramebufferManager::ReinterpretPixelData(unsigned int convtype) -{ - g_renderer->ResetAPIState(); - - GLuint src_texture = 0; - - // We aren't allowed to render and sample the same texture in one draw call, - // so we have to create a new texture and overwrite it completely. - // To not allocate one big texture every time, we've allocated two on - // initialization and just swap them here: - src_texture = m_efbColor; - m_efbColor = m_efbColorSwap; - m_efbColorSwap = src_texture; - FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textureType, m_efbColor, 0); - - glViewport(0, 0, m_targetWidth, m_targetHeight); - glActiveTexture(GL_TEXTURE9); - glBindTexture(m_textureType, src_texture); - g_sampler_cache->BindNearestSampler(9); - - m_pixel_format_shaders[convtype ? 1 : 0].Bind(); - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - glBindTexture(m_textureType, 0); - - g_renderer->RestoreAPIState(); -} - -void FramebufferManager::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - g_renderer->ResetAPIState(); - - if (type == EFBAccessType::PokeZ) - { - glDepthMask(GL_TRUE); - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glEnable(GL_DEPTH_TEST); - glDepthFunc(GL_ALWAYS); - } - - glBindVertexArray(m_EfbPokes_VAO); - glBindBuffer(GL_ARRAY_BUFFER, m_EfbPokes_VBO); - glBufferData(GL_ARRAY_BUFFER, sizeof(EfbPokeData) * num_points, points, GL_STREAM_DRAW); - m_EfbPokes.Bind(); - glViewport(0, 0, m_targetWidth, m_targetHeight); - glDrawArrays(GL_POINTS, 0, (GLsizei)num_points); - - glBindBuffer(GL_ARRAY_BUFFER, - static_cast(g_vertex_manager.get())->GetVertexBufferHandle()); - g_renderer->RestoreAPIState(); - - // TODO: Could just update the EFB cache with the new value - ClearEFBCache(); -} - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h deleted file mode 100644 index f68556bf90..0000000000 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/GL/GLUtil.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/Render.h" -#include "VideoCommon/FramebufferManagerBase.h" - -// On the GameCube, the game sends a request for the graphics processor to -// transfer its internal EFB (Embedded Framebuffer) to an area in GameCube RAM -// called the XFB (External Framebuffer). The size and location of the XFB is -// decided at the time of the copy, and the format is always YUYV. The video -// interface is given a pointer to the XFB, which will be decoded and -// displayed on the TV. -// -// There are two ways for Dolphin to emulate this: -// -// Real XFB mode: -// -// Dolphin will behave like the GameCube and encode the EFB to -// a portion of GameCube RAM. The emulated video interface will decode the data -// for output to the screen. -// -// Advantages: Behaves exactly like the GameCube. -// Disadvantages: Resolution will be limited. -// -// Virtual XFB mode: -// -// When a request is made to copy the EFB to an XFB, Dolphin -// will remember the RAM location and size of the XFB in a Virtual XFB list. -// The video interface will look up the XFB in the list and use the enhanced -// data stored there, if available. -// -// Advantages: Enables high resolution graphics, better than real hardware. -// Disadvantages: If the GameCube CPU writes directly to the XFB (which is -// possible but uncommon), the Virtual XFB will not capture this information. - -// There may be multiple XFBs in GameCube RAM. This is the maximum number to -// virtualize. - -namespace OGL -{ -class FramebufferManager : public FramebufferManagerBase -{ -public: - FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, - bool enable_stencil_buffer); - ~FramebufferManager(); - - // To get the EFB in texture form, these functions may have to transfer - // the EFB to a resolved texture first. - static GLuint GetEFBColorTexture(const EFBRectangle& sourceRc); - static GLuint GetEFBDepthTexture(const EFBRectangle& sourceRc); - static void ResolveEFBStencilTexture(); - - static GLuint GetEFBFramebuffer(unsigned int layer = 0) - { - return (layer < m_EFBLayers) ? m_efbFramebuffer[layer] : m_efbFramebuffer.back(); - } - // Resolved framebuffer is only used in MSAA mode. - static GLuint GetResolvedFramebuffer(); - static void SetFramebuffer(GLuint fb); - static void FramebufferTexture(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, - GLint level); - - // If in MSAA mode, this will perform a resolve of the specified rectangle, and return the resolve - // target as a texture ID. - // Thus, this call may be expensive. Don't repeat it unnecessarily. - // If not in MSAA mode, will just return the render target texture ID. - // After calling this, before you render anything else, you MUST bind the framebuffer you want to - // draw to. - static GLuint ResolveAndGetRenderTarget(const EFBRectangle& rect); - - // Same as above but for the depth Target. - // After calling this, before you render anything else, you MUST bind the framebuffer you want to - // draw to. - static GLuint ResolveAndGetDepthTarget(const EFBRectangle& rect); - - // Convert EFB content on pixel format change. - // convtype=0 -> rgb8->rgba6, convtype=2 -> rgba6->rgb8 - static void ReinterpretPixelData(unsigned int convtype); - - static void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); - static bool HasStencilBuffer(); - -private: - GLuint CreateTexture(GLenum texture_type, GLenum internal_format, GLenum pixel_format, - GLenum data_type); - void BindLayeredTexture(GLuint texture, const std::vector& framebuffers, - GLenum attachment, GLenum texture_type); - - static int m_targetWidth; - static int m_targetHeight; - static int m_msaaSamples; - - static GLenum m_textureType; - static std::vector m_efbFramebuffer; - static GLuint m_efbColor; - static GLuint m_efbDepth; - static GLuint - m_efbColorSwap; // will be hot swapped with m_efbColor when reinterpreting EFB pixel formats - - static bool m_enable_stencil_buffer; - - // Only used in MSAA mode, TODO: try to avoid them - static std::vector m_resolvedFramebuffer; - static GLuint m_resolvedColorTexture; - static GLuint m_resolvedDepthTexture; - - // For pixel format draw - static SHADER m_pixel_format_shaders[2]; - - // For EFB pokes - static GLuint m_EfbPokes_VBO; - static GLuint m_EfbPokes_VAO; - static SHADER m_EfbPokes; -}; - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp index df24809ae0..50a6276c72 100644 --- a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp @@ -7,6 +7,7 @@ #include "Common/MsgHandler.h" #include "VideoBackends/OGL/ProgramShaderCache.h" +#include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/VertexManager.h" #include "VideoCommon/NativeVertexFormat.h" @@ -18,7 +19,7 @@ namespace OGL { std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) { return std::make_unique(vtx_decl); } @@ -44,10 +45,10 @@ static void SetPointer(u32 attrib, u32 stride, const AttributeFormat& format) (u8*)nullptr + format.offset); } -GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl) +GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl) { - this->vtx_decl = _vtx_decl; - u32 vertex_stride = _vtx_decl.stride; + u32 vertex_stride = vtx_decl.stride; // We will not allow vertex components causing uneven strides. if (vertex_stride & 3) @@ -63,22 +64,22 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl) glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vm->GetIndexBufferHandle()); glBindBuffer(GL_ARRAY_BUFFER, vm->GetVertexBufferHandle()); - SetPointer(SHADER_POSITION_ATTRIB, vertex_stride, _vtx_decl.position); + SetPointer(SHADER_POSITION_ATTRIB, vertex_stride, vtx_decl.position); for (int i = 0; i < 3; i++) - SetPointer(SHADER_NORM0_ATTRIB + i, vertex_stride, _vtx_decl.normals[i]); + SetPointer(SHADER_NORM0_ATTRIB + i, vertex_stride, vtx_decl.normals[i]); for (int i = 0; i < 2; i++) - SetPointer(SHADER_COLOR0_ATTRIB + i, vertex_stride, _vtx_decl.colors[i]); + SetPointer(SHADER_COLOR0_ATTRIB + i, vertex_stride, vtx_decl.colors[i]); for (int i = 0; i < 8; i++) - SetPointer(SHADER_TEXTURE0_ATTRIB + i, vertex_stride, _vtx_decl.texcoords[i]); + SetPointer(SHADER_TEXTURE0_ATTRIB + i, vertex_stride, vtx_decl.texcoords[i]); - SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, _vtx_decl.posmtx); + SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, vtx_decl.posmtx); } GLVertexFormat::~GLVertexFormat() { glDeleteVertexArrays(1, &VAO); } -} +} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj b/Source/Core/VideoBackends/OGL/OGL.vcxproj index 38ea16b42e..7395b6ef2b 100644 --- a/Source/Core/VideoBackends/OGL/OGL.vcxproj +++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj @@ -40,17 +40,13 @@ - - - - @@ -58,16 +54,12 @@ - - - - diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters index fbede5caf6..4077f05a37 100644 --- a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters +++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters @@ -18,21 +18,12 @@ Decoder - - GLUtil - Render - - Render - Render - - Render - Render @@ -42,9 +33,6 @@ Render - - Render - @@ -61,21 +49,12 @@ Decoder - - GLUtil - Render - - Render - Render - - Render - Render @@ -85,9 +64,6 @@ Render - - Render - diff --git a/Source/Core/VideoBackends/OGL/OGLShader.cpp b/Source/Core/VideoBackends/OGL/OGLShader.cpp index e881269524..340d79f244 100644 --- a/Source/Core/VideoBackends/OGL/OGLShader.cpp +++ b/Source/Core/VideoBackends/OGL/OGLShader.cpp @@ -24,23 +24,24 @@ static GLenum GetGLShaderTypeForStage(ShaderStage stage) } } -OGLShader::OGLShader(ShaderStage stage, GLenum gl_type, GLuint shader_id) - : AbstractShader(stage), m_type(gl_type), m_id(shader_id) +OGLShader::OGLShader(ShaderStage stage, GLenum gl_type, GLuint gl_id) + : AbstractShader(stage), m_id(ProgramShaderCache::GenerateShaderID()), m_type(gl_type), + m_gl_id(gl_id) { } -OGLShader::OGLShader(GLuint compute_program_id) - : AbstractShader(ShaderStage::Compute), m_type(GL_COMPUTE_SHADER), - m_compute_program_id(compute_program_id) +OGLShader::OGLShader(GLuint gl_compute_program_id) + : AbstractShader(ShaderStage::Compute), m_id(ProgramShaderCache::GenerateShaderID()), + m_type(GL_COMPUTE_SHADER), m_gl_compute_program_id(gl_compute_program_id) { } OGLShader::~OGLShader() { if (m_stage != ShaderStage::Compute) - glDeleteShader(m_id); + glDeleteShader(m_gl_id); else - glDeleteProgram(m_compute_program_id); + glDeleteProgram(m_gl_compute_program_id); } bool OGLShader::HasBinary() const diff --git a/Source/Core/VideoBackends/OGL/OGLShader.h b/Source/Core/VideoBackends/OGL/OGLShader.h index e3036044f6..a703d60696 100644 --- a/Source/Core/VideoBackends/OGL/OGLShader.h +++ b/Source/Core/VideoBackends/OGL/OGLShader.h @@ -16,13 +16,14 @@ namespace OGL class OGLShader final : public AbstractShader { public: - explicit OGLShader(ShaderStage stage, GLenum gl_type, GLuint shader_id); - explicit OGLShader(GLuint compute_program_id); + explicit OGLShader(ShaderStage stage, GLenum gl_type, GLuint gl_id); + explicit OGLShader(GLuint gl_compute_program_id); ~OGLShader() override; + u64 GetID() const { return m_id; } GLenum GetGLShaderType() const { return m_type; } - GLuint GetGLShaderID() const { return m_id; } - GLuint GetGLComputeProgramID() const { return m_compute_program_id; } + GLuint GetGLShaderID() const { return m_gl_id; } + GLuint GetGLComputeProgramID() const { return m_gl_compute_program_id; } bool HasBinary() const override; BinaryData GetBinary() const override; @@ -30,9 +31,10 @@ public: size_t length); private: + u64 m_id; GLenum m_type; - GLuint m_id = 0; - GLuint m_compute_program_id = 0; + GLuint m_gl_id = 0; + GLuint m_gl_compute_program_id = 0; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGLTexture.cpp b/Source/Core/VideoBackends/OGL/OGLTexture.cpp index fbe7f576c7..3cdbaca301 100644 --- a/Source/Core/VideoBackends/OGL/OGLTexture.cpp +++ b/Source/Core/VideoBackends/OGL/OGLTexture.cpp @@ -6,13 +6,8 @@ #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" -#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoBackends/OGL/OGLTexture.h" #include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" namespace OGL { @@ -115,10 +110,9 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co DEBUG_ASSERT_MSG(VIDEO, !tex_config.IsMultisampled() || tex_config.levels == 1, "OpenGL does not support multisampled textures with mip levels"); - GLenum target = - tex_config.IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + const GLenum target = GetGLTarget(); glGenTextures(1, &m_texId); - glActiveTexture(GL_TEXTURE9); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); glBindTexture(target, m_texId); glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, m_config.levels - 1); @@ -139,7 +133,7 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co m_config.layers); } - if (m_config.rendertarget) + if (m_config.IsRenderTarget()) { // We can't render to compressed formats. ASSERT(!IsCompressedFormat(m_config.format)); @@ -147,40 +141,19 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co { for (u32 level = 0; level < m_config.levels; level++) { - glTexImage3D(target, level, GL_RGBA, std::max(m_config.width >> level, 1u), - std::max(m_config.height >> level, 1u), m_config.layers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); + glTexImage3D(target, level, gl_internal_format, std::max(m_config.width >> level, 1u), + std::max(m_config.height >> level, 1u), m_config.layers, 0, + GetGLFormatForTextureFormat(m_config.format), + GetGLTypeForTextureFormat(m_config.format), nullptr); } } - glGenFramebuffers(1, &m_framebuffer); - FramebufferManager::SetFramebuffer(m_framebuffer); - FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target, m_texId, - 0); - - // We broke the framebuffer binding here, and need to restore it, as the CreateTexture - // method is in the base renderer class and can be called by VideoCommon. - FramebufferManager::SetFramebuffer(0); } } OGLTexture::~OGLTexture() { - g_renderer->UnbindTexture(this); - if (m_texId) - glDeleteTextures(1, &m_texId); - - if (m_framebuffer) - glDeleteFramebuffers(1, &m_framebuffer); -} - -GLuint OGLTexture::GetRawTexIdentifier() const -{ - return m_texId; -} - -GLuint OGLTexture::GetFramebuffer() const -{ - return m_framebuffer; + Renderer::GetInstance()->UnbindTexture(this); + glDeleteTextures(1, &m_texId); } void OGLTexture::CopyRectangleFromTexture(const AbstractTexture* src, @@ -188,19 +161,18 @@ void OGLTexture::CopyRectangleFromTexture(const AbstractTexture* src, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - const OGLTexture* srcentry = static_cast(src); + const OGLTexture* src_gltex = static_cast(src); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); if (g_ogl_config.bSupportsCopySubImage) { - glCopyImageSubData(srcentry->m_texId, GL_TEXTURE_2D_ARRAY, src_level, src_rect.left, - src_rect.top, src_layer, m_texId, GL_TEXTURE_2D_ARRAY, dst_level, - dst_rect.left, dst_rect.top, dst_layer, dst_rect.GetWidth(), - dst_rect.GetHeight(), 1); + glCopyImageSubData(src_gltex->m_texId, src_gltex->GetGLTarget(), src_level, src_rect.left, + src_rect.top, src_layer, m_texId, GetGLTarget(), dst_level, dst_rect.left, + dst_rect.top, dst_layer, dst_rect.GetWidth(), dst_rect.GetHeight(), 1); } else { - BlitFramebuffer(const_cast(srcentry), src_rect, src_layer, src_level, dst_rect, + BlitFramebuffer(const_cast(src_gltex), src_rect, src_layer, src_level, dst_rect, dst_layer, dst_level); } } @@ -210,28 +182,12 @@ void OGLTexture::BlitFramebuffer(OGLTexture* srcentry, const MathUtil::Rectangle const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - // If it isn't a single leveled/layered texture, we need to update the framebuffer. - bool update_src_framebuffer = - srcentry->m_framebuffer == 0 || srcentry->m_config.layers != 0 || src_level != 0; - bool update_dst_framebuffer = m_framebuffer == 0 || m_config.layers != 0 || dst_level != 0; - if (!m_framebuffer) - glGenFramebuffers(1, &m_framebuffer); - if (!srcentry->m_framebuffer) - glGenFramebuffers(1, &const_cast(srcentry)->m_framebuffer); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, srcentry->m_framebuffer); - if (update_src_framebuffer) - { - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcentry->m_texId, - src_level, src_layer); - } - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_framebuffer); - if (update_dst_framebuffer) - { - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texId, dst_level, - dst_layer); - } + Renderer::GetInstance()->BindSharedReadFramebuffer(); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcentry->m_texId, src_level, + src_layer); + Renderer::GetInstance()->BindSharedDrawFramebuffer(); + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texId, dst_level, + dst_layer); // glBlitFramebuffer is still affected by the scissor test, which is enabled by default. glDisable(GL_SCISSOR_TEST); @@ -239,50 +195,10 @@ void OGLTexture::BlitFramebuffer(OGLTexture* srcentry, const MathUtil::Rectangle glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, GL_NEAREST); - if (update_src_framebuffer) - { - FramebufferManager::FramebufferTexture( - GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - srcentry->m_config.IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY, - srcentry->m_texId, 0); - } - if (update_dst_framebuffer) - { - FramebufferManager::FramebufferTexture( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - m_config.IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY, m_texId, - 0); - } - // The default state for the scissor test is enabled. We don't need to do a full state // restore, as the framebuffer and scissor test are the only things we changed. glEnable(GL_SCISSOR_TEST); - FramebufferManager::SetFramebuffer(0); -} - -void OGLTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ - const OGLTexture* srcentry = static_cast(source); - if (!m_framebuffer) - { - glGenFramebuffers(1, &m_framebuffer); - FramebufferManager::SetFramebuffer(m_framebuffer); - FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D_ARRAY, m_texId, 0); - } - g_renderer->ResetAPIState(); - FramebufferManager::SetFramebuffer(m_framebuffer); - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, srcentry->m_texId); - g_sampler_cache->BindLinearSampler(9); - glViewport(dstrect.left, dstrect.top, dstrect.GetWidth(), dstrect.GetHeight()); - TextureCache::GetInstance()->GetColorCopyProgram().Bind(); - glUniform4f(TextureCache::GetInstance()->GetColorCopyPositionUniform(), float(srcrect.left), - float(srcrect.top), float(srcrect.GetWidth()), float(srcrect.GetHeight())); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - g_renderer->RestoreAPIState(); + Renderer::GetInstance()->RestoreFramebufferBinding(); } void OGLTexture::ResolveFromTexture(const AbstractTexture* src, @@ -307,8 +223,9 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 std::max(1u, m_config.width >> level), std::max(1u, m_config.height >> level), width, height); - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, m_texId); + const GLenum target = GetGLTarget(); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); + glBindTexture(target, m_texId); if (row_length != width) glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length); @@ -318,12 +235,12 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 { if (g_ogl_config.bSupportsTextureStorage) { - glCompressedTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, - gl_internal_format, static_cast(buffer_size), buffer); + glCompressedTexSubImage3D(target, level, 0, 0, 0, width, height, 1, gl_internal_format, + static_cast(buffer_size), buffer); } else { - glCompressedTexImage3D(GL_TEXTURE_2D_ARRAY, level, gl_internal_format, width, height, 1, 0, + glCompressedTexImage3D(target, level, gl_internal_format, width, height, 1, 0, static_cast(buffer_size), buffer); } } @@ -333,13 +250,12 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 GLenum gl_type = GetGLTypeForTextureFormat(m_config.format); if (g_ogl_config.bSupportsTextureStorage) { - glTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, gl_format, gl_type, - buffer); + glTexSubImage3D(target, level, 0, 0, 0, width, height, 1, gl_format, gl_type, buffer); } else { - glTexImage3D(GL_TEXTURE_2D_ARRAY, level, gl_internal_format, width, height, 1, 0, gl_format, - gl_type, buffer); + glTexImage3D(target, level, gl_internal_format, width, height, 1, 0, gl_format, gl_type, + buffer); } } @@ -347,6 +263,11 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } +GLenum OGLTexture::GetGLFormatForImageTexture() const +{ + return GetGLInternalFormatForTextureFormat(m_config.format, true); +} + OGLStagingTexture::OGLStagingTexture(StagingTextureType type, const TextureConfig& config, GLenum target, GLuint buffer_name, size_t buffer_size, char* map_ptr, size_t map_stride) @@ -405,8 +326,7 @@ std::unique_ptr OGLStagingTexture::Create(StagingTextureType } glBufferStorage(target, buffer_size, nullptr, buffer_flags); - buffer_ptr = - reinterpret_cast(glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, buffer_size, map_flags)); + buffer_ptr = reinterpret_cast(glMapBufferRange(target, 0, buffer_size, map_flags)); ASSERT(buffer_ptr != nullptr); } else @@ -426,7 +346,7 @@ void OGLStagingTexture::CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect) { - ASSERT(m_type == StagingTextureType::Readback); + ASSERT(m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && @@ -443,40 +363,37 @@ void OGLStagingTexture::CopyFromTexture(const AbstractTexture* src, glPixelStorei(GL_PACK_ROW_LENGTH, m_config.width); const OGLTexture* gltex = static_cast(src); - size_t dst_offset = dst_rect.top * m_config.GetStride() + dst_rect.left * m_texel_size; + const size_t dst_offset = dst_rect.top * m_config.GetStride() + dst_rect.left * m_texel_size; - // If we don't have a FBO associated with this texture, we need to use a slow path. - if (gltex->GetFramebuffer() != 0 && src_layer == 0 && src_level == 0) + // Prefer glGetTextureSubImage(), when available. + if (g_ogl_config.bSupportsTextureSubImage) { - // This texture has a framebuffer, so we can use glReadPixels(). - glBindFramebuffer(GL_READ_FRAMEBUFFER, gltex->GetFramebuffer()); - glReadPixels(src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), - GetGLFormatForTextureFormat(m_config.format), - GetGLTypeForTextureFormat(m_config.format), reinterpret_cast(dst_offset)); - - // Reset both read/draw framebuffers. - glBindFramebuffer(GL_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); + glGetTextureSubImage( + gltex->GetGLTextureId(), src_level, src_rect.left, src_rect.top, src_layer, + src_rect.GetWidth(), src_rect.GetHeight(), 1, GetGLFormatForTextureFormat(src->GetFormat()), + GetGLTypeForTextureFormat(src->GetFormat()), + static_cast(m_buffer_size - dst_offset), reinterpret_cast(dst_offset)); } else { - if (g_ogl_config.bSupportsTextureSubImage) + // Mutate the shared framebuffer. + Renderer::GetInstance()->BindSharedReadFramebuffer(); + if (AbstractTexture::IsDepthFormat(gltex->GetFormat())) { - glGetTextureSubImage( - gltex->GetRawTexIdentifier(), src_level, src_rect.left, src_rect.top, src_layer, - src_rect.GetWidth(), src_rect.GetHeight(), 1, - GetGLFormatForTextureFormat(m_config.format), GetGLTypeForTextureFormat(m_config.format), - static_cast(m_buffer_size - dst_offset), reinterpret_cast(dst_offset)); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, 0, 0, 0); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, gltex->GetGLTextureId(), + src_level, src_layer); } else { - // TODO: Investigate whether it's faster to use glReadPixels() with a framebuffer, since we're - // copying the whole texture, which may waste bandwidth. So we're trading CPU work in creating - // the framebuffer for GPU work in copying potentially redundant texels. - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, gltex->GetRawTexIdentifier()); - glGetTexImage(GL_TEXTURE_2D_ARRAY, src_level, GetGLFormatForTextureFormat(m_config.format), - GetGLTypeForTextureFormat(m_config.format), nullptr); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gltex->GetGLTextureId(), + src_level, src_layer); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, 0, 0, 0); } + glReadPixels(src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), + GetGLFormatForTextureFormat(src->GetFormat()), + GetGLTypeForTextureFormat(src->GetFormat()), reinterpret_cast(dst_offset)); + Renderer::GetInstance()->RestoreFramebufferBinding(); } glPixelStorei(GL_PACK_ROW_LENGTH, 0); @@ -501,7 +418,7 @@ void OGLStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - ASSERT(m_type == StagingTextureType::Upload); + ASSERT(m_type == StagingTextureType::Upload || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && @@ -509,8 +426,9 @@ void OGLStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); - size_t src_offset = src_rect.top * m_config.GetStride() + src_rect.left * m_texel_size; - size_t copy_size = src_rect.GetHeight() * m_config.GetStride(); + const OGLTexture* gltex = static_cast(dst); + const size_t src_offset = src_rect.top * m_config.GetStride() + src_rect.left * m_texel_size; + const size_t copy_size = src_rect.GetHeight() * m_config.GetStride(); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer_name); glPixelStorei(GL_UNPACK_ROW_LENGTH, m_config.width); @@ -533,12 +451,12 @@ void OGLStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, } // Copy from the staging buffer to the texture object. - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, static_cast(dst)->GetRawTexIdentifier()); - glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, dst_rect.left, dst_rect.top, dst_layer, - dst_rect.GetWidth(), dst_rect.GetHeight(), 1, - GetGLFormatForTextureFormat(m_config.format), - GetGLTypeForTextureFormat(m_config.format), reinterpret_cast(src_offset)); + const GLenum target = gltex->GetGLTarget(); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); + glBindTexture(target, gltex->GetGLTextureId()); + glTexSubImage3D(target, 0, dst_rect.left, dst_rect.top, dst_layer, dst_rect.GetWidth(), + dst_rect.GetHeight(), 1, GetGLFormatForTextureFormat(dst->GetFormat()), + GetGLTypeForTextureFormat(dst->GetFormat()), reinterpret_cast(src_offset)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -602,10 +520,13 @@ void OGLStagingTexture::Unmap() m_map_pointer = nullptr; } -OGLFramebuffer::OGLFramebuffer(AbstractTextureFormat color_format, +OGLFramebuffer::OGLFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, GLuint fbo) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples), m_fbo(fbo) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples), + m_fbo(fbo) { } @@ -614,8 +535,8 @@ OGLFramebuffer::~OGLFramebuffer() glDeleteFramebuffers(1, &m_fbo); } -std::unique_ptr OGLFramebuffer::Create(const OGLTexture* color_attachment, - const OGLTexture* depth_attachment) +std::unique_ptr OGLFramebuffer::Create(OGLTexture* color_attachment, + OGLTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -638,13 +559,13 @@ std::unique_ptr OGLFramebuffer::Create(const OGLTexture* color_a { if (color_attachment->GetConfig().layers > 1) { - glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - color_attachment->GetRawTexIdentifier(), 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, color_attachment->GetGLTextureId(), + 0); } else { glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - color_attachment->GetRawTexIdentifier(), 0, 0); + color_attachment->GetGLTextureId(), 0, 0); } } @@ -655,19 +576,26 @@ std::unique_ptr OGLFramebuffer::Create(const OGLTexture* color_a GL_DEPTH_ATTACHMENT; if (depth_attachment->GetConfig().layers > 1) { - glFramebufferTexture(GL_FRAMEBUFFER, attachment, depth_attachment->GetRawTexIdentifier(), 0); + glFramebufferTexture(GL_FRAMEBUFFER, attachment, depth_attachment->GetGLTextureId(), 0); } else { - glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, depth_attachment->GetRawTexIdentifier(), - 0, 0); + glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, depth_attachment->GetGLTextureId(), 0, + 0); } } DEBUG_ASSERT(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - FramebufferManager::SetFramebuffer(0); - return std::make_unique(color_format, depth_format, width, height, layers, - samples, fbo); + Renderer::GetInstance()->RestoreFramebufferBinding(); + + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples, fbo); +} + +void OGLFramebuffer::UpdateDimensions(u32 width, u32 height) +{ + m_width = width; + m_height = height; } } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGLTexture.h b/Source/Core/VideoBackends/OGL/OGLTexture.h index 60c5c932b3..bd6a6918e2 100644 --- a/Source/Core/VideoBackends/OGL/OGLTexture.h +++ b/Source/Core/VideoBackends/OGL/OGLTexture.h @@ -25,16 +25,17 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) override; - GLuint GetRawTexIdentifier() const; - GLuint GetFramebuffer() const; + GLuint GetGLTextureId() const { return m_texId; } + GLenum GetGLTarget() const + { + return IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + } + GLenum GetGLFormatForImageTexture() const; private: void BlitFramebuffer(OGLTexture* srcentry, const MathUtil::Rectangle& src_rect, @@ -42,7 +43,6 @@ private: u32 dst_layer, u32 dst_level); GLuint m_texId; - GLuint m_framebuffer = 0; }; class OGLStagingTexture final : public AbstractStagingTexture @@ -79,13 +79,18 @@ private: class OGLFramebuffer final : public AbstractFramebuffer { public: - OGLFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, + OGLFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, GLuint fbo); ~OGLFramebuffer() override; + static std::unique_ptr Create(OGLTexture* color_attachment, + OGLTexture* depth_attachment); + GLuint GetFBO() const { return m_fbo; } - static std::unique_ptr Create(const OGLTexture* color_attachment, - const OGLTexture* depth_attachment); + + // Used for updating the dimensions of the system/window framebuffer. + void UpdateDimensions(u32 width, u32 height); protected: GLuint m_fbo; diff --git a/Source/Core/VideoBackends/OGL/PostProcessing.cpp b/Source/Core/VideoBackends/OGL/PostProcessing.cpp deleted file mode 100644 index 57d8f7c8f8..0000000000 --- a/Source/Core/VideoBackends/OGL/PostProcessing.cpp +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/OGL/PostProcessing.h" - -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" -#include "Common/StringUtil.h" - -#include "Core/Config/GraphicsSettings.h" - -#include "VideoBackends/OGL/FramebufferManager.h" -#include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/SamplerCache.h" - -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace OGL -{ -static const char s_vertex_shader[] = "out vec2 uv0;\n" - "uniform vec4 src_rect;\n" - "void main(void) {\n" - " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" - " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" - " uv0 = vec2(mix(src_rect.xy, src_rect.zw, rawpos));\n" - "}\n"; - -OpenGLPostProcessing::OpenGLPostProcessing() : m_initialized(false) -{ - CreateHeader(); -} - -OpenGLPostProcessing::~OpenGLPostProcessing() -{ - m_shader.Destroy(); -} - -void OpenGLPostProcessing::BlitFromTexture(TargetRectangle src, TargetRectangle dst, - int src_texture, int src_width, int src_height, - int layer) -{ - ApplyShader(); - - glViewport(dst.left, dst.bottom, dst.GetWidth(), dst.GetHeight()); - - ProgramShaderCache::BindVertexFormat(nullptr); - - m_shader.Bind(); - - glUniform4f(m_uniform_resolution, (float)src_width, (float)src_height, 1.0f / (float)src_width, - 1.0f / (float)src_height); - glUniform4f(m_uniform_src_rect, src.left / (float)src_width, src.top / (float)src_height, - src.right / (float)src_width, src.bottom / (float)src_height); - glUniform1ui(m_uniform_time, (GLuint)m_timer.GetTimeElapsed()); - glUniform1i(m_uniform_layer, layer); - - if (m_config.IsDirty()) - { - for (auto& it : m_config.GetOptions()) - { - if (it.second.m_dirty) - { - switch (it.second.m_type) - { - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL: - glUniform1i(m_uniform_bindings[it.first], it.second.m_bool_value); - break; - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER: - switch (it.second.m_integer_values.size()) - { - case 1: - glUniform1i(m_uniform_bindings[it.first], it.second.m_integer_values[0]); - break; - case 2: - glUniform2i(m_uniform_bindings[it.first], it.second.m_integer_values[0], - it.second.m_integer_values[1]); - break; - case 3: - glUniform3i(m_uniform_bindings[it.first], it.second.m_integer_values[0], - it.second.m_integer_values[1], it.second.m_integer_values[2]); - break; - case 4: - glUniform4i(m_uniform_bindings[it.first], it.second.m_integer_values[0], - it.second.m_integer_values[1], it.second.m_integer_values[2], - it.second.m_integer_values[3]); - break; - } - break; - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT: - switch (it.second.m_float_values.size()) - { - case 1: - glUniform1f(m_uniform_bindings[it.first], it.second.m_float_values[0]); - break; - case 2: - glUniform2f(m_uniform_bindings[it.first], it.second.m_float_values[0], - it.second.m_float_values[1]); - break; - case 3: - glUniform3f(m_uniform_bindings[it.first], it.second.m_float_values[0], - it.second.m_float_values[1], it.second.m_float_values[2]); - break; - case 4: - glUniform4f(m_uniform_bindings[it.first], it.second.m_float_values[0], - it.second.m_float_values[1], it.second.m_float_values[2], - it.second.m_float_values[3]); - break; - } - break; - } - it.second.m_dirty = false; - } - } - m_config.SetDirty(false); - } - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, src_texture); - g_sampler_cache->BindLinearSampler(9); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); -} - -void OpenGLPostProcessing::ApplyShader() -{ - // shader didn't changed - if (m_initialized && m_config.GetShader() == g_ActiveConfig.sPostProcessingShader) - return; - - m_shader.Destroy(); - m_uniform_bindings.clear(); - - // load shader code - std::string main_code = m_config.LoadShader(); - std::string options_code = LoadShaderOptions(); - std::string code = m_glsl_header + options_code + main_code; - - // and compile it - if (!ProgramShaderCache::CompileShader(m_shader, s_vertex_shader, code)) - { - ERROR_LOG(VIDEO, "Failed to compile post-processing shader %s", m_config.GetShader().c_str()); - Config::SetCurrent(Config::GFX_ENHANCE_POST_SHADER, ""); - code = m_config.LoadShader(); - ProgramShaderCache::CompileShader(m_shader, s_vertex_shader, code); - } - - // read uniform locations - m_uniform_resolution = glGetUniformLocation(m_shader.glprogid, "resolution"); - m_uniform_time = glGetUniformLocation(m_shader.glprogid, "time"); - m_uniform_src_rect = glGetUniformLocation(m_shader.glprogid, "src_rect"); - m_uniform_layer = glGetUniformLocation(m_shader.glprogid, "layer"); - - for (const auto& it : m_config.GetOptions()) - { - std::string glsl_name = "options." + it.first; - m_uniform_bindings[it.first] = glGetUniformLocation(m_shader.glprogid, glsl_name.c_str()); - } - m_initialized = true; -} - -void OpenGLPostProcessing::CreateHeader() -{ - m_glsl_header = - // Required variables - // Shouldn't be accessed directly by the PP shader - // Texture sampler - "SAMPLER_BINDING(8) uniform sampler2D samp8;\n" - "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" - - // Output variable - "out float4 ocol0;\n" - // Input coordinates - "in float2 uv0;\n" - // Resolution - "uniform float4 resolution;\n" - // Time - "uniform uint time;\n" - // Layer - "uniform int layer;\n" - - // Interfacing functions - "float4 Sample()\n" - "{\n" - "\treturn texture(samp9, float3(uv0, layer));\n" - "}\n" - - "float4 SampleLocation(float2 location)\n" - "{\n" - "\treturn texture(samp9, float3(location, layer));\n" - "}\n" - - "float4 SampleLayer(int layer)\n" - "{\n" - "\treturn texture(samp9, float3(uv0, layer));\n" - "}\n" - - "#define SampleOffset(offset) textureOffset(samp9, float3(uv0, layer), offset)\n" - - "float2 GetResolution()\n" - "{\n" - "\treturn resolution.xy;\n" - "}\n" - - "float2 GetInvResolution()\n" - "{\n" - "\treturn resolution.zw;\n" - "}\n" - - "float2 GetCoordinates()\n" - "{\n" - "\treturn uv0;\n" - "}\n" - - "uint GetTime()\n" - "{\n" - "\treturn time;\n" - "}\n" - - "void SetOutput(float4 color)\n" - "{\n" - "\tocol0 = color;\n" - "}\n" - - "#define GetOption(x) (options.x)\n" - "#define OptionEnabled(x) (options.x != 0)\n"; -} - -std::string OpenGLPostProcessing::LoadShaderOptions() -{ - m_uniform_bindings.clear(); - if (m_config.GetOptions().empty()) - return ""; - - std::string glsl_options = "struct Options\n{\n"; - - for (const auto& it : m_config.GetOptions()) - { - if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL) - { - glsl_options += StringFromFormat("int %s;\n", it.first.c_str()); - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER) - { - u32 count = static_cast(it.second.m_integer_values.size()); - if (count == 1) - glsl_options += StringFromFormat("int %s;\n", it.first.c_str()); - else - glsl_options += StringFromFormat("int%d %s;\n", count, it.first.c_str()); - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT) - { - u32 count = static_cast(it.second.m_float_values.size()); - if (count == 1) - glsl_options += StringFromFormat("float %s;\n", it.first.c_str()); - else - glsl_options += StringFromFormat("float%d %s;\n", count, it.first.c_str()); - } - - m_uniform_bindings[it.first] = 0; - } - - glsl_options += "};\n"; - glsl_options += "uniform Options options;\n"; - - return glsl_options; -} - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/PostProcessing.h b/Source/Core/VideoBackends/OGL/PostProcessing.h deleted file mode 100644 index 1c8c8ecf52..0000000000 --- a/Source/Core/VideoBackends/OGL/PostProcessing.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/GL/GLUtil.h" - -#include "VideoBackends/OGL/ProgramShaderCache.h" - -#include "VideoCommon/PostProcessing.h" -#include "VideoCommon/VideoCommon.h" - -namespace OGL -{ -class OpenGLPostProcessing : public PostProcessingShaderImplementation -{ -public: - OpenGLPostProcessing(); - ~OpenGLPostProcessing(); - - void BlitFromTexture(TargetRectangle src, TargetRectangle dst, int src_texture, int src_width, - int src_height, int layer); - void ApplyShader(); - -private: - bool m_initialized; - SHADER m_shader; - GLuint m_uniform_resolution; - GLuint m_uniform_src_rect; - GLuint m_uniform_time; - GLuint m_uniform_layer; - std::string m_glsl_header; - - std::unordered_map m_uniform_bindings; - - void CreateHeader(); - std::string LoadShaderOptions(); -}; - -} // namespace diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index c8d54acd5a..e06491b986 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -4,6 +4,7 @@ #include "VideoBackends/OGL/ProgramShaderCache.h" +#include #include #include #include @@ -27,7 +28,6 @@ #include "VideoBackends/OGL/VertexManager.h" #include "VideoCommon/AsyncShaderCompiler.h" -#include "VideoCommon/Debugger.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/ImageWrite.h" @@ -54,6 +54,7 @@ static GLuint CurrentProgram = 0; ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs; std::mutex ProgramShaderCache::s_pipeline_program_lock; static std::string s_glsl_header = ""; +static std::atomic s_shader_counter{0}; static thread_local bool s_is_shared_context = false; static std::string GetGLSLVersionString() @@ -109,13 +110,13 @@ void SHADER::SetProgramVariables() glUniformBlockBinding(glprogid, UBERBlock_id, 4); // Bind Texture Samplers - for (int a = 0; a < 10; ++a) + for (int a = 0; a < 8; ++a) { - std::string name = StringFromFormat(a < 8 ? "samp[%d]" : "samp%d", a); - // Still need to get sampler locations since we aren't binding them statically in the shaders - int loc = glGetUniformLocation(glprogid, name.c_str()); - if (loc != -1) + int loc = glGetUniformLocation(glprogid, StringFromFormat("samp[%d]", a).c_str()); + if (loc < 0) + loc = glGetUniformLocation(glprogid, StringFromFormat("samp%d", a).c_str()); + if (loc >= 0) glUniform1i(loc, a); } @@ -191,21 +192,22 @@ bool PipelineProgramKey::operator!=(const PipelineProgramKey& rhs) const bool PipelineProgramKey::operator==(const PipelineProgramKey& rhs) const { - return std::tie(vertex_shader, geometry_shader, pixel_shader) == - std::tie(rhs.vertex_shader, rhs.geometry_shader, rhs.pixel_shader); + return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) == + std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id); } bool PipelineProgramKey::operator<(const PipelineProgramKey& rhs) const { - return std::tie(vertex_shader, geometry_shader, pixel_shader) < - std::tie(rhs.vertex_shader, rhs.geometry_shader, rhs.pixel_shader); + return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) < + std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id); } std::size_t PipelineProgramKeyHash::operator()(const PipelineProgramKey& key) const { // We would really want std::hash_combine for this.. - std::hash hasher; - return hasher(key.vertex_shader) + hasher(key.geometry_shader) + hasher(key.pixel_shader); + std::hash hasher; + return hasher(key.vertex_shader_id) + hasher(key.geometry_shader_id) + + hasher(key.pixel_shader_id); } StreamBuffer* ProgramShaderCache::GetUniformBuffer() @@ -218,13 +220,6 @@ u32 ProgramShaderCache::GetUniformBufferAlignment() return s_ubo_align; } -void ProgramShaderCache::InvalidateConstants() -{ - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; -} - void ProgramShaderCache::UploadConstants() { if (PixelShaderManager::dirty || VertexShaderManager::dirty || GeometryShaderManager::dirty) @@ -574,7 +569,9 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexForm const OGLShader* geometry_shader, const OGLShader* pixel_shader) { - PipelineProgramKey key = {vertex_shader, geometry_shader, pixel_shader}; + PipelineProgramKey key = {vertex_shader ? vertex_shader->GetID() : 0, + geometry_shader ? geometry_shader->GetID() : 0, + pixel_shader ? pixel_shader->GetID() : 0}; { std::lock_guard guard(s_pipeline_program_lock); auto iter = s_pipeline_programs.find(key); @@ -750,6 +747,7 @@ void ProgramShaderCache::CreateHeader() "%s\n" // Silly differences + "#define API_OPENGL 1\n" "#define float2 vec2\n" "#define float3 vec3\n" "#define float4 vec4\n" @@ -759,8 +757,6 @@ void ProgramShaderCache::CreateHeader() "#define int2 ivec2\n" "#define int3 ivec3\n" "#define int4 ivec4\n" - - // hlsl to glsl function translation "#define frac fract\n" "#define lerp mix\n" @@ -782,12 +778,17 @@ void ProgramShaderCache::CreateHeader() "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" "#define UBO_BINDING(packing, x) layout(packing, binding = x)\n" "#define SAMPLER_BINDING(x) layout(binding = x)\n" - "#define SSBO_BINDING(x) layout(binding = x)\n" : + "#define TEXEL_BUFFER_BINDING(x) layout(binding = x)\n" + "#define SSBO_BINDING(x) layout(binding = x)\n" + "#define IMAGE_BINDING(format, x) layout(format, binding = x)\n" : "#define ATTRIBUTE_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" "#define UBO_BINDING(packing, x) layout(packing)\n" - "#define SAMPLER_BINDING(x)\n", + "#define SAMPLER_BINDING(x)\n" + "#define TEXEL_BUFFER_BINDING(x)\n" + "#define SSBO_BINDING(x)\n" + "#define IMAGE_BINDING(format, x) layout(format)\n", // Input/output blocks are matched by name during program linking "#define VARYING_LOCATION(x)\n", !is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ? @@ -823,6 +824,11 @@ void ProgramShaderCache::CreateHeader() v >= GlslEs310 ? "precision highp image2DArray;" : ""); } +u64 ProgramShaderCache::GenerateShaderID() +{ + return s_shader_counter++; +} + bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param) { std::unique_ptr context = diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index b94e733167..7fc267dc5d 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -44,9 +44,9 @@ struct SHADER struct PipelineProgramKey { - const OGLShader* vertex_shader; - const OGLShader* geometry_shader; - const OGLShader* pixel_shader; + u64 vertex_shader_id; + u64 geometry_shader_id; + u64 pixel_shader_id; bool operator==(const PipelineProgramKey& rhs) const; bool operator!=(const PipelineProgramKey& rhs) const; @@ -82,7 +82,6 @@ public: const std::string& gcode); static StreamBuffer* GetUniformBuffer(); static u32 GetUniformBufferAlignment(); - static void InvalidateConstants(); static void UploadConstants(); static void UploadConstants(const void* data, u32 data_size); @@ -90,6 +89,14 @@ public: static void Shutdown(); static void CreateHeader(); + // This counter increments with each shader object allocated, in order to give it a unique ID. + // Since the shaders can be destroyed after a pipeline is created, we can't use the shader pointer + // as a key for GL programs. For the same reason, we can't use the GL objects either. This ID is + // guaranteed to be unique for the emulation session, even if the memory allocator or GL driver + // re-uses pointers, therefore we won't have any collisions where the shaders attached to a + // pipeline do not match the pipeline configuration. + static u64 GenerateShaderID(); + static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format, const OGLShader* vertex_shader, const OGLShader* geometry_shader, diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index b72941583e..baeb9f877c 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -27,22 +27,21 @@ #include "Core/Core.h" #include "VideoBackends/OGL/BoundingBox.h" -#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoBackends/OGL/OGLPipeline.h" #include "VideoBackends/OGL/OGLShader.h" #include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/PostProcessing.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/SamplerCache.h" #include "VideoBackends/OGL/StreamBuffer.h" -#include "VideoBackends/OGL/TextureCache.h" #include "VideoBackends/OGL/VertexManager.h" #include "VideoCommon/BPFunctions.h" #include "VideoCommon/DriverDetails.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/PixelEngine.h" +#include "VideoCommon/PostProcessing.h" #include "VideoCommon/RenderState.h" #include "VideoCommon/ShaderGenCommon.h" #include "VideoCommon/VertexShaderManager.h" @@ -54,22 +53,6 @@ namespace OGL { VideoConfig g_ogl_config; -// Declarations and definitions -// ---------------------------- - -// 1 for no MSAA. Use s_MSAASamples > 1 to check for MSAA. -static int s_MSAASamples = 1; - -// EFB cache related -static const u32 EFB_CACHE_RECT_SIZE = 64; // Cache 64x64 blocks. -static const u32 EFB_CACHE_WIDTH = - (EFB_WIDTH + EFB_CACHE_RECT_SIZE - 1) / EFB_CACHE_RECT_SIZE; // round up -static const u32 EFB_CACHE_HEIGHT = (EFB_HEIGHT + EFB_CACHE_RECT_SIZE - 1) / EFB_CACHE_RECT_SIZE; -static bool s_efbCacheValid[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; -static bool s_efbCacheIsCleared = false; -static std::vector - s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor - static void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const char* message, const void* userParam) { @@ -356,6 +339,16 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ m_current_depth_state(RenderState::GetInvalidDepthState()), m_current_blend_state(RenderState::GetInvalidBlendingState()) { + // Create the window framebuffer. + if (!m_main_gl_context->IsHeadless()) + { + m_system_framebuffer = std::make_unique( + nullptr, nullptr, AbstractTextureFormat::RGBA8, AbstractTextureFormat::Undefined, + std::max(m_main_gl_context->GetBackBufferWidth(), 1u), + std::max(m_main_gl_context->GetBackBufferHeight(), 1u), 1, 1, 0); + m_current_framebuffer = m_system_framebuffer.get(); + } + bool bSuccess = true; g_ogl_config.gl_vendor = (const char*)glGetString(GL_VENDOR); @@ -437,9 +430,9 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART) && ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); - g_Config.backend_info.bSupportsBBox = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); + g_Config.backend_info.bSupportsBBox = g_Config.backend_info.bSupportsFragmentStoresAndAtomics; g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && GLExtensions::Supports("GL_ARB_sample_shading"); @@ -692,9 +685,13 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ glDebugMessageCallbackARB(ErrorCallback, nullptr); } if (LogManager::GetInstance()->IsEnabled(LogTypes::HOST_GPU, LogTypes::LERROR)) + { glEnable(GL_DEBUG_OUTPUT); + } else + { glDisable(GL_DEBUG_OUTPUT); + } } int samples; @@ -753,23 +750,9 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VSYNC)) m_main_gl_context->SwapInterval(g_ActiveConfig.bVSyncActive); - // Because of the fixed framebuffer size we need to disable the resolution - // options while running - - // The stencil is used for bounding box emulation when SSBOs are not available - glDisable(GL_STENCIL_TEST); - glStencilFunc(GL_ALWAYS, 1, 0xFF); - glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - - // Reset The Current Viewport - glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); if (g_ActiveConfig.backend_info.bSupportsClipControl) glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glClearDepthf(1.0f); - glEnable(GL_DEPTH_TEST); - glDepthFunc(GL_LEQUAL); if (g_ActiveConfig.backend_info.bSupportsDepthClamp) { glEnable(GL_CLIP_DISTANCE0); @@ -779,18 +762,14 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment - glEnable(GL_SCISSOR_TEST); - glScissor(0, 0, GetTargetWidth(), GetTargetHeight()); - glBlendFunc(GL_ONE, GL_ONE); - glBlendColor(0, 0, 0, 0.5f); - glClearDepthf(1.0f); + glGenFramebuffers(1, &m_shared_read_framebuffer); + glGenFramebuffers(1, &m_shared_draw_framebuffer); if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart) GLUtil::EnablePrimitiveRestart(m_main_gl_context.get()); IndexGenerator::Init(); UpdateActiveConfig(); - ClearEFBCache(); } Renderer::~Renderer() = default; @@ -805,24 +784,15 @@ bool Renderer::Initialize() if (!::Renderer::Initialize()) return false; - // Initialize the FramebufferManager - g_framebuffer_manager = std::make_unique( - m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; - - m_post_processor = std::make_unique(); return true; } void Renderer::Shutdown() { ::Renderer::Shutdown(); - g_framebuffer_manager.reset(); - UpdateActiveConfig(); - - m_post_processor.reset(); + glDeleteFramebuffers(1, &m_shared_draw_framebuffer); + glDeleteFramebuffers(1, &m_shared_read_framebuffer); } std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) @@ -836,12 +806,11 @@ std::unique_ptr Renderer::CreateStagingTexture(StagingTe return OGLStagingTexture::Create(type, config); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return OGLFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return OGLFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } std::unique_ptr Renderer::CreateShaderFromSource(ShaderStage stage, @@ -861,231 +830,9 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin return OGLPipeline::Create(config); } -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) -{ - TargetRectangle result; - result.left = EFBToScaledX(rc.left); - result.top = EFBToScaledY(EFB_HEIGHT - rc.top); - result.right = EFBToScaledX(rc.right); - result.bottom = EFBToScaledY(EFB_HEIGHT - rc.bottom); - return result; -} - void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) { - glScissor(rc.left, rc.bottom, rc.GetWidth(), rc.GetHeight()); -} - -void ClearEFBCache() -{ - if (!s_efbCacheIsCleared) - { - s_efbCacheIsCleared = true; - memset(s_efbCacheValid, 0, sizeof(s_efbCacheValid)); - } -} - -void Renderer::UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, - const TargetRectangle& targetPixelRc, const void* data) -{ - const u32 cacheType = (type == EFBAccessType::PeekZ ? 0 : 1); - - if (s_efbCache[cacheType][cacheRectIdx].empty()) - s_efbCache[cacheType][cacheRectIdx].resize(EFB_CACHE_RECT_SIZE * EFB_CACHE_RECT_SIZE); - - u32 targetPixelRcWidth = targetPixelRc.right - targetPixelRc.left; - u32 efbPixelRcHeight = efbPixelRc.bottom - efbPixelRc.top; - u32 efbPixelRcWidth = efbPixelRc.right - efbPixelRc.left; - - for (u32 yCache = 0; yCache < efbPixelRcHeight; ++yCache) - { - u32 yEFB = efbPixelRc.top + yCache; - u32 yPixel = (EFBToScaledY(EFB_HEIGHT - yEFB) + EFBToScaledY(EFB_HEIGHT - yEFB - 1)) / 2; - u32 yData = yPixel - targetPixelRc.bottom; - - for (u32 xCache = 0; xCache < efbPixelRcWidth; ++xCache) - { - u32 xEFB = efbPixelRc.left + xCache; - u32 xPixel = (EFBToScaledX(xEFB) + EFBToScaledX(xEFB + 1)) / 2; - u32 xData = xPixel - targetPixelRc.left; - u32 value; - if (type == EFBAccessType::PeekZ) - { - float* ptr = (float*)data; - value = MathUtil::Clamp((u32)(ptr[yData * targetPixelRcWidth + xData] * 16777216.0f), - 0, 0xFFFFFF); - } - else - { - u32* ptr = (u32*)data; - value = ptr[yData * targetPixelRcWidth + xData]; - } - s_efbCache[cacheType][cacheRectIdx][yCache * EFB_CACHE_RECT_SIZE + xCache] = value; - } - } - - s_efbCacheValid[cacheType][cacheRectIdx] = true; - s_efbCacheIsCleared = false; -} - -// This function allows the CPU to directly access the EFB. -// There are EFB peeks (which will read the color or depth of a pixel) -// and EFB pokes (which will change the color or depth of a pixel). -// -// The behavior of EFB peeks can only be modified by: -// - GX_PokeAlphaRead -// The behavior of EFB pokes can be modified by: -// - GX_PokeAlphaMode (TODO) -// - GX_PokeAlphaUpdate (TODO) -// - GX_PokeBlendMode (TODO) -// - GX_PokeColorUpdate (TODO) -// - GX_PokeDither (TODO) -// - GX_PokeDstAlpha (TODO) -// - GX_PokeZMode (TODO) -u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) -{ - u32 cacheRectIdx = (y / EFB_CACHE_RECT_SIZE) * EFB_CACHE_WIDTH + (x / EFB_CACHE_RECT_SIZE); - - EFBRectangle efbPixelRc; - - if (type == EFBAccessType::PeekColor || type == EFBAccessType::PeekZ) - { - // Get the rectangular target region containing the EFB pixel - efbPixelRc.left = (x / EFB_CACHE_RECT_SIZE) * EFB_CACHE_RECT_SIZE; - efbPixelRc.top = (y / EFB_CACHE_RECT_SIZE) * EFB_CACHE_RECT_SIZE; - efbPixelRc.right = std::min(efbPixelRc.left + EFB_CACHE_RECT_SIZE, (u32)EFB_WIDTH); - efbPixelRc.bottom = std::min(efbPixelRc.top + EFB_CACHE_RECT_SIZE, (u32)EFB_HEIGHT); - } - else - { - efbPixelRc.left = x; - efbPixelRc.top = y; - efbPixelRc.right = x + 1; - efbPixelRc.bottom = y + 1; - } - - TargetRectangle targetPixelRc = ConvertEFBRectangle(efbPixelRc); - u32 targetPixelRcWidth = targetPixelRc.right - targetPixelRc.left; - u32 targetPixelRcHeight = targetPixelRc.top - targetPixelRc.bottom; - - // TODO (FIX) : currently, AA path is broken/offset and doesn't return the correct pixel - switch (type) - { - case EFBAccessType::PeekZ: - { - if (!s_efbCacheValid[0][cacheRectIdx]) - { - if (s_MSAASamples > 1) - { - ResetAPIState(); - - // Resolve our rectangle. - FramebufferManager::GetEFBDepthTexture(efbPixelRc); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); - } - - std::unique_ptr depthMap(new float[targetPixelRcWidth * targetPixelRcHeight]); - - glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, - targetPixelRcHeight, GL_DEPTH_COMPONENT, GL_FLOAT, depthMap.get()); - - UpdateEFBCache(type, cacheRectIdx, efbPixelRc, targetPixelRc, depthMap.get()); - - if (s_MSAASamples > 1) - RestoreAPIState(); - } - - u32 xRect = x % EFB_CACHE_RECT_SIZE; - u32 yRect = y % EFB_CACHE_RECT_SIZE; - u32 z = s_efbCache[0][cacheRectIdx][yRect * EFB_CACHE_RECT_SIZE + xRect]; - - // if Z is in 16 bit format you must return a 16 bit integer - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - z = z >> 8; - - return z; - } - - case EFBAccessType::PeekColor: // GXPeekARGB - { - // Although it may sound strange, this really is A8R8G8B8 and not RGBA or 24-bit... - - // Tested in Killer 7, the first 8bits represent the alpha value which is used to - // determine if we're aiming at an enemy (0x80 / 0x88) or not (0x70) - // Wind Waker is also using it for the pictograph to determine the color of each pixel - if (!s_efbCacheValid[1][cacheRectIdx]) - { - if (s_MSAASamples > 1) - { - ResetAPIState(); - - // Resolve our rectangle. - FramebufferManager::GetEFBColorTexture(efbPixelRc); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); - } - - std::unique_ptr colorMap(new u32[targetPixelRcWidth * targetPixelRcHeight]); - - if (IsGLES()) - // XXX: Swap colours - glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, - targetPixelRcHeight, GL_RGBA, GL_UNSIGNED_BYTE, colorMap.get()); - else - glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, - targetPixelRcHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, colorMap.get()); - - UpdateEFBCache(type, cacheRectIdx, efbPixelRc, targetPixelRc, colorMap.get()); - - if (s_MSAASamples > 1) - RestoreAPIState(); - } - - u32 xRect = x % EFB_CACHE_RECT_SIZE; - u32 yRect = y % EFB_CACHE_RECT_SIZE; - u32 color = s_efbCache[1][cacheRectIdx][yRect * EFB_CACHE_RECT_SIZE + xRect]; - - // check what to do with the alpha channel (GX_PokeAlphaRead) - PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); - - if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) - { - color = RGBA8ToRGBA6ToRGBA8(color); - } - else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - color = RGBA8ToRGB565ToRGBA8(color); - } - if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) - { - color |= 0xFF000000; - } - if (alpha_read_mode.ReadMode == 2) - { - // GX_READ_NONE - return color; - } - else if (alpha_read_mode.ReadMode == 1) - { - // GX_READ_FF - return (color | 0xFF000000); - } - else /*if(alpha_read_mode.ReadMode == 0)*/ - { - // GX_READ_00 - return (color & 0x00FFFFFF); - } - } - - default: - break; - } - - return 0; -} - -void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - FramebufferManager::PokeEFB(type, points, num_points); + glScissor(rc.left, rc.top, rc.GetWidth(), rc.GetHeight()); } u16 Renderer::BBoxRead(int index) @@ -1138,9 +885,6 @@ void Renderer::BBoxWrite(int index, u16 _value) void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { - // The x/y parameters here assume a upper-left origin. glViewport takes an offset from the - // lower-left of the framebuffer, so we must set y to the distance from the lower-left. - y = static_cast(m_current_framebuffer_height) - y - height; if (g_ogl_config.bSupportViewportFloat) { glViewportIndexedf(0, x, y, width, height); @@ -1156,7 +900,7 @@ void Renderer::SetViewport(float x, float y, float width, float height, float ne void Renderer::Draw(u32 base_vertex, u32 num_vertices) { - glDrawArrays(static_cast(m_graphics_pipeline)->GetGLPrimitive(), base_vertex, + glDrawArrays(static_cast(m_current_pipeline)->GetGLPrimitive(), base_vertex, num_vertices); } @@ -1164,135 +908,112 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) { if (g_ogl_config.bSupportsGLBaseVertex) { - glDrawElementsBaseVertex(static_cast(m_graphics_pipeline)->GetGLPrimitive(), + glDrawElementsBaseVertex(static_cast(m_current_pipeline)->GetGLPrimitive(), num_indices, GL_UNSIGNED_SHORT, static_cast(nullptr) + base_index, base_vertex); } else { - glDrawElements(static_cast(m_graphics_pipeline)->GetGLPrimitive(), + glDrawElements(static_cast(m_current_pipeline)->GetGLPrimitive(), num_indices, GL_UNSIGNED_SHORT, static_cast(nullptr) + base_index); } } +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) +{ + glUseProgram(static_cast(shader)->GetGLComputeProgramID()); + glDispatchCompute(groups_x, groups_y, groups_z); + + // We messed up the program binding, so restore it. + ProgramShaderCache::InvalidateLastProgram(); + if (m_current_pipeline) + static_cast(m_current_pipeline)->GetProgram()->shader.Bind(); + + // Barrier to texture can be used for reads. + if (m_bound_image_texture) + glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); +} + void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) { - ResetAPIState(); + g_framebuffer_manager->FlushEFBPokes(); + g_framebuffer_manager->InvalidatePeekCache(); - // color - GLboolean const color_mask = colorEnable ? GL_TRUE : GL_FALSE, - alpha_mask = alphaEnable ? GL_TRUE : GL_FALSE; - glColorMask(color_mask, color_mask, color_mask, alpha_mask); - - glClearColor(float((color >> 16) & 0xFF) / 255.0f, float((color >> 8) & 0xFF) / 255.0f, - float((color >> 0) & 0xFF) / 255.0f, float((color >> 24) & 0xFF) / 255.0f); - - // depth - glDepthMask(zEnable ? GL_TRUE : GL_FALSE); - - glClearDepthf(float(z & 0xFFFFFF) / 16777216.0f); + u32 clear_mask = 0; + if (colorEnable || alphaEnable) + { + glColorMask(colorEnable, colorEnable, colorEnable, alphaEnable); + glClearColor(float((color >> 16) & 0xFF) / 255.0f, float((color >> 8) & 0xFF) / 255.0f, + float((color >> 0) & 0xFF) / 255.0f, float((color >> 24) & 0xFF) / 255.0f); + clear_mask = GL_COLOR_BUFFER_BIT; + } + if (zEnable) + { + glDepthMask(zEnable ? GL_TRUE : GL_FALSE); + glClearDepthf(float(z & 0xFFFFFF) / 16777216.0f); + clear_mask |= GL_DEPTH_BUFFER_BIT; + } // Update rect for clearing the picture - glEnable(GL_SCISSOR_TEST); - - TargetRectangle const targetRc = ConvertEFBRectangle(rc); - glScissor(targetRc.left, targetRc.bottom, targetRc.GetWidth(), targetRc.GetHeight()); - // glColorMask/glDepthMask/glScissor affect glClear (glViewport does not) - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + const auto converted_target_rc = + ConvertFramebufferRectangle(ConvertEFBRectangle(rc), m_current_framebuffer); + SetScissorRect(converted_target_rc); - RestoreAPIState(); + glClear(clear_mask); - ClearEFBCache(); + // Restore color/depth mask. + if (colorEnable || alphaEnable) + { + glColorMask(m_current_blend_state.colorupdate, m_current_blend_state.colorupdate, + m_current_blend_state.colorupdate, m_current_blend_state.alphaupdate); + } + if (zEnable) + glDepthMask(m_current_depth_state.updateenable); + + // Scissor rect must be restored. + BPFunctions::SetScissor(); } void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) { - TargetRectangle source_rc = rc; - source_rc.top = rc.GetHeight(); - source_rc.bottom = 0; + // Quad-buffered stereo is annoying on GL. + if (g_ActiveConfig.stereo_mode != StereoMode::QuadBuffer) + return ::Renderer::RenderXFBToScreen(texture, rc); - // Check if we need to render to a new surface. - TargetRectangle flipped_trc = GetTargetRectangle(); - std::swap(flipped_trc.top, flipped_trc.bottom); + const auto target_rc = GetTargetRectangle(); - // Copy the framebuffer to screen. - OpenGLPostProcessing* post_processor = static_cast(m_post_processor.get()); - if (g_ActiveConfig.stereo_mode == StereoMode::SBS || - g_ActiveConfig.stereo_mode == StereoMode::TAB) - { - TargetRectangle left_rc, right_rc; + glDrawBuffer(GL_BACK_LEFT); + m_post_processor->BlitFromTexture(target_rc, rc, texture, 0); - // Top-and-Bottom mode needs to compensate for inverted vertical screen coordinates. - if (g_ActiveConfig.stereo_mode == StereoMode::TAB) - std::tie(right_rc, left_rc) = ConvertStereoRectangle(flipped_trc); - else - std::tie(left_rc, right_rc) = ConvertStereoRectangle(flipped_trc); + glDrawBuffer(GL_BACK_RIGHT); + m_post_processor->BlitFromTexture(target_rc, rc, texture, 1); - post_processor->BlitFromTexture(source_rc, left_rc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 0); - post_processor->BlitFromTexture(source_rc, right_rc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 1); - } - else if (g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer) - { - glDrawBuffer(GL_BACK_LEFT); - post_processor->BlitFromTexture(source_rc, flipped_trc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 0); - - glDrawBuffer(GL_BACK_RIGHT); - post_processor->BlitFromTexture(source_rc, flipped_trc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 1); - - glDrawBuffer(GL_BACK); - } - else - { - post_processor->BlitFromTexture(source_rc, flipped_trc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 0); - } + glDrawBuffer(GL_BACK); } -void Renderer::ReinterpretPixelData(unsigned int convtype) +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) { - if (convtype == 0 || convtype == 2) - { - FramebufferManager::ReinterpretPixelData(convtype); - } - else - { - ERROR_LOG(VIDEO, "Trying to reinterpret pixel data with unsupported conversion type %d", - convtype); - } -} + if (m_current_framebuffer == framebuffer) + return; -void Renderer::SetFramebuffer(const AbstractFramebuffer* framebuffer) -{ - glBindFramebuffer(GL_FRAMEBUFFER, static_cast(framebuffer)->GetFBO()); + glBindFramebuffer(GL_FRAMEBUFFER, static_cast(framebuffer)->GetFBO()); m_current_framebuffer = framebuffer; - m_current_framebuffer_width = framebuffer->GetWidth(); - m_current_framebuffer_height = framebuffer->GetHeight(); } -void Renderer::SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) { // EXT_discard_framebuffer could be used here to save bandwidth on tilers. SetFramebuffer(framebuffer); } -void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value, float depth_value) { SetFramebuffer(framebuffer); - // NOTE: This disturbs the current scissor/mask setting. - // This won't be an issue when we implement proper state tracking. glDisable(GL_SCISSOR_TEST); GLbitfield clear_mask = 0; if (framebuffer->HasColorBuffer()) @@ -1304,15 +1025,162 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, if (framebuffer->HasDepthBuffer()) { glDepthMask(GL_TRUE); - glClearDepth(depth_value); + glClearDepthf(depth_value); clear_mask |= GL_DEPTH_BUFFER_BIT; } glClear(clear_mask); + glEnable(GL_SCISSOR_TEST); + + // Restore color/depth mask. + if (framebuffer->HasColorBuffer()) + { + glColorMask(m_current_blend_state.colorupdate, m_current_blend_state.colorupdate, + m_current_blend_state.colorupdate, m_current_blend_state.alphaupdate); + } + if (framebuffer->HasDepthBuffer()) + glDepthMask(m_current_depth_state.updateenable); } -void Renderer::ApplyBlendingState(const BlendingState state, bool force) +void Renderer::BindBackbuffer(const ClearColor& clear_color) { - if (!force && m_current_blend_state == state) + CheckForSurfaceChange(); + CheckForSurfaceResize(); + SetAndClearFramebuffer(m_system_framebuffer.get(), clear_color); +} + +void Renderer::PresentBackbuffer() +{ + if (g_ogl_config.bSupportsDebug) + { + if (LogManager::GetInstance()->IsEnabled(LogTypes::HOST_GPU, LogTypes::LERROR)) + glEnable(GL_DEBUG_OUTPUT); + else + glDisable(GL_DEBUG_OUTPUT); + } + + // Swap the back and front buffers, presenting the image. + m_main_gl_context->Swap(); +} + +void Renderer::OnConfigChanged(u32 bits) +{ + if (bits & CONFIG_CHANGE_BIT_VSYNC && !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VSYNC)) + m_main_gl_context->SwapInterval(g_ActiveConfig.bVSyncActive); + + if (bits & CONFIG_CHANGE_BIT_ANISOTROPY) + g_sampler_cache->Clear(); +} + +void Renderer::Flush() +{ + // ensure all commands are sent to the GPU. + // Otherwise the driver could batch several frames together. + glFlush(); +} + +void Renderer::WaitForGPUIdle() +{ + glFinish(); +} + +void Renderer::CheckForSurfaceChange() +{ + if (!m_surface_changed.TestAndClear()) + return; + + m_main_gl_context->UpdateSurface(m_new_surface_handle); + m_new_surface_handle = nullptr; + + // With a surface change, the window likely has new dimensions. + m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); + m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); + m_system_framebuffer->UpdateDimensions(m_backbuffer_width, m_backbuffer_height); +} + +void Renderer::CheckForSurfaceResize() +{ + if (!m_surface_resized.TestAndClear()) + return; + + m_main_gl_context->Update(); + m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); + m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); + m_system_framebuffer->UpdateDimensions(m_backbuffer_width, m_backbuffer_height); +} + +void Renderer::BeginUtilityDrawing() +{ + ::Renderer::BeginUtilityDrawing(); + + glEnable(GL_PROGRAM_POINT_SIZE); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + glDisable(GL_CLIP_DISTANCE0); + glDisable(GL_CLIP_DISTANCE1); + } +} + +void Renderer::EndUtilityDrawing() +{ + ::Renderer::EndUtilityDrawing(); + + glDisable(GL_PROGRAM_POINT_SIZE); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + glEnable(GL_CLIP_DISTANCE0); + glEnable(GL_CLIP_DISTANCE1); + } +} + +void Renderer::ApplyRasterizationState(const RasterizationState state) +{ + if (m_current_rasterization_state == state) + return; + + // none, ccw, cw, ccw + if (state.cullmode != GenMode::CULL_NONE) + { + // TODO: GX_CULL_ALL not supported, yet! + glEnable(GL_CULL_FACE); + glFrontFace(state.cullmode == GenMode::CULL_FRONT ? GL_CCW : GL_CW); + } + else + { + glDisable(GL_CULL_FACE); + } + + m_current_rasterization_state = state; +} + +void Renderer::ApplyDepthState(const DepthState state) +{ + if (m_current_depth_state == state) + return; + + const GLenum glCmpFuncs[8] = {GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL, + GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, GL_ALWAYS}; + + if (state.testenable) + { + glEnable(GL_DEPTH_TEST); + glDepthMask(state.updateenable ? GL_TRUE : GL_FALSE); + glDepthFunc(glCmpFuncs[state.func]); + } + else + { + // if the test is disabled write is disabled too + // TODO: When PE performance metrics are being emulated via occlusion queries, we should + // (probably?) enable depth test with depth function ALWAYS here + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + } + + m_current_depth_state = state; +} + +void Renderer::ApplyBlendingState(const BlendingState state) +{ + if (m_current_blend_state == state) return; bool useDualSource = @@ -1348,13 +1216,9 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force) GL_ONE_MINUS_DST_ALPHA}; if (state.blendenable) - { glEnable(GL_BLEND); - } else - { glDisable(GL_BLEND); - } // Always call glBlendEquationSeparate and glBlendFuncSeparate, even when // GL_BLEND is disabled, as a workaround for some bugs (possibly graphics @@ -1372,220 +1236,58 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force) GL_XOR, GL_OR, GL_NOR, GL_EQUIV, GL_INVERT, GL_OR_REVERSE, GL_COPY_INVERTED, GL_OR_INVERTED, GL_NAND, GL_SET}; - if (IsGLES()) + // Logic ops aren't available in GLES3 + if (!IsGLES()) { - // Logic ops aren't available in GLES3 - } - else if (state.logicopenable) - { - glEnable(GL_COLOR_LOGIC_OP); - glLogicOp(logic_op_codes[state.logicmode]); - } - else - { - glDisable(GL_COLOR_LOGIC_OP); + if (state.logicopenable) + { + glEnable(GL_COLOR_LOGIC_OP); + glLogicOp(logic_op_codes[state.logicmode]); + } + else + { + glDisable(GL_COLOR_LOGIC_OP); + } } glColorMask(state.colorupdate, state.colorupdate, state.colorupdate, state.alphaupdate); m_current_blend_state = state; } -void Renderer::BindBackbuffer(const ClearColor& clear_color) -{ - CheckForSurfaceChange(); - CheckForSurfaceResize(); - - glBindFramebuffer(GL_FRAMEBUFFER, 0); - glClearColor(0, 0, 0, 0); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_backbuffer_width; - m_current_framebuffer_height = m_backbuffer_height; -} - -void Renderer::PresentBackbuffer() -{ - if (g_ogl_config.bSupportsDebug) - { - if (LogManager::GetInstance()->IsEnabled(LogTypes::HOST_GPU, LogTypes::LERROR)) - glEnable(GL_DEBUG_OUTPUT); - else - glDisable(GL_DEBUG_OUTPUT); - } - - // Swap the back and front buffers, presenting the image. - m_main_gl_context->Swap(); -} - -void Renderer::OnConfigChanged(u32 bits) -{ - if (bits & (CONFIG_CHANGE_BIT_TARGET_SIZE | CONFIG_CHANGE_BIT_MULTISAMPLES | - CONFIG_CHANGE_BIT_STEREO_MODE | CONFIG_CHANGE_BIT_BBOX)) - { - s_MSAASamples = g_ActiveConfig.iMultisamples; - if (s_MSAASamples > 1 && s_MSAASamples > g_ogl_config.max_samples) - { - s_MSAASamples = g_ogl_config.max_samples; - OSD::AddMessage( - StringFromFormat("%d Anti Aliasing samples selected, but only %d supported by your GPU.", - s_MSAASamples, g_ogl_config.max_samples), - 10000); - } - - g_framebuffer_manager.reset(); - g_framebuffer_manager = std::make_unique( - m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); - BoundingBox::SetTargetSizeChanged(m_target_width, m_target_height); - } - - if (bits & CONFIG_CHANGE_BIT_VSYNC && !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VSYNC)) - m_main_gl_context->SwapInterval(g_ActiveConfig.bVSyncActive); - - if (bits & CONFIG_CHANGE_BIT_ANISOTROPY) - g_sampler_cache->Clear(); -} - -void Renderer::Flush() -{ - // ensure all commands are sent to the GPU. - // Otherwise the driver could batch several frames togehter. - glFlush(); -} - -void Renderer::CheckForSurfaceChange() -{ - if (!m_surface_changed.TestAndClear()) - return; - - m_main_gl_context->UpdateSurface(m_new_surface_handle); - m_new_surface_handle = nullptr; - - // With a surface change, the window likely has new dimensions. - m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); - m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); -} - -void Renderer::CheckForSurfaceResize() -{ - if (!m_surface_resized.TestAndClear()) - return; - - m_main_gl_context->Update(); - m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); - m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); -} - -// ALWAYS call RestoreAPIState for each ResetAPIState call you're doing -void Renderer::ResetAPIState() -{ - // Gets us to a reasonably sane state where it's possible to do things like - // image copies with textured quads, etc. - glDisable(GL_SCISSOR_TEST); - glDisable(GL_DEPTH_TEST); - glDisable(GL_CULL_FACE); - glDisable(GL_BLEND); - if (!IsGLES()) - glDisable(GL_COLOR_LOGIC_OP); - if (g_ActiveConfig.backend_info.bSupportsDepthClamp) - { - glDisable(GL_CLIP_DISTANCE0); - glDisable(GL_CLIP_DISTANCE1); - } - glDepthMask(GL_FALSE); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - m_current_rasterization_state = RenderState::GetInvalidRasterizationState(); - m_current_depth_state = RenderState::GetInvalidDepthState(); - m_current_blend_state = RenderState::GetInvalidBlendingState(); -} - -void Renderer::RestoreAPIState() -{ - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; - FramebufferManager::SetFramebuffer(0); - - // Gets us back into a more game-like state. - glEnable(GL_SCISSOR_TEST); - if (g_ActiveConfig.backend_info.bSupportsDepthClamp) - { - glEnable(GL_CLIP_DISTANCE0); - glEnable(GL_CLIP_DISTANCE1); - } - BPFunctions::SetScissor(); - BPFunctions::SetViewport(); -} - -void Renderer::ApplyRasterizationState(const RasterizationState state, bool force) -{ - if (!force && m_current_rasterization_state == state) - return; - - // none, ccw, cw, ccw - if (state.cullmode != GenMode::CULL_NONE) - { - // TODO: GX_CULL_ALL not supported, yet! - glEnable(GL_CULL_FACE); - glFrontFace(state.cullmode == GenMode::CULL_FRONT ? GL_CCW : GL_CW); - } - else - { - glDisable(GL_CULL_FACE); - } - - m_current_rasterization_state = state; -} - -void Renderer::ApplyDepthState(const DepthState state, bool force) -{ - if (!force && m_current_depth_state == state) - return; - - const GLenum glCmpFuncs[8] = {GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL, - GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, GL_ALWAYS}; - - if (state.testenable) - { - glEnable(GL_DEPTH_TEST); - glDepthMask(state.updateenable ? GL_TRUE : GL_FALSE); - glDepthFunc(glCmpFuncs[state.func]); - } - else - { - // if the test is disabled write is disabled too - // TODO: When PE performance metrics are being emulated via occlusion queries, we should - // (probably?) enable depth test with depth function ALWAYS here - glDisable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); - } - - m_current_depth_state = state; -} - void Renderer::SetPipeline(const AbstractPipeline* pipeline) { - // Not all shader changes currently go through SetPipeline, so we can't - // test if the pipeline hasn't changed and skip these applications. Yet. - m_graphics_pipeline = static_cast(pipeline); - if (!m_graphics_pipeline) + if (m_current_pipeline == pipeline) return; - ApplyRasterizationState(m_graphics_pipeline->GetRasterizationState()); - ApplyDepthState(m_graphics_pipeline->GetDepthState()); - ApplyBlendingState(m_graphics_pipeline->GetBlendingState()); - ProgramShaderCache::BindVertexFormat(m_graphics_pipeline->GetVertexFormat()); - m_graphics_pipeline->GetProgram()->shader.Bind(); + if (pipeline) + { + ApplyRasterizationState(static_cast(pipeline)->GetRasterizationState()); + ApplyDepthState(static_cast(pipeline)->GetDepthState()); + ApplyBlendingState(static_cast(pipeline)->GetBlendingState()); + ProgramShaderCache::BindVertexFormat( + static_cast(pipeline)->GetVertexFormat()); + static_cast(pipeline)->GetProgram()->shader.Bind(); + } + else + { + ProgramShaderCache::InvalidateLastProgram(); + glUseProgram(0); + } + m_current_pipeline = pipeline; } void Renderer::SetTexture(u32 index, const AbstractTexture* texture) { - if (m_bound_textures[index] == texture) + const OGLTexture* gl_texture = static_cast(texture); + if (m_bound_textures[index] == gl_texture) return; glActiveTexture(GL_TEXTURE0 + index); - glBindTexture(GL_TEXTURE_2D_ARRAY, - texture ? static_cast(texture)->GetRawTexIdentifier() : 0); - m_bound_textures[index] = texture; + if (gl_texture) + glBindTexture(gl_texture->GetGLTarget(), gl_texture->GetGLTextureId()); + else + glBindTexture(GL_TEXTURE_2D_ARRAY, 0); + m_bound_textures[index] = gl_texture; } void Renderer::SetSamplerState(u32 index, const SamplerState& state) @@ -1593,6 +1295,25 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state) g_sampler_cache->SetSamplerState(index, state); } +void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) +{ + if (m_bound_image_texture == texture) + return; + + if (texture) + { + const GLenum access = read ? (write ? GL_READ_WRITE : GL_READ_ONLY) : GL_WRITE_ONLY; + glBindImageTexture(0, static_cast(texture)->GetGLTextureId(), 0, GL_TRUE, 0, + access, static_cast(texture)->GetGLFormatForImageTexture()); + } + else + { + glBindImageTexture(0, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8); + } + + m_bound_image_texture = texture; +} + void Renderer::UnbindTexture(const AbstractTexture* texture) { for (size_t i = 0; i < m_bound_textures.size(); i++) @@ -1604,15 +1325,34 @@ void Renderer::UnbindTexture(const AbstractTexture* texture) glBindTexture(GL_TEXTURE_2D_ARRAY, 0); m_bound_textures[i] = nullptr; } -} -void Renderer::SetInterlacingMode() -{ - // TODO + if (m_bound_image_texture == texture) + { + glBindImageTexture(0, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8); + m_bound_image_texture = nullptr; + } } std::unique_ptr Renderer::CreateAsyncShaderCompiler() { return std::make_unique(); } + +void Renderer::BindSharedReadFramebuffer() +{ + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_shared_read_framebuffer); +} + +void Renderer::BindSharedDrawFramebuffer() +{ + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_shared_draw_framebuffer); +} + +void Renderer::RestoreFramebufferBinding() +{ + glBindFramebuffer( + GL_FRAMEBUFFER, + m_current_framebuffer ? static_cast(m_current_framebuffer)->GetFBO() : 0); +} + } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index 398a49377d..442a31d5c0 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -11,12 +11,11 @@ #include "Common/GL/GLExtensions/GLExtensions.h" #include "VideoCommon/RenderBase.h" -struct XFBSourceBase; - namespace OGL { +class OGLFramebuffer; class OGLPipeline; -void ClearEFBCache(); +class OGLTexture; enum GlslVersion { @@ -86,6 +85,8 @@ public: Renderer(std::unique_ptr main_gl_context, float backbuffer_scale); ~Renderer() override; + static Renderer* GetInstance() { return static_cast(g_renderer.get()); } + bool IsHeadless() const override; bool Initialize() override; @@ -98,73 +99,80 @@ public: size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; void SetPipeline(const AbstractPipeline* pipeline) override; - void SetFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, float depth_value = 0.0f) override; void SetScissorRect(const MathUtil::Rectangle& rc) override; void SetTexture(u32 index, const AbstractTexture* texture) override; void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; void UnbindTexture(const AbstractTexture* texture) override; - void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; - u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; - u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - void ResetAPIState() override; - void RestoreAPIState() override; - - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + void BeginUtilityDrawing() override; + void EndUtilityDrawing() override; void Flush() override; + void WaitForGPUIdle() override; void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; void OnConfigChanged(u32 bits) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; - void ReinterpretPixelData(unsigned int convtype) override; - std::unique_ptr CreateAsyncShaderCompiler() override; // Only call methods from this on the GPU thread. GLContext* GetMainGLContext() const { return m_main_gl_context.get(); } bool IsGLES() const { return m_main_gl_context->IsGLES(); } - const OGLPipeline* GetCurrentGraphicsPipeline() const { return m_graphics_pipeline; } + // Invalidates a cached texture binding. Required for texel buffers when they borrow the units. + void InvalidateTextureBinding(u32 index) { m_bound_textures[index] = nullptr; } + + // The shared framebuffer exists for copying textures when extensions are not available. It is + // slower, but the only way to do these things otherwise. + GLuint GetSharedReadFramebuffer() const { return m_shared_read_framebuffer; } + GLuint GetSharedDrawFramebuffer() const { return m_shared_draw_framebuffer; } + void BindSharedReadFramebuffer(); + void BindSharedDrawFramebuffer(); + + // Restores FBO binding after it's been changed. + void RestoreFramebufferBinding(); private: - void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, - const TargetRectangle& targetPixelRc, const void* data); - void CheckForSurfaceChange(); void CheckForSurfaceResize(); - void ApplyBlendingState(const BlendingState state, bool force = false); - void ApplyRasterizationState(const RasterizationState state, bool force = false); - void ApplyDepthState(const DepthState state, bool force = false); + void ApplyRasterizationState(const RasterizationState state); + void ApplyDepthState(const DepthState state); + void ApplyBlendingState(const BlendingState state); std::unique_ptr m_main_gl_context; - std::array m_bound_textures{}; - const OGLPipeline* m_graphics_pipeline = nullptr; + std::unique_ptr m_system_framebuffer; + std::array m_bound_textures{}; + AbstractTexture* m_bound_image_texture = nullptr; RasterizationState m_current_rasterization_state; DepthState m_current_depth_state; BlendingState m_current_blend_state; + GLuint m_shared_read_framebuffer = 0; + GLuint m_shared_draw_framebuffer = 0; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/StreamBuffer.h b/Source/Core/VideoBackends/OGL/StreamBuffer.h index 2a8efba1af..9946f3d33e 100644 --- a/Source/Core/VideoBackends/OGL/StreamBuffer.h +++ b/Source/Core/VideoBackends/OGL/StreamBuffer.h @@ -19,6 +19,8 @@ public: static std::unique_ptr Create(u32 type, u32 size); virtual ~StreamBuffer(); + u32 GetGLBufferId() const { return m_buffer; } + u32 GetSize() const { return m_size; } u32 GetCurrentOffset() const { return m_iterator; } /* This mapping function will return a pair of: @@ -64,4 +66,4 @@ private: std::array m_fences{}; }; -} +} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp deleted file mode 100644 index 9b0699587b..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ /dev/null @@ -1,574 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "VideoBackends/OGL/FramebufferManager.h" -#include "VideoBackends/OGL/GPUTimer.h" -#include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/Render.h" -#include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/StreamBuffer.h" -#include "VideoBackends/OGL/TextureCache.h" -#include "VideoBackends/OGL/TextureConverter.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureConverterShaderGen.h" -#include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace OGL -{ -constexpr const char GLSL_PROGRAM_VS[] = R"GLSL( -out vec3 %c_uv0; -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -uniform vec4 copy_position; // left, top, right, bottom - -void main() -{ - vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); - %c_uv0 = vec3(mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0).xy), 0.0); - gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); -} -)GLSL"; - -constexpr const char GLSL_PROGRAM_GS[] = R"GLSL( -layout(triangles) in; -layout(triangle_strip, max_vertices = 6) out; -in vec3 v_uv0[3]; -out vec3 f_uv0; -SAMPLER_BINDING(9) uniform sampler2DArray samp9; - -void main() -{ - int layers = textureSize(samp9, 0).z; - for (int layer = 0; layer < layers; ++layer) { - for (int i = 0; i < 3; ++i) { - f_uv0 = vec3(v_uv0[i].xy, layer); - gl_Position = gl_in[i].gl_Position; - gl_Layer = layer; - EmitVertex(); - } - } - EndPrimitive(); -} -)GLSL"; - -constexpr const char GLSL_COLOR_COPY_FS[] = R"GLSL( -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -in vec3 f_uv0; -out vec4 ocol0; - -void main() -{ - vec4 texcol = texture(samp9, f_uv0); - ocol0 = texcol; -} -)GLSL"; - -constexpr const char GLSL_PALETTE_FS[] = R"GLSL( -uniform int texture_buffer_offset; -uniform float multiplier; -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -SAMPLER_BINDING(10) uniform usamplerBuffer samp10; - -in vec3 f_uv0; -out vec4 ocol0; - -int Convert3To8(int v) -{ - // Swizzle bits: 00000123 -> 12312312 - return (v << 5) | (v << 2) | (v >> 1); -} - -int Convert4To8(int v) -{ - // Swizzle bits: 00001234 -> 12341234 - return (v << 4) | v; -} - -int Convert5To8(int v) -{ - // Swizzle bits: 00012345 -> 12345123 - return (v << 3) | (v >> 2); -} - -int Convert6To8(int v) -{ - // Swizzle bits: 00123456 -> 12345612 - return (v << 2) | (v >> 4); -} - -float4 DecodePixel_RGB5A3(int val) -{ - int r,g,b,a; - if ((val&0x8000) > 0) - { - r=Convert5To8((val>>10) & 0x1f); - g=Convert5To8((val>>5 ) & 0x1f); - b=Convert5To8((val ) & 0x1f); - a=0xFF; - } - else - { - a=Convert3To8((val>>12) & 0x7); - r=Convert4To8((val>>8 ) & 0xf); - g=Convert4To8((val>>4 ) & 0xf); - b=Convert4To8((val ) & 0xf); - } - return float4(r, g, b, a) / 255.0; -} - -float4 DecodePixel_RGB565(int val) -{ - int r, g, b, a; - r = Convert5To8((val >> 11) & 0x1f); - g = Convert6To8((val >> 5) & 0x3f); - b = Convert5To8((val) & 0x1f); - a = 0xFF; - return float4(r, g, b, a) / 255.0; -} - -float4 DecodePixel_IA8(int val) -{ - int i = val & 0xFF; - int a = val >> 8; - return float4(i, i, i, a) / 255.0; -} - -void main() -{ - int src = int(round(texture(samp9, f_uv0).r * multiplier)); - src = int(texelFetch(samp10, src + texture_buffer_offset).r); - src = ((src << 8) & 0xFF00) | (src >> 8); - ocol0 = DecodePixel_%s(src); -} -)GLSL"; - -//#define TIME_TEXTURE_DECODING 1 - -void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - // Flip top/bottom due to lower-left coordinate system. - float clamp_top_val = - clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f; - float clamp_bottom_val = - clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 1.0f; - TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y, - memory_stride, src_rect, scale_by_half, y_scale, gamma, - clamp_top_val, clamp_bottom_val, filter_coefficients); -} - -TextureCache::TextureCache() -{ - CompileShaders(); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - { - s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1); - s32 buffer_size = buffer_size_mb * 1024 * 1024; - s32 max_buffer_size = 0; - - // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB - // buffer here. This buffer is also used as storage for undecoded textures when compute shader - // texture decoding is enabled, in which case the requested size is 32MB. - glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); - - // Clamp the buffer size to the maximum size that the driver supports. - buffer_size = std::min(buffer_size, max_buffer_size); - - m_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size); - glGenTextures(1, &m_palette_resolv_texture); - glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); - glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_palette_stream_buffer->m_buffer); - - if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) - CreateTextureDecodingResources(); - } -} - -TextureCache::~TextureCache() -{ - DeleteShaders(); - if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) - DestroyTextureDecodingResources(); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - { - glDeleteTextures(1, &m_palette_resolv_texture); - } -} - -TextureCache* TextureCache::GetInstance() -{ - return static_cast(g_texture_cache.get()); -} - -const SHADER& TextureCache::GetColorCopyProgram() const -{ - return m_colorCopyProgram; -} - -GLuint TextureCache::GetColorCopyPositionUniform() const -{ - return m_colorCopyPositionUniform; -} - -bool TextureCache::CompilePaletteShader(TLUTFormat tlutfmt, const std::string& vcode, - const std::string& pcode, const std::string& gcode) -{ - ASSERT(IsValidTLUTFormat(tlutfmt)); - PaletteShader& shader = m_palette_shaders[static_cast(tlutfmt)]; - - if (!ProgramShaderCache::CompileShader(shader.shader, vcode, pcode, gcode)) - return false; - - shader.buffer_offset_uniform = - glGetUniformLocation(shader.shader.glprogid, "texture_buffer_offset"); - shader.multiplier_uniform = glGetUniformLocation(shader.shader.glprogid, "multiplier"); - shader.copy_position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); - - return true; -} - -bool TextureCache::CompileShaders() -{ - std::string geo_program = ""; - char prefix = 'f'; - if (g_ActiveConfig.stereo_mode != StereoMode::Off) - { - geo_program = GLSL_PROGRAM_GS; - prefix = 'v'; - } - - if (!ProgramShaderCache::CompileShader(m_colorCopyProgram, - StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - GLSL_COLOR_COPY_FS, geo_program)) - { - return false; - } - - m_colorCopyPositionUniform = glGetUniformLocation(m_colorCopyProgram.glprogid, "copy_position"); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - { - if (!CompilePaletteShader(TLUTFormat::IA8, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - StringFromFormat(GLSL_PALETTE_FS, "IA8"), geo_program)) - return false; - - if (!CompilePaletteShader(TLUTFormat::RGB565, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - StringFromFormat(GLSL_PALETTE_FS, "RGB565"), geo_program)) - return false; - - if (!CompilePaletteShader(TLUTFormat::RGB5A3, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - StringFromFormat(GLSL_PALETTE_FS, "RGB5A3"), geo_program)) - return false; - } - - return true; -} - -void TextureCache::DeleteShaders() -{ - for (auto& it : m_efb_copy_programs) - it.second.shader.Destroy(); - m_efb_copy_programs.clear(); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - for (auto& shader : m_palette_shaders) - shader.shader.Destroy(); -} - -void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, - const void* palette, TLUTFormat tlutfmt) -{ - if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) - return; - - ASSERT(IsValidTLUTFormat(tlutfmt)); - const PaletteShader& palette_shader = m_palette_shaders[static_cast(tlutfmt)]; - - g_renderer->ResetAPIState(); - - OGLTexture* source_texture = static_cast(source->texture.get()); - OGLTexture* destination_texture = static_cast(destination->texture.get()); - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, source_texture->GetRawTexIdentifier()); - g_sampler_cache->BindNearestSampler(9); - - FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); - glViewport(0, 0, destination->GetWidth(), destination->GetHeight()); - palette_shader.shader.Bind(); - - // C14 textures are currently unsupported - int size = source->format == TextureFormat::I4 ? 32 : 512; - auto buffer = m_palette_stream_buffer->Map(size); - memcpy(buffer.first, palette, size); - m_palette_stream_buffer->Unmap(size); - glUniform1i(palette_shader.buffer_offset_uniform, buffer.second / 2); - glUniform1f(palette_shader.multiplier_uniform, - source->format == TextureFormat::I4 ? 15.0f : 255.0f); - glUniform4f(palette_shader.copy_position_uniform, 0.0f, 0.0f, - static_cast(source->GetWidth()), static_cast(source->GetHeight())); - - glActiveTexture(GL_TEXTURE10); - glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); - g_sampler_cache->BindNearestSampler(10); - - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - g_renderer->RestoreAPIState(); -} - -static const std::string decoding_vertex_shader = R"( -void main() -{ - vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); - gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); -} -)"; - -void TextureCache::CreateTextureDecodingResources() -{ - static const GLenum gl_view_types[TextureConversionShaderTiled::BUFFER_FORMAT_COUNT] = { - GL_R8UI, // BUFFER_FORMAT_R8_UINT - GL_R16UI, // BUFFER_FORMAT_R16_UINT - GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT - GL_RGBA8UI, // BUFFER_FORMAT_RGBA8_UINT - }; - - glGenTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT, - m_texture_decoding_buffer_views.data()); - for (size_t i = 0; i < TextureConversionShaderTiled::BUFFER_FORMAT_COUNT; i++) - { - glBindTexture(GL_TEXTURE_BUFFER, m_texture_decoding_buffer_views[i]); - glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], m_palette_stream_buffer->m_buffer); - } -} - -void TextureCache::DestroyTextureDecodingResources() -{ - glDeleteTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT, - m_texture_decoding_buffer_views.data()); - m_texture_decoding_buffer_views.fill(0); - m_texture_decoding_program_info.clear(); -} - -bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) -{ - auto key = std::make_pair(static_cast(format), static_cast(palette_format)); - auto iter = m_texture_decoding_program_info.find(key); - if (iter != m_texture_decoding_program_info.end()) - return iter->second.valid; - - TextureDecodingProgramInfo info; - info.base_info = TextureConversionShaderTiled::GetDecodingShaderInfo(format); - if (!info.base_info) - { - m_texture_decoding_program_info.emplace(key, info); - return false; - } - - std::string shader_source = - TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::OpenGL); - if (shader_source.empty()) - { - m_texture_decoding_program_info.emplace(key, info); - return false; - } - - if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source)) - { - m_texture_decoding_program_info.emplace(key, info); - return false; - } - - info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size"); - info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size"); - info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset"); - info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride"); - info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset"); - info.valid = true; - m_texture_decoding_program_info.emplace(key, info); - return true; -} - -void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, - size_t data_size, TextureFormat format, u32 width, u32 height, - u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format) -{ - auto key = std::make_pair(static_cast(format), static_cast(palette_format)); - auto iter = m_texture_decoding_program_info.find(key); - if (iter == m_texture_decoding_program_info.end()) - return; - -#ifdef TIME_TEXTURE_DECODING - GPUTimer timer; -#endif - - // Copy to GPU-visible buffer, aligned to the data type. - auto info = iter->second; - u32 bytes_per_buffer_elem = - TextureConversionShaderTiled::GetBytesPerBufferElement(info.base_info->buffer_format); - - // Only copy palette if it is required. - bool has_palette = info.base_info->palette_size > 0; - u32 total_upload_size = static_cast(data_size); - u32 palette_offset = total_upload_size; - if (has_palette) - { - // Align to u16. - if ((total_upload_size % sizeof(u16)) != 0) - { - total_upload_size++; - palette_offset++; - } - - total_upload_size += info.base_info->palette_size; - } - - // Allocate space in stream buffer, and copy texture + palette across. - auto buffer = m_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem); - memcpy(buffer.first, data, data_size); - if (has_palette) - memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size); - m_palette_stream_buffer->Unmap(total_upload_size); - - info.program.Bind(); - - // Calculate stride in buffer elements - u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem; - u32 offset_in_elements = buffer.second / bytes_per_buffer_elem; - u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16); - if (info.uniform_dst_size >= 0) - glUniform2ui(info.uniform_dst_size, width, height); - if (info.uniform_src_size >= 0) - glUniform2ui(info.uniform_src_size, aligned_width, aligned_height); - if (info.uniform_src_offset >= 0) - glUniform1ui(info.uniform_src_offset, offset_in_elements); - if (info.uniform_src_row_stride >= 0) - glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements); - if (info.uniform_palette_offset >= 0) - glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements); - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_BUFFER, m_texture_decoding_buffer_views[info.base_info->buffer_format]); - - if (has_palette) - { - // Use an R16UI view for the palette. - glActiveTexture(GL_TEXTURE10); - glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); - } - - auto dispatch_groups = - TextureConversionShaderTiled::GetDispatchCount(info.base_info, aligned_width, aligned_height); - glBindImageTexture(0, static_cast(entry->texture.get())->GetRawTexIdentifier(), - dst_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8); - glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1); - glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); - -#ifdef TIME_TEXTURE_DECODING - WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast(format), - width, height, timer.GetTimeMilliseconds()); -#endif -} - -void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - auto* destination_texture = static_cast(entry->texture.get()); - g_renderer->ResetAPIState(); // reset any game specific settings - - // Make sure to resolve anything we need to read from. - const GLuint read_texture = is_depth_copy ? - FramebufferManager::ResolveAndGetDepthTarget(src_rect) : - FramebufferManager::ResolveAndGetRenderTarget(src_rect); - - FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, read_texture); - if (scale_by_half) - g_sampler_cache->BindLinearSampler(9); - else - g_sampler_cache->BindNearestSampler(9); - - glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height); - - auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half, - NeedsCopyFilterInShader(filter_coefficients)); - - auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader()); - EFBCopyShader& shader = it.first->second; - bool created = it.second; - - if (created) - { - ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::OpenGL, uid.GetUidData()); - - std::string geo_program = ""; - char prefix = 'f'; - if (g_ActiveConfig.stereo_mode != StereoMode::Off) - { - geo_program = GLSL_PROGRAM_GS; - prefix = 'v'; - } - - ProgramShaderCache::CompileShader(shader.shader, - StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - code.GetBuffer(), geo_program); - - shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); - shader.pixel_height_uniform = glGetUniformLocation(shader.shader.glprogid, "pixel_height"); - shader.gamma_rcp_uniform = glGetUniformLocation(shader.shader.glprogid, "gamma_rcp"); - shader.clamp_tb_uniform = glGetUniformLocation(shader.shader.glprogid, "clamp_tb"); - shader.filter_coefficients_uniform = - glGetUniformLocation(shader.shader.glprogid, "filter_coefficients"); - } - - shader.shader.Bind(); - - TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect); - glUniform4f(shader.position_uniform, static_cast(R.left), static_cast(R.top), - static_cast(R.right), static_cast(R.bottom)); - glUniform1f(shader.pixel_height_uniform, g_ActiveConfig.bCopyEFBScaled ? - 1.0f / g_renderer->GetTargetHeight() : - 1.0f / EFB_HEIGHT); - glUniform1f(shader.gamma_rcp_uniform, 1.0f / gamma); - glUniform2f(shader.clamp_tb_uniform, - clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f, - clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 1.0f); - glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0], filter_coefficients[1], - filter_coefficients[2]); - - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - g_renderer->RestoreAPIState(); -} -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h deleted file mode 100644 index 89fc82ee9d..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/GL/GLUtil.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" - -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureConverterShaderGen.h" -#include "VideoCommon/VideoCommon.h" - -class AbstractTexture; -class StreamBuffer; -struct TextureConfig; - -namespace OGL -{ -class TextureCache : public TextureCacheBase -{ -public: - TextureCache(); - ~TextureCache(); - - static TextureCache* GetInstance(); - - bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override; - void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size, - TextureFormat format, u32 width, u32 height, u32 aligned_width, - u32 aligned_height, u32 row_stride, const u8* palette, - TLUTFormat palette_format) override; - - const SHADER& GetColorCopyProgram() const; - GLuint GetColorCopyPositionUniform() const; - -private: - struct PaletteShader - { - SHADER shader; - GLuint buffer_offset_uniform; - GLuint multiplier_uniform; - GLuint copy_position_uniform; - }; - - struct TextureDecodingProgramInfo - { - const TextureConversionShaderTiled::DecodingShaderInfo* base_info = nullptr; - SHADER program; - GLint uniform_dst_size = -1; - GLint uniform_src_size = -1; - GLint uniform_src_row_stride = -1; - GLint uniform_src_offset = -1; - GLint uniform_palette_offset = -1; - bool valid = false; - }; - - void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, - TLUTFormat format) override; - - void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - bool CompileShaders() override; - void DeleteShaders() override; - - bool CompilePaletteShader(TLUTFormat tlutfmt, const std::string& vcode, const std::string& pcode, - const std::string& gcode); - - void CreateTextureDecodingResources(); - void DestroyTextureDecodingResources(); - - struct EFBCopyShader - { - SHADER shader; - GLuint position_uniform; - GLuint pixel_height_uniform; - GLuint gamma_rcp_uniform; - GLuint clamp_tb_uniform; - GLuint filter_coefficients_uniform; - }; - - std::map m_efb_copy_programs; - - SHADER m_colorCopyProgram; - GLuint m_colorCopyPositionUniform; - - std::array m_palette_shaders; - std::unique_ptr m_palette_stream_buffer; - GLuint m_palette_resolv_texture = 0; - - std::map, TextureDecodingProgramInfo> m_texture_decoding_program_info; - std::array - m_texture_decoding_buffer_views; -}; -} diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp deleted file mode 100644 index 095365140f..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// Fast image conversion using OpenGL shaders. - -#include "VideoBackends/OGL/TextureConverter.h" - -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "Core/HW/Memmap.h" - -#include "VideoBackends/OGL/FramebufferManager.h" -#include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/Render.h" -#include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace OGL -{ -namespace TextureConverter -{ -namespace -{ -struct EncodingProgram -{ - SHADER program; - GLint copy_position_uniform; - GLint y_scale_uniform; - GLint gamma_rcp_uniform; - GLint clamp_tb_uniform; - GLint filter_coefficients_uniform; -}; - -std::map s_encoding_programs; -std::unique_ptr s_encoding_render_texture; - -const int renderBufferWidth = EFB_WIDTH * 4; -const int renderBufferHeight = 1024; -} // namespace - -static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params) -{ - auto iter = s_encoding_programs.find(params); - if (iter != s_encoding_programs.end()) - return iter->second; - - const char* shader = - TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::OpenGL); - -#if defined(_DEBUG) || defined(DEBUGFAST) - if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader) - { - static int counter = 0; - std::string filename = - StringFromFormat("%senc_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - - SaveData(filename, shader); - } -#endif - - const char* VProgram = "void main()\n" - "{\n" - " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" - " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" - "}\n"; - - EncodingProgram program; - if (!ProgramShaderCache::CompileShader(program.program, VProgram, shader)) - PanicAlert("Failed to compile texture encoding shader."); - - program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position"); - program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale"); - program.gamma_rcp_uniform = glGetUniformLocation(program.program.glprogid, "gamma_rcp"); - program.clamp_tb_uniform = glGetUniformLocation(program.program.glprogid, "clamp_tb"); - program.filter_coefficients_uniform = - glGetUniformLocation(program.program.glprogid, "filter_coefficients"); - return s_encoding_programs.emplace(params, program).first->second; -} - -void Init() -{ - s_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); -} - -void Shutdown() -{ - s_encoding_render_texture.reset(); - - for (auto& program : s_encoding_programs) - program.second.program.Destroy(); - s_encoding_programs.clear(); -} - -// dst_line_size, writeStride in bytes - -static void EncodeToRamUsingShader(GLuint srcTexture, AbstractStagingTexture* destAddr, - u32 dst_line_size, u32 dstHeight, u32 writeStride, - bool linearFilter, float y_scale) -{ - FramebufferManager::SetFramebuffer( - static_cast(s_encoding_render_texture.get())->GetFramebuffer()); - - // set source texture - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, srcTexture); - - // We also linear filtering for both box filtering and downsampling higher resolutions to 1x - // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more - // complex down filtering to average all pixels and produce the correct result. - // Also, box filtering won't be correct for anything other than 1x IR - if (linearFilter || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f) - g_sampler_cache->BindLinearSampler(9); - else - g_sampler_cache->BindNearestSampler(9); - - glViewport(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight); - - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - MathUtil::Rectangle copy_rect(0, 0, dst_line_size / 4, dstHeight); - - destAddr->CopyFromTexture(s_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); -} - -void EncodeToRamFromTexture(AbstractStagingTexture* dest, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, - u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma, float clamp_top, float clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) -{ - g_renderer->ResetAPIState(); - - EncodingProgram& texconv_shader = GetOrCreateEncodingShader(params); - - texconv_shader.program.Bind(); - glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width, - scale_by_half ? 2 : 1); - glUniform1f(texconv_shader.y_scale_uniform, y_scale); - glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma); - glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom); - glUniform3f(texconv_shader.filter_coefficients_uniform, filter_coefficients[0], - filter_coefficients[1], filter_coefficients[2]); - - const GLuint read_texture = params.depth ? - FramebufferManager::ResolveAndGetDepthTarget(src_rect) : - FramebufferManager::ResolveAndGetRenderTarget(src_rect); - - EncodeToRamUsingShader(read_texture, dest, bytes_per_row, num_blocks_y, memory_stride, - scale_by_half && !params.depth, y_scale); - - g_renderer->RestoreAPIState(); -} - -} // namespace TextureConverter - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.h b/Source/Core/VideoBackends/OGL/TextureConverter.h deleted file mode 100644 index 575fbf5bd0..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureConverter.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/CommonTypes.h" -#include "Common/GL/GLUtil.h" - -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/VideoCommon.h" - -struct EFBCopyParams; -class AbstractStagingTexture; - -namespace OGL -{ -// Converts textures between formats using shaders -// TODO: support multiple texture formats -namespace TextureConverter -{ -void Init(); -void Shutdown(); - -// returns size of the encoded data (in bytes) -void EncodeToRamFromTexture( - AbstractStagingTexture* dest, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma, float clamp_top, float clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); -} - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index 3883a13595..543a85dac8 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -9,17 +9,14 @@ #include #include +#include "Common/Align.h" #include "Common/CommonTypes.h" -#include "Common/FileUtil.h" #include "Common/GL/GLExtensions/GLExtensions.h" -#include "Common/StringUtil.h" -#include "VideoBackends/OGL/BoundingBox.h" #include "VideoBackends/OGL/OGLPipeline.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" -#include "VideoCommon/BoundingBox.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/Statistics.h" @@ -28,38 +25,127 @@ namespace OGL { -// This are the initially requested size for the buffers expressed in bytes -const u32 MAX_IBUFFER_SIZE = 2 * 1024 * 1024; -const u32 MAX_VBUFFER_SIZE = 32 * 1024 * 1024; - -VertexManager::VertexManager() : m_cpu_v_buffer(MAX_VBUFFER_SIZE), m_cpu_i_buffer(MAX_IBUFFER_SIZE) +static void CheckBufferBinding() { - CreateDeviceObjects(); + // The index buffer is part of the VAO state, therefore we need to bind it first. + if (!ProgramShaderCache::IsValidVertexFormatBound()) + { + ProgramShaderCache::BindVertexFormat( + static_cast(VertexLoaderManager::GetCurrentVertexFormat())); + } } +VertexManager::VertexManager() = default; + VertexManager::~VertexManager() { - DestroyDeviceObjects(); -} + if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) + { + glDeleteTextures(static_cast(m_texel_buffer_views.size()), + m_texel_buffer_views.data()); + } -void VertexManager::CreateDeviceObjects() -{ - m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE); - m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE); -} - -void VertexManager::DestroyDeviceObjects() -{ - m_vertex_buffer.reset(); + // VAO must be found when destroying the index buffer. + CheckBufferBinding(); + m_texel_buffer.reset(); m_index_buffer.reset(); + m_vertex_buffer.reset(); +} + +bool VertexManager::Initialize() +{ + if (!VertexManagerBase::Initialize()) + return false; + + m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_STREAM_BUFFER_SIZE); + m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_STREAM_BUFFER_SIZE); + + if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) + { + // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB + // buffer here. This buffer is also used as storage for undecoded textures when compute shader + // texture decoding is enabled, in which case the requested size is 32MB. + GLint max_buffer_size; + glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); + m_texel_buffer = StreamBuffer::Create( + GL_TEXTURE_BUFFER, std::min(max_buffer_size, static_cast(TEXEL_STREAM_BUFFER_SIZE))); + + // Allocate texture views backed by buffer. + static constexpr std::array, NUM_TEXEL_BUFFER_FORMATS> + format_mapping = {{ + {TEXEL_BUFFER_FORMAT_R8_UINT, GL_R8UI}, + {TEXEL_BUFFER_FORMAT_R16_UINT, GL_R16UI}, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, GL_RGBA8}, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, GL_RG32UI}, + }}; + glGenTextures(static_cast(m_texel_buffer_views.size()), m_texel_buffer_views.data()); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); + for (const auto& it : format_mapping) + { + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[it.first]); + glTexBuffer(GL_TEXTURE_BUFFER, it.second, m_texel_buffer->GetGLBufferId()); + } + } + + return true; } void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) { - ProgramShaderCache::InvalidateConstants(); + InvalidateConstants(); ProgramShaderCache::UploadConstants(uniforms, uniforms_size); } +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + if (data_size > m_texel_buffer->GetSize()) + return false; + + const u32 elem_size = GetTexelBufferElementSize(format); + const auto dst = m_texel_buffer->Map(data_size, elem_size); + std::memcpy(dst.first, data, data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + *out_offset = dst.second / elem_size; + m_texel_buffer->Unmap(data_size); + + // Bind the correct view to the texel buffer slot. + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast(format)]); + Renderer::GetInstance()->InvalidateTextureBinding(0); + return true; +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) +{ + const u32 elem_size = GetTexelBufferElementSize(format); + const u32 palette_elem_size = GetTexelBufferElementSize(palette_format); + const u32 reserve_size = data_size + palette_size + palette_elem_size; + if (reserve_size > m_texel_buffer->GetSize()) + return false; + + const auto dst = m_texel_buffer->Map(reserve_size, elem_size); + const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size); + std::memcpy(dst.first, data, data_size); + std::memcpy(dst.first + palette_byte_offset, palette_data, palette_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size); + *out_offset = dst.second / elem_size; + *out_palette_offset = (dst.second + palette_byte_offset) / palette_elem_size; + m_texel_buffer->Unmap(palette_byte_offset + palette_size); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast(format)]); + Renderer::GetInstance()->InvalidateTextureBinding(0); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast(palette_format)]); + Renderer::GetInstance()->InvalidateTextureBinding(1); + + return true; +} + GLuint VertexManager::GetVertexBufferHandle() const { return m_vertex_buffer->m_buffer; @@ -70,37 +156,16 @@ GLuint VertexManager::GetIndexBufferHandle() const return m_index_buffer->m_buffer; } -static void CheckBufferBinding() +void VertexManager::ResetBuffer(u32 vertex_stride) { - // The index buffer is part of the VAO state, therefore we need to bind it first. - if (!ProgramShaderCache::IsValidVertexFormatBound()) - { - ProgramShaderCache::BindVertexFormat( - static_cast(VertexLoaderManager::GetCurrentVertexFormat())); - } -} + CheckBufferBinding(); -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - if (cull_all) - { - // This buffer isn't getting sent to the GPU. Just allocate it on the cpu. - m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_v_buffer.data(); - m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_v_buffer.size(); + auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride); + m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first; + m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE; - IndexGenerator::Start((u16*)m_cpu_i_buffer.data()); - } - else - { - CheckBufferBinding(); - - auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride); - m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first; - m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE; - - buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16)); - IndexGenerator::Start((u16*)buffer.first); - } + buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16)); + IndexGenerator::Start((u16*)buffer.first); } void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, @@ -120,31 +185,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size); } -void VertexManager::UploadConstants() +void VertexManager::UploadUniforms() { ProgramShaderCache::UploadConstants(); } - -void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) -{ - if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) - { - glEnable(GL_STENCIL_TEST); - } - - if (m_current_pipeline_object) - { - static_cast(g_renderer.get())->SetPipeline(m_current_pipeline_object); - static_cast(g_renderer.get())->DrawIndexed(base_index, num_indices, base_vertex); - } - - if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) - { - OGL::BoundingBox::StencilWasUpdated(); - glDisable(GL_STENCIL_TEST); - } - - g_Config.iSaveTargetId++; - ClearEFBCache(); -} } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index 388c559716..b9b410855c 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -4,8 +4,8 @@ #pragma once +#include #include -#include #include "Common/CommonTypes.h" #include "Common/GL/GLUtil.h" @@ -26,35 +26,34 @@ public: // Handles the OpenGL details of drawing lots of vertices quickly. // Other functionality is moving out. -class VertexManager : public VertexManagerBase +class VertexManager final : public VertexManagerBase { public: VertexManager(); - ~VertexManager(); + ~VertexManager() override; - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + bool Initialize() override; void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; GLuint GetVertexBufferHandle() const; GLuint GetIndexBufferHandle() const; protected: - void CreateDeviceObjects() override; - void DestroyDeviceObjects() override; - void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void ResetBuffer(u32 vertex_stride) override; void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) override; - void UploadConstants() override; - void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; + void UploadUniforms() override; private: std::unique_ptr m_vertex_buffer; std::unique_ptr m_index_buffer; - - // Alternative buffers in CPU memory for primatives we are going to discard. - std::vector m_cpu_v_buffer; - std::vector m_cpu_i_buffer; + std::unique_ptr m_texel_buffer; + std::array m_texel_buffer_views{}; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 3624a7fd7a..76cd7b2ca6 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -50,13 +50,11 @@ Make AA apply instantly during gameplay if possible #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureCache.h" -#include "VideoBackends/OGL/TextureConverter.h" #include "VideoBackends/OGL/VertexManager.h" #include "VideoBackends/OGL/VideoBackend.h" -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/VideoCommon.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoConfig.h" namespace OGL @@ -78,6 +76,7 @@ void VideoBackend::InitBackendInfo() { g_Config.backend_info.api_type = APIType::OpenGL; g_Config.backend_info.MaxTextureSize = 16384; + g_Config.backend_info.bUsesLowerLeftOrigin = true; g_Config.backend_info.bSupportsExclusiveFullscreen = false; g_Config.backend_info.bSupportsOversizedViewports = true; g_Config.backend_info.bSupportsGeometryShaders = true; @@ -89,6 +88,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsLogicOp = true; g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsCopyToVram = true; + g_Config.backend_info.bSupportsLargePoints = true; // TODO: There is a bug here, if texel buffers are not supported the graphics options // will show the option when it is not supported. The only way around this would be @@ -173,17 +173,26 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) return false; g_renderer = std::make_unique(std::move(main_gl_context), wsi.render_surface_scale); - g_vertex_manager = std::make_unique(); - g_perf_query = GetPerfQuery(); ProgramShaderCache::Init(); - g_texture_cache = std::make_unique(); - g_sampler_cache = std::make_unique(); + g_vertex_manager = std::make_unique(); g_shader_cache = std::make_unique(); - if (!g_renderer->Initialize()) + g_framebuffer_manager = std::make_unique(); + g_perf_query = GetPerfQuery(); + g_texture_cache = std::make_unique(); + g_sampler_cache = std::make_unique(); + BoundingBox::Init(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); return false; - TextureConverter::Init(); - BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); - return g_shader_cache->Initialize(); + } + + g_shader_cache->InitializeShaderCache(); + return true; } void VideoBackend::Shutdown() @@ -191,13 +200,13 @@ void VideoBackend::Shutdown() g_shader_cache->Shutdown(); g_renderer->Shutdown(); BoundingBox::Shutdown(); - TextureConverter::Shutdown(); - g_shader_cache.reset(); g_sampler_cache.reset(); g_texture_cache.reset(); - ProgramShaderCache::Shutdown(); g_perf_query.reset(); g_vertex_manager.reset(); + g_framebuffer_manager.reset(); + g_shader_cache.reset(); + ProgramShaderCache::Shutdown(); g_renderer.reset(); ShutdownShared(); } diff --git a/Source/Core/VideoBackends/Software/SWRenderer.cpp b/Source/Core/VideoBackends/Software/SWRenderer.cpp index 6369b35b20..4b57b9b551 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.cpp +++ b/Source/Core/VideoBackends/Software/SWRenderer.cpp @@ -19,11 +19,15 @@ #include "VideoCommon/AbstractPipeline.h" #include "VideoCommon/AbstractShader.h" +#include "VideoCommon/AbstractTexture.h" #include "VideoCommon/BoundingBox.h" +#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" +namespace SW +{ SWRenderer::SWRenderer(std::unique_ptr window) : ::Renderer(static_cast(MAX_XFB_WIDTH), static_cast(MAX_XFB_HEIGHT), 1.0f, AbstractTextureFormat::RGBA8), @@ -38,21 +42,20 @@ bool SWRenderer::IsHeadless() const std::unique_ptr SWRenderer::CreateTexture(const TextureConfig& config) { - return std::make_unique(config); + return std::make_unique(config); } std::unique_ptr SWRenderer::CreateStagingTexture(StagingTextureType type, const TextureConfig& config) { - return std::make_unique(type, config); + return std::make_unique(type, config); } std::unique_ptr -SWRenderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +SWRenderer::CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) { - return SW::SWFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return SWFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } class SWShader final : public AbstractShader @@ -132,18 +135,15 @@ void SWRenderer::BBoxWrite(int index, u16 value) BoundingBox::coords[index] = value; } -TargetRectangle SWRenderer::ConvertEFBRectangle(const EFBRectangle& rc) -{ - TargetRectangle result; - result.left = rc.left; - result.top = rc.top; - result.right = rc.right; - result.bottom = rc.bottom; - return result; -} - void SWRenderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) { EfbCopy::ClearEfb(); } + +std::unique_ptr +SWRenderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + return std::make_unique(vtx_decl); +} +} // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWRenderer.h b/Source/Core/VideoBackends/Software/SWRenderer.h index 88c5ece360..a06ccaecd2 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.h +++ b/Source/Core/VideoBackends/Software/SWRenderer.h @@ -12,7 +12,9 @@ class SWOGLWindow; -class SWRenderer : public Renderer +namespace SW +{ +class SWRenderer final : public Renderer { public: SWRenderer(std::unique_ptr window); @@ -23,13 +25,14 @@ public: std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; @@ -37,15 +40,18 @@ public: u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; - void ReinterpretPixelData(unsigned int convtype) override {} + void ReinterpretPixelData(EFBReinterpretType convtype) override {} + + void ScaleTexture(AbstractFramebuffer* dst_framebuffer, const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect) override; private: std::unique_ptr m_window; }; +} // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWTexture.cpp b/Source/Core/VideoBackends/Software/SWTexture.cpp index eb450d642f..466da1efd5 100644 --- a/Source/Core/VideoBackends/Software/SWTexture.cpp +++ b/Source/Core/VideoBackends/Software/SWTexture.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "VideoBackends/Software/SWTexture.h" +#include "VideoBackends/Software/SWRenderer.h" #include #include "Common/Assert.h" @@ -45,6 +46,25 @@ void CopyTextureData(const TextureConfig& src_config, const u8* src_ptr, u32 src dst_ptr += dst_stride; } } +} // namespace + +void SWRenderer::ScaleTexture(AbstractFramebuffer* dst_framebuffer, + const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect) +{ + const SWTexture* software_source_texture = static_cast(src_texture); + SWTexture* software_dest_texture = static_cast(dst_framebuffer->GetColorAttachment()); + + std::vector source_pixels; + source_pixels.resize(src_rect.GetHeight() * src_rect.GetWidth() * 4); + memcpy(source_pixels.data(), software_source_texture->GetData(), source_pixels.size()); + + std::vector destination_pixels; + destination_pixels.resize(dst_rect.GetHeight() * dst_rect.GetWidth() * 4); + + CopyRegion(source_pixels.data(), src_rect, destination_pixels.data(), dst_rect); + memcpy(software_dest_texture->GetData(), destination_pixels.data(), destination_pixels.size()); } SWTexture::SWTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config) @@ -62,30 +82,6 @@ void SWTexture::CopyRectangleFromTexture(const AbstractTexture* src, src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), m_config, m_data.data(), dst_rect.left, dst_rect.top); } -void SWTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ - const SWTexture* software_source_texture = static_cast(source); - - if (srcrect.GetWidth() == dstrect.GetWidth() && srcrect.GetHeight() == dstrect.GetHeight()) - { - m_data.assign(software_source_texture->GetData(), - software_source_texture->GetData() + m_data.size()); - } - else - { - std::vector source_pixels; - source_pixels.resize(srcrect.GetHeight() * srcrect.GetWidth() * 4); - memcpy(source_pixels.data(), software_source_texture->GetData(), source_pixels.size()); - - std::vector destination_pixels; - destination_pixels.resize(dstrect.GetHeight() * dstrect.GetWidth() * 4); - - CopyRegion(source_pixels.data(), srcrect, destination_pixels.data(), dstrect); - memcpy(GetData(), destination_pixels.data(), destination_pixels.size()); - } -} void SWTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) { @@ -153,14 +149,16 @@ void SWStagingTexture::Flush() m_needs_flush = false; } -SWFramebuffer::SWFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, +SWFramebuffer::SWFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples) { } -std::unique_ptr SWFramebuffer::Create(const SWTexture* color_attachment, - const SWTexture* depth_attachment) +std::unique_ptr SWFramebuffer::Create(SWTexture* color_attachment, + SWTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -175,8 +173,8 @@ std::unique_ptr SWFramebuffer::Create(const SWTexture* color_atta const u32 layers = either_attachment->GetLayers(); const u32 samples = either_attachment->GetSamples(); - return std::make_unique(color_format, depth_format, width, height, layers, - samples); + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples); } } // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWTexture.h b/Source/Core/VideoBackends/Software/SWTexture.h index f9d5c50485..7990600e81 100644 --- a/Source/Core/VideoBackends/Software/SWTexture.h +++ b/Source/Core/VideoBackends/Software/SWTexture.h @@ -25,9 +25,6 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, @@ -66,12 +63,13 @@ private: class SWFramebuffer final : public AbstractFramebuffer { public: - explicit SWFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, + explicit SWFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples); ~SWFramebuffer() override = default; - static std::unique_ptr Create(const SWTexture* color_attachment, - const SWTexture* depth_attachment); + static std::unique_ptr Create(SWTexture* color_attachment, + SWTexture* depth_attachment); }; } // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 4a53b4041c..20c6a1f700 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -14,6 +14,7 @@ #include "VideoBackends/Software/DebugUtil.h" #include "VideoBackends/Software/NativeVertexFormat.h" #include "VideoBackends/Software/Rasterizer.h" +#include "VideoBackends/Software/SWRenderer.h" #include "VideoBackends/Software/Tev.h" #include "VideoBackends/Software/TransformUnit.h" @@ -27,48 +28,9 @@ #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" -class NullNativeVertexFormat : public NativeVertexFormat -{ -public: - NullNativeVertexFormat(const PortableVertexDeclaration& _vtx_decl) { vtx_decl = _vtx_decl; } -}; +SWVertexLoader::SWVertexLoader() = default; -std::unique_ptr -SWVertexLoader::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) -{ - return std::make_unique(vtx_decl); -} - -SWVertexLoader::SWVertexLoader() - : m_local_vertex_buffer(MAXVBUFFERSIZE), m_local_index_buffer(MAXIBUFFERSIZE) -{ -} - -SWVertexLoader::~SWVertexLoader() -{ -} - -void SWVertexLoader::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) -{ -} - -void SWVertexLoader::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - m_cur_buffer_pointer = m_base_buffer_pointer = m_local_vertex_buffer.data(); - m_end_buffer_pointer = m_cur_buffer_pointer + m_local_vertex_buffer.size(); - IndexGenerator::Start(m_local_index_buffer.data()); -} - -void SWVertexLoader::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, - u32* out_base_vertex, u32* out_base_index) -{ - *out_base_vertex = 0; - *out_base_index = 0; -} - -void SWVertexLoader::UploadConstants() -{ -} +SWVertexLoader::~SWVertexLoader() = default; void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { @@ -104,7 +66,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ for (u32 i = 0; i < IndexGenerator::GetIndexLen(); i++) { - const u16 index = m_local_index_buffer[i]; + const u16 index = m_cpu_index_buffer[i]; memset(static_cast(&m_vertex), 0, sizeof(m_vertex)); // Super Mario Sunshine requires those to be zero for those debug boxes. @@ -224,8 +186,8 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int index) { - DataReader src(m_local_vertex_buffer.data(), - m_local_vertex_buffer.data() + m_local_vertex_buffer.size()); + DataReader src(m_cpu_vertex_buffer.data(), + m_cpu_vertex_buffer.data() + m_cpu_vertex_buffer.size()); src.Skip(index * vdec.stride); ReadVertexAttribute(&m_vertex.position[0], src, vdec.position, 0, 3, false); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index dabbdef168..55316e5134 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -20,24 +20,12 @@ public: SWVertexLoader(); ~SWVertexLoader(); - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vdec) override; - - void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; - protected: - void ResetBuffer(u32 vertex_stride, bool cull_all) override; - void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, - u32* out_base_index) override; - void UploadConstants() override; void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; void SetFormat(u8 attributeIndex, u8 primitiveType); void ParseVertex(const PortableVertexDeclaration& vdec, int index); - std::vector m_local_vertex_buffer; - std::vector m_local_index_buffer; - InputVertexData m_vertex; SetupUnit m_setup_unit; diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 0ab16bdfa8..c47072d4af 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -10,6 +10,7 @@ #include "Common/Common.h" #include "Common/CommonTypes.h" #include "Common/GL/GLContext.h" +#include "Common/MsgHandler.h" #include "VideoBackends/Software/Clipper.h" #include "VideoBackends/Software/DebugUtil.h" @@ -22,14 +23,11 @@ #include "VideoBackends/Software/TextureCache.h" #include "VideoBackends/Software/VideoBackend.h" -#include "VideoCommon/FramebufferManagerBase.h" -#include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" -#define VSYNC_ENABLED 0 - namespace SW { class PerfQuery : public PerfQueryBase @@ -59,6 +57,7 @@ void VideoSoftware::InitBackendInfo() { g_Config.backend_info.api_type = APIType::Nothing; g_Config.backend_info.MaxTextureSize = 16384; + g_Config.backend_info.bUsesLowerLeftOrigin = false; g_Config.backend_info.bSupports3DVision = false; g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsEarlyZ = true; @@ -70,6 +69,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsCopyToVram = false; + g_Config.backend_info.bSupportsLargePoints = false; g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsBackgroundCompiling = false; g_Config.backend_info.bSupportsLogicOp = true; @@ -92,10 +92,22 @@ bool VideoSoftware::Initialize(const WindowSystemInfo& wsi) g_renderer = std::make_unique(std::move(window)); g_vertex_manager = std::make_unique(); + g_shader_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); g_perf_query = std::make_unique(); g_texture_cache = std::make_unique(); - g_shader_cache = std::make_unique(); - return g_renderer->Initialize() && g_shader_cache->Initialize(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); + return true; } void VideoSoftware::Shutdown() @@ -107,9 +119,10 @@ void VideoSoftware::Shutdown() g_renderer->Shutdown(); DebugUtil::Shutdown(); - g_framebuffer_manager.reset(); g_texture_cache.reset(); g_perf_query.reset(); + g_framebuffer_manager.reset(); + g_shader_cache.reset(); g_vertex_manager.reset(); g_renderer.reset(); ShutdownShared(); diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h index 06bfbf73fa..8d2e26d508 100644 --- a/Source/Core/VideoBackends/Software/TextureCache.h +++ b/Source/Core/VideoBackends/Software/TextureCache.h @@ -9,27 +9,19 @@ namespace SW { class TextureCache : public TextureCacheBase { -public: - bool CompileShaders() override { return true; } - void DeleteShaders() override {} - void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, - TLUTFormat format) override - { - } +protected: void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half, y_scale, gamma); } - -private: void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { // TODO: If we ever want to "fake" vram textures, we would need to implement this } diff --git a/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp index 169404d7ca..9e7da60249 100644 --- a/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp +++ b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp @@ -13,7 +13,6 @@ #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StagingBuffer.h" #include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan @@ -33,7 +32,7 @@ BoundingBox::~BoundingBox() bool BoundingBox::Initialize() { - if (!g_vulkan_context->SupportsBoundingBox()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) { WARN_LOG(VIDEO, "Vulkan: Bounding box is unsupported by your device."); return true; @@ -45,6 +44,8 @@ bool BoundingBox::Initialize() if (!CreateReadbackBuffer()) return false; + // Bind bounding box to state tracker + StateTracker::GetInstance()->SetSSBO(m_gpu_buffer, 0, BUFFER_SIZE); return true; } @@ -79,7 +80,7 @@ void BoundingBox::Flush() StateTracker::GetInstance()->EndRenderPass(); // Ensure GPU buffer is in a state where it can be transferred to. - Util::BufferMemoryBarrier( + StagingBuffer::BufferMemoryBarrier( g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0, BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); @@ -95,7 +96,7 @@ void BoundingBox::Flush() // Restore fragment shader access to the buffer. if (updated_buffer) { - Util::BufferMemoryBarrier( + StagingBuffer::BufferMemoryBarrier( g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); @@ -219,7 +220,7 @@ void BoundingBox::Readback() StateTracker::GetInstance()->EndRenderPass(); // Ensure all writes are completed to the GPU buffer prior to the transfer. - Util::BufferMemoryBarrier( + StagingBuffer::BufferMemoryBarrier( g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0, BUFFER_SIZE, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); @@ -233,15 +234,15 @@ void BoundingBox::Readback() m_readback_buffer->GetBuffer(), 1, ®ion); // Restore GPU buffer access. - Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, - VK_ACCESS_TRANSFER_READ_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + StagingBuffer::BufferMemoryBarrier( + g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); // Wait until these commands complete. - Util::ExecuteCurrentCommandsAndRestoreState(false, true); + Renderer::GetInstance()->ExecuteCommandBuffer(false, true); // Cache is now valid. m_readback_buffer->InvalidateCPUCache(); diff --git a/Source/Core/VideoBackends/Vulkan/BoundingBox.h b/Source/Core/VideoBackends/Vulkan/BoundingBox.h index 872f49aa31..e5fda1066a 100644 --- a/Source/Core/VideoBackends/Vulkan/BoundingBox.h +++ b/Source/Core/VideoBackends/Vulkan/BoundingBox.h @@ -24,9 +24,6 @@ public: bool Initialize(); - VkBuffer GetGPUBuffer() const { return m_gpu_buffer; } - VkDeviceSize GetGPUBufferOffset() const { return 0; } - VkDeviceSize GetGPUBufferSize() const { return BUFFER_SIZE; } s32 Get(size_t index); void Set(size_t index, s32 value); diff --git a/Source/Core/VideoBackends/Vulkan/CMakeLists.txt b/Source/Core/VideoBackends/Vulkan/CMakeLists.txt index 5746cd1aaa..014a3378c8 100644 --- a/Source/Core/VideoBackends/Vulkan/CMakeLists.txt +++ b/Source/Core/VideoBackends/Vulkan/CMakeLists.txt @@ -1,21 +1,14 @@ add_library(videovulkan BoundingBox.cpp CommandBufferManager.cpp - FramebufferManager.cpp ObjectCache.cpp PerfQuery.cpp - PostProcessing.cpp Renderer.cpp - ShaderCache.cpp ShaderCompiler.cpp StateTracker.cpp StagingBuffer.cpp StreamBuffer.cpp SwapChain.cpp - Texture2D.cpp - TextureCache.cpp - TextureConverter.cpp - Util.cpp VertexFormat.cpp VertexManager.cpp VKPipeline.cpp diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp index ea07d47a53..1f2fcd01c7 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp @@ -44,12 +44,16 @@ bool CommandBufferManager::Initialize() bool CommandBufferManager::CreateCommandBuffers() { + static constexpr VkSemaphoreCreateInfo semaphore_create_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0}; + VkDevice device = g_vulkan_context->GetDevice(); VkResult res; for (FrameResources& resources : m_frame_resources) { resources.init_command_buffer_used = false; + resources.semaphore_used = false; resources.needs_fence_wait = false; VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, @@ -83,6 +87,13 @@ bool CommandBufferManager::CreateCommandBuffers() return false; } + res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &resources.semaphore); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + return false; + } + // TODO: A better way to choose the number of descriptors. VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000}, @@ -105,9 +116,16 @@ bool CommandBufferManager::CreateCommandBuffers() } } + res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &m_present_semaphore); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + return false; + } + // Activate the first command buffer. ActivateCommandBuffer moves forward, so start with the last - m_current_frame = m_frame_resources.size() - 1; - ActivateCommandBuffer(); + m_current_frame = static_cast(m_frame_resources.size()) - 1; + BeginCommandBuffer(); return true; } @@ -122,28 +140,23 @@ void CommandBufferManager::DestroyCommandBuffers() // We destroy the command pool first, to avoid any warnings from the validation layers about // objects which are pending destruction being in-use. if (resources.command_pool != VK_NULL_HANDLE) - { vkDestroyCommandPool(device, resources.command_pool, nullptr); - resources.command_pool = VK_NULL_HANDLE; - } // Destroy any pending objects. for (auto& it : resources.cleanup_resources) it(); - resources.cleanup_resources.clear(); + + if (resources.semaphore != VK_NULL_HANDLE) + vkDestroySemaphore(device, resources.semaphore, nullptr); if (resources.fence != VK_NULL_HANDLE) - { vkDestroyFence(device, resources.fence, nullptr); - resources.fence = VK_NULL_HANDLE; - } if (resources.descriptor_pool != VK_NULL_HANDLE) - { vkDestroyDescriptorPool(device, resources.descriptor_pool, nullptr); - resources.descriptor_pool = VK_NULL_HANDLE; - } } + + vkDestroySemaphore(device, m_present_semaphore, nullptr); } VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayout set_layout) @@ -183,22 +196,14 @@ bool CommandBufferManager::CreateSubmitThread() m_pending_submits.pop_front(); } - SubmitCommandBuffer(submit.index, submit.wait_semaphore, submit.signal_semaphore, - submit.present_swap_chain, submit.present_image_index); + SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, + submit.present_image_index); }); }); return true; } -void CommandBufferManager::PrepareToSubmitCommandBuffer() -{ - // Grab the semaphore before submitting command buffer either on-thread or off-thread. - // This prevents a race from occurring where a second command buffer is executed - // before the worker thread has woken and executed the first one yet. - m_submit_semaphore.Wait(); -} - void CommandBufferManager::WaitForWorkerThreadIdle() { // Drain the semaphore, then allow another request in the future. @@ -215,8 +220,8 @@ void CommandBufferManager::WaitForGPUIdle() void CommandBufferManager::WaitForFence(VkFence fence) { // Find the command buffer that this fence corresponds to. - size_t command_buffer_index = 0; - for (; command_buffer_index < m_frame_resources.size(); command_buffer_index++) + u32 command_buffer_index = 0; + for (; command_buffer_index < static_cast(m_frame_resources.size()); command_buffer_index++) { if (m_frame_resources[command_buffer_index].fence == fence) break; @@ -227,6 +232,9 @@ void CommandBufferManager::WaitForFence(VkFence fence) if (!m_frame_resources[command_buffer_index].needs_fence_wait) return; + // Ensure this command buffer has been submitted. + WaitForWorkerThreadIdle(); + // Wait for this command buffer to be completed. VkResult res = vkWaitForFences(g_vulkan_context->GetDevice(), 1, @@ -240,19 +248,11 @@ void CommandBufferManager::WaitForFence(VkFence fence) } void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, - VkSemaphore wait_semaphore, - VkSemaphore signal_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index) { - FrameResources& resources = m_frame_resources[m_current_frame]; - - // Fire fence tracking callbacks. This can't happen on the worker thread. - // We invoke these before submitting so that any last-minute commands can be added. - for (const auto& iter : m_fence_point_callbacks) - iter.second.first(resources.command_buffers[1], resources.fence); - // End the current command buffer. + FrameResources& resources = m_frame_resources[m_current_frame]; for (VkCommandBuffer command_buffer : resources.command_buffers) { VkResult res = vkEndCommandBuffer(command_buffer); @@ -266,14 +266,18 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, // This command buffer now has commands, so can't be re-used without waiting. resources.needs_fence_wait = true; + // Grab the semaphore before submitting command buffer either on-thread or off-thread. + // This prevents a race from occurring where a second command buffer is executed + // before the worker thread has woken and executed the first one yet. + m_submit_semaphore.Wait(); + // Submitting off-thread? if (m_use_threaded_submission && submit_on_worker_thread) { // Push to the pending submit queue. { std::lock_guard guard(m_pending_submit_lock); - m_pending_submits.push_back({m_current_frame, wait_semaphore, signal_semaphore, - present_swap_chain, present_image_index}); + m_pending_submits.push_back({present_swap_chain, present_image_index, m_current_frame}); } // Wake up the worker thread for a single iteration. @@ -282,17 +286,18 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, else { // Pass through to normal submission path. - SubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore, present_swap_chain, - present_image_index); + SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index); } + + // Switch to next cmdbuffer. + BeginCommandBuffer(); } -void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore, - VkSemaphore signal_semaphore, +void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, - uint32_t present_image_index) + u32 present_image_index) { - FrameResources& resources = m_frame_resources[index]; + FrameResources& resources = m_frame_resources[command_buffer_index]; // This may be executed on the worker thread, so don't modify any state of the manager class. uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; @@ -307,22 +312,22 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se nullptr}; // If the init command buffer did not have any commands recorded, don't submit it. - if (!m_frame_resources[index].init_command_buffer_used) + if (!resources.init_command_buffer_used) { submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &m_frame_resources[index].command_buffers[1]; + submit_info.pCommandBuffers = &resources.command_buffers[1]; } - if (wait_semaphore != VK_NULL_HANDLE) + if (resources.semaphore_used != VK_NULL_HANDLE) { - submit_info.pWaitSemaphores = &wait_semaphore; + submit_info.pWaitSemaphores = &resources.semaphore; submit_info.waitSemaphoreCount = 1; } - if (signal_semaphore != VK_NULL_HANDLE) + if (present_swap_chain != VK_NULL_HANDLE) { submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &signal_semaphore; + submit_info.pSignalSemaphores = &m_present_semaphore; } VkResult res = @@ -337,11 +342,10 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se if (present_swap_chain != VK_NULL_HANDLE) { // Should have a signal semaphore. - ASSERT(signal_semaphore != VK_NULL_HANDLE); VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1, - &signal_semaphore, + &m_present_semaphore, 1, &present_swap_chain, &present_image_index, @@ -361,15 +365,15 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se m_submit_semaphore.Post(); } -void CommandBufferManager::OnCommandBufferExecuted(size_t index) +void CommandBufferManager::OnCommandBufferExecuted(u32 index) { FrameResources& resources = m_frame_resources[index]; // Fire fence tracking callbacks. - for (auto iter = m_fence_point_callbacks.begin(); iter != m_fence_point_callbacks.end();) + for (auto iter = m_fence_callbacks.begin(); iter != m_fence_callbacks.end();) { auto backup_iter = iter++; - backup_iter->second.second(resources.fence); + backup_iter->second(resources.fence); } // Clean up all objects pending destruction on this command buffer @@ -378,7 +382,7 @@ void CommandBufferManager::OnCommandBufferExecuted(size_t index) resources.cleanup_resources.clear(); } -void CommandBufferManager::ActivateCommandBuffer() +void CommandBufferManager::BeginCommandBuffer() { // Move to the next command buffer. m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; @@ -422,19 +426,7 @@ void CommandBufferManager::ActivateCommandBuffer() // Reset upload command buffer state resources.init_command_buffer_used = false; -} - -void CommandBufferManager::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion) -{ - VkFence pending_fence = GetCurrentCommandBufferFence(); - - // If we're waiting for completion, don't bother waking the worker thread. - PrepareToSubmitCommandBuffer(); - SubmitCommandBuffer((submit_off_thread && wait_for_completion)); - ActivateCommandBuffer(); - - if (wait_for_completion) - WaitForFence(pending_fence); + resources.semaphore_used = false; } void CommandBufferManager::DeferBufferDestruction(VkBuffer object) @@ -479,20 +471,18 @@ void CommandBufferManager::DeferImageViewDestruction(VkImageView object) [object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); }); } -void CommandBufferManager::AddFencePointCallback( - const void* key, const CommandBufferQueuedCallback& queued_callback, - const CommandBufferExecutedCallback& executed_callback) +void CommandBufferManager::AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback) { // Shouldn't be adding twice. - ASSERT(m_fence_point_callbacks.find(key) == m_fence_point_callbacks.end()); - m_fence_point_callbacks.emplace(key, std::make_pair(queued_callback, executed_callback)); + ASSERT(m_fence_callbacks.find(key) == m_fence_callbacks.end()); + m_fence_callbacks.emplace(key, std::move(callback)); } -void CommandBufferManager::RemoveFencePointCallback(const void* key) +void CommandBufferManager::RemoveFenceSignaledCallback(const void* key) { - auto iter = m_fence_point_callbacks.find(key); - ASSERT(iter != m_fence_point_callbacks.end()); - m_fence_point_callbacks.erase(iter); + auto iter = m_fence_callbacks.find(key); + ASSERT(iter != m_fence_callbacks.end()); + m_fence_callbacks.erase(iter); } std::unique_ptr g_command_buffer_mgr; diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h index 9cefe100d7..9cfc50e287 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h @@ -22,7 +22,6 @@ #include "VideoCommon/VideoCommon.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/Util.h" namespace Vulkan { @@ -55,8 +54,14 @@ public: // Gets the fence that will be signaled when the currently executing command buffer is // queued and executed. Do not wait for this fence before the buffer is executed. VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; } - // Ensure the worker thread has submitted the previous frame's command buffer. - void PrepareToSubmitCommandBuffer(); + + // Returns the semaphore for the current command buffer, which can be used to ensure the + // swap chain image is ready before the command buffer executes. + VkSemaphore GetCurrentCommandBufferSemaphore() + { + m_frame_resources[m_current_frame].semaphore_used = true; + return m_frame_resources[m_current_frame].semaphore; + } // Ensure that the worker thread has submitted any previous command buffers and is idle. void WaitForWorkerThreadIdle(); @@ -70,17 +75,12 @@ public: void WaitForFence(VkFence fence); void SubmitCommandBuffer(bool submit_on_worker_thread, - VkSemaphore wait_semaphore = VK_NULL_HANDLE, - VkSemaphore signal_semaphore = VK_NULL_HANDLE, VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, uint32_t present_image_index = 0xFFFFFFFF); - void ActivateCommandBuffer(); - - void ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion); - // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. bool CheckLastPresentFail() { return m_present_failed_flag.TestAndClear(); } + // Schedule a vulkan resource for destruction later on. This will occur when the command buffer // is next re-used, and the GPU has finished working with the specified resource. void DeferBufferDestruction(VkBuffer object); @@ -93,13 +93,9 @@ public: // Instruct the manager to fire the specified callback when a fence is flagged to be signaled. // This happens when command buffers are executed, and can be tested if signaled, which means // that all commands up to the point when the callback was fired have completed. - using CommandBufferQueuedCallback = std::function; - using CommandBufferExecutedCallback = std::function; - - void AddFencePointCallback(const void* key, const CommandBufferQueuedCallback& queued_callback, - const CommandBufferExecutedCallback& executed_callback); - - void RemoveFencePointCallback(const void* key); + using FenceSignaledCallback = std::function; + void AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback); + void RemoveFenceSignaledCallback(const void* key); private: bool CreateCommandBuffers(); @@ -107,30 +103,32 @@ private: bool CreateSubmitThread(); - void SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, - VkSwapchainKHR present_swap_chain, uint32_t present_image_index); + void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, + u32 present_image_index); + void BeginCommandBuffer(); - void OnCommandBufferExecuted(size_t index); + void OnCommandBufferExecuted(u32 index); struct FrameResources { // [0] - Init (upload) command buffer, [1] - draw command buffer - VkCommandPool command_pool; - std::array command_buffers; - VkDescriptorPool descriptor_pool; - VkFence fence; - bool init_command_buffer_used; - bool needs_fence_wait; + VkCommandPool command_pool = VK_NULL_HANDLE; + std::array command_buffers = {}; + VkDescriptorPool descriptor_pool = VK_NULL_HANDLE; + VkFence fence = VK_NULL_HANDLE; + VkSemaphore semaphore = VK_NULL_HANDLE; + bool init_command_buffer_used = false; + bool semaphore_used = false; + bool needs_fence_wait = false; std::vector> cleanup_resources; }; - std::array m_frame_resources = {}; - size_t m_current_frame; + std::array m_frame_resources; + u32 m_current_frame; // callbacks when a fence point is set - std::map> - m_fence_point_callbacks; + std::map m_fence_callbacks; // Threaded command buffer execution // Semaphore determines when a command buffer can be queued @@ -139,12 +137,11 @@ private: std::unique_ptr m_submit_loop; struct PendingCommandBufferSubmit { - size_t index; - VkSemaphore wait_semaphore; - VkSemaphore signal_semaphore; VkSwapchainKHR present_swap_chain; - uint32_t present_image_index; + u32 present_image_index; + u32 command_buffer_index; }; + VkSemaphore m_present_semaphore = VK_NULL_HANDLE; std::deque m_pending_submits; std::mutex m_pending_submit_lock; Common::Flag m_present_failed_flag; diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h index f2af115b2a..816af86bc5 100644 --- a/Source/Core/VideoBackends/Vulkan/Constants.h +++ b/Source/Core/VideoBackends/Vulkan/Constants.h @@ -26,39 +26,29 @@ enum STAGING_BUFFER_TYPE // Descriptor set layouts enum DESCRIPTOR_SET_LAYOUT { - DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER, - DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS, - DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS, - DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS, - DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS, + DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS, + DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS, + DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS, + DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER, + DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS, DESCRIPTOR_SET_LAYOUT_COMPUTE, NUM_DESCRIPTOR_SET_LAYOUTS }; -// Descriptor set bind points -enum DESCRIPTOR_SET_BIND_POINT -{ - DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS, - DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS, - DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER, - NUM_DESCRIPTOR_SET_BIND_POINTS -}; - // We use four pipeline layouts: // - Standard -// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) -// - 8 combined image samplers (accessible from PS) -// - 1 SSBO accessible from PS if supported -// - Push Constant -// - Same as standard, plus 128 bytes of push constants, accessible from all stages. -// - Texture Decoding -// - Same as push constant, plus a single texel buffer accessible from PS. +// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) [set=0, binding=0-2] +// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7] +// - 1 SSBO accessible from PS if supported [set=2, binding=0] +// - Utility +// - 1 combined UBO, accessible from VS/GS/PS [set=0, binding=0] +// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7] +// - 1 texel buffer (accessible from PS) [set=1, binding=8] // - Compute // - 1 uniform buffer [set=0, binding=0] -// - 4 combined image samplers [set=0, binding=1-4] -// - 1 texel buffer [set=0, binding=5] -// - 1 storage image [set=0, binding=6] -// - 128 bytes of push constants +// - 2 combined image samplers [set=0, binding=1-2] +// - 2 texel buffers [set=0, binding=3-4] +// - 1 storage image [set=0, binding=5] // // All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers). // The third descriptor set (see bind points above) is used for storage or texel buffers. @@ -66,8 +56,6 @@ enum DESCRIPTOR_SET_BIND_POINT enum PIPELINE_LAYOUT { PIPELINE_LAYOUT_STANDARD, - PIPELINE_LAYOUT_PUSH_CONSTANT, - PIPELINE_LAYOUT_TEXTURE_CONVERSION, PIPELINE_LAYOUT_UTILITY, PIPELINE_LAYOUT_COMPUTE, NUM_PIPELINE_LAYOUTS @@ -83,53 +71,22 @@ enum UNIFORM_BUFFER_DESCRIPTOR_SET_BINDING }; // Maximum number of attributes per vertex (we don't have any more than this?) -constexpr size_t MAX_VERTEX_ATTRIBUTES = 16; +constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; // Number of pixel shader texture slots -constexpr size_t NUM_PIXEL_SHADER_SAMPLERS = 8; +constexpr u32 NUM_PIXEL_SHADER_SAMPLERS = 8; +constexpr u32 NUM_COMPUTE_SHADER_SAMPLERS = 2; -// Total number of binding points in the pipeline layout -constexpr size_t TOTAL_PIPELINE_BINDING_POINTS = - NUM_UBO_DESCRIPTOR_SET_BINDINGS + NUM_PIXEL_SHADER_SAMPLERS + 1; - -// Format of EFB textures -constexpr VkFormat EFB_COLOR_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM; -constexpr VkFormat EFB_DEPTH_TEXTURE_FORMAT = VK_FORMAT_D32_SFLOAT; -constexpr VkFormat EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT = VK_FORMAT_R32_SFLOAT; - -// Format of texturecache textures -constexpr VkFormat TEXTURECACHE_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM; +// Number of texel buffer binding points. +constexpr u32 NUM_COMPUTE_TEXEL_BUFFERS = 2; // Textures that don't fit into this buffer will be uploaded with a separate buffer (see below). -constexpr size_t INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024; +constexpr u32 TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; // Textures greater than 1024*1024 will be put in staging textures that are released after // execution instead. A 2048x2048 texture is 16MB, and we'd only fit four of these in our // streaming buffer and be blocking frequently. Games are unlikely to have textures this // large anyway, so it's only really an issue for HD texture packs, and memory is not // a limiting factor in these scenarios anyway. -constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8; - -// Streaming uniform buffer size -constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr size_t MAXIMUM_UNIFORM_STREAM_BUFFER_SIZE = 32 * 1024 * 1024; - -// Texel buffer size for palette and texture decoding. -constexpr size_t TEXTURE_CONVERSION_TEXEL_BUFFER_SIZE = 8 * 1024 * 1024; - -// Push constant buffer size for utility shaders -constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 128; - -// Minimum number of draw calls per command buffer when attempting to preempt a readback operation. -constexpr u32 MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK = 10; - -// Multisampling state info that we don't expose in VideoCommon. -union MultisamplingState -{ - BitField<0, 5, u32> samples; // 1-16 - BitField<5, 1, u32> per_sample_shading; // SSAA - u32 hex; -}; - +constexpr u32 STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4; } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp b/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp deleted file mode 100644 index cd1fbd9c82..0000000000 --- a/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp +++ /dev/null @@ -1,1198 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/FramebufferManager.h" - -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "Core/HW/Memmap.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VKTexture.h" -#include "VideoBackends/Vulkan/VertexFormat.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/RenderBase.h" -#include "VideoCommon/VideoConfig.h" - -namespace Vulkan -{ -// Maximum number of pixels poked in one batch * 6 -constexpr size_t MAX_POKE_VERTICES = 8192; -constexpr size_t POKE_VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; - -FramebufferManager::FramebufferManager() -{ -} - -FramebufferManager::~FramebufferManager() -{ - DestroyEFBFramebuffer(); - - DestroyConversionShaders(); - - DestroyReadbackFramebuffer(); - DestroyReadbackTextures(); - DestroyReadbackShaders(); - - DestroyPokeVertexBuffer(); - DestroyPokeShaders(); -} - -FramebufferManager* FramebufferManager::GetInstance() -{ - return static_cast(g_framebuffer_manager.get()); -} - -u32 FramebufferManager::GetEFBWidth() const -{ - return m_efb_color_texture->GetWidth(); -} - -u32 FramebufferManager::GetEFBHeight() const -{ - return m_efb_color_texture->GetHeight(); -} - -u32 FramebufferManager::GetEFBLayers() const -{ - return m_efb_color_texture->GetLayers(); -} - -VkSampleCountFlagBits FramebufferManager::GetEFBSamples() const -{ - return m_efb_color_texture->GetSamples(); -} - -MultisamplingState FramebufferManager::GetEFBMultisamplingState() const -{ - MultisamplingState ms = {}; - ms.per_sample_shading = g_ActiveConfig.MultisamplingEnabled() && g_ActiveConfig.bSSAA; - ms.samples = static_cast(GetEFBSamples()); - return ms; -} - -bool FramebufferManager::Initialize() -{ - if (!CreateEFBRenderPasses()) - { - PanicAlert("Failed to create EFB render pass"); - return false; - } - if (!CreateEFBFramebuffer()) - { - PanicAlert("Failed to create EFB textures"); - return false; - } - - if (!CompileConversionShaders()) - { - PanicAlert("Failed to compile EFB shaders"); - return false; - } - - if (!CreateReadbackRenderPasses()) - { - PanicAlert("Failed to create readback render passes"); - return false; - } - if (!CompileReadbackShaders()) - { - PanicAlert("Failed to compile readback shaders"); - return false; - } - if (!CreateReadbackTextures()) - { - PanicAlert("Failed to create readback textures"); - return false; - } - if (!CreateReadbackFramebuffer()) - { - PanicAlert("Failed to create readback framebuffer"); - return false; - } - - CreatePokeVertexFormat(); - if (!CreatePokeVertexBuffer()) - { - PanicAlert("Failed to create poke vertex buffer"); - return false; - } - - if (!CompilePokeShaders()) - { - PanicAlert("Failed to compile poke shaders"); - return false; - } - - return true; -} - -bool FramebufferManager::CreateEFBRenderPasses() -{ - m_efb_load_render_pass = g_object_cache->GetRenderPass( - EFB_COLOR_TEXTURE_FORMAT, Util::GetVkFormatForHostTextureFormat(GetEFBDepthFormat()), - g_ActiveConfig.iMultisamples, VK_ATTACHMENT_LOAD_OP_LOAD); - m_efb_clear_render_pass = g_object_cache->GetRenderPass( - EFB_COLOR_TEXTURE_FORMAT, Util::GetVkFormatForHostTextureFormat(GetEFBDepthFormat()), - g_ActiveConfig.iMultisamples, VK_ATTACHMENT_LOAD_OP_CLEAR); - m_depth_resolve_render_pass = g_object_cache->GetRenderPass( - EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - return m_efb_load_render_pass != VK_NULL_HANDLE && m_efb_clear_render_pass != VK_NULL_HANDLE && - m_depth_resolve_render_pass != VK_NULL_HANDLE; -} - -bool FramebufferManager::CreateEFBFramebuffer() -{ - u32 efb_width = static_cast(std::max(g_renderer->GetTargetWidth(), 1)); - u32 efb_height = static_cast(std::max(g_renderer->GetTargetHeight(), 1)); - u32 efb_layers = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - VkSampleCountFlagBits efb_samples = - static_cast(g_ActiveConfig.iMultisamples); - INFO_LOG(VIDEO, "EFB size: %ux%ux%u", efb_width, efb_height, efb_layers); - - // Update the static variable in the base class. Why does this even exist? - FramebufferManagerBase::m_EFBLayers = efb_layers; - - // Allocate EFB render targets - m_efb_color_texture = - Texture2D::Create(efb_width, efb_height, 1, efb_layers, EFB_COLOR_TEXTURE_FORMAT, efb_samples, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); - - // We need a second texture to swap with for changing pixel formats - m_efb_convert_color_texture = - Texture2D::Create(efb_width, efb_height, 1, efb_layers, EFB_COLOR_TEXTURE_FORMAT, efb_samples, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); - - m_efb_depth_texture = Texture2D::Create( - efb_width, efb_height, 1, efb_layers, - Util::GetVkFormatForHostTextureFormat(GetEFBDepthFormat()), efb_samples, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); - - if (!m_efb_color_texture || !m_efb_convert_color_texture || !m_efb_depth_texture) - return false; - - // Create resolved textures if MSAA is on - if (g_ActiveConfig.MultisamplingEnabled()) - { - m_efb_resolve_color_texture = Texture2D::Create( - efb_width, efb_height, 1, efb_layers, EFB_COLOR_TEXTURE_FORMAT, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT); - - m_efb_resolve_depth_texture = Texture2D::Create( - efb_width, efb_height, 1, efb_layers, EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT); - - if (!m_efb_resolve_color_texture || !m_efb_resolve_depth_texture) - return false; - - VkImageView attachment = m_efb_resolve_depth_texture->GetView(); - VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - m_depth_resolve_render_pass, - 1, - &attachment, - efb_width, - efb_height, - efb_layers}; - - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_depth_resolve_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - } - - VkImageView framebuffer_attachments[] = { - m_efb_color_texture->GetView(), - m_efb_depth_texture->GetView(), - }; - - VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - m_efb_load_render_pass, - static_cast(ArraySize(framebuffer_attachments)), - framebuffer_attachments, - efb_width, - efb_height, - efb_layers}; - - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_efb_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - // Create second framebuffer for format conversions - framebuffer_attachments[0] = m_efb_convert_color_texture->GetView(); - res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_efb_convert_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - // Transition to state that can be used to clear - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - m_efb_convert_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - // Clear the contents of the buffers. - static const VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 0.0f}}; - static const VkClearDepthStencilValue clear_depth = {0.0f, 0}; - VkImageSubresourceRange clear_color_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, efb_layers}; - VkImageSubresourceRange clear_depth_range = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, efb_layers}; - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - &clear_color, 1, &clear_color_range); - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_convert_color_texture->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &clear_color_range); - vkCmdClearDepthStencilImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_depth_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - &clear_depth, 1, &clear_depth_range); - - // Transition to color attachment state ready for rendering. - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - - return true; -} - -void FramebufferManager::DestroyEFBFramebuffer() -{ - if (m_efb_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_efb_framebuffer, nullptr); - m_efb_framebuffer = VK_NULL_HANDLE; - } - - if (m_efb_convert_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_efb_convert_framebuffer, nullptr); - m_efb_convert_framebuffer = VK_NULL_HANDLE; - } - - if (m_depth_resolve_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_depth_resolve_framebuffer, nullptr); - m_depth_resolve_framebuffer = VK_NULL_HANDLE; - } - - m_efb_color_texture.reset(); - m_efb_convert_color_texture.reset(); - m_efb_depth_texture.reset(); - m_efb_resolve_color_texture.reset(); - m_efb_resolve_depth_texture.reset(); -} - -void FramebufferManager::RecreateEFBFramebuffer() -{ - DestroyEFBFramebuffer(); - - if (!CreateEFBRenderPasses()) - PanicAlert("Failed to create EFB render pass"); - - if (!CreateEFBFramebuffer()) - PanicAlert("Failed to create EFB textures"); -} - -void FramebufferManager::RecompileShaders() -{ - DestroyConversionShaders(); - - if (!CompileConversionShaders()) - PanicAlert("Failed to compile EFB shaders"); - - DestroyReadbackShaders(); - if (!CompileReadbackShaders()) - PanicAlert("Failed to compile readback shaders"); -} - -void FramebufferManager::ReinterpretPixelData(int convtype) -{ - VkShaderModule pixel_shader = VK_NULL_HANDLE; - if (convtype == 0) - { - pixel_shader = m_ps_rgb8_to_rgba6; - } - else if (convtype == 2) - { - pixel_shader = m_ps_rgba6_to_rgb8; - } - else - { - ERROR_LOG(VIDEO, "Unhandled reinterpret pixel data %d", convtype); - return; - } - - // Transition EFB color buffer to shader resource, and the convert buffer to color attachment. - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_efb_convert_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_efb_load_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - g_shader_cache->GetScreenQuadGeometryShader(), pixel_shader); - - VkRect2D region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}}; - draw.SetMultisamplingState(GetEFBMultisamplingState()); - draw.BeginRenderPass(m_efb_convert_framebuffer, region); - draw.SetPSSampler(0, m_efb_color_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(0, 0, GetEFBWidth(), GetEFBHeight()); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Swap EFB texture pointers - std::swap(m_efb_color_texture, m_efb_convert_color_texture); - std::swap(m_efb_framebuffer, m_efb_convert_framebuffer); -} - -Texture2D* FramebufferManager::ResolveEFBColorTexture(const VkRect2D& region) -{ - // Return the normal EFB texture if multisampling is off. - if (GetEFBSamples() == VK_SAMPLE_COUNT_1_BIT) - return m_efb_color_texture.get(); - - // Can't resolve within a render pass. - StateTracker::GetInstance()->EndRenderPass(); - - // It's not valid to resolve out-of-bounds coordinates. - // Ensuring the region is within the image is the caller's responsibility. - ASSERT(region.offset.x >= 0 && region.offset.y >= 0 && - (static_cast(region.offset.x) + region.extent.width) <= GetEFBWidth() && - (static_cast(region.offset.y) + region.extent.height) <= GetEFBHeight()); - - // Resolving is considered to be a transfer operation. - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_efb_resolve_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - // Resolve to our already-created texture. - VkImageResolve resolve = { - {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, GetEFBLayers()}, // VkImageSubresourceLayers srcSubresource - {region.offset.x, region.offset.y, 0}, // VkOffset3D srcOffset - {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, GetEFBLayers()}, // VkImageSubresourceLayers dstSubresource - {region.offset.x, region.offset.y, 0}, // VkOffset3D dstOffset - {region.extent.width, region.extent.height, GetEFBLayers()} // VkExtent3D extent - }; - vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - m_efb_resolve_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, &resolve); - - // Restore MSAA texture ready for rendering again - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - return m_efb_resolve_color_texture.get(); -} - -Texture2D* FramebufferManager::ResolveEFBDepthTexture(const VkRect2D& region) -{ - // Return the normal EFB texture if multisampling is off. - if (GetEFBSamples() == VK_SAMPLE_COUNT_1_BIT) - return m_efb_depth_texture.get(); - - // Can't resolve within a render pass. - StateTracker::GetInstance()->EndRenderPass(); - - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_efb_resolve_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // Draw using resolve shader to write the minimum depth of all samples to the resolve texture. - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_depth_resolve_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - g_shader_cache->GetScreenQuadGeometryShader(), m_ps_depth_resolve); - draw.BeginRenderPass(m_depth_resolve_framebuffer, region); - draw.SetPSSampler(0, m_efb_depth_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(region.offset.x, region.offset.y, region.extent.width, - region.extent.height); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Restore MSAA texture ready for rendering again - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - - return m_efb_resolve_depth_texture.get(); -} - -bool FramebufferManager::CompileConversionShaders() -{ - static const char RGB8_TO_RGBA6_SHADER_SOURCE[] = R"( - #if MSAA_ENABLED - SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; - #else - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - #endif - layout(location = 0) in vec3 uv0; - layout(location = 0) out vec4 ocol0; - - void main() - { - int layer = 0; - #if EFB_LAYERS > 1 - layer = int(uv0.z); - #endif - - ivec3 coords = ivec3(gl_FragCoord.xy, layer); - - vec4 val; - #if !MSAA_ENABLED - // No MSAA - just load the first (and only) sample - val = texelFetch(samp0, coords, 0); - #elif SSAA_ENABLED - // Sample shading, shader runs once per sample - val = texelFetch(samp0, coords, gl_SampleID); - #else - // MSAA without sample shading, average out all samples. - val = vec4(0, 0, 0, 0); - for (int i = 0; i < MSAA_SAMPLES; i++) - val += texelFetch(samp0, coords, i); - val /= float(MSAA_SAMPLES); - #endif - - ivec4 src8 = ivec4(round(val * 255.f)); - ivec4 dst6; - dst6.r = src8.r >> 2; - dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4); - dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6); - dst6.a = src8.b & 0x3F; - - ocol0 = float4(dst6) / 63.f; - } - )"; - - static const char RGBA6_TO_RGB8_SHADER_SOURCE[] = R"( - #if MSAA_ENABLED - SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; - #else - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - #endif - layout(location = 0) in vec3 uv0; - layout(location = 0) out vec4 ocol0; - - void main() - { - int layer = 0; - #if EFB_LAYERS > 1 - layer = int(uv0.z); - #endif - - ivec3 coords = ivec3(gl_FragCoord.xy, layer); - - vec4 val; - #if !MSAA_ENABLED - // No MSAA - just load the first (and only) sample - val = texelFetch(samp0, coords, 0); - #elif SSAA_ENABLED - // Sample shading, shader runs once per sample - val = texelFetch(samp0, coords, gl_SampleID); - #else - // MSAA without sample shading, average out all samples. - val = vec4(0, 0, 0, 0); - for (int i = 0; i < MSAA_SAMPLES; i++) - val += texelFetch(samp0, coords, i); - val /= float(MSAA_SAMPLES); - #endif - - ivec4 src6 = ivec4(round(val * 63.f)); - ivec4 dst8; - dst8.r = (src6.r << 2) | (src6.g >> 4); - dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2); - dst8.b = ((src6.b & 0x3) << 6) | src6.a; - dst8.a = 255; - - ocol0 = float4(dst8) / 255.f; - } - )"; - - static const char DEPTH_RESOLVE_SHADER_SOURCE[] = R"( - SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; - layout(location = 0) in vec3 uv0; - layout(location = 0) out float ocol0; - - void main() - { - int layer = 0; - #if EFB_LAYERS > 1 - layer = int(uv0.z); - #endif - - // gl_FragCoord is in window coordinates, and we're rendering to - // the same rectangle in the resolve texture. - ivec3 coords = ivec3(gl_FragCoord.xy, layer); - - // Take the minimum of all depth samples. - ocol0 = texelFetch(samp0, coords, 0).r; - for (int i = 1; i < MSAA_SAMPLES; i++) - ocol0 = min(ocol0, texelFetch(samp0, coords, i).r); - } - )"; - - std::string header = g_shader_cache->GetUtilityShaderHeader(); - DestroyConversionShaders(); - - m_ps_rgb8_to_rgba6 = Util::CompileAndCreateFragmentShader(header + RGB8_TO_RGBA6_SHADER_SOURCE); - m_ps_rgba6_to_rgb8 = Util::CompileAndCreateFragmentShader(header + RGBA6_TO_RGB8_SHADER_SOURCE); - if (GetEFBSamples() != VK_SAMPLE_COUNT_1_BIT) - m_ps_depth_resolve = Util::CompileAndCreateFragmentShader(header + DEPTH_RESOLVE_SHADER_SOURCE); - - return (m_ps_rgba6_to_rgb8 != VK_NULL_HANDLE && m_ps_rgb8_to_rgba6 != VK_NULL_HANDLE && - (GetEFBSamples() == VK_SAMPLE_COUNT_1_BIT || m_ps_depth_resolve != VK_NULL_HANDLE)); -} - -void FramebufferManager::DestroyConversionShaders() -{ - auto DestroyShader = [this](VkShaderModule& shader) { - if (shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); - shader = VK_NULL_HANDLE; - } - }; - - DestroyShader(m_ps_rgb8_to_rgba6); - DestroyShader(m_ps_rgba6_to_rgb8); - DestroyShader(m_ps_depth_resolve); -} - -u32 FramebufferManager::PeekEFBColor(u32 x, u32 y) -{ - if (!m_color_readback_texture_valid && !PopulateColorReadbackTexture()) - return 0; - - u32 value; - m_color_readback_texture->ReadTexel(x, y, &value); - return value; -} - -bool FramebufferManager::PopulateColorReadbackTexture() -{ - // Can't be in our normal render pass. - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->OnCPUEFBAccess(); - - // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. - VkRect2D src_region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}}; - Texture2D* src_texture = m_efb_color_texture.get(); - if (GetEFBSamples() > 1) - src_texture = ResolveEFBColorTexture(src_region); - - if (GetEFBWidth() != EFB_WIDTH || GetEFBHeight() != EFB_HEIGHT) - { - // Transition EFB to shader read before drawing. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_color_copy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_copy_color_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - VK_NULL_HANDLE, m_copy_color_shader); - - VkRect2D rect = {{0, 0}, {EFB_WIDTH, EFB_HEIGHT}}; - draw.BeginRenderPass(m_color_copy_framebuffer, rect); - draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Restore EFB to color attachment, since we're done with it. - if (src_texture == m_efb_color_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - } - - // Use this as a source texture now. - src_texture = m_color_copy_texture.get(); - } - - // Copy from EFB or copy texture to staging texture. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - static_cast(m_color_readback_texture.get()) - ->CopyFromTexture(src_texture, m_color_readback_texture->GetConfig().GetRect(), 0, 0, - m_color_readback_texture->GetConfig().GetRect()); - - // Restore original layout if we used the EFB as a source. - if (src_texture == m_efb_color_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - } - - // Wait until the copy is complete. - m_color_readback_texture->Flush(); - m_color_readback_texture_valid = true; - return true; -} - -float FramebufferManager::PeekEFBDepth(u32 x, u32 y) -{ - if (!m_depth_readback_texture_valid && !PopulateDepthReadbackTexture()) - return 0.0f; - - float value; - m_depth_readback_texture->ReadTexel(x, y, &value); - return value; -} - -bool FramebufferManager::PopulateDepthReadbackTexture() -{ - // Can't be in our normal render pass. - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->OnCPUEFBAccess(); - - // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. - VkRect2D src_region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}}; - Texture2D* src_texture = m_efb_depth_texture.get(); - if (GetEFBSamples() > 1) - { - // EFB depth resolves are written out as color textures - src_texture = ResolveEFBDepthTexture(src_region); - } - if (GetEFBWidth() != EFB_WIDTH || GetEFBHeight() != EFB_HEIGHT) - { - // Transition EFB to shader read before drawing. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_depth_copy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_copy_depth_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - VK_NULL_HANDLE, m_copy_depth_shader); - - VkRect2D rect = {{0, 0}, {EFB_WIDTH, EFB_HEIGHT}}; - draw.BeginRenderPass(m_depth_copy_framebuffer, rect); - draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Restore EFB to depth attachment, since we're done with it. - if (src_texture == m_efb_depth_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - } - - // Use this as a source texture now. - src_texture = m_depth_copy_texture.get(); - } - - // Copy from EFB or copy texture to staging texture. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - static_cast(m_depth_readback_texture.get()) - ->CopyFromTexture(src_texture, m_depth_readback_texture->GetConfig().GetRect(), 0, 0, - m_depth_readback_texture->GetConfig().GetRect()); - - // Restore original layout if we used the EFB as a source. - if (src_texture == m_efb_depth_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - } - - // Wait until the copy is complete. - m_depth_readback_texture->Flush(); - m_depth_readback_texture_valid = true; - return true; -} - -void FramebufferManager::InvalidatePeekCache() -{ - m_color_readback_texture_valid = false; - m_depth_readback_texture_valid = false; -} - -bool FramebufferManager::CreateReadbackRenderPasses() -{ - m_copy_color_render_pass = g_object_cache->GetRenderPass( - EFB_COLOR_TEXTURE_FORMAT, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - m_copy_depth_render_pass = g_object_cache->GetRenderPass( - EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - if (m_copy_color_render_pass == VK_NULL_HANDLE || m_copy_depth_render_pass == VK_NULL_HANDLE) - return false; - - // Some devices don't support point sizes >1 (e.g. Adreno). - // If we can't use a point size above our maximum IR, use triangles instead. - // This means a 6x increase in the size of the vertices, though. - if (!g_vulkan_context->GetDeviceFeatures().largePoints || - g_vulkan_context->GetDeviceLimits().pointSizeGranularity > 1 || - g_vulkan_context->GetDeviceLimits().pointSizeRange[0] > 1 || - g_vulkan_context->GetDeviceLimits().pointSizeRange[1] < 16) - { - m_poke_primitive = PrimitiveType::TriangleStrip; - } - else - { - // Points should be okay. - m_poke_primitive = PrimitiveType::Points; - } - - return true; -} - -bool FramebufferManager::CompileReadbackShaders() -{ - std::string source; - - // TODO: Use input attachment here instead? - // TODO: MSAA resolve in shader. - static const char COPY_COLOR_SHADER_SOURCE[] = R"( - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - layout(location = 0) in vec3 uv0; - layout(location = 0) out vec4 ocol0; - void main() - { - ocol0 = texture(samp0, uv0); - } - )"; - - static const char COPY_DEPTH_SHADER_SOURCE[] = R"( - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - layout(location = 0) in vec3 uv0; - layout(location = 0) out float ocol0; - void main() - { - ocol0 = texture(samp0, uv0).r; - } - )"; - - source = g_shader_cache->GetUtilityShaderHeader() + COPY_COLOR_SHADER_SOURCE; - m_copy_color_shader = Util::CompileAndCreateFragmentShader(source); - - source = g_shader_cache->GetUtilityShaderHeader() + COPY_DEPTH_SHADER_SOURCE; - m_copy_depth_shader = Util::CompileAndCreateFragmentShader(source); - - return m_copy_color_shader != VK_NULL_HANDLE && m_copy_depth_shader != VK_NULL_HANDLE; -} - -void FramebufferManager::DestroyReadbackShaders() -{ - if (m_copy_color_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_color_shader, nullptr); - m_copy_color_shader = VK_NULL_HANDLE; - } - if (m_copy_depth_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_depth_shader, nullptr); - m_copy_depth_shader = VK_NULL_HANDLE; - } -} - -bool FramebufferManager::CreateReadbackTextures() -{ - m_color_copy_texture = - Texture2D::Create(EFB_WIDTH, EFB_HEIGHT, 1, 1, EFB_COLOR_TEXTURE_FORMAT, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - m_depth_copy_texture = - Texture2D::Create(EFB_WIDTH, EFB_HEIGHT, 1, 1, EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - if (!m_color_copy_texture || !m_depth_copy_texture) - { - ERROR_LOG(VIDEO, "Failed to create EFB copy textures"); - return false; - } - - TextureConfig readback_texture_config(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, - AbstractTextureFormat::RGBA8, false); - m_color_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Mutable, readback_texture_config); - m_depth_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Mutable, readback_texture_config); - if (!m_color_readback_texture || !m_depth_readback_texture) - { - ERROR_LOG(VIDEO, "Failed to create EFB readback textures"); - return false; - } - - return true; -} - -void FramebufferManager::DestroyReadbackTextures() -{ - m_color_copy_texture.reset(); - m_color_readback_texture.reset(); - m_color_readback_texture_valid = false; - m_depth_copy_texture.reset(); - m_depth_readback_texture.reset(); - m_depth_readback_texture_valid = false; -} - -bool FramebufferManager::CreateReadbackFramebuffer() -{ - VkImageView framebuffer_attachment = m_color_copy_texture->GetView(); - VkFramebufferCreateInfo framebuffer_info = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkFramebufferCreateFlags flags - m_copy_color_render_pass, // VkRenderPass renderPass - 1, // uint32_t attachmentCount - &framebuffer_attachment, // const VkImageView* pAttachments - EFB_WIDTH, // uint32_t width - EFB_HEIGHT, // uint32_t height - 1 // uint32_t layers - }; - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_color_copy_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - // Swap for depth - framebuffer_info.renderPass = m_copy_depth_render_pass; - framebuffer_attachment = m_depth_copy_texture->GetView(); - res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_depth_copy_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - return true; -} - -void FramebufferManager::DestroyReadbackFramebuffer() -{ - if (m_color_copy_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_color_copy_framebuffer, nullptr); - m_color_copy_framebuffer = VK_NULL_HANDLE; - } - if (m_depth_copy_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_depth_copy_framebuffer, nullptr); - m_depth_copy_framebuffer = VK_NULL_HANDLE; - } -} - -void FramebufferManager::PokeEFBColor(u32 x, u32 y, u32 color) -{ - // Flush if we exceeded the number of vertices per batch. - if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) - FlushEFBPokes(); - - CreatePokeVertices(&m_color_poke_vertices, x, y, 0.0f, color); - - // Update the peek cache if it's valid, since we know the color of the pixel now. - if (m_color_readback_texture_valid) - m_color_readback_texture->WriteTexel(x, y, &color); -} - -void FramebufferManager::PokeEFBDepth(u32 x, u32 y, float depth) -{ - // Flush if we exceeded the number of vertices per batch. - if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) - FlushEFBPokes(); - - CreatePokeVertices(&m_depth_poke_vertices, x, y, depth, 0); - - // Update the peek cache if it's valid, since we know the color of the pixel now. - if (m_depth_readback_texture_valid) - m_depth_readback_texture->WriteTexel(x, y, &depth); -} - -void FramebufferManager::CreatePokeVertices(std::vector* destination_list, u32 x, - u32 y, float z, u32 color) -{ - if (m_poke_primitive == PrimitiveType::Points) - { - // GPU will expand the point to a quad. - float cs_x = float(x) * 2.0f / EFB_WIDTH - 1.0f; - float cs_y = float(y) * 2.0f / EFB_HEIGHT - 1.0f; - float point_size = GetEFBWidth() / static_cast(EFB_WIDTH); - destination_list->push_back({{cs_x, cs_y, z, point_size}, color}); - return; - } - - // Some devices don't support point sizes >1 (e.g. Adreno). - // Generate quad from the single point (clip-space coordinates). - float x1 = float(x) * 2.0f / EFB_WIDTH - 1.0f; - float y1 = float(y) * 2.0f / EFB_HEIGHT - 1.0f; - float x2 = float(x + 1) * 2.0f / EFB_WIDTH - 1.0f; - float y2 = float(y + 1) * 2.0f / EFB_HEIGHT - 1.0f; - destination_list->push_back({{x1, y1, z, 1.0f}, color}); - destination_list->push_back({{x2, y1, z, 1.0f}, color}); - destination_list->push_back({{x1, y2, z, 1.0f}, color}); - destination_list->push_back({{x1, y2, z, 1.0f}, color}); - destination_list->push_back({{x2, y1, z, 1.0f}, color}); - destination_list->push_back({{x2, y2, z, 1.0f}, color}); -} - -void FramebufferManager::FlushEFBPokes() -{ - if (!m_color_poke_vertices.empty()) - { - DrawPokeVertices(m_color_poke_vertices.data(), m_color_poke_vertices.size(), true, false); - m_color_poke_vertices.clear(); - } - - if (!m_depth_poke_vertices.empty()) - { - DrawPokeVertices(m_depth_poke_vertices.data(), m_depth_poke_vertices.size(), false, true); - m_depth_poke_vertices.clear(); - } -} - -void FramebufferManager::DrawPokeVertices(const EFBPokeVertex* vertices, size_t vertex_count, - bool write_color, bool write_depth) -{ - // Relatively simple since we don't have any bindings. - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - - // We don't use the utility shader in order to keep the vertices compact. - PipelineInfo pipeline_info = {}; - pipeline_info.vertex_format = m_poke_vertex_format.get(); - pipeline_info.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD); - pipeline_info.vs = m_poke_vertex_shader; - pipeline_info.gs = (GetEFBLayers() > 1) ? m_poke_geometry_shader : VK_NULL_HANDLE; - pipeline_info.ps = m_poke_fragment_shader; - pipeline_info.render_pass = m_efb_load_render_pass; - pipeline_info.rasterization_state.hex = RenderState::GetNoCullRasterizationState().hex; - pipeline_info.rasterization_state.primitive = m_poke_primitive; - pipeline_info.multisampling_state.hex = GetEFBMultisamplingState().hex; - pipeline_info.depth_state.hex = RenderState::GetNoDepthTestingDepthStencilState().hex; - pipeline_info.blend_state.hex = RenderState::GetNoBlendingBlendState().hex; - pipeline_info.blend_state.colorupdate = write_color; - pipeline_info.blend_state.alphaupdate = write_color; - if (write_depth) - { - pipeline_info.depth_state.testenable = true; - pipeline_info.depth_state.updateenable = true; - pipeline_info.depth_state.func = ZMode::ALWAYS; - } - - VkPipeline pipeline = g_shader_cache->GetPipeline(pipeline_info); - if (pipeline == VK_NULL_HANDLE) - { - PanicAlert("Failed to get pipeline for EFB poke draw"); - return; - } - - // Populate vertex buffer. - size_t vertices_size = sizeof(EFBPokeVertex) * m_color_poke_vertices.size(); - if (!m_poke_vertex_stream_buffer->ReserveMemory(vertices_size, sizeof(EfbPokeData), true, true, - false)) - { - // Kick a command buffer first. - WARN_LOG(VIDEO, "Kicking command buffer due to no EFB poke space."); - Util::ExecuteCurrentCommandsAndRestoreState(false); - command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - - if (!m_poke_vertex_stream_buffer->ReserveMemory(vertices_size, sizeof(EfbPokeData), true, true, - false)) - { - PanicAlert("Failed to get space for EFB poke vertices"); - return; - } - } - VkBuffer vb_buffer = m_poke_vertex_stream_buffer->GetBuffer(); - VkDeviceSize vb_offset = m_poke_vertex_stream_buffer->GetCurrentOffset(); - memcpy(m_poke_vertex_stream_buffer->GetCurrentHostPointer(), vertices, vertices_size); - m_poke_vertex_stream_buffer->CommitMemory(vertices_size); - - // Set up state. - StateTracker::GetInstance()->EndClearRenderPass(); - StateTracker::GetInstance()->BeginRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - Util::SetViewportAndScissor(command_buffer, 0, 0, GetEFBWidth(), GetEFBHeight()); - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - vkCmdBindVertexBuffers(command_buffer, 0, 1, &vb_buffer, &vb_offset); - vkCmdDraw(command_buffer, static_cast(vertex_count), 1, 0, 0); -} - -void FramebufferManager::CreatePokeVertexFormat() -{ - PortableVertexDeclaration vtx_decl = {}; - vtx_decl.position.enable = true; - vtx_decl.position.type = VAR_FLOAT; - vtx_decl.position.components = 4; - vtx_decl.position.integer = false; - vtx_decl.position.offset = offsetof(EFBPokeVertex, position); - vtx_decl.colors[0].enable = true; - vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; - vtx_decl.colors[0].components = 4; - vtx_decl.colors[0].integer = false; - vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); - vtx_decl.stride = sizeof(EFBPokeVertex); - - m_poke_vertex_format = std::make_unique(vtx_decl); -} - -bool FramebufferManager::CreatePokeVertexBuffer() -{ - m_poke_vertex_stream_buffer = StreamBuffer::Create( - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, POKE_VERTEX_BUFFER_SIZE, POKE_VERTEX_BUFFER_SIZE); - if (!m_poke_vertex_stream_buffer) - { - ERROR_LOG(VIDEO, "Failed to create EFB poke vertex buffer"); - return false; - } - - return true; -} - -void FramebufferManager::DestroyPokeVertexBuffer() -{ - m_poke_vertex_stream_buffer.reset(); -} - -bool FramebufferManager::CompilePokeShaders() -{ - static const char POKE_VERTEX_SHADER_SOURCE[] = R"( - layout(location = 0) in vec4 ipos; - layout(location = 5) in vec4 icol0; - - layout(location = 0) out vec4 col0; - - void main() - { - gl_Position = vec4(ipos.xyz, 1.0f); - #if USE_POINT_SIZE - gl_PointSize = ipos.w; - #endif - col0 = icol0; - } - - )"; - - static const char POKE_GEOMETRY_SHADER_SOURCE[] = R"( - layout(triangles) in; - layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; - - VARYING_LOCATION(0) in VertexData - { - vec4 col0; - } in_data[]; - - VARYING_LOCATION(0) out VertexData - { - vec4 col0; - } out_data; - - void main() - { - for (int j = 0; j < EFB_LAYERS; j++) - { - for (int i = 0; i < 3; i++) - { - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - out_data.col0 = in_data[i].col0; - EmitVertex(); - } - EndPrimitive(); - } - } - )"; - - static const char POKE_PIXEL_SHADER_SOURCE[] = R"( - layout(location = 0) in vec4 col0; - layout(location = 0) out vec4 ocol0; - void main() - { - ocol0 = col0; - } - )"; - - std::string source = g_shader_cache->GetUtilityShaderHeader(); - if (m_poke_primitive == PrimitiveType::Points) - source += "#define USE_POINT_SIZE 1\n"; - source += POKE_VERTEX_SHADER_SOURCE; - m_poke_vertex_shader = Util::CompileAndCreateVertexShader(source); - if (m_poke_vertex_shader == VK_NULL_HANDLE) - return false; - - if (g_vulkan_context->SupportsGeometryShaders()) - { - source = g_shader_cache->GetUtilityShaderHeader() + POKE_GEOMETRY_SHADER_SOURCE; - m_poke_geometry_shader = Util::CompileAndCreateGeometryShader(source); - if (m_poke_geometry_shader == VK_NULL_HANDLE) - return false; - } - - source = g_shader_cache->GetUtilityShaderHeader() + POKE_PIXEL_SHADER_SOURCE; - m_poke_fragment_shader = Util::CompileAndCreateFragmentShader(source); - if (m_poke_fragment_shader == VK_NULL_HANDLE) - return false; - - return true; -} - -void FramebufferManager::DestroyPokeShaders() -{ - if (m_poke_vertex_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_vertex_shader, nullptr); - m_poke_vertex_shader = VK_NULL_HANDLE; - } - if (m_poke_geometry_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_geometry_shader, nullptr); - m_poke_geometry_shader = VK_NULL_HANDLE; - } - if (m_poke_fragment_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_fragment_shader, nullptr); - m_poke_vertex_shader = VK_NULL_HANDLE; - } -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/FramebufferManager.h b/Source/Core/VideoBackends/Vulkan/FramebufferManager.h deleted file mode 100644 index f037564e1c..0000000000 --- a/Source/Core/VideoBackends/Vulkan/FramebufferManager.h +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/TextureCache.h" -#include "VideoCommon/FramebufferManagerBase.h" -#include "VideoCommon/RenderState.h" - -class AbstractStagingTexture; - -namespace Vulkan -{ -class StateTracker; -class StreamBuffer; -class Texture2D; -class VertexFormat; -class VKTexture; -class XFBSource; - -class FramebufferManager : public FramebufferManagerBase -{ -public: - FramebufferManager(); - ~FramebufferManager(); - - static FramebufferManager* GetInstance(); - - bool Initialize(); - - VkRenderPass GetEFBLoadRenderPass() const { return m_efb_load_render_pass; } - VkRenderPass GetEFBClearRenderPass() const { return m_efb_clear_render_pass; } - Texture2D* GetEFBColorTexture() const { return m_efb_color_texture.get(); } - Texture2D* GetEFBDepthTexture() const { return m_efb_depth_texture.get(); } - VkFramebuffer GetEFBFramebuffer() const { return m_efb_framebuffer; } - u32 GetEFBWidth() const; - u32 GetEFBHeight() const; - u32 GetEFBLayers() const; - VkSampleCountFlagBits GetEFBSamples() const; - MultisamplingState GetEFBMultisamplingState() const; - - void RecreateEFBFramebuffer(); - - // Recompile shaders, use when MSAA mode changes. - void RecompileShaders(); - - // Reinterpret pixel format of EFB color texture. - // Assumes no render pass is currently in progress. - // Swaps EFB framebuffers, so re-bind afterwards. - void ReinterpretPixelData(int convtype); - - // This render pass can be used for other readback operations. - VkRenderPass GetColorCopyForReadbackRenderPass() const { return m_copy_color_render_pass; } - // Resolve color/depth textures to a non-msaa texture, and return it. - Texture2D* ResolveEFBColorTexture(const VkRect2D& region); - Texture2D* ResolveEFBDepthTexture(const VkRect2D& region); - - // Returns the texture that the EFB color texture is resolved to when multisampling is enabled. - // Ensure ResolveEFBColorTexture is called before this method. - Texture2D* GetResolvedEFBColorTexture() const { return m_efb_resolve_color_texture.get(); } - // Reads a framebuffer value back from the GPU. This may block if the cache is not current. - u32 PeekEFBColor(u32 x, u32 y); - float PeekEFBDepth(u32 x, u32 y); - void InvalidatePeekCache(); - - // Writes a value to the framebuffer. This will never block, and writes will be batched. - void PokeEFBColor(u32 x, u32 y, u32 color); - void PokeEFBDepth(u32 x, u32 y, float depth); - void FlushEFBPokes(); - -private: - struct EFBPokeVertex - { - float position[4]; - u32 color; - }; - - bool CreateEFBRenderPasses(); - bool CreateEFBFramebuffer(); - void DestroyEFBFramebuffer(); - - bool CompileConversionShaders(); - void DestroyConversionShaders(); - - bool CreateReadbackRenderPasses(); - bool CompileReadbackShaders(); - void DestroyReadbackShaders(); - bool CreateReadbackTextures(); - void DestroyReadbackTextures(); - bool CreateReadbackFramebuffer(); - void DestroyReadbackFramebuffer(); - - void CreatePokeVertexFormat(); - bool CreatePokeVertexBuffer(); - void DestroyPokeVertexBuffer(); - bool CompilePokeShaders(); - void DestroyPokeShaders(); - - bool PopulateColorReadbackTexture(); - bool PopulateDepthReadbackTexture(); - - void CreatePokeVertices(std::vector* destination_list, u32 x, u32 y, float z, - u32 color); - - void DrawPokeVertices(const EFBPokeVertex* vertices, size_t vertex_count, bool write_color, - bool write_depth); - - VkRenderPass m_efb_load_render_pass = VK_NULL_HANDLE; - VkRenderPass m_efb_clear_render_pass = VK_NULL_HANDLE; - VkRenderPass m_depth_resolve_render_pass = VK_NULL_HANDLE; - - std::unique_ptr m_efb_color_texture; - std::unique_ptr m_efb_convert_color_texture; - std::unique_ptr m_efb_depth_texture; - std::unique_ptr m_efb_resolve_color_texture; - std::unique_ptr m_efb_resolve_depth_texture; - VkFramebuffer m_efb_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_efb_convert_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_depth_resolve_framebuffer = VK_NULL_HANDLE; - - // Format conversion shaders - VkShaderModule m_ps_rgb8_to_rgba6 = VK_NULL_HANDLE; - VkShaderModule m_ps_rgba6_to_rgb8 = VK_NULL_HANDLE; - VkShaderModule m_ps_depth_resolve = VK_NULL_HANDLE; - - // EFB readback texture - std::unique_ptr m_color_copy_texture; - std::unique_ptr m_depth_copy_texture; - VkFramebuffer m_color_copy_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_depth_copy_framebuffer = VK_NULL_HANDLE; - - // CPU-side EFB readback texture - std::unique_ptr m_color_readback_texture; - std::unique_ptr m_depth_readback_texture; - bool m_color_readback_texture_valid = false; - bool m_depth_readback_texture_valid = false; - - // EFB poke drawing setup - std::unique_ptr m_poke_vertex_format; - std::unique_ptr m_poke_vertex_stream_buffer; - std::vector m_color_poke_vertices; - std::vector m_depth_poke_vertices; - PrimitiveType m_poke_primitive = PrimitiveType::TriangleStrip; - - VkRenderPass m_copy_color_render_pass = VK_NULL_HANDLE; - VkRenderPass m_copy_depth_render_pass = VK_NULL_HANDLE; - VkShaderModule m_copy_color_shader = VK_NULL_HANDLE; - VkShaderModule m_copy_depth_shader = VK_NULL_HANDLE; - - VkShaderModule m_poke_vertex_shader = VK_NULL_HANDLE; - VkShaderModule m_poke_geometry_shader = VK_NULL_HANDLE; - VkShaderModule m_poke_fragment_shader = VK_NULL_HANDLE; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index 1de68b8349..59690118aa 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -19,7 +19,7 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/ShaderCompiler.h" #include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoCommon/Statistics.h" @@ -28,16 +28,16 @@ namespace Vulkan { std::unique_ptr g_object_cache; -ObjectCache::ObjectCache() -{ -} +ObjectCache::ObjectCache() = default; ObjectCache::~ObjectCache() { + DestroyPipelineCache(); DestroySamplers(); DestroyPipelineLayouts(); DestroyDescriptorSetLayouts(); DestroyRenderPassCache(); + m_dummy_texture.reset(); } bool ObjectCache::Initialize() @@ -48,44 +48,37 @@ bool ObjectCache::Initialize() if (!CreatePipelineLayouts()) return false; - if (!CreateUtilityShaderVertexFormat()) - return false; - if (!CreateStaticSamplers()) return false; m_texture_upload_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE, - MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE); + StreamBuffer::Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_UPLOAD_BUFFER_SIZE); if (!m_texture_upload_buffer) { PanicAlert("Failed to create texture upload buffer"); return false; } - m_utility_shader_vertex_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1024 * 1024, 4 * 1024 * 1024); - m_utility_shader_uniform_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 1024, 4 * 1024 * 1024); - if (!m_utility_shader_vertex_buffer || !m_utility_shader_uniform_buffer) - return false; - - m_dummy_texture = Texture2D::Create(1, 1, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); - m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkClearColorValue clear_color = {}; - VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - m_dummy_texture->GetImage(), m_dummy_texture->GetLayout(), &clear_color, 1, - &clear_range); - m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + if (g_ActiveConfig.bShaderCache) + { + if (!LoadPipelineCache()) + return false; + } + else + { + if (!CreatePipelineCache()) + return false; + } return true; } +void ObjectCache::Shutdown() +{ + if (g_ActiveConfig.bShaderCache && m_pipeline_cache != VK_NULL_HANDLE) + SavePipelineCache(); +} + void ObjectCache::ClearSamplerCache() { for (const auto& it : m_sampler_cache) @@ -115,13 +108,9 @@ void ObjectCache::DestroySamplers() bool ObjectCache::CreateDescriptorSetLayouts() { - static const VkDescriptorSetLayoutBinding single_ubo_set_bindings[] = { - 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT}; - // The geometry shader buffer must be last in this binding set, as we don't include it // if geometry shaders are not supported by the device. See the decrement below. - static const VkDescriptorSetLayoutBinding per_stage_ubo_set_bindings[] = { + static const VkDescriptorSetLayoutBinding standard_ubo_bindings[] = { {UBO_DESCRIPTOR_SET_BINDING_PS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, {UBO_DESCRIPTOR_SET_BINDING_VS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, @@ -129,45 +118,56 @@ bool ObjectCache::CreateDescriptorSetLayouts() {UBO_DESCRIPTOR_SET_BINDING_GS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_GEOMETRY_BIT}}; - static const VkDescriptorSetLayoutBinding sampler_set_bindings[] = { + static const VkDescriptorSetLayoutBinding standard_sampler_bindings[] = { {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, static_cast(NUM_PIXEL_SHADER_SAMPLERS), VK_SHADER_STAGE_FRAGMENT_BIT}}; - static const VkDescriptorSetLayoutBinding ssbo_set_bindings[] = { + static const VkDescriptorSetLayoutBinding standard_ssbo_bindings[] = { {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}}; - static const VkDescriptorSetLayoutBinding texel_buffer_set_bindings[] = { - {0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + static const VkDescriptorSetLayoutBinding utility_ubo_bindings[] = { + 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT}; + + // Utility samplers aren't dynamically indexed. + static const VkDescriptorSetLayoutBinding utility_sampler_bindings[] = { + {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {5, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {6, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {7, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {8, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, }; static const VkDescriptorSetLayoutBinding compute_set_bindings[] = { {0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_COMPUTE_BIT}, {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {5, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {6, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {3, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {4, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {5, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT}, }; VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = { {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(single_ubo_set_bindings)), single_ubo_set_bindings}, + static_cast(ArraySize(standard_ubo_bindings)), standard_ubo_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(per_stage_ubo_set_bindings)), per_stage_ubo_set_bindings}, + static_cast(ArraySize(standard_sampler_bindings)), standard_sampler_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(sampler_set_bindings)), sampler_set_bindings}, + static_cast(ArraySize(standard_ssbo_bindings)), standard_ssbo_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(ssbo_set_bindings)), ssbo_set_bindings}, + static_cast(ArraySize(utility_ubo_bindings)), utility_ubo_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}, + static_cast(ArraySize(utility_sampler_bindings)), utility_sampler_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(compute_set_bindings)), compute_set_bindings}}; // Don't set the GS bit if geometry shaders aren't available. - if (!g_vulkan_context->SupportsGeometryShaders()) - create_infos[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS].bindingCount--; + if (!g_ActiveConfig.backend_info.bSupportsGeometryShaders) + create_infos[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS].bindingCount--; for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++) { @@ -199,22 +199,15 @@ bool ObjectCache::CreatePipelineLayouts() // Descriptor sets for each pipeline layout. // In the standard set, the SSBO must be the last descriptor, as we do not include it // when fragment stores and atomics are not supported by the device. - VkDescriptorSetLayout standard_sets[] = { - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS]}; - VkDescriptorSetLayout texture_conversion_sets[] = { - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]}; - VkDescriptorSetLayout utility_sets[] = { - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS]}; - VkDescriptorSetLayout compute_sets[] = {m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]}; - VkPushConstantRange push_constant_range = { - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE}; - VkPushConstantRange compute_push_constant_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0, - PUSH_CONSTANT_BUFFER_SIZE}; + const VkDescriptorSetLayout standard_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS]}; + const VkDescriptorSetLayout utility_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS]}; + const VkDescriptorSetLayout compute_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]}; // Info for each pipeline layout VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = { @@ -222,25 +215,16 @@ bool ObjectCache::CreatePipelineLayouts() {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(standard_sets)), standard_sets, 0, nullptr}, - // Push Constant - {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(standard_sets)), standard_sets, 1, &push_constant_range}, - - // Texture Conversion - {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1, - &push_constant_range}, - - // Texture Conversion + // Utility {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(utility_sets)), utility_sets, 0, nullptr}, // Compute {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}}; + static_cast(ArraySize(compute_sets)), compute_sets, 0, nullptr}}; // If bounding box is unsupported, don't bother with the SSBO descriptor set. - if (!g_vulkan_context->SupportsBoundingBox()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) pipeline_layout_info[PIPELINE_LAYOUT_STANDARD].setLayoutCount--; for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++) @@ -265,30 +249,6 @@ void ObjectCache::DestroyPipelineLayouts() } } -bool ObjectCache::CreateUtilityShaderVertexFormat() -{ - PortableVertexDeclaration vtx_decl = {}; - vtx_decl.position.enable = true; - vtx_decl.position.type = VAR_FLOAT; - vtx_decl.position.components = 4; - vtx_decl.position.integer = false; - vtx_decl.position.offset = offsetof(UtilityShaderVertex, Position); - vtx_decl.texcoords[0].enable = true; - vtx_decl.texcoords[0].type = VAR_FLOAT; - vtx_decl.texcoords[0].components = 4; - vtx_decl.texcoords[0].integer = false; - vtx_decl.texcoords[0].offset = offsetof(UtilityShaderVertex, TexCoord); - vtx_decl.colors[0].enable = true; - vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; - vtx_decl.colors[0].components = 4; - vtx_decl.colors[0].integer = false; - vtx_decl.colors[0].offset = offsetof(UtilityShaderVertex, Color); - vtx_decl.stride = sizeof(UtilityShaderVertex); - - m_utility_shader_vertex_format = std::make_unique(vtx_decl); - return true; -} - bool ObjectCache::CreateStaticSamplers() { VkSamplerCreateInfo create_info = { @@ -472,4 +432,199 @@ void ObjectCache::DestroyRenderPassCache() vkDestroyRenderPass(g_vulkan_context->GetDevice(), it.second, nullptr); m_render_pass_cache.clear(); } + +class PipelineCacheReadCallback : public LinearDiskCacheReader +{ +public: + PipelineCacheReadCallback(std::vector* data) : m_data(data) {} + void Read(const u32& key, const u8* value, u32 value_size) override + { + m_data->resize(value_size); + if (value_size > 0) + memcpy(m_data->data(), value, value_size); + } + +private: + std::vector* m_data; +}; + +class PipelineCacheReadIgnoreCallback : public LinearDiskCacheReader +{ +public: + void Read(const u32& key, const u8* value, u32 value_size) override {} +}; + +bool ObjectCache::CreatePipelineCache() +{ + // Vulkan pipeline caches can be shared between games for shader compile time reduction. + // This assumes that drivers don't create all pipelines in the cache on load time, only + // when a lookup occurs that matches a pipeline (or pipeline data) in the cache. + m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); + + VkPipelineCacheCreateInfo info = { + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineCacheCreateFlags flags + 0, // size_t initialDataSize + nullptr // const void* pInitialData + }; + + VkResult res = + vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); + if (res == VK_SUCCESS) + return true; + + LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed: "); + return false; } + +bool ObjectCache::LoadPipelineCache() +{ + // We have to keep the pipeline cache file name around since when we save it + // we delete the old one, by which time the game's unique ID is already cleared. + m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); + + std::vector disk_data; + LinearDiskCache disk_cache; + PipelineCacheReadCallback read_callback(&disk_data); + if (disk_cache.OpenAndRead(m_pipeline_cache_filename, read_callback) != 1) + disk_data.clear(); + + if (!disk_data.empty() && !ValidatePipelineCache(disk_data.data(), disk_data.size())) + { + // Don't use this data. In fact, we should delete it to prevent it from being used next time. + File::Delete(m_pipeline_cache_filename); + return CreatePipelineCache(); + } + + VkPipelineCacheCreateInfo info = { + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineCacheCreateFlags flags + disk_data.size(), // size_t initialDataSize + disk_data.data() // const void* pInitialData + }; + + VkResult res = + vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); + if (res == VK_SUCCESS) + return true; + + // Failed to create pipeline cache, try with it empty. + LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed, trying empty cache: "); + return CreatePipelineCache(); +} + +// Based on Vulkan 1.0 specification, +// Table 9.1. Layout for pipeline cache header version VK_PIPELINE_CACHE_HEADER_VERSION_ONE +// NOTE: This data is assumed to be in little-endian format. +#pragma pack(push, 4) +struct VK_PIPELINE_CACHE_HEADER +{ + u32 header_length; + u32 header_version; + u32 vendor_id; + u32 device_id; + u8 uuid[VK_UUID_SIZE]; +}; +#pragma pack(pop) +static_assert(std::is_trivially_copyable::value, + "VK_PIPELINE_CACHE_HEADER must be trivially copyable"); + +bool ObjectCache::ValidatePipelineCache(const u8* data, size_t data_length) +{ + if (data_length < sizeof(VK_PIPELINE_CACHE_HEADER)) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header"); + return false; + } + + VK_PIPELINE_CACHE_HEADER header; + std::memcpy(&header, data, sizeof(header)); + if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER)) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header length"); + return false; + } + + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header version"); + return false; + } + + if (header.vendor_id != g_vulkan_context->GetDeviceProperties().vendorID) + { + ERROR_LOG(VIDEO, + "Pipeline cache failed validation: Incorrect vendor ID (file: 0x%X, device: 0x%X)", + header.vendor_id, g_vulkan_context->GetDeviceProperties().vendorID); + return false; + } + + if (header.device_id != g_vulkan_context->GetDeviceProperties().deviceID) + { + ERROR_LOG(VIDEO, + "Pipeline cache failed validation: Incorrect device ID (file: 0x%X, device: 0x%X)", + header.device_id, g_vulkan_context->GetDeviceProperties().deviceID); + return false; + } + + if (std::memcmp(header.uuid, g_vulkan_context->GetDeviceProperties().pipelineCacheUUID, + VK_UUID_SIZE) != 0) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Incorrect UUID"); + return false; + } + + return true; +} + +void ObjectCache::DestroyPipelineCache() +{ + vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); + m_pipeline_cache = VK_NULL_HANDLE; +} + +void ObjectCache::SavePipelineCache() +{ + size_t data_size; + VkResult res = + vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); + return; + } + + std::vector data(data_size); + res = vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, + data.data()); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); + return; + } + + // Delete the old cache and re-create. + File::Delete(m_pipeline_cache_filename); + + // We write a single key of 1, with the entire pipeline cache data. + // Not ideal, but our disk cache class does not support just writing a single blob + // of data without specifying a key. + LinearDiskCache disk_cache; + PipelineCacheReadIgnoreCallback callback; + disk_cache.OpenAndRead(m_pipeline_cache_filename, callback); + disk_cache.Append(1, data.data(), static_cast(data.size())); + disk_cache.Close(); +} + +void ObjectCache::ReloadPipelineCache() +{ + SavePipelineCache(); + + if (g_ActiveConfig.bShaderCache) + LoadPipelineCache(); + else + CreatePipelineCache(); +} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.h b/Source/Core/VideoBackends/Vulkan/ObjectCache.h index c3502f7c66..794c52c0f0 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.h +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.h @@ -16,7 +16,6 @@ #include "Common/LinearDiskCache.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/Texture2D.h" #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/PixelShaderGen.h" @@ -27,6 +26,7 @@ namespace Vulkan { class CommandBufferManager; class VertexFormat; +class VKTexture; class StreamBuffer; class ObjectCache @@ -35,29 +35,23 @@ public: ObjectCache(); ~ObjectCache(); + // Perform at startup, create descriptor layouts, compiles all static shaders. + bool Initialize(); + void Shutdown(); + // Descriptor set layout accessor. Used for allocating descriptor sets. VkDescriptorSetLayout GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT layout) const { return m_descriptor_set_layouts[layout]; } + // Pipeline layout accessor. Used to fill in required field in PipelineInfo. VkPipelineLayout GetPipelineLayout(PIPELINE_LAYOUT layout) const { return m_pipeline_layouts[layout]; } - // Shared utility shader resources - VertexFormat* GetUtilityShaderVertexFormat() const - { - return m_utility_shader_vertex_format.get(); - } - StreamBuffer* GetUtilityShaderVertexBuffer() const - { - return m_utility_shader_vertex_buffer.get(); - } - StreamBuffer* GetUtilityShaderUniformBuffer() const - { - return m_utility_shader_uniform_buffer.get(); - } + + // Staging buffer for textures. StreamBuffer* GetTextureUploadBuffer() const { return m_texture_upload_buffer.get(); } // Static samplers @@ -65,36 +59,39 @@ public: VkSampler GetLinearSampler() const { return m_linear_sampler; } VkSampler GetSampler(const SamplerState& info); - // Dummy image for samplers that are unbound - Texture2D* GetDummyImage() const { return m_dummy_texture.get(); } - VkImageView GetDummyImageView() const { return m_dummy_texture->GetView(); } // Render pass cache. VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, u32 multisamples, VkAttachmentLoadOp load_op); - // Perform at startup, create descriptor layouts, compiles all static shaders. - bool Initialize(); + // Pipeline cache. Used when creating pipelines for drivers to store compiled programs. + VkPipelineCache GetPipelineCache() const { return m_pipeline_cache; } // Clear sampler cache, use when anisotropy mode changes // WARNING: Ensure none of the objects from here are in use when calling void ClearSamplerCache(); + // Saves the pipeline cache to disk. Call when shutting down. + void SavePipelineCache(); + + // Reload pipeline cache. Call when host config changes. + void ReloadPipelineCache(); + private: bool CreateDescriptorSetLayouts(); void DestroyDescriptorSetLayouts(); bool CreatePipelineLayouts(); void DestroyPipelineLayouts(); - bool CreateUtilityShaderVertexFormat(); bool CreateStaticSamplers(); void DestroySamplers(); void DestroyRenderPassCache(); + bool CreatePipelineCache(); + bool LoadPipelineCache(); + bool ValidatePipelineCache(const u8* data, size_t data_length); + void DestroyPipelineCache(); std::array m_descriptor_set_layouts = {}; std::array m_pipeline_layouts = {}; - std::unique_ptr m_utility_shader_vertex_format; - std::unique_ptr m_utility_shader_vertex_buffer; - std::unique_ptr m_utility_shader_uniform_buffer; std::unique_ptr m_texture_upload_buffer; VkSampler m_point_sampler = VK_NULL_HANDLE; @@ -103,11 +100,15 @@ private: std::map m_sampler_cache; // Dummy image for samplers that are unbound - std::unique_ptr m_dummy_texture; + std::unique_ptr m_dummy_texture; // Render pass cache using RenderPassCacheKey = std::tuple; std::map m_render_pass_cache; + + // pipeline cache + VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; + std::string m_pipeline_cache_filename; }; extern std::unique_ptr g_object_cache; diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp index 25334e3eb4..3a84f2e571 100644 --- a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp @@ -13,20 +13,18 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StagingBuffer.h" #include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan { -PerfQuery::PerfQuery() -{ -} +PerfQuery::PerfQuery() = default; PerfQuery::~PerfQuery() { - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); if (m_query_pool != VK_NULL_HANDLE) vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr); @@ -51,11 +49,8 @@ bool PerfQuery::Initialize() return false; } - g_command_buffer_mgr->AddFencePointCallback( - this, - std::bind(&PerfQuery::OnCommandBufferQueued, this, std::placeholders::_1, - std::placeholders::_2), - std::bind(&PerfQuery::OnCommandBufferExecuted, this, std::placeholders::_1)); + g_command_buffer_mgr->AddFenceSignaledCallback( + this, std::bind(&PerfQuery::OnFenceSignaled, this, std::placeholders::_1)); return true; } @@ -92,9 +87,6 @@ void PerfQuery::EnableQuery(PerfQueryGroup type) // TODO: Is this needed? StateTracker::GetInstance()->BeginRenderPass(); vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index, flags); - - // Prevent background command buffer submission while the query is active. - StateTracker::GetInstance()->SetBackgroundCommandBufferExecution(false); } } @@ -105,8 +97,6 @@ void PerfQuery::DisableQuery(PerfQueryGroup type) // DisableQuery should be called for each EnableQuery, so subtract one to get the previous one. u32 index = (m_query_read_pos + m_query_count - 1) % PERF_QUERY_BUFFER_SIZE; vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index); - StateTracker::GetInstance()->SetBackgroundCommandBufferExecution(true); - DEBUG_LOG(VIDEO, "end query %u", index); } } @@ -198,40 +188,42 @@ bool PerfQuery::CreateReadbackBuffer() return true; } -void PerfQuery::QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence, - u32 start_index, u32 query_count) +void PerfQuery::QueueCopyQueryResults(u32 start_index, u32 query_count) { DEBUG_LOG(VIDEO, "queue copy of queries %u-%u", start_index, start_index + query_count - 1); // Transition buffer for GPU write // TODO: Is this needed? - m_readback_buffer->PrepareForGPUWrite(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); // Copy from queries -> buffer - vkCmdCopyQueryPoolResults(command_buffer, m_query_pool, start_index, query_count, - m_readback_buffer->GetBuffer(), start_index * sizeof(PerfQueryDataType), - sizeof(PerfQueryDataType), VK_QUERY_RESULT_WAIT_BIT); + vkCmdCopyQueryPoolResults(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, + start_index, query_count, m_readback_buffer->GetBuffer(), + start_index * sizeof(PerfQueryDataType), sizeof(PerfQueryDataType), + VK_QUERY_RESULT_WAIT_BIT); // Prepare for host readback - m_readback_buffer->FlushGPUCache(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT); + m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); // Reset queries so they're ready to use again - vkCmdResetQueryPool(command_buffer, m_query_pool, start_index, query_count); + vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, start_index, + query_count); // Flag all queries as available, but with a fence that has to be completed first for (u32 i = 0; i < query_count; i++) { u32 index = start_index + i; ActiveQuery& entry = m_query_buffer[index]; - entry.pending_fence = fence; + entry.pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); entry.available = true; entry.active = false; } } -void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence) +void PerfQuery::FlushQueries() { // Flag all pending queries that aren't available as available after execution. u32 copy_start_index = 0; @@ -254,7 +246,7 @@ void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fe ASSERT(entry.active); if (index < copy_start_index) { - QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count); + QueueCopyQueryResults(copy_start_index, copy_count); copy_start_index = index; copy_count = 0; } @@ -266,10 +258,10 @@ void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fe } if (copy_count > 0) - QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count); + QueueCopyQueryResults(copy_start_index, copy_count); } -void PerfQuery::OnCommandBufferExecuted(VkFence fence) +void PerfQuery::OnFenceSignaled(VkFence fence) { // Need to save these since ProcessResults will modify them. u32 query_read_pos = m_query_read_pos; @@ -350,7 +342,7 @@ void PerfQuery::NonBlockingPartialFlush() // Submit a command buffer in the background if the front query is not bound to one. // Ideally this will complete before the buffer fills. if (m_query_buffer[m_query_read_pos].pending_fence == VK_NULL_HANDLE) - Util::ExecuteCurrentCommandsAndRestoreState(true, false); + Renderer::GetInstance()->ExecuteCommandBuffer(true, false); } void PerfQuery::BlockingPartialFlush() @@ -364,7 +356,7 @@ void PerfQuery::BlockingPartialFlush() { // This will callback OnCommandBufferQueued which will set the fence on the entry. // We wait for completion, which will also call OnCommandBufferExecuted, and clear the fence. - Util::ExecuteCurrentCommandsAndRestoreState(false, true); + Renderer::GetInstance()->ExecuteCommandBuffer(false, true); } else { @@ -373,4 +365,4 @@ void PerfQuery::BlockingPartialFlush() g_command_buffer_mgr->WaitForFence(entry.pending_fence); } } -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.h b/Source/Core/VideoBackends/Vulkan/PerfQuery.h index c5f5d13e90..47ccf22a66 100644 --- a/Source/Core/VideoBackends/Vulkan/PerfQuery.h +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.h @@ -24,6 +24,7 @@ public: static PerfQuery* GetInstance(); bool Initialize(); + void FlushQueries(); void EnableQuery(PerfQueryGroup type) override; void DisableQuery(PerfQueryGroup type) override; @@ -43,12 +44,11 @@ private: bool CreateQueryPool(); bool CreateReadbackBuffer(); - void QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence, u32 start_index, - u32 query_count); + void QueueCopyQueryResults(u32 start_index, u32 query_count); void ProcessResults(u32 start_index, u32 query_count); void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence); - void OnCommandBufferExecuted(VkFence fence); + void OnFenceSignaled(VkFence fence); void NonBlockingPartialFlush(); void BlockingPartialFlush(); diff --git a/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp b/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp deleted file mode 100644 index 57dbd3e391..0000000000 --- a/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2017 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/PostProcessing.h" -#include - -#include "Common/Assert.h" -#include "Common/StringUtil.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace Vulkan -{ -VulkanPostProcessing::~VulkanPostProcessing() -{ - if (m_default_fragment_shader != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_default_fragment_shader, nullptr); - if (m_fragment_shader != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_fragment_shader, nullptr); -} - -bool VulkanPostProcessing::Initialize() -{ - if (!CompileDefaultShader()) - return false; - - RecompileShader(); - return true; -} - -void VulkanPostProcessing::BlitFromTexture(const TargetRectangle& dst, const TargetRectangle& src, - const Texture2D* src_tex, int src_layer, - VkRenderPass render_pass) -{ - // If the source layer is negative we simply copy all available layers. - VkShaderModule geometry_shader = - src_layer < 0 ? g_shader_cache->GetPassthroughGeometryShader() : VK_NULL_HANDLE; - VkShaderModule fragment_shader = - m_fragment_shader != VK_NULL_HANDLE ? m_fragment_shader : m_default_fragment_shader; - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), render_pass, - g_shader_cache->GetPassthroughVertexShader(), geometry_shader, - fragment_shader); - - // Source is always bound. - draw.SetPSSampler(0, src_tex->GetView(), g_object_cache->GetLinearSampler()); - - // No need to allocate uniforms for the default shader. - // The config will also still contain the invalid shader at this point. - if (fragment_shader != m_default_fragment_shader) - { - size_t uniforms_size = CalculateUniformsSize(); - u8* uniforms = draw.AllocatePSUniforms(uniforms_size); - FillUniformBuffer(uniforms, src, src_tex, src_layer); - draw.CommitPSUniforms(uniforms_size); - } - - draw.DrawQuad(dst.left, dst.top, dst.GetWidth(), dst.GetHeight(), src.left, src.top, src_layer, - src.GetWidth(), src.GetHeight(), static_cast(src_tex->GetWidth()), - static_cast(src_tex->GetHeight())); -} - -struct BuiltinUniforms -{ - float resolution[4]; - float src_rect[4]; - u32 time; - u32 unused[3]; -}; - -size_t VulkanPostProcessing::CalculateUniformsSize() const -{ - // Allocate a vec4 for each uniform to simplify allocation. - return sizeof(BuiltinUniforms) + m_config.GetOptions().size() * sizeof(float) * 4; -} - -void VulkanPostProcessing::FillUniformBuffer(u8* buf, const TargetRectangle& src, - const Texture2D* src_tex, int src_layer) -{ - float src_width_float = static_cast(src_tex->GetWidth()); - float src_height_float = static_cast(src_tex->GetHeight()); - BuiltinUniforms builtin_uniforms = { - {src_width_float, src_height_float, 1.0f / src_width_float, 1.0f / src_height_float}, - {static_cast(src.left) / src_width_float, - static_cast(src.top) / src_height_float, - static_cast(src.GetWidth()) / src_width_float, - static_cast(src.GetHeight()) / src_height_float}, - static_cast(m_timer.GetTimeElapsed())}; - - std::memcpy(buf, &builtin_uniforms, sizeof(builtin_uniforms)); - buf += sizeof(builtin_uniforms); - - for (const auto& it : m_config.GetOptions()) - { - union - { - u32 as_bool[4]; - s32 as_int[4]; - float as_float[4]; - } value = {}; - - switch (it.second.m_type) - { - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL: - value.as_bool[0] = it.second.m_bool_value ? 1 : 0; - break; - - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER: - ASSERT(it.second.m_integer_values.size() < 4); - std::copy_n(it.second.m_integer_values.begin(), it.second.m_integer_values.size(), - value.as_int); - break; - - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT: - ASSERT(it.second.m_float_values.size() < 4); - std::copy_n(it.second.m_float_values.begin(), it.second.m_float_values.size(), - value.as_float); - break; - } - - std::memcpy(buf, &value, sizeof(value)); - buf += sizeof(value); - } -} - -constexpr char DEFAULT_FRAGMENT_SHADER_SOURCE[] = R"( - layout(set = 1, binding = 0) uniform sampler2DArray samp0; - - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - void main() - { - ocol0 = float4(texture(samp0, uv0).xyz, 1.0); - } -)"; - -constexpr char POSTPROCESSING_SHADER_HEADER[] = R"( - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - SAMPLER_BINDING(1) uniform sampler2DArray samp1; - - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - // Interfacing functions - // The EFB may have a zero alpha value, which we don't want to write to the frame dump, so set it to one here. - float4 Sample() - { - return float4(texture(samp0, uv0).xyz, 1.0); - } - - float4 SampleLocation(float2 location) - { - return float4(texture(samp0, float3(location, uv0.z)).xyz, 1.0); - } - - float4 SampleLayer(int layer) - { - return float4(texture(samp0, float3(uv0.xy, float(layer))).xyz, 1.0); - } - - #define SampleOffset(offset) float4(textureOffset(samp0, uv0, offset).xyz, 1.0) - - float2 GetResolution() - { - return options.resolution.xy; - } - - float2 GetInvResolution() - { - return options.resolution.zw; - } - - float2 GetCoordinates() - { - return uv0.xy; - } - - uint GetTime() - { - return options.time; - } - - void SetOutput(float4 color) - { - ocol0 = color; - } - - #define GetOption(x) (options.x) - #define OptionEnabled(x) (options.x != 0) - - // Workaround because there is no getter function for src rect/layer. - float4 src_rect = options.src_rect; - int layer = int(uv0.z); -)"; - -void VulkanPostProcessing::UpdateConfig() -{ - if (m_config.GetShader() == g_ActiveConfig.sPostProcessingShader) - return; - - RecompileShader(); -} - -bool VulkanPostProcessing::CompileDefaultShader() -{ - m_default_fragment_shader = Util::CompileAndCreateFragmentShader(DEFAULT_FRAGMENT_SHADER_SOURCE); - if (m_default_fragment_shader == VK_NULL_HANDLE) - { - PanicAlert("Failed to compile default post-processing shader."); - return false; - } - - return true; -} - -bool VulkanPostProcessing::RecompileShader() -{ - // As a driver can return the same new module pointer when destroying a shader and re-compiling, - // we need to wipe out the pipeline cache, otherwise we risk using old pipelines with old shaders. - // We can't just clear a single pipeline, because we don't know which render pass is going to be - // used here either. - if (m_fragment_shader != VK_NULL_HANDLE) - { - g_command_buffer_mgr->WaitForGPUIdle(); - g_shader_cache->ClearPipelineCache(); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_fragment_shader, nullptr); - m_fragment_shader = VK_NULL_HANDLE; - } - - // If post-processing is disabled, just use the default shader. - // This way we don't need to allocate uniforms. - if (g_ActiveConfig.sPostProcessingShader.empty()) - return true; - - // Generate GLSL and compile the new shader. - std::string main_code = m_config.LoadShader(); - std::string options_code = GetGLSLUniformBlock(); - std::string code = options_code + POSTPROCESSING_SHADER_HEADER + main_code; - m_fragment_shader = Util::CompileAndCreateFragmentShader(code); - if (m_fragment_shader == VK_NULL_HANDLE) - { - // BlitFromTexture will use the default shader as a fallback. - PanicAlert("Failed to compile post-processing shader %s", m_config.GetShader().c_str()); - return false; - } - - return true; -} - -std::string VulkanPostProcessing::GetGLSLUniformBlock() const -{ - std::stringstream ss; - u32 unused_counter = 1; - ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; - - // Builtin uniforms - ss << " float4 resolution;\n"; - ss << " float4 src_rect;\n"; - ss << " uint time;\n"; - for (u32 i = 0; i < 3; i++) - ss << " uint unused" << unused_counter++ << ";\n\n"; - - // Custom options/uniforms - for (const auto& it : m_config.GetOptions()) - { - if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL) - { - ss << StringFromFormat(" int %s;\n", it.first.c_str()); - for (u32 i = 0; i < 3; i++) - ss << " int unused" << unused_counter++ << ";\n"; - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER) - { - u32 count = static_cast(it.second.m_integer_values.size()); - if (count == 1) - ss << StringFromFormat(" int %s;\n", it.first.c_str()); - else - ss << StringFromFormat(" int%u %s;\n", count, it.first.c_str()); - - for (u32 i = count; i < 4; i++) - ss << " int unused" << unused_counter++ << ";\n"; - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT) - { - u32 count = static_cast(it.second.m_float_values.size()); - if (count == 1) - ss << StringFromFormat(" float %s;\n", it.first.c_str()); - else - ss << StringFromFormat(" float%u %s;\n", count, it.first.c_str()); - - for (u32 i = count; i < 4; i++) - ss << " float unused" << unused_counter++ << ";\n"; - } - } - - ss << "} options;\n\n"; - - return ss.str(); -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PostProcessing.h b/Source/Core/VideoBackends/Vulkan/PostProcessing.h deleted file mode 100644 index e9c18b9d2d..0000000000 --- a/Source/Core/VideoBackends/Vulkan/PostProcessing.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2017 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/PostProcessing.h" -#include "VideoCommon/VideoCommon.h" - -namespace Vulkan -{ -class Texture2D; - -class VulkanPostProcessing : public PostProcessingShaderImplementation -{ -public: - VulkanPostProcessing() = default; - ~VulkanPostProcessing(); - - bool Initialize(); - - void BlitFromTexture(const TargetRectangle& dst, const TargetRectangle& src, - const Texture2D* src_tex, int src_layer, VkRenderPass render_pass); - - void UpdateConfig(); - -private: - size_t CalculateUniformsSize() const; - void FillUniformBuffer(u8* buf, const TargetRectangle& src, const Texture2D* src_tex, - int src_layer); - - bool CompileDefaultShader(); - bool RecompileShader(); - std::string GetGLSLUniformBlock() const; - - VkShaderModule m_fragment_shader = VK_NULL_HANDLE; - VkShaderModule m_default_fragment_shader = VK_NULL_HANDLE; -}; - -} // namespace diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index a3085aa486..0254f128a8 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -18,27 +18,22 @@ #include "VideoBackends/Vulkan/BoundingBox.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" #include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/PostProcessing.h" +#include "VideoBackends/Vulkan/PerfQuery.h" #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/StreamBuffer.h" #include "VideoBackends/Vulkan/SwapChain.h" -#include "VideoBackends/Vulkan/TextureCache.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VKPipeline.h" #include "VideoBackends/Vulkan/VKShader.h" #include "VideoBackends/Vulkan/VKTexture.h" +#include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/BPFunctions.h" -#include "VideoCommon/BPMemory.h" #include "VideoCommon/DriverDetails.h" -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/PixelEngine.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/RenderState.h" -#include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -59,11 +54,6 @@ Renderer::Renderer(std::unique_ptr swap_chain, float backbuffer_scale Renderer::~Renderer() = default; -Renderer* Renderer::GetInstance() -{ - return static_cast(g_renderer.get()); -} - bool Renderer::IsHeadless() const { return m_swap_chain == nullptr; @@ -74,8 +64,6 @@ bool Renderer::Initialize() if (!::Renderer::Initialize()) return false; - BindEFBToStateTracker(); - m_bounding_box = std::make_unique(); if (!m_bounding_box->Initialize()) { @@ -83,34 +71,16 @@ bool Renderer::Initialize() return false; } - if (g_vulkan_context->SupportsBoundingBox()) - { - // Bind bounding box to state tracker - StateTracker::GetInstance()->SetBBoxBuffer(m_bounding_box->GetGPUBuffer(), - m_bounding_box->GetGPUBufferOffset(), - m_bounding_box->GetGPUBufferSize()); - } - - // Initialize post processing. - m_post_processor = std::make_unique(); - if (!static_cast(m_post_processor.get())->Initialize()) - { - PanicAlert("failed to initialize post processor."); - return false; - } - // Various initialization routines will have executed commands on the command buffer. // Execute what we have done before beginning the first frame. - g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); - g_command_buffer_mgr->SubmitCommandBuffer(false); - BeginFrame(); - + ExecuteCommandBuffer(true, false); return true; } void Renderer::Shutdown() { ::Renderer::Shutdown(); + m_swap_chain.reset(); } std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) @@ -136,17 +106,22 @@ std::unique_ptr Renderer::CreateShaderFromBinary(ShaderStage sta return VKShader::CreateFromBinary(stage, data, length); } +std::unique_ptr +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + return std::make_unique(vtx_decl); +} + std::unique_ptr Renderer::CreatePipeline(const AbstractPipelineConfig& config) { return VKPipeline::Create(config); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return VKFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return VKFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } void Renderer::SetPipeline(const AbstractPipeline* pipeline) @@ -154,90 +129,6 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline) StateTracker::GetInstance()->SetPipeline(static_cast(pipeline)); } -u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) -{ - if (type == EFBAccessType::PeekColor) - { - u32 color = FramebufferManager::GetInstance()->PeekEFBColor(x, y); - - // a little-endian value is expected to be returned - color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000)); - - // check what to do with the alpha channel (GX_PokeAlphaRead) - PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); - - if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) - { - color = RGBA8ToRGBA6ToRGBA8(color); - } - else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - color = RGBA8ToRGB565ToRGBA8(color); - } - if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) - { - color |= 0xFF000000; - } - - if (alpha_read_mode.ReadMode == 2) - { - return color; // GX_READ_NONE - } - else if (alpha_read_mode.ReadMode == 1) - { - return color | 0xFF000000; // GX_READ_FF - } - else /*if(alpha_read_mode.ReadMode == 0)*/ - { - return color & 0x00FFFFFF; // GX_READ_00 - } - } - else // if (type == EFBAccessType::PeekZ) - { - // Depth buffer is inverted for improved precision near far plane - float depth = 1.0f - FramebufferManager::GetInstance()->PeekEFBDepth(x, y); - u32 ret = 0; - - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - // if Z is in 16 bit format you must return a 16 bit integer - ret = MathUtil::Clamp(static_cast(depth * 65536.0f), 0, 0xFFFF); - } - else - { - ret = MathUtil::Clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); - } - - return ret; - } -} - -void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - if (type == EFBAccessType::PokeColor) - { - for (size_t i = 0; i < num_points; i++) - { - // Convert to expected format (BGRA->RGBA) - // TODO: Check alpha, depending on mode? - const EfbPokeData& point = points[i]; - u32 color = ((point.data & 0xFF00FF00) | ((point.data >> 16) & 0xFF) | - ((point.data << 16) & 0xFF0000)); - FramebufferManager::GetInstance()->PokeEFBColor(point.x, point.y, color); - } - } - else // if (type == EFBAccessType::PokeZ) - { - for (size_t i = 0; i < num_points; i++) - { - // Convert to floating-point depth. - const EfbPokeData& point = points[i]; - float depth = (1.0f - float(point.data & 0xFFFFFF) / 16777216.0f); - FramebufferManager::GetInstance()->PokeEFBDepth(point.x, point.y, depth); - } - } -} - u16 Renderer::BBoxRead(int index) { s32 value = m_bounding_box->Get(static_cast(index)); @@ -285,31 +176,18 @@ void Renderer::BBoxWrite(int index, u16 value) m_bounding_box->Set(static_cast(index), scaled_value); } -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +void Renderer::BBoxFlush() { - TargetRectangle result; - result.left = EFBToScaledX(rc.left); - result.top = EFBToScaledY(rc.top); - result.right = EFBToScaledX(rc.right); - result.bottom = EFBToScaledY(rc.bottom); - return result; -} - -void Renderer::BeginFrame() -{ - // Activate a new command list, and restore state ready for the next draw - g_command_buffer_mgr->ActivateCommandBuffer(); - - // Ensure that the state tracker rebinds everything, and allocates a new set - // of descriptors out of the next pool. - StateTracker::GetInstance()->InvalidateDescriptorSets(); - StateTracker::GetInstance()->InvalidateConstants(); - StateTracker::GetInstance()->SetPendingRebind(); + m_bounding_box->Flush(); + m_bounding_box->Invalidate(); } void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) { + g_framebuffer_manager->FlushEFBPokes(); + g_framebuffer_manager->InvalidatePeekCache(); + // Native -> EFB coordinates TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc); @@ -340,7 +218,9 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha clear_color_value.color.float32[1] = static_cast((color >> 8) & 0xFF) / 255.0f; clear_color_value.color.float32[2] = static_cast((color >> 0) & 0xFF) / 255.0f; clear_color_value.color.float32[3] = static_cast((color >> 24) & 0xFF) / 255.0f; - clear_depth_value.depthStencil.depth = (1.0f - (static_cast(z & 0xFFFFFF) / 16777216.0f)); + clear_depth_value.depthStencil.depth = static_cast(z & 0xFFFFFF) / 16777216.0f; + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + clear_depth_value.depthStencil.depth = 1.0f - clear_depth_value.depthStencil.depth; // If we're not in a render pass (start of the frame), we can use a clear render pass // to discard the data, rather than loading and then clearing. @@ -396,7 +276,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha } if (num_clear_attachments > 0) { - VkClearRect vk_rect = {target_vk_rc, 0, FramebufferManager::GetInstance()->GetEFBLayers()}; + VkClearRect vk_rect = {target_vk_rc, 0, g_framebuffer_manager->GetEFBLayers()}; if (!StateTracker::GetInstance()->IsWithinRenderArea( target_vk_rc.offset.x, target_vk_rc.offset.y, target_vk_rc.extent.width, target_vk_rc.extent.height)) @@ -414,57 +294,17 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha if (!color_enable && !alpha_enable && !z_enable) return; - // Clearing must occur within a render pass. - if (!StateTracker::GetInstance()->IsWithinRenderArea(target_vk_rc.offset.x, target_vk_rc.offset.y, - target_vk_rc.extent.width, - target_vk_rc.extent.height)) - { - StateTracker::GetInstance()->EndClearRenderPass(); - } - StateTracker::GetInstance()->BeginRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - - // Mask away the appropriate colors and use a shader - BlendingState blend_state = RenderState::GetNoBlendingBlendState(); - blend_state.colorupdate = color_enable; - blend_state.alphaupdate = alpha_enable; - - DepthState depth_state = RenderState::GetNoDepthTestingDepthStencilState(); - depth_state.testenable = z_enable; - depth_state.updateenable = z_enable; - depth_state.func = ZMode::ALWAYS; - - // No need to start a new render pass, but we do need to restore viewport state - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - FramebufferManager::GetInstance()->GetEFBLoadRenderPass(), - g_shader_cache->GetPassthroughVertexShader(), - g_shader_cache->GetPassthroughGeometryShader(), - g_shader_cache->GetClearFragmentShader()); - - draw.SetMultisamplingState(FramebufferManager::GetInstance()->GetEFBMultisamplingState()); - draw.SetDepthState(depth_state); - draw.SetBlendState(blend_state); - - draw.DrawColoredQuad(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight(), - clear_color_value.color.float32[0], clear_color_value.color.float32[1], - clear_color_value.color.float32[2], clear_color_value.color.float32[3], - clear_depth_value.depthStencil.depth); -} - -void Renderer::ReinterpretPixelData(unsigned int convtype) -{ - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - FramebufferManager::GetInstance()->ReinterpretPixelData(convtype); - - // EFB framebuffer has now changed, so update accordingly. - BindEFBToStateTracker(); + g_framebuffer_manager->ClearEFB(rc, color_enable, alpha_enable, z_enable, color, z); } void Renderer::Flush() { - Util::ExecuteCurrentCommandsAndRestoreState(true, false); + ExecuteCommandBuffer(true, false); +} + +void Renderer::WaitForGPUIdle() +{ + ExecuteCommandBuffer(false, true); } void Renderer::BindBackbuffer(const ClearColor& clear_color) @@ -475,35 +315,13 @@ void Renderer::BindBackbuffer(const ClearColor& clear_color) CheckForSurfaceChange(); CheckForSurfaceResize(); - // Ensure the worker thread is not still submitting a previous command buffer. - // In other words, the last frame has been submitted (otherwise the next call would - // be a race, as the image may not have been consumed yet). - g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); - - VkResult res; - if (!g_command_buffer_mgr->CheckLastPresentFail()) - { - // Grab the next image from the swap chain in preparation for drawing the window. - res = m_swap_chain->AcquireNextImage(); - } - else - { - // If the last present failed, we need to recreate the swap chain. - res = VK_ERROR_OUT_OF_DATE_KHR; - } - + VkResult res = g_command_buffer_mgr->CheckLastPresentFail() ? VK_ERROR_OUT_OF_DATE_KHR : + m_swap_chain->AcquireNextImage(); if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR) { - // There's an issue here. We can't resize the swap chain while the GPU is still busy with it, - // but calling WaitForGPUIdle would create a deadlock as PrepareToSubmitCommandBuffer has been - // called by SwapImpl. WaitForGPUIdle waits on the semaphore, which PrepareToSubmitCommandBuffer - // has already done, so it blocks indefinitely. To work around this, we submit the current - // command buffer, resize the swap chain (which calls WaitForGPUIdle), and then finally call - // PrepareToSubmitCommandBuffer to return to the state that the caller expects. - g_command_buffer_mgr->SubmitCommandBuffer(false); + // Execute cmdbuffer before resizing, as the last frame could still be presenting. + ExecuteCommandBuffer(false, true); m_swap_chain->ResizeSwapChain(); - BeginFrame(); - g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); res = m_swap_chain->AcquireNextImage(); } if (res != VK_SUCCESS) @@ -512,30 +330,18 @@ void Renderer::BindBackbuffer(const ClearColor& clear_color) // Transition from undefined (or present src, but it can be substituted) to // color attachment ready for writing. These transitions must occur outside // a render pass, unless the render pass declares a self-dependency. - Texture2D* backbuffer = m_swap_chain->GetCurrentTexture(); - backbuffer->OverrideImageLayout(VK_IMAGE_LAYOUT_UNDEFINED); - backbuffer->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = backbuffer->GetWidth(); - m_current_framebuffer_height = backbuffer->GetHeight(); - - // Draw to the backbuffer. - VkRect2D region = {{0, 0}, {backbuffer->GetWidth(), backbuffer->GetHeight()}}; - StateTracker::GetInstance()->SetRenderPass(m_swap_chain->GetLoadRenderPass(), - m_swap_chain->GetClearRenderPass()); - StateTracker::GetInstance()->SetFramebuffer(m_swap_chain->GetCurrentFramebuffer(), region); - - // Begin render pass for rendering to the swap chain. - VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; - StateTracker::GetInstance()->BeginClearRenderPass(region, &clear_value, 1); + m_swap_chain->GetCurrentTexture()->OverrideImageLayout(VK_IMAGE_LAYOUT_UNDEFINED); + m_swap_chain->GetCurrentTexture()->TransitionToLayout( + g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + SetAndClearFramebuffer(m_swap_chain->GetCurrentFramebuffer(), + ClearColor{{0.0f, 0.0f, 0.0f, 1.0f}}); } void Renderer::PresentBackbuffer() { // End drawing to backbuffer StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->OnEndFrame(); + PerfQuery::GetInstance()->FlushQueries(); // Transition the backbuffer to PRESENT_SRC to ensure all commands drawing // to it have finished before present. @@ -546,47 +352,25 @@ void Renderer::PresentBackbuffer() // Because this final command buffer is rendering to the swap chain, we need to wait for // the available semaphore to be signaled before executing the buffer. This final submission // can happen off-thread in the background while we're preparing the next frame. - g_command_buffer_mgr->SubmitCommandBuffer(true, m_swap_chain->GetImageAvailableSemaphore(), - m_swap_chain->GetRenderingFinishedSemaphore(), - m_swap_chain->GetSwapChain(), + g_command_buffer_mgr->SubmitCommandBuffer(true, m_swap_chain->GetSwapChain(), m_swap_chain->GetCurrentImageIndex()); - BeginFrame(); + + // New cmdbuffer, so invalidate state. + StateTracker::GetInstance()->InvalidateCachedState(); } -void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) +void Renderer::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion) { - const TargetRectangle target_rc = GetTargetRectangle(); + StateTracker::GetInstance()->EndRenderPass(); + PerfQuery::GetInstance()->FlushQueries(); - VulkanPostProcessing* post_processor = static_cast(m_post_processor.get()); - if (g_ActiveConfig.stereo_mode == StereoMode::SBS || - g_ActiveConfig.stereo_mode == StereoMode::TAB) - { - TargetRectangle left_rect; - TargetRectangle right_rect; - std::tie(left_rect, right_rect) = ConvertStereoRectangle(target_rc); + // If we're waiting for completion, don't bother waking the worker thread. + const VkFence pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread && wait_for_completion); + if (wait_for_completion) + g_command_buffer_mgr->WaitForFence(pending_fence); - post_processor->BlitFromTexture(left_rect, rc, - static_cast(texture)->GetRawTexIdentifier(), - 0, m_swap_chain->GetLoadRenderPass()); - post_processor->BlitFromTexture(right_rect, rc, - static_cast(texture)->GetRawTexIdentifier(), - 1, m_swap_chain->GetLoadRenderPass()); - } - else if (g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer) - { - post_processor->BlitFromTexture(target_rc, rc, - static_cast(texture)->GetRawTexIdentifier(), - -1, m_swap_chain->GetLoadRenderPass()); - } - else - { - post_processor->BlitFromTexture(target_rc, rc, - static_cast(texture)->GetRawTexIdentifier(), - 0, m_swap_chain->GetLoadRenderPass()); - } - - // The post-processor uses the old-style Vulkan draws, which mess with the tracked state. - StateTracker::GetInstance()->SetPendingRebind(); + StateTracker::GetInstance()->InvalidateCachedState(); } void Renderer::CheckForSurfaceChange() @@ -595,8 +379,7 @@ void Renderer::CheckForSurfaceChange() return; // Submit the current draws up until rendering the XFB. - g_command_buffer_mgr->ExecuteCommandBuffer(false, false); - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); // Clear the present failed flag, since we don't want to resize after recreating. g_command_buffer_mgr->CheckLastPresentFail(); @@ -624,8 +407,7 @@ void Renderer::CheckForSurfaceResize() } // Wait for the GPU to catch up since we're going to destroy the swap chain. - g_command_buffer_mgr->ExecuteCommandBuffer(false, false); - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); // Clear the present failed flag, since we don't want to resize after recreating. g_command_buffer_mgr->CheckLastPresentFail(); @@ -637,45 +419,29 @@ void Renderer::CheckForSurfaceResize() void Renderer::OnConfigChanged(u32 bits) { - // Update texture cache settings with any changed options. - TextureCache::GetInstance()->OnConfigChanged(g_ActiveConfig); - - // Handle settings that can cause the EFB framebuffer to change. - if (bits & CONFIG_CHANGE_BIT_TARGET_SIZE) - RecreateEFBFramebuffer(); - - // MSAA samples changed, we need to recreate the EFB render pass. - // If the stereoscopy mode changed, we need to recreate the buffers as well. - // SSAA changed on/off, we have to recompile shaders. - // Changing stereoscopy from off<->on also requires shaders to be recompiled. - if (bits & (CONFIG_CHANGE_BIT_HOST_CONFIG | CONFIG_CHANGE_BIT_MULTISAMPLES)) - { - RecreateEFBFramebuffer(); - FramebufferManager::GetInstance()->RecompileShaders(); - g_shader_cache->ReloadPipelineCache(); - g_shader_cache->RecompileSharedShaders(); - } + if (bits & CONFIG_CHANGE_BIT_HOST_CONFIG) + g_object_cache->ReloadPipelineCache(); // For vsync, we need to change the present mode, which means recreating the swap chain. if (m_swap_chain && bits & CONFIG_CHANGE_BIT_VSYNC) { - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive); } // For quad-buffered stereo we need to change the layer count, so recreate the swap chain. if (m_swap_chain && bits & CONFIG_CHANGE_BIT_STEREO_MODE) { - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); m_swap_chain->RecreateSwapChain(); } // Wipe sampler cache if force texture filtering or anisotropy changes. if (bits & (CONFIG_CHANGE_BIT_ANISOTROPY | CONFIG_CHANGE_BIT_FORCE_TEXTURE_FILTERING)) + { + ExecuteCommandBuffer(false, true); ResetSamplerStates(); - - // Check for a changed post-processing shader and recompile if needed. - static_cast(m_post_processor.get())->UpdateConfig(); + } } void Renderer::OnSwapChainResized() @@ -684,103 +450,55 @@ void Renderer::OnSwapChainResized() m_backbuffer_height = m_swap_chain->GetHeight(); } -void Renderer::BindEFBToStateTracker() -{ - // Update framebuffer in state tracker - VkRect2D framebuffer_size = {{0, 0}, - {FramebufferManager::GetInstance()->GetEFBWidth(), - FramebufferManager::GetInstance()->GetEFBHeight()}}; - StateTracker::GetInstance()->SetRenderPass( - FramebufferManager::GetInstance()->GetEFBLoadRenderPass(), - FramebufferManager::GetInstance()->GetEFBClearRenderPass()); - StateTracker::GetInstance()->SetFramebuffer( - FramebufferManager::GetInstance()->GetEFBFramebuffer(), framebuffer_size); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = FramebufferManager::GetInstance()->GetEFBWidth(); - m_current_framebuffer_height = FramebufferManager::GetInstance()->GetEFBHeight(); -} - -void Renderer::RecreateEFBFramebuffer() -{ - // Ensure the GPU is finished with the current EFB textures. - g_command_buffer_mgr->WaitForGPUIdle(); - FramebufferManager::GetInstance()->RecreateEFBFramebuffer(); - BindEFBToStateTracker(); - - // Viewport and scissor rect have to be reset since they will be scaled differently. - BPFunctions::SetViewport(); - BPFunctions::SetScissor(); -} - -void Renderer::ApplyState() -{ -} - -void Renderer::ResetAPIState() -{ - // End the EFB render pass if active - StateTracker::GetInstance()->EndRenderPass(); -} - -void Renderer::RestoreAPIState() +void Renderer::BindFramebuffer(VKFramebuffer* fb) { StateTracker::GetInstance()->EndRenderPass(); - if (m_current_framebuffer) - static_cast(m_current_framebuffer)->TransitionForSample(); - BindEFBToStateTracker(); - BPFunctions::SetViewport(); - BPFunctions::SetScissor(); - - // Instruct the state tracker to re-bind everything before the next draw - StateTracker::GetInstance()->SetPendingRebind(); -} - -void Renderer::BindFramebuffer(const VKFramebuffer* fb) -{ - const VkRect2D render_area = {static_cast(fb->GetWidth()), - static_cast(fb->GetHeight())}; - - StateTracker::GetInstance()->EndRenderPass(); - if (m_current_framebuffer) - static_cast(m_current_framebuffer)->TransitionForSample(); + // Shouldn't be bound as a texture. + if (fb->GetColorAttachment()) + { + StateTracker::GetInstance()->UnbindTexture( + static_cast(fb->GetColorAttachment())->GetView()); + } + if (fb->GetDepthAttachment()) + { + StateTracker::GetInstance()->UnbindTexture( + static_cast(fb->GetDepthAttachment())->GetView()); + } fb->TransitionForRender(); - StateTracker::GetInstance()->SetFramebuffer(fb->GetFB(), render_area); - StateTracker::GetInstance()->SetRenderPass(fb->GetLoadRenderPass(), fb->GetClearRenderPass()); + StateTracker::GetInstance()->SetFramebuffer(fb); m_current_framebuffer = fb; - m_current_framebuffer_width = fb->GetWidth(); - m_current_framebuffer_height = fb->GetHeight(); } -void Renderer::SetFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) { - const VKFramebuffer* vkfb = static_cast(framebuffer); + if (m_current_framebuffer == framebuffer) + return; + + VKFramebuffer* vkfb = static_cast(framebuffer); BindFramebuffer(vkfb); - StateTracker::GetInstance()->BeginRenderPass(); } -void Renderer::SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) { - const VKFramebuffer* vkfb = static_cast(framebuffer); + if (m_current_framebuffer == framebuffer) + return; + + VKFramebuffer* vkfb = static_cast(framebuffer); BindFramebuffer(vkfb); // If we're discarding, begin the discard pass, then switch to a load pass. // This way if the command buffer is flushed, we don't start another discard pass. - StateTracker::GetInstance()->SetRenderPass(vkfb->GetDiscardRenderPass(), - vkfb->GetClearRenderPass()); - StateTracker::GetInstance()->BeginRenderPass(); - StateTracker::GetInstance()->SetRenderPass(vkfb->GetLoadRenderPass(), vkfb->GetClearRenderPass()); + StateTracker::GetInstance()->BeginDiscardRenderPass(); } -void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value, float depth_value) { - const VKFramebuffer* vkfb = static_cast(framebuffer); + VKFramebuffer* vkfb = static_cast(framebuffer); BindFramebuffer(vkfb); - const VkRect2D render_area = {static_cast(vkfb->GetWidth()), - static_cast(vkfb->GetHeight())}; std::array clear_values; u32 num_clear_values = 0; if (vkfb->GetColorFormat() != AbstractTextureFormat::Undefined) @@ -795,7 +513,7 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, clear_values[num_clear_values].depthStencil.stencil = 0; num_clear_values++; } - StateTracker::GetInstance()->BeginClearRenderPass(render_area, clear_values.data(), + StateTracker::GetInstance()->BeginClearRenderPass(vkfb->GetRect(), clear_values.data(), num_clear_values); } @@ -803,9 +521,27 @@ void Renderer::SetTexture(u32 index, const AbstractTexture* texture) { // Texture should always be in SHADER_READ_ONLY layout prior to use. // This is so we don't need to transition during render passes. - auto* tex = texture ? static_cast(texture)->GetRawTexIdentifier() : nullptr; - DEBUG_ASSERT(!tex || tex->GetLayout() == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - StateTracker::GetInstance()->SetTexture(index, tex ? tex->GetView() : VK_NULL_HANDLE); + const VKTexture* tex = static_cast(texture); + if (tex) + { + if (tex->GetLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + { + if (StateTracker::GetInstance()->InRenderPass()) + { + WARN_LOG(VIDEO, "Transitioning image in render pass in Renderer::SetTexture()"); + StateTracker::GetInstance()->EndRenderPass(); + } + + tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + StateTracker::GetInstance()->SetTexture(index, tex->GetView()); + } + else + { + StateTracker::GetInstance()->SetTexture(0, VK_NULL_HANDLE); + } } void Renderer::SetSamplerState(u32 index, const SamplerState& state) @@ -826,10 +562,27 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state) m_sampler_states[index].hex = state.hex; } +void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) +{ + VKTexture* vk_texture = static_cast(texture); + if (vk_texture) + { + StateTracker::GetInstance()->EndRenderPass(); + StateTracker::GetInstance()->SetImageTexture(vk_texture->GetView()); + vk_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + read ? (write ? VKTexture::ComputeImageLayout::ReadWrite : + VKTexture::ComputeImageLayout::ReadOnly) : + VKTexture::ComputeImageLayout::WriteOnly); + } + else + { + StateTracker::GetInstance()->SetImageTexture(VK_NULL_HANDLE); + } +} + void Renderer::UnbindTexture(const AbstractTexture* texture) { - StateTracker::GetInstance()->UnbindTexture( - static_cast(texture)->GetRawTexIdentifier()->GetView()); + StateTracker::GetInstance()->UnbindTexture(static_cast(texture)->GetView()); } void Renderer::ResetSamplerStates() @@ -839,7 +592,7 @@ void Renderer::ResetSamplerStates() g_command_buffer_mgr->WaitForGPUIdle(); // Invalidate all sampler states, next draw will re-initialize them. - for (size_t i = 0; i < m_sampler_states.size(); i++) + for (u32 i = 0; i < m_sampler_states.size(); i++) { m_sampler_states[i].hex = RenderState::GetPointSamplerState().hex; StateTracker::GetInstance()->SetSampler(i, g_object_cache->GetPointSampler()); @@ -849,10 +602,6 @@ void Renderer::ResetSamplerStates() g_object_cache->ClearSamplerCache(); } -void Renderer::SetInterlacingMode() -{ -} - void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) { VkRect2D scissor = {{rc.left, rc.top}, @@ -863,14 +612,13 @@ void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { - VkViewport viewport = {x, y, std::max(width, 1.0f), std::max(height, 1.0f), - near_depth, far_depth}; + VkViewport viewport = {x, y, width, height, near_depth, far_depth}; StateTracker::GetInstance()->SetViewport(viewport); } void Renderer::Draw(u32 base_vertex, u32 num_vertices) { - if (StateTracker::GetInstance()->Bind()) + if (!StateTracker::GetInstance()->Bind()) return; vkCmdDraw(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_vertices, 1, base_vertex, 0); @@ -884,4 +632,13 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, base_vertex, 0); } + +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) +{ + StateTracker::GetInstance()->SetComputeShader(static_cast(shader)); + if (StateTracker::GetInstance()->BindCompute()) + vkCmdDispatch(g_command_buffer_mgr->GetCurrentCommandBuffer(), groups_x, groups_y, groups_z); +} + } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h index 43e97279f0..2b3390b263 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.h +++ b/Source/Core/VideoBackends/Vulkan/Renderer.h @@ -17,10 +17,8 @@ struct XFBSourceBase; namespace Vulkan { class BoundingBox; -class FramebufferManager; class SwapChain; class StagingTexture2D; -class Texture2D; class VKFramebuffer; class VKPipeline; class VKTexture; @@ -31,7 +29,7 @@ public: Renderer(std::unique_ptr swap_chain, float backbuffer_scale); ~Renderer() override; - static Renderer* GetInstance(); + static Renderer* GetInstance() { return static_cast(g_renderer.get()); } bool IsHeadless() const override; @@ -42,67 +40,60 @@ public: std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; SwapChain* GetSwapChain() const { return m_swap_chain.get(); } BoundingBox* GetBoundingBox() const { return m_bounding_box.get(); } - u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + void BBoxFlush() override; void Flush() override; - void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; + void WaitForGPUIdle() override; void OnConfigChanged(u32 bits) override; void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) override; - void ReinterpretPixelData(unsigned int convtype) override; - - void ApplyState() override; - - void ResetAPIState() override; - void RestoreAPIState() override; - void SetPipeline(const AbstractPipeline* pipeline) override; - void SetFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, float depth_value = 0.0f) override; void SetScissorRect(const MathUtil::Rectangle& rc) override; void SetTexture(u32 index, const AbstractTexture* texture) override; void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; void UnbindTexture(const AbstractTexture* texture) override; - void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; -private: - void BeginFrame(); + // Completes the current render pass, executes the command buffer, and restores state ready for + // next render. Use when you want to kick the current buffer to make room for new data. + void ExecuteCommandBuffer(bool execute_off_thread, bool wait_for_completion = false); +private: void CheckForSurfaceChange(); void CheckForSurfaceResize(); void ResetSamplerStates(); void OnSwapChainResized(); - void BindEFBToStateTracker(); - void RecreateEFBFramebuffer(); - void BindFramebuffer(const VKFramebuffer* fb); + void BindFramebuffer(VKFramebuffer* fb); std::unique_ptr m_swap_chain; std::unique_ptr m_bounding_box; @@ -110,4 +101,4 @@ private: // Keep a copy of sampler states to avoid cache lookups every draw std::array m_sampler_states = {}; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp deleted file mode 100644 index 33181e920c..0000000000 --- a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp +++ /dev/null @@ -1,859 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/ShaderCache.h" - -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/LinearDiskCache.h" -#include "Common/MsgHandler.h" - -#include "Core/ConfigManager.h" -#include "Core/Host.h" - -#include "VideoBackends/Vulkan/FramebufferManager.h" -#include "VideoBackends/Vulkan/ShaderCompiler.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VertexFormat.h" -#include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/Statistics.h" - -namespace Vulkan -{ -std::unique_ptr g_shader_cache; - -ShaderCache::ShaderCache() -{ -} - -ShaderCache::~ShaderCache() -{ - DestroyPipelineCache(); - DestroySharedShaders(); -} - -bool ShaderCache::Initialize() -{ - if (g_ActiveConfig.bShaderCache) - { - if (!LoadPipelineCache()) - return false; - } - else - { - if (!CreatePipelineCache()) - return false; - } - - if (!CompileSharedShaders()) - return false; - - return true; -} - -void ShaderCache::Shutdown() -{ - if (g_ActiveConfig.bShaderCache && m_pipeline_cache != VK_NULL_HANDLE) - SavePipelineCache(); -} - -static bool IsStripPrimitiveTopology(VkPrimitiveTopology topology) -{ - return topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP || - topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP || - topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY || - topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; -} - -static VkPipelineRasterizationStateCreateInfo -GetVulkanRasterizationState(const RasterizationState& state) -{ - static constexpr std::array cull_modes = { - {VK_CULL_MODE_NONE, VK_CULL_MODE_BACK_BIT, VK_CULL_MODE_FRONT_BIT, - VK_CULL_MODE_FRONT_AND_BACK}}; - - bool depth_clamp = g_ActiveConfig.backend_info.bSupportsDepthClamp; - - return { - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineRasterizationStateCreateFlags flags - depth_clamp, // VkBool32 depthClampEnable - VK_FALSE, // VkBool32 rasterizerDiscardEnable - VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode - cull_modes[state.cullmode], // VkCullModeFlags cullMode - VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace - VK_FALSE, // VkBool32 depthBiasEnable - 0.0f, // float depthBiasConstantFactor - 0.0f, // float depthBiasClamp - 0.0f, // float depthBiasSlopeFactor - 1.0f // float lineWidth - }; -} - -static VkPipelineMultisampleStateCreateInfo -GetVulkanMultisampleState(const MultisamplingState& state) -{ - return { - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineMultisampleStateCreateFlags flags - static_cast( - state.samples.Value()), // VkSampleCountFlagBits rasterizationSamples - state.per_sample_shading, // VkBool32 sampleShadingEnable - 1.0f, // float minSampleShading - nullptr, // const VkSampleMask* pSampleMask; - VK_FALSE, // VkBool32 alphaToCoverageEnable - VK_FALSE // VkBool32 alphaToOneEnable - }; -} - -static VkPipelineDepthStencilStateCreateInfo GetVulkanDepthStencilState(const DepthState& state) -{ - // Less/greater are swapped due to inverted depth. - static constexpr std::array funcs = { - {VK_COMPARE_OP_NEVER, VK_COMPARE_OP_GREATER, VK_COMPARE_OP_EQUAL, - VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_LESS, VK_COMPARE_OP_NOT_EQUAL, - VK_COMPARE_OP_LESS_OR_EQUAL, VK_COMPARE_OP_ALWAYS}}; - - return { - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineDepthStencilStateCreateFlags flags - state.testenable, // VkBool32 depthTestEnable - state.updateenable, // VkBool32 depthWriteEnable - funcs[state.func], // VkCompareOp depthCompareOp - VK_FALSE, // VkBool32 depthBoundsTestEnable - VK_FALSE, // VkBool32 stencilTestEnable - {}, // VkStencilOpState front - {}, // VkStencilOpState back - 0.0f, // float minDepthBounds - 1.0f // float maxDepthBounds - }; -} - -static VkPipelineColorBlendAttachmentState GetVulkanAttachmentBlendState(const BlendingState& state) -{ - VkPipelineColorBlendAttachmentState vk_state = {}; - vk_state.blendEnable = static_cast(state.blendenable); - vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; - vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; - - if (state.usedualsrc && g_vulkan_context->SupportsDualSourceBlend()) - { - static constexpr std::array src_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - static constexpr std::array dst_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; - vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; - vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; - vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; - } - else - { - static constexpr std::array src_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - static constexpr std::array dst_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; - vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; - vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; - vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; - } - - if (state.colorupdate) - { - vk_state.colorWriteMask = - VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT; - } - else - { - vk_state.colorWriteMask = 0; - } - - if (state.alphaupdate) - vk_state.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT; - - return vk_state; -} - -static VkPipelineColorBlendStateCreateInfo -GetVulkanColorBlendState(const BlendingState& state, - const VkPipelineColorBlendAttachmentState* attachments, - uint32_t num_attachments) -{ - static constexpr std::array vk_logic_ops = { - {VK_LOGIC_OP_CLEAR, VK_LOGIC_OP_AND, VK_LOGIC_OP_AND_REVERSE, VK_LOGIC_OP_COPY, - VK_LOGIC_OP_AND_INVERTED, VK_LOGIC_OP_NO_OP, VK_LOGIC_OP_XOR, VK_LOGIC_OP_OR, - VK_LOGIC_OP_NOR, VK_LOGIC_OP_EQUIVALENT, VK_LOGIC_OP_INVERT, VK_LOGIC_OP_OR_REVERSE, - VK_LOGIC_OP_COPY_INVERTED, VK_LOGIC_OP_OR_INVERTED, VK_LOGIC_OP_NAND, VK_LOGIC_OP_SET}}; - - VkBool32 vk_logic_op_enable = static_cast(state.logicopenable); - if (vk_logic_op_enable && !g_vulkan_context->SupportsLogicOps()) - { - // At the time of writing, Adreno and Mali drivers didn't support logic ops. - // The "emulation" through blending path has been removed, so just disable it completely. - // These drivers don't support dual-source blend either, so issues are to be expected. - vk_logic_op_enable = VK_FALSE; - } - - VkLogicOp vk_logic_op = vk_logic_op_enable ? vk_logic_ops[state.logicmode] : VK_LOGIC_OP_CLEAR; - - VkPipelineColorBlendStateCreateInfo vk_state = { - VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineColorBlendStateCreateFlags flags - vk_logic_op_enable, // VkBool32 logicOpEnable - vk_logic_op, // VkLogicOp logicOp - num_attachments, // uint32_t attachmentCount - attachments, // const VkPipelineColorBlendAttachmentState* pAttachments - {1.0f, 1.0f, 1.0f, 1.0f} // float blendConstants[4] - }; - - return vk_state; -} - -VkPipeline ShaderCache::CreatePipeline(const PipelineInfo& info) -{ - // Declare descriptors for empty vertex buffers/attributes - static const VkPipelineVertexInputStateCreateInfo empty_vertex_input_state = { - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineVertexInputStateCreateFlags flags - 0, // uint32_t vertexBindingDescriptionCount - nullptr, // const VkVertexInputBindingDescription* pVertexBindingDescriptions - 0, // uint32_t vertexAttributeDescriptionCount - nullptr // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions - }; - - // Vertex inputs - const VkPipelineVertexInputStateCreateInfo& vertex_input_state = - info.vertex_format ? info.vertex_format->GetVertexInputStateInfo() : empty_vertex_input_state; - - // Input assembly - static constexpr std::array vk_primitive_topologies = { - {VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP}}; - VkPipelineInputAssemblyStateCreateInfo input_assembly_state = { - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, nullptr, 0, - vk_primitive_topologies[static_cast(info.rasterization_state.primitive.Value())], - VK_FALSE}; - - // See Vulkan spec, section 19: - // If topology is VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY or VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, - // primitiveRestartEnable must be VK_FALSE - if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart && - IsStripPrimitiveTopology(input_assembly_state.topology)) - { - input_assembly_state.primitiveRestartEnable = VK_TRUE; - } - - // Shaders to stages - VkPipelineShaderStageCreateInfo shader_stages[3]; - uint32_t num_shader_stages = 0; - if (info.vs != VK_NULL_HANDLE) - { - shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, - 0, - VK_SHADER_STAGE_VERTEX_BIT, - info.vs, - "main"}; - } - if (info.gs != VK_NULL_HANDLE) - { - shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, - 0, - VK_SHADER_STAGE_GEOMETRY_BIT, - info.gs, - "main"}; - } - if (info.ps != VK_NULL_HANDLE) - { - shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, - 0, - VK_SHADER_STAGE_FRAGMENT_BIT, - info.ps, - "main"}; - } - - // Fill in Vulkan descriptor structs from our state structures. - VkPipelineRasterizationStateCreateInfo rasterization_state = - GetVulkanRasterizationState(info.rasterization_state); - VkPipelineMultisampleStateCreateInfo multisample_state = - GetVulkanMultisampleState(info.multisampling_state); - VkPipelineDepthStencilStateCreateInfo depth_stencil_state = - GetVulkanDepthStencilState(info.depth_state); - VkPipelineColorBlendAttachmentState blend_attachment_state = - GetVulkanAttachmentBlendState(info.blend_state); - VkPipelineColorBlendStateCreateInfo blend_state = - GetVulkanColorBlendState(info.blend_state, &blend_attachment_state, 1); - - // This viewport isn't used, but needs to be specified anyway. - static const VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; - static const VkRect2D scissor = {{0, 0}, {1, 1}}; - static const VkPipelineViewportStateCreateInfo viewport_state = { - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - nullptr, - 0, // VkPipelineViewportStateCreateFlags flags; - 1, // uint32_t viewportCount - &viewport, // const VkViewport* pViewports - 1, // uint32_t scissorCount - &scissor // const VkRect2D* pScissors - }; - - // Set viewport and scissor dynamic state so we can change it elsewhere. - static const VkDynamicState dynamic_states[] = {VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR}; - static const VkPipelineDynamicStateCreateInfo dynamic_state = { - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, nullptr, - 0, // VkPipelineDynamicStateCreateFlags flags - static_cast(ArraySize(dynamic_states)), // uint32_t dynamicStateCount - dynamic_states // const VkDynamicState* pDynamicStates - }; - - // Combine to full pipeline info structure. - VkGraphicsPipelineCreateInfo pipeline_info = { - VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - nullptr, // VkStructureType sType - 0, // VkPipelineCreateFlags flags - num_shader_stages, // uint32_t stageCount - shader_stages, // const VkPipelineShaderStageCreateInfo* pStages - &vertex_input_state, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState - &input_assembly_state, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState - nullptr, // const VkPipelineTessellationStateCreateInfo* pTessellationState - &viewport_state, // const VkPipelineViewportStateCreateInfo* pViewportState - &rasterization_state, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState - &multisample_state, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState - &depth_stencil_state, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState - &blend_state, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState - &dynamic_state, // const VkPipelineDynamicStateCreateInfo* pDynamicState - info.pipeline_layout, // VkPipelineLayout layout - info.render_pass, // VkRenderPass renderPass - 0, // uint32_t subpass - VK_NULL_HANDLE, // VkPipeline basePipelineHandle - -1 // int32_t basePipelineIndex - }; - - VkPipeline pipeline; - VkResult res = vkCreateGraphicsPipelines(g_vulkan_context->GetDevice(), m_pipeline_cache, 1, - &pipeline_info, nullptr, &pipeline); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines failed: "); - return VK_NULL_HANDLE; - } - - return pipeline; -} - -VkPipeline ShaderCache::GetPipeline(const PipelineInfo& info) -{ - auto iter = m_pipeline_objects.find(info); - if (iter != m_pipeline_objects.end()) - return iter->second; - - VkPipeline pipeline = CreatePipeline(info); - m_pipeline_objects.emplace(info, pipeline); - return pipeline; -} - -VkPipeline ShaderCache::CreateComputePipeline(const ComputePipelineInfo& info) -{ - VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - nullptr, - 0, - {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, info.cs, - "main", nullptr}, - info.pipeline_layout, - VK_NULL_HANDLE, - -1}; - - VkPipeline pipeline; - VkResult res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, 1, - &pipeline_info, nullptr, &pipeline); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: "); - return VK_NULL_HANDLE; - } - - return pipeline; -} - -VkPipeline ShaderCache::GetComputePipeline(const ComputePipelineInfo& info) -{ - auto iter = m_compute_pipeline_objects.find(info); - if (iter != m_compute_pipeline_objects.end()) - return iter->second; - - VkPipeline pipeline = CreateComputePipeline(info); - m_compute_pipeline_objects.emplace(info, pipeline); - return pipeline; -} - -void ShaderCache::ClearPipelineCache() -{ - for (const auto& it : m_pipeline_objects) - { - if (it.second != VK_NULL_HANDLE) - vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr); - } - m_pipeline_objects.clear(); - - for (const auto& it : m_compute_pipeline_objects) - { - if (it.second != VK_NULL_HANDLE) - vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr); - } - m_compute_pipeline_objects.clear(); -} - -class PipelineCacheReadCallback : public LinearDiskCacheReader -{ -public: - PipelineCacheReadCallback(std::vector* data) : m_data(data) {} - void Read(const u32& key, const u8* value, u32 value_size) override - { - m_data->resize(value_size); - if (value_size > 0) - memcpy(m_data->data(), value, value_size); - } - -private: - std::vector* m_data; -}; - -class PipelineCacheReadIgnoreCallback : public LinearDiskCacheReader -{ -public: - void Read(const u32& key, const u8* value, u32 value_size) override {} -}; - -bool ShaderCache::CreatePipelineCache() -{ - // Vulkan pipeline caches can be shared between games for shader compile time reduction. - // This assumes that drivers don't create all pipelines in the cache on load time, only - // when a lookup occurs that matches a pipeline (or pipeline data) in the cache. - m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); - - VkPipelineCacheCreateInfo info = { - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineCacheCreateFlags flags - 0, // size_t initialDataSize - nullptr // const void* pInitialData - }; - - VkResult res = - vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); - if (res == VK_SUCCESS) - return true; - - LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed: "); - return false; -} - -bool ShaderCache::LoadPipelineCache() -{ - // We have to keep the pipeline cache file name around since when we save it - // we delete the old one, by which time the game's unique ID is already cleared. - m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); - - std::vector disk_data; - LinearDiskCache disk_cache; - PipelineCacheReadCallback read_callback(&disk_data); - if (disk_cache.OpenAndRead(m_pipeline_cache_filename, read_callback) != 1) - disk_data.clear(); - - if (!disk_data.empty() && !ValidatePipelineCache(disk_data.data(), disk_data.size())) - { - // Don't use this data. In fact, we should delete it to prevent it from being used next time. - File::Delete(m_pipeline_cache_filename); - return CreatePipelineCache(); - } - - VkPipelineCacheCreateInfo info = { - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineCacheCreateFlags flags - disk_data.size(), // size_t initialDataSize - disk_data.data() // const void* pInitialData - }; - - VkResult res = - vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); - if (res == VK_SUCCESS) - return true; - - // Failed to create pipeline cache, try with it empty. - LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed, trying empty cache: "); - return CreatePipelineCache(); -} - -// Based on Vulkan 1.0 specification, -// Table 9.1. Layout for pipeline cache header version VK_PIPELINE_CACHE_HEADER_VERSION_ONE -// NOTE: This data is assumed to be in little-endian format. -#pragma pack(push, 4) -struct VK_PIPELINE_CACHE_HEADER -{ - u32 header_length; - u32 header_version; - u32 vendor_id; - u32 device_id; - u8 uuid[VK_UUID_SIZE]; -}; -#pragma pack(pop) -static_assert(std::is_trivially_copyable::value, - "VK_PIPELINE_CACHE_HEADER must be trivially copyable"); - -bool ShaderCache::ValidatePipelineCache(const u8* data, size_t data_length) -{ - if (data_length < sizeof(VK_PIPELINE_CACHE_HEADER)) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header"); - return false; - } - - VK_PIPELINE_CACHE_HEADER header; - std::memcpy(&header, data, sizeof(header)); - if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER)) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header length"); - return false; - } - - if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header version"); - return false; - } - - if (header.vendor_id != g_vulkan_context->GetDeviceProperties().vendorID) - { - ERROR_LOG(VIDEO, - "Pipeline cache failed validation: Incorrect vendor ID (file: 0x%X, device: 0x%X)", - header.vendor_id, g_vulkan_context->GetDeviceProperties().vendorID); - return false; - } - - if (header.device_id != g_vulkan_context->GetDeviceProperties().deviceID) - { - ERROR_LOG(VIDEO, - "Pipeline cache failed validation: Incorrect device ID (file: 0x%X, device: 0x%X)", - header.device_id, g_vulkan_context->GetDeviceProperties().deviceID); - return false; - } - - if (std::memcmp(header.uuid, g_vulkan_context->GetDeviceProperties().pipelineCacheUUID, - VK_UUID_SIZE) != 0) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Incorrect UUID"); - return false; - } - - return true; -} - -void ShaderCache::DestroyPipelineCache() -{ - ClearPipelineCache(); - vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); - m_pipeline_cache = VK_NULL_HANDLE; -} - -void ShaderCache::SavePipelineCache() -{ - size_t data_size; - VkResult res = - vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); - return; - } - - std::vector data(data_size); - res = vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, - data.data()); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); - return; - } - - // Delete the old cache and re-create. - File::Delete(m_pipeline_cache_filename); - - // We write a single key of 1, with the entire pipeline cache data. - // Not ideal, but our disk cache class does not support just writing a single blob - // of data without specifying a key. - LinearDiskCache disk_cache; - PipelineCacheReadIgnoreCallback callback; - disk_cache.OpenAndRead(m_pipeline_cache_filename, callback); - disk_cache.Append(1, data.data(), static_cast(data.size())); - disk_cache.Close(); -} - -void ShaderCache::RecompileSharedShaders() -{ - DestroySharedShaders(); - if (!CompileSharedShaders()) - PanicAlert("Failed to recompile shared shaders."); -} - -void ShaderCache::ReloadPipelineCache() -{ - SavePipelineCache(); - DestroyPipelineCache(); - - if (g_ActiveConfig.bShaderCache) - LoadPipelineCache(); - else - CreatePipelineCache(); -} - -std::string ShaderCache::GetUtilityShaderHeader() const -{ - std::stringstream ss; - if (g_ActiveConfig.iMultisamples > 1) - { - ss << "#define MSAA_ENABLED 1" << std::endl; - ss << "#define MSAA_SAMPLES " << g_ActiveConfig.iMultisamples << std::endl; - if (g_ActiveConfig.bSSAA) - ss << "#define SSAA_ENABLED 1" << std::endl; - } - - u32 efb_layers = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - ss << "#define EFB_LAYERS " << efb_layers << std::endl; - - return ss.str(); -} - -std::size_t PipelineInfoHash::operator()(const PipelineInfo& key) const -{ - return static_cast(XXH64(&key, sizeof(key), 0)); -} - -bool operator==(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0; -} - -bool operator!=(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return !operator==(lhs, rhs); -} - -bool operator<(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0; -} - -bool operator>(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0; -} - -std::size_t ComputePipelineInfoHash::operator()(const ComputePipelineInfo& key) const -{ - return static_cast(XXH64(&key, sizeof(key), 0)); -} - -bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0; -} - -bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return !operator==(lhs, rhs); -} - -bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0; -} - -bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0; -} - -bool ShaderCache::CompileSharedShaders() -{ - static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"( - layout(location = 0) in vec4 ipos; - layout(location = 5) in vec4 icol0; - layout(location = 8) in vec3 itex0; - - layout(location = 0) out vec3 uv0; - layout(location = 1) out vec4 col0; - - void main() - { - gl_Position = ipos; - uv0 = itex0; - col0 = icol0; - } - )"; - - static const char PASSTHROUGH_GEOMETRY_SHADER_SOURCE[] = R"( - layout(triangles) in; - layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; - - layout(location = 0) in vec3 in_uv0[]; - layout(location = 1) in vec4 in_col0[]; - - layout(location = 0) out vec3 out_uv0; - layout(location = 1) out vec4 out_col0; - - void main() - { - for (int j = 0; j < EFB_LAYERS; j++) - { - for (int i = 0; i < 3; i++) - { - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - out_uv0 = vec3(in_uv0[i].xy, float(j)); - out_col0 = in_col0[i]; - EmitVertex(); - } - EndPrimitive(); - } - } - )"; - - static const char SCREEN_QUAD_VERTEX_SHADER_SOURCE[] = R"( - layout(location = 0) out vec3 uv0; - - void main() - { - /* - * id &1 &2 clamp(*2-1) - * 0 0,0 0,0 -1,-1 TL - * 1 1,0 1,0 1,-1 TR - * 2 0,2 0,1 -1,1 BL - * 3 1,2 1,1 1,1 BR - */ - vec2 rawpos = vec2(float(gl_VertexID & 1), clamp(float(gl_VertexID & 2), 0.0f, 1.0f)); - gl_Position = vec4(rawpos * 2.0f - 1.0f, 0.0f, 1.0f); - uv0 = vec3(rawpos, 0.0f); - } - )"; - - static const char SCREEN_QUAD_GEOMETRY_SHADER_SOURCE[] = R"( - layout(triangles) in; - layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; - - layout(location = 0) in vec3 in_uv0[]; - - layout(location = 0) out vec3 out_uv0; - - void main() - { - for (int j = 0; j < EFB_LAYERS; j++) - { - for (int i = 0; i < 3; i++) - { - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - out_uv0 = vec3(in_uv0[i].xy, float(j)); - EmitVertex(); - } - EndPrimitive(); - } - } - )"; - - static const char CLEAR_FRAGMENT_SHADER_SOURCE[] = R"( - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - void main() - { - ocol0 = col0; - } - - )"; - - const std::string header = GetUtilityShaderHeader(); - - m_screen_quad_vertex_shader = - Util::CompileAndCreateVertexShader(header + SCREEN_QUAD_VERTEX_SHADER_SOURCE); - m_passthrough_vertex_shader = - Util::CompileAndCreateVertexShader(header + PASSTHROUGH_VERTEX_SHADER_SOURCE); - if (m_screen_quad_vertex_shader == VK_NULL_HANDLE || - m_passthrough_vertex_shader == VK_NULL_HANDLE) - { - return false; - } - - if (g_ActiveConfig.stereo_mode != StereoMode::Off && g_vulkan_context->SupportsGeometryShaders()) - { - m_screen_quad_geometry_shader = - Util::CompileAndCreateGeometryShader(header + SCREEN_QUAD_GEOMETRY_SHADER_SOURCE); - m_passthrough_geometry_shader = - Util::CompileAndCreateGeometryShader(header + PASSTHROUGH_GEOMETRY_SHADER_SOURCE); - if (m_screen_quad_geometry_shader == VK_NULL_HANDLE || - m_passthrough_geometry_shader == VK_NULL_HANDLE) - { - return false; - } - } - - m_clear_fragment_shader = - Util::CompileAndCreateFragmentShader(header + CLEAR_FRAGMENT_SHADER_SOURCE); - if (m_clear_fragment_shader == VK_NULL_HANDLE) - return false; - - return true; -} - -void ShaderCache::DestroySharedShaders() -{ - auto DestroyShader = [this](VkShaderModule& shader) { - if (shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); - shader = VK_NULL_HANDLE; - } - }; - - DestroyShader(m_screen_quad_vertex_shader); - DestroyShader(m_passthrough_vertex_shader); - DestroyShader(m_screen_quad_geometry_shader); - DestroyShader(m_passthrough_geometry_shader); - DestroyShader(m_clear_fragment_shader); -} -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCache.h b/Source/Core/VideoBackends/Vulkan/ShaderCache.h deleted file mode 100644 index 51060358e2..0000000000 --- a/Source/Core/VideoBackends/Vulkan/ShaderCache.h +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/LinearDiskCache.h" - -#include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCompiler.h" - -#include "VideoCommon/RenderState.h" - -namespace Vulkan -{ -class CommandBufferManager; -class VertexFormat; -class StreamBuffer; - -struct PipelineInfo -{ - // These are packed in descending order of size, to avoid any padding so that the structure - // can be copied/compared as a single block of memory. 64-bit pointer size is assumed. - const VertexFormat* vertex_format; - VkPipelineLayout pipeline_layout; - VkShaderModule vs; - VkShaderModule gs; - VkShaderModule ps; - VkRenderPass render_pass; - BlendingState blend_state; - RasterizationState rasterization_state; - DepthState depth_state; - MultisamplingState multisampling_state; -}; - -struct PipelineInfoHash -{ - std::size_t operator()(const PipelineInfo& key) const; -}; - -bool operator==(const PipelineInfo& lhs, const PipelineInfo& rhs); -bool operator!=(const PipelineInfo& lhs, const PipelineInfo& rhs); -bool operator<(const PipelineInfo& lhs, const PipelineInfo& rhs); -bool operator>(const PipelineInfo& lhs, const PipelineInfo& rhs); - -struct ComputePipelineInfo -{ - VkPipelineLayout pipeline_layout; - VkShaderModule cs; -}; - -struct ComputePipelineInfoHash -{ - std::size_t operator()(const ComputePipelineInfo& key) const; -}; - -bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); -bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); -bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); -bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); - -class ShaderCache -{ -public: - ShaderCache(); - ~ShaderCache(); - - // Get utility shader header based on current config. - std::string GetUtilityShaderHeader() const; - - // Perform at startup, create descriptor layouts, compiles all static shaders. - bool Initialize(); - void Shutdown(); - - // Creates a pipeline for the specified description. The resulting pipeline, if successful - // is not stored anywhere, this is left up to the caller. - VkPipeline CreatePipeline(const PipelineInfo& info); - - // Find a pipeline by the specified description, if not found, attempts to create it. - VkPipeline GetPipeline(const PipelineInfo& info); - - // Creates a compute pipeline, and does not track the handle. - VkPipeline CreateComputePipeline(const ComputePipelineInfo& info); - - // Find a pipeline by the specified description, if not found, attempts to create it - VkPipeline GetComputePipeline(const ComputePipelineInfo& info); - - // Clears our pipeline cache of all objects. This is necessary when recompiling shaders, - // as drivers are free to return the same pointer again, which means that we may end up using - // and old pipeline object if they are not cleared first. Some stutter may be experienced - // while our cache is rebuilt on use, but the pipeline cache object should mitigate this. - // NOTE: Ensure that none of these objects are in use before calling. - void ClearPipelineCache(); - - // Saves the pipeline cache to disk. Call when shutting down. - void SavePipelineCache(); - - // Recompile shared shaders, call when stereo mode changes. - void RecompileSharedShaders(); - - // Reload pipeline cache. This will destroy all pipelines. - void ReloadPipelineCache(); - - // Shared shader accessors - VkShaderModule GetScreenQuadVertexShader() const { return m_screen_quad_vertex_shader; } - VkShaderModule GetPassthroughVertexShader() const { return m_passthrough_vertex_shader; } - VkShaderModule GetScreenQuadGeometryShader() const { return m_screen_quad_geometry_shader; } - VkShaderModule GetPassthroughGeometryShader() const { return m_passthrough_geometry_shader; } - VkShaderModule GetClearFragmentShader() const { return m_clear_fragment_shader; } - -private: - bool CreatePipelineCache(); - bool LoadPipelineCache(); - bool ValidatePipelineCache(const u8* data, size_t data_length); - void DestroyPipelineCache(); - bool CompileSharedShaders(); - void DestroySharedShaders(); - - std::unordered_map m_pipeline_objects; - std::unordered_map - m_compute_pipeline_objects; - VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; - std::string m_pipeline_cache_filename; - - // Utility/shared shaders - VkShaderModule m_screen_quad_vertex_shader = VK_NULL_HANDLE; - VkShaderModule m_passthrough_vertex_shader = VK_NULL_HANDLE; - VkShaderModule m_screen_quad_geometry_shader = VK_NULL_HANDLE; - VkShaderModule m_passthrough_geometry_shader = VK_NULL_HANDLE; - VkShaderModule m_clear_fragment_shader = VK_NULL_HANDLE; -}; - -extern std::unique_ptr g_shader_cache; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp index d430c8bab9..abe6df2653 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp @@ -53,12 +53,13 @@ static const char SHADER_HEADER[] = R"( #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y) #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) #define SAMPLER_BINDING(x) layout(set = 1, binding = x) + #define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8)) #define SSBO_BINDING(x) layout(set = 2, binding = x) - #define TEXEL_BUFFER_BINDING(x) layout(set = 2, binding = x) #define VARYING_LOCATION(x) layout(location = x) #define FORCE_EARLY_Z layout(early_fragment_tests) in // hlsl to glsl function translation + #define API_VULKAN 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 @@ -79,12 +80,13 @@ static const char COMPUTE_SHADER_HEADER[] = R"( // Target GLSL 4.5. #version 450 core // All resources are packed into one descriptor set for compute. - #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (0 + x)) + #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) #define SAMPLER_BINDING(x) layout(set = 0, binding = (1 + x)) - #define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (5 + x)) - #define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (7 + x)) + #define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (3 + x)) + #define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (5 + x)) // hlsl to glsl function translation + #define API_VULKAN 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp index 7a2c238489..50d7c2f16a 100644 --- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp @@ -9,7 +9,6 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/StagingBuffer.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan @@ -30,6 +29,28 @@ StagingBuffer::~StagingBuffer() g_command_buffer_mgr->DeferBufferDestruction(m_buffer); } +void StagingBuffer::BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, + VkAccessFlags src_access_mask, + VkAccessFlags dst_access_mask, VkDeviceSize offset, + VkDeviceSize size, VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask) +{ + VkBufferMemoryBarrier buffer_info = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + src_access_mask, // VkAccessFlags srcAccessMask + dst_access_mask, // VkAccessFlags dstAccessMask + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + buffer, // VkBuffer buffer + offset, // VkDeviceSize offset + size // VkDeviceSize size + }; + + vkCmdPipelineBarrier(command_buffer, src_stage_mask, dst_stage_mask, 0, 0, nullptr, 1, + &buffer_info, 0, nullptr); +} + bool StagingBuffer::Map(VkDeviceSize offset, VkDeviceSize size) { m_map_offset = offset; @@ -84,8 +105,8 @@ void StagingBuffer::InvalidateGPUCache(VkCommandBuffer command_buffer, return; ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); - Util::BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, - offset, size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage); + BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, offset, + size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage); } void StagingBuffer::PrepareForGPUWrite(VkCommandBuffer command_buffer, @@ -97,8 +118,8 @@ void StagingBuffer::PrepareForGPUWrite(VkCommandBuffer command_buffer, return; ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); - Util::BufferMemoryBarrier(command_buffer, m_buffer, 0, dst_access_flags, offset, size, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_pipeline_stage); + BufferMemoryBarrier(command_buffer, m_buffer, 0, dst_access_flags, offset, size, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_pipeline_stage); } void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits src_access_flags, @@ -109,8 +130,8 @@ void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBi return; ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); - Util::BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, - offset, size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT); + BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, offset, + size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT); } void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size) diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.h b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h index 2ecb21cb22..bf2f9fb2e5 100644 --- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.h +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h @@ -63,6 +63,13 @@ public: static bool AllocateBuffer(STAGING_BUFFER_TYPE type, VkDeviceSize size, VkBufferUsageFlags usage, VkBuffer* out_buffer, VkDeviceMemory* out_memory, bool* out_coherent); + // Wrapper for creating an barrier on a buffer + static void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask); + protected: STAGING_BUFFER_TYPE m_type; VkBuffer m_buffer; @@ -74,4 +81,4 @@ protected: VkDeviceSize m_map_offset = 0; VkDeviceSize m_map_size = 0; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp index 089b385ffe..6bb1b8981d 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -4,32 +4,25 @@ #include "VideoBackends/Vulkan/StateTracker.h" -#include - -#include "Common/Align.h" #include "Common/Assert.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/Constants.h" #include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/VKPipeline.h" +#include "VideoBackends/Vulkan/VKShader.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/GeometryShaderManager.h" -#include "VideoCommon/PixelShaderManager.h" -#include "VideoCommon/Statistics.h" -#include "VideoCommon/VertexLoaderManager.h" -#include "VideoCommon/VertexShaderManager.h" -#include "VideoCommon/VideoConfig.h" - namespace Vulkan { static std::unique_ptr s_state_tracker; +StateTracker::StateTracker() = default; + +StateTracker::~StateTracker() = default; + StateTracker* StateTracker::GetInstance() { return s_state_tracker.get(); @@ -49,46 +42,37 @@ bool StateTracker::CreateInstance() void StateTracker::DestroyInstance() { + if (!s_state_tracker) + return; + + // When the dummy texture is destroyed, it unbinds itself, then references itself. + // Clear everything out so this doesn't happen. + for (auto& it : s_state_tracker->m_bindings.samplers) + it.imageView = VK_NULL_HANDLE; + s_state_tracker->m_bindings.image_texture.imageView = VK_NULL_HANDLE; + s_state_tracker->m_dummy_texture.reset(); + s_state_tracker.reset(); } bool StateTracker::Initialize() { + // Create a dummy texture which can be used in place of a real binding. + m_dummy_texture = + VKTexture::Create(TextureConfig(1, 1, 1, 1, 1, AbstractTextureFormat::RGBA8, 0)); + if (!m_dummy_texture) + return false; + // Initialize all samplers to point by default for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) { - m_bindings.ps_samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - m_bindings.ps_samplers[i].imageView = g_object_cache->GetDummyImageView(); - m_bindings.ps_samplers[i].sampler = g_object_cache->GetPointSampler(); + m_bindings.samplers[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + m_bindings.samplers[i].imageView = m_dummy_texture->GetView(); + m_bindings.samplers[i].sampler = g_object_cache->GetPointSampler(); } - // Create the streaming uniform buffer - m_uniform_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, INITIAL_UNIFORM_STREAM_BUFFER_SIZE, - MAXIMUM_UNIFORM_STREAM_BUFFER_SIZE); - if (!m_uniform_stream_buffer) - { - PanicAlert("Failed to create uniform stream buffer"); - return false; - } - - // The validation layer complains if max(offsets) + max(ubo_ranges) >= ubo_size. - // To work around this we reserve the maximum buffer size at all times, but only commit - // as many bytes as we use. - m_uniform_buffer_reserve_size = sizeof(PixelShaderConstants); - m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, - g_vulkan_context->GetUniformBufferAlignment()) + - sizeof(VertexShaderConstants); - m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, - g_vulkan_context->GetUniformBufferAlignment()) + - sizeof(GeometryShaderConstants); - // Default dirty flags include all descriptors - InvalidateDescriptorSets(); - SetPendingRebind(); - - // Set default constants - UploadAllConstants(); + InvalidateCachedState(); return true; } @@ -113,20 +97,11 @@ void StateTracker::SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexT m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; } -void StateTracker::SetRenderPass(VkRenderPass load_render_pass, VkRenderPass clear_render_pass) -{ - // Should not be changed within a render pass. - ASSERT(!InRenderPass()); - m_load_render_pass = load_render_pass; - m_clear_render_pass = clear_render_pass; -} - -void StateTracker::SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& render_area) +void StateTracker::SetFramebuffer(VKFramebuffer* framebuffer) { // Should not be changed within a render pass. ASSERT(!InRenderPass()); m_framebuffer = framebuffer; - m_framebuffer_size = render_area; } void StateTracker::SetPipeline(const VKPipeline* pipeline) @@ -134,264 +109,143 @@ void StateTracker::SetPipeline(const VKPipeline* pipeline) if (m_pipeline == pipeline) return; + // If the usage changes, we need to re-bind everything, as the layout is different. const bool new_usage = pipeline && (!m_pipeline || m_pipeline->GetUsage() != pipeline->GetUsage()); m_pipeline = pipeline; m_dirty_flags |= DIRTY_FLAG_PIPELINE; if (new_usage) - m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS; + m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SETS; } -void StateTracker::UpdateVertexShaderConstants() +void StateTracker::SetComputeShader(const VKShader* shader) { - if (!VertexShaderManager::dirty || !ReserveConstantStorage()) + if (m_compute_shader == shader) return; - // Buffer allocation changed? - if (m_uniform_stream_buffer->GetBuffer() != - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].buffer) - { - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].buffer = - m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_VS_UBO; - } - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &VertexShaderManager::constants, - sizeof(VertexShaderConstants)); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); - m_uniform_stream_buffer->CommitMemory(sizeof(VertexShaderConstants)); - VertexShaderManager::dirty = false; + m_compute_shader = shader; + m_dirty_flags |= DIRTY_FLAG_COMPUTE_SHADER; } -void StateTracker::UpdateGeometryShaderConstants() +void StateTracker::SetGXUniformBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size) { - if (!GeometryShaderManager::dirty || !ReserveConstantStorage()) + auto& binding = m_bindings.gx_ubo_bindings[index]; + if (binding.buffer != buffer || binding.range != size) + { + binding.buffer = buffer; + binding.range = size; + m_dirty_flags |= DIRTY_FLAG_GX_UBOS; + } + + if (m_bindings.gx_ubo_offsets[index] != offset) + { + m_bindings.gx_ubo_offsets[index] = offset; + m_dirty_flags |= DIRTY_FLAG_GX_UBO_OFFSETS; + } +} + +void StateTracker::SetUtilityUniformBuffer(VkBuffer buffer, u32 offset, u32 size) +{ + auto& binding = m_bindings.utility_ubo_binding; + if (binding.buffer != buffer || binding.range != size) + { + binding.buffer = buffer; + binding.range = size; + m_dirty_flags |= DIRTY_FLAG_UTILITY_UBO; + } + + if (m_bindings.utility_ubo_offset != offset) + { + m_bindings.utility_ubo_offset = offset; + m_dirty_flags |= DIRTY_FLAG_UTILITY_UBO_OFFSET | DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + } +} + +void StateTracker::SetTexture(u32 index, VkImageView view) +{ + if (m_bindings.samplers[index].imageView == view) return; - // Buffer allocation changed? - if (m_uniform_stream_buffer->GetBuffer() != - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].buffer) - { - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].buffer = - m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_GS_UBO; - } - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_GS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &GeometryShaderManager::constants, - sizeof(GeometryShaderConstants)); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); - m_uniform_stream_buffer->CommitMemory(sizeof(GeometryShaderConstants)); - GeometryShaderManager::dirty = false; + m_bindings.samplers[index].imageView = view; + m_bindings.samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + m_dirty_flags |= + DIRTY_FLAG_GX_SAMPLERS | DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS; } -void StateTracker::UpdatePixelShaderConstants() +void StateTracker::SetSampler(u32 index, VkSampler sampler) { - if (!PixelShaderManager::dirty || !ReserveConstantStorage()) + if (m_bindings.samplers[index].sampler == sampler) return; - // Buffer allocation changed? - if (m_uniform_stream_buffer->GetBuffer() != - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].buffer) - { - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].buffer = - m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_PS_UBO; - } - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &PixelShaderManager::constants, - sizeof(PixelShaderConstants)); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); - m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants)); - PixelShaderManager::dirty = false; + m_bindings.samplers[index].sampler = sampler; + m_dirty_flags |= + DIRTY_FLAG_GX_SAMPLERS | DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS; } -void StateTracker::UpdateConstants(const void* data, u32 data_size) +void StateTracker::SetSSBO(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range) { - if (!m_uniform_stream_buffer->ReserveMemory( - data_size, g_vulkan_context->GetUniformBufferAlignment(), true, true, false)) - { - WARN_LOG(VIDEO, "Executing command buffer while waiting for ext space in uniform buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); - } - - for (u32 binding = 0; binding < NUM_UBO_DESCRIPTOR_SET_BINDINGS; binding++) - { - if (m_bindings.uniform_buffer_bindings[binding].buffer != m_uniform_stream_buffer->GetBuffer()) - { - m_bindings.uniform_buffer_bindings[binding].buffer = m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_VS_UBO << binding; - } - m_bindings.uniform_buffer_offsets[binding] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - } - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), data, data_size); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); - m_uniform_stream_buffer->CommitMemory(data_size); - - // Cached data is now out-of-sync. - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; -} - -bool StateTracker::ReserveConstantStorage() -{ - // Since we invalidate all constants on command buffer execution, it doesn't matter if this - // causes the stream buffer to be resized. - if (m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size, - g_vulkan_context->GetUniformBufferAlignment(), true, - true, false)) - { - return true; - } - - // The only places that call constant updates are safe to have state restored. - WARN_LOG(VIDEO, "Executing command buffer while waiting for space in uniform buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); - - // Since we are on a new command buffer, all constants have been invalidated, and we need - // to reupload them. We may as well do this now, since we're issuing a draw anyway. - UploadAllConstants(); - return false; -} - -void StateTracker::UploadAllConstants() -{ - // We are free to re-use parts of the buffer now since we're uploading all constants. - size_t ub_alignment = g_vulkan_context->GetUniformBufferAlignment(); - size_t pixel_constants_offset = 0; - size_t vertex_constants_offset = - Common::AlignUp(pixel_constants_offset + sizeof(PixelShaderConstants), ub_alignment); - size_t geometry_constants_offset = - Common::AlignUp(vertex_constants_offset + sizeof(VertexShaderConstants), ub_alignment); - size_t allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants); - - // Allocate everything at once. - // We should only be here if the buffer was full and a command buffer was submitted anyway. - if (!m_uniform_stream_buffer->ReserveMemory(allocation_size, ub_alignment, true, true, false)) - { - PanicAlert("Failed to allocate space for constants in streaming buffer"); - return; - } - - // Update bindings - for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) - { - m_bindings.uniform_buffer_bindings[i].buffer = m_uniform_stream_buffer->GetBuffer(); - m_bindings.uniform_buffer_bindings[i].offset = 0; - } - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].range = - sizeof(PixelShaderConstants); - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].range = - sizeof(VertexShaderConstants); - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].range = - sizeof(GeometryShaderConstants); - - // Update dynamic offsets - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset() + pixel_constants_offset); - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset); - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_GS] = static_cast( - m_uniform_stream_buffer->GetCurrentOffset() + geometry_constants_offset); - - m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_VS_UBO | - DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO; - - // Copy the actual data in - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + pixel_constants_offset, - &PixelShaderManager::constants, sizeof(PixelShaderConstants)); - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + vertex_constants_offset, - &VertexShaderManager::constants, sizeof(VertexShaderConstants)); - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + geometry_constants_offset, - &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); - - // Finally, flush buffer memory after copying - m_uniform_stream_buffer->CommitMemory(allocation_size); - - // Clear dirty flags - VertexShaderManager::dirty = false; - GeometryShaderManager::dirty = false; - PixelShaderManager::dirty = false; -} - -void StateTracker::SetTexture(size_t index, VkImageView view) -{ - if (m_bindings.ps_samplers[index].imageView == view) - return; - - m_bindings.ps_samplers[index].imageView = view; - m_bindings.ps_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - m_dirty_flags |= DIRTY_FLAG_PS_SAMPLERS; -} - -void StateTracker::SetSampler(size_t index, VkSampler sampler) -{ - if (m_bindings.ps_samplers[index].sampler == sampler) - return; - - m_bindings.ps_samplers[index].sampler = sampler; - m_dirty_flags |= DIRTY_FLAG_PS_SAMPLERS; -} - -void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range) -{ - if (m_bindings.ps_ssbo.buffer == buffer && m_bindings.ps_ssbo.offset == offset && - m_bindings.ps_ssbo.range == range) + if (m_bindings.ssbo.buffer == buffer && m_bindings.ssbo.offset == offset && + m_bindings.ssbo.range == range) { return; } - m_bindings.ps_ssbo.buffer = buffer; - m_bindings.ps_ssbo.offset = offset; - m_bindings.ps_ssbo.range = range; - m_dirty_flags |= DIRTY_FLAG_PS_SSBO; + m_bindings.ssbo.buffer = buffer; + m_bindings.ssbo.offset = offset; + m_bindings.ssbo.range = range; + m_dirty_flags |= DIRTY_FLAG_GX_SSBO; +} + +void StateTracker::SetTexelBuffer(u32 index, VkBufferView view) +{ + if (m_bindings.texel_buffers[index] == view) + return; + + m_bindings.texel_buffers[index] = view; + m_dirty_flags |= DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS; +} + +void StateTracker::SetImageTexture(VkImageView view) +{ + if (m_bindings.image_texture.imageView == view) + return; + + m_bindings.image_texture.imageView = view; + m_bindings.image_texture.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + m_dirty_flags |= DIRTY_FLAG_COMPUTE_BINDINGS; } void StateTracker::UnbindTexture(VkImageView view) { - for (VkDescriptorImageInfo& it : m_bindings.ps_samplers) + for (VkDescriptorImageInfo& it : m_bindings.samplers) { if (it.imageView == view) - it.imageView = g_object_cache->GetDummyImageView(); + { + it.imageView = m_dummy_texture->GetView(); + it.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } + } + + if (m_bindings.image_texture.imageView == view) + { + m_bindings.image_texture.imageView = m_dummy_texture->GetView(); + m_bindings.image_texture.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; } } -void StateTracker::InvalidateDescriptorSets() +void StateTracker::InvalidateCachedState() { - m_descriptor_sets.fill(VK_NULL_HANDLE); - m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS; -} - -void StateTracker::InvalidateConstants() -{ - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; -} - -void StateTracker::SetPendingRebind() -{ - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_DESCRIPTOR_SET_BINDING | - DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_VIEWPORT | - DIRTY_FLAG_SCISSOR | DIRTY_FLAG_PIPELINE; + m_gx_descriptor_sets.fill(VK_NULL_HANDLE); + m_utility_descriptor_sets.fill(VK_NULL_HANDLE); + m_compute_descriptor_set = VK_NULL_HANDLE; + m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTORS | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | + DIRTY_FLAG_PIPELINE | DIRTY_FLAG_COMPUTE_SHADER | DIRTY_FLAG_DESCRIPTOR_SETS | + DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + if (m_vertex_buffer != VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER; + if (m_index_buffer != VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; } void StateTracker::BeginRenderPass() @@ -399,13 +253,33 @@ void StateTracker::BeginRenderPass() if (InRenderPass()) return; - m_current_render_pass = m_load_render_pass; - m_framebuffer_render_area = m_framebuffer_size; + m_current_render_pass = m_framebuffer->GetLoadRenderPass(); + m_framebuffer_render_area = m_framebuffer->GetRect(); VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, - m_framebuffer, + m_framebuffer->GetFB(), + m_framebuffer_render_area, + 0, + nullptr}; + + vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, + VK_SUBPASS_CONTENTS_INLINE); +} + +void StateTracker::BeginDiscardRenderPass() +{ + if (InRenderPass()) + return; + + m_current_render_pass = m_framebuffer->GetDiscardRenderPass(); + m_framebuffer_render_area = m_framebuffer->GetRect(); + + VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + m_current_render_pass, + m_framebuffer->GetFB(), m_framebuffer_render_area, 0, nullptr}; @@ -428,13 +302,13 @@ void StateTracker::BeginClearRenderPass(const VkRect2D& area, const VkClearValue { ASSERT(!InRenderPass()); - m_current_render_pass = m_clear_render_pass; + m_current_render_pass = m_framebuffer->GetClearRenderPass(); m_framebuffer_render_area = area; VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, - m_framebuffer, + m_framebuffer->GetFB(), m_framebuffer_render_area, num_clear_values, clear_values}; @@ -461,22 +335,22 @@ void StateTracker::SetScissor(const VkRect2D& scissor) m_dirty_flags |= DIRTY_FLAG_SCISSOR; } -bool StateTracker::Bind(bool rebind_all /*= false*/) +bool StateTracker::Bind() { // Must have a pipeline. if (!m_pipeline) return false; // Check the render area if we were in a clear pass. - if (m_current_render_pass == m_clear_render_pass && !IsViewportWithinRenderArea()) + if (m_current_render_pass == m_framebuffer->GetClearRenderPass() && !IsViewportWithinRenderArea()) EndRenderPass(); // Get a new descriptor set if any parts have changed - if (m_dirty_flags & DIRTY_FLAG_ALL_DESCRIPTOR_SETS && !UpdateDescriptorSet()) + if (!UpdateDescriptorSet()) { // We can fail to allocate descriptors if we exhaust the pool for this command buffer. WARN_LOG(VIDEO, "Failed to get a descriptor set, executing buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false, false); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); if (!UpdateDescriptorSet()) { // Something strange going on. @@ -490,151 +364,57 @@ bool StateTracker::Bind(bool rebind_all /*= false*/) BeginRenderPass(); // Re-bind parts of the pipeline - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - if (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER || rebind_all) + const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + if (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER) vkCmdBindVertexBuffers(command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); - if (m_dirty_flags & DIRTY_FLAG_INDEX_BUFFER || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_INDEX_BUFFER) vkCmdBindIndexBuffer(command_buffer, m_index_buffer, m_index_buffer_offset, m_index_type); - if (m_dirty_flags & DIRTY_FLAG_PIPELINE || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_PIPELINE) vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipeline()); - if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SET_BINDING || rebind_all) - { - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline->GetVkPipelineLayout(), 0, m_num_active_descriptor_sets, - m_descriptor_sets.data(), m_num_dynamic_offsets, - m_bindings.uniform_buffer_offsets.data()); - } - else if (m_dirty_flags & DIRTY_FLAG_DYNAMIC_OFFSETS) - { - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline->GetVkPipelineLayout(), - DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS, 1, - &m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS], - m_num_dynamic_offsets, m_bindings.uniform_buffer_offsets.data()); - } - - if (m_dirty_flags & DIRTY_FLAG_VIEWPORT || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_VIEWPORT) vkCmdSetViewport(command_buffer, 0, 1, &m_viewport); - if (m_dirty_flags & DIRTY_FLAG_SCISSOR || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_SCISSOR) vkCmdSetScissor(command_buffer, 0, 1, &m_scissor); - m_dirty_flags = 0; + m_dirty_flags &= ~(DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | + DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR); return true; } -void StateTracker::OnDraw() +bool StateTracker::BindCompute() { - m_draw_counter++; + if (!m_compute_shader) + return false; - // If we didn't have any CPU access last frame, do nothing. - if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution) - return; + // Can't kick compute in a render pass. + if (InRenderPass()) + EndRenderPass(); - // Check if this draw is scheduled to kick a command buffer. - // The draw counters will always be sorted so a binary search is possible here. - if (std::binary_search(m_scheduled_command_buffer_kicks.begin(), - m_scheduled_command_buffer_kicks.end(), m_draw_counter)) + const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + if (m_dirty_flags & DIRTY_FLAG_COMPUTE_SHADER) { - // Kick a command buffer on the background thread. - Util::ExecuteCurrentCommandsAndRestoreState(true); - } -} - -void StateTracker::OnCPUEFBAccess() -{ - // Check this isn't another access without any draws inbetween. - if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter) - return; - - // Store the current draw counter for scheduling in OnEndFrame. - m_cpu_accesses_this_frame.emplace_back(m_draw_counter); -} - -void StateTracker::OnEFBCopyToRAM() -{ - // If we're not deferring, try to preempt it next frame. - if (!g_ActiveConfig.bDeferEFBCopies) - { - OnCPUEFBAccess(); - return; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + m_compute_shader->GetComputePipeline()); } - // Otherwise, only execute if we have at least 10 objects between us and the last copy. - const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter; - m_last_efb_copy_draw_counter = m_draw_counter; - if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) - return; - - Util::ExecuteCurrentCommandsAndRestoreState(true); -} - -void StateTracker::OnEndFrame() -{ - m_draw_counter = 0; - m_last_efb_copy_draw_counter = 0; - m_scheduled_command_buffer_kicks.clear(); - - // If we have no CPU access at all, leave everything in the one command buffer for maximum - // parallelism between CPU/GPU, at the cost of slightly higher latency. - if (m_cpu_accesses_this_frame.empty()) - return; - - // In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway - // between the draw counters that invoked the readback, or every 250 draws, whichever is smaller. - if (g_ActiveConfig.iCommandBufferExecuteInterval > 0) + if (!UpdateComputeDescriptorSet()) { - u32 last_draw_counter = 0; - u32 interval = static_cast(g_ActiveConfig.iCommandBufferExecuteInterval); - for (u32 draw_counter : m_cpu_accesses_this_frame) + WARN_LOG(VIDEO, "Failed to get a compute descriptor set, executing buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); + if (!UpdateComputeDescriptorSet()) { - // We don't want to waste executing command buffers for only a few draws, so set a minimum. - // Leave last_draw_counter as-is, so we get the correct number of draws between submissions. - u32 draw_count = draw_counter - last_draw_counter; - if (draw_count < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) - continue; - - if (draw_count <= interval) - { - u32 mid_point = draw_count / 2; - m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point); - } - else - { - u32 counter = interval; - while (counter < draw_count) - { - m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter); - counter += interval; - } - } - - last_draw_counter = draw_counter; + // Something strange going on. + ERROR_LOG(VIDEO, "Failed to get descriptor set, skipping dispatch"); + return false; } } -#if 0 - { - std::stringstream ss; - std::for_each(m_cpu_accesses_this_frame.begin(), m_cpu_accesses_this_frame.end(), [&ss](u32 idx) { ss << idx << ","; }); - WARN_LOG(VIDEO, "CPU EFB accesses in last frame: %s", ss.str().c_str()); - } - { - std::stringstream ss; - std::for_each(m_scheduled_command_buffer_kicks.begin(), m_scheduled_command_buffer_kicks.end(), [&ss](u32 idx) { ss << idx << ","; }); - WARN_LOG(VIDEO, "Scheduled command buffer kicks: %s", ss.str().c_str()); - } -#endif - - m_cpu_accesses_this_frame.clear(); -} - -void StateTracker::SetBackgroundCommandBufferExecution(bool enabled) -{ - m_allow_background_execution = enabled; + m_dirty_flags &= ~DIRTY_FLAG_COMPUTE_SHADER; + return true; } bool StateTracker::IsWithinRenderArea(s32 x, s32 y, u32 width, u32 height) const @@ -661,7 +441,7 @@ bool StateTracker::IsViewportWithinRenderArea() const void StateTracker::EndClearRenderPass() { - if (m_current_render_pass != m_clear_render_pass) + if (m_current_render_pass != m_framebuffer->GetClearRenderPass()) return; // End clear render pass. Bind() will call BeginRenderPass() which @@ -685,135 +465,244 @@ bool StateTracker::UpdateGXDescriptorSet() std::array writes; u32 num_writes = 0; - if (m_dirty_flags & (DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO) || - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS] == VK_NULL_HANDLE) + if (m_dirty_flags & DIRTY_FLAG_GX_UBOS || m_gx_descriptor_sets[0] == VK_NULL_HANDLE) { - VkDescriptorSetLayout layout = - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS); - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); - if (set == VK_NULL_HANDLE) + m_gx_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS)); + if (m_gx_descriptor_sets[0] == VK_NULL_HANDLE) return false; for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) { - if (i == UBO_DESCRIPTOR_SET_BINDING_GS && !g_vulkan_context->SupportsGeometryShaders()) + if (i == UBO_DESCRIPTOR_SET_BINDING_GS && + !g_ActiveConfig.backend_info.bSupportsGeometryShaders) + { continue; + } writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, - set, + m_gx_descriptor_sets[0], static_cast(i), 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, nullptr, - &m_bindings.uniform_buffer_bindings[i], + &m_bindings.gx_ubo_bindings[i], nullptr}; } - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS] = set; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_UBOS) | DIRTY_FLAG_DESCRIPTOR_SETS; } - if (m_dirty_flags & DIRTY_FLAG_PS_SAMPLERS || - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] == VK_NULL_HANDLE) + if (m_dirty_flags & DIRTY_FLAG_GX_SAMPLERS || m_gx_descriptor_sets[1] == VK_NULL_HANDLE) { - VkDescriptorSetLayout layout = - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS); - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); - if (set == VK_NULL_HANDLE) + m_gx_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS)); + if (m_gx_descriptor_sets[1] == VK_NULL_HANDLE) return false; writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, - set, + m_gx_descriptor_sets[1], 0, 0, static_cast(NUM_PIXEL_SHADER_SAMPLERS), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - m_bindings.ps_samplers.data(), + m_bindings.samplers.data(), nullptr, nullptr}; - - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SAMPLERS) | DIRTY_FLAG_DESCRIPTOR_SETS; } - if (g_vulkan_context->SupportsBoundingBox() && - (m_dirty_flags & DIRTY_FLAG_PS_SSBO || - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE)) + if (g_ActiveConfig.backend_info.bSupportsBBox && + (m_dirty_flags & DIRTY_FLAG_GX_SSBO || m_gx_descriptor_sets[2] == VK_NULL_HANDLE)) { - VkDescriptorSetLayout layout = - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS); - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); - if (set == VK_NULL_HANDLE) + m_gx_descriptor_sets[2] = + g_command_buffer_mgr->AllocateDescriptorSet(g_object_cache->GetDescriptorSetLayout( + DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS)); + if (m_gx_descriptor_sets[2] == VK_NULL_HANDLE) return false; - writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - nullptr, - &m_bindings.ps_ssbo, - nullptr}; - - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] = set; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + writes[num_writes++] = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, m_gx_descriptor_sets[2], 0, 0, 1, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, nullptr, &m_bindings.ssbo, nullptr}; + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SSBO) | DIRTY_FLAG_DESCRIPTOR_SETS; } if (num_writes > 0) vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_writes, writes.data(), 0, nullptr); - m_num_active_descriptor_sets = g_vulkan_context->SupportsBoundingBox() ? - NUM_GX_DRAW_DESCRIPTOR_SETS_SSBO : - NUM_GX_DRAW_DESCRIPTOR_SETS; - m_num_dynamic_offsets = NUM_UBO_DESCRIPTOR_SET_BINDINGS; + if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SETS) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + g_ActiveConfig.backend_info.bSupportsBBox ? + NUM_GX_DESCRIPTOR_SETS : + (NUM_GX_DESCRIPTOR_SETS - 1), + m_gx_descriptor_sets.data(), NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_bindings.gx_ubo_offsets.data()); + m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_GX_UBO_OFFSETS); + } + else if (m_dirty_flags & DIRTY_FLAG_GX_UBO_OFFSETS) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + 1, m_gx_descriptor_sets.data(), NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_bindings.gx_ubo_offsets.data()); + m_dirty_flags &= ~DIRTY_FLAG_GX_UBO_OFFSETS; + } + return true; } bool StateTracker::UpdateUtilityDescriptorSet() { + // Max number of updates - UBO, Samplers, TexelBuffer + std::array dswrites; + u32 writes = 0; + // Allocate descriptor sets. - m_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER)); - m_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); - if (m_descriptor_sets[0] == VK_NULL_HANDLE || m_descriptor_sets[1] == VK_NULL_HANDLE) + if (m_dirty_flags & DIRTY_FLAG_UTILITY_UBO || m_utility_descriptor_sets[0] == VK_NULL_HANDLE) { - return false; + m_utility_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER)); + if (!m_utility_descriptor_sets[0]) + return false; + + dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_utility_descriptor_sets[0], + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &m_bindings.utility_ubo_binding, + nullptr}; + + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_UTILITY_UBO) | DIRTY_FLAG_DESCRIPTOR_SETS; } - // Build UBO descriptor set. - std::array dswrites; - dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - m_descriptor_sets[0], - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - &m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS], - nullptr}; - dswrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - m_descriptor_sets[1], - 0, - 0, - NUM_PIXEL_SHADER_SAMPLERS, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - m_bindings.ps_samplers.data(), - nullptr, - nullptr}; + if (m_dirty_flags & DIRTY_FLAG_UTILITY_BINDINGS || m_utility_descriptor_sets[1] == VK_NULL_HANDLE) + { + m_utility_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS)); + if (!m_utility_descriptor_sets[1]) + return false; + + dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_utility_descriptor_sets[1], + 0, + 0, + NUM_PIXEL_SHADER_SAMPLERS, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + m_bindings.samplers.data(), + nullptr, + nullptr}; + dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_utility_descriptor_sets[1], + 8, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + nullptr, + nullptr, + m_bindings.texel_buffers.data()}; + + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_UTILITY_BINDINGS) | DIRTY_FLAG_DESCRIPTOR_SETS; + } + + if (writes > 0) + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), writes, dswrites.data(), 0, nullptr); + + if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SETS) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + NUM_UTILITY_DESCRIPTOR_SETS, m_utility_descriptor_sets.data(), 1, + &m_bindings.utility_ubo_offset); + m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_UTILITY_UBO_OFFSET); + } + else if (m_dirty_flags & DIRTY_FLAG_UTILITY_UBO_OFFSET) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + 1, m_utility_descriptor_sets.data(), 1, &m_bindings.utility_ubo_offset); + m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_UTILITY_UBO_OFFSET); + } + + return true; +} + +bool StateTracker::UpdateComputeDescriptorSet() +{ + // Max number of updates - UBO, Samplers, TexelBuffer, Image + std::array dswrites; + + // Allocate descriptor sets. + if (m_dirty_flags & DIRTY_FLAG_COMPUTE_BINDINGS) + { + m_compute_descriptor_set = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); + dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &m_bindings.utility_ubo_binding, + nullptr}; + dswrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 1, + 0, + NUM_COMPUTE_SHADER_SAMPLERS, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + m_bindings.samplers.data(), + nullptr, + nullptr}; + dswrites[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 3, + 0, + NUM_COMPUTE_TEXEL_BUFFERS, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + nullptr, + nullptr, + m_bindings.texel_buffers.data()}; + dswrites[3] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 5, + 0, + 1, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + &m_bindings.image_texture, + nullptr, + nullptr}; + + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), static_cast(dswrites.size()), + dswrites.data(), 0, nullptr); + m_dirty_flags = + (m_dirty_flags & ~DIRTY_FLAG_COMPUTE_BINDINGS) | DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + } + + if (m_dirty_flags & DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_COMPUTE, + g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE), 0, 1, + &m_compute_descriptor_set, 1, &m_bindings.utility_ubo_offset); + m_dirty_flags &= ~DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + } - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), static_cast(dswrites.size()), - dswrites.data(), 0, nullptr); - m_num_active_descriptor_sets = NUM_UTILITY_DRAW_DESCRIPTOR_SETS; - m_num_dynamic_offsets = 1; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; return true; } diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.h b/Source/Core/VideoBackends/Vulkan/StateTracker.h index f4cbc8e180..10e804d69f 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.h +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.h @@ -10,64 +10,53 @@ #include "Common/CommonTypes.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/RenderBase.h" namespace Vulkan { +class VKFramebuffer; +class VKShader; class VKPipeline; +class VKTexture; class StreamBuffer; class VertexFormat; class StateTracker { public: - StateTracker() = default; - ~StateTracker() = default; + StateTracker(); + ~StateTracker(); static StateTracker* GetInstance(); static bool CreateInstance(); static void DestroyInstance(); - VkFramebuffer GetFramebuffer() const { return m_framebuffer; } + VKFramebuffer* GetFramebuffer() const { return m_framebuffer; } const VKPipeline* GetPipeline() const { return m_pipeline; } void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset); void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type); - - void SetRenderPass(VkRenderPass load_render_pass, VkRenderPass clear_render_pass); - void SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& render_area); + void SetFramebuffer(VKFramebuffer* framebuffer); void SetPipeline(const VKPipeline* pipeline); - - void UpdateVertexShaderConstants(); - void UpdateGeometryShaderConstants(); - void UpdatePixelShaderConstants(); - - // Updates constants from external data, e.g. utility draws. - void UpdateConstants(const void* data, u32 data_size); - - void SetTexture(size_t index, VkImageView view); - void SetSampler(size_t index, VkSampler sampler); - - void SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range); + void SetComputeShader(const VKShader* shader); + void SetGXUniformBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + void SetUtilityUniformBuffer(VkBuffer buffer, u32 offset, u32 size); + void SetTexture(u32 index, VkImageView view); + void SetSampler(u32 index, VkSampler sampler); + void SetSSBO(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range); + void SetTexelBuffer(u32 index, VkBufferView view); + void SetImageTexture(VkImageView view); void UnbindTexture(VkImageView view); - // When executing a command buffer, we want to recreate the descriptor set, as it will - // now be in a different pool for the new command buffer. - void InvalidateDescriptorSets(); - - // Same with the uniforms, as the current storage will belong to the previous command buffer. - void InvalidateConstants(); - // Set dirty flags on everything to force re-bind at next draw time. - void SetPendingRebind(); + void InvalidateCachedState(); // Ends a render pass if we're currently in one. // When Bind() is next called, the pass will be restarted. // Calling this function is allowed even if a pass has not begun. bool InRenderPass() const { return m_current_render_pass != VK_NULL_HANDLE; } void BeginRenderPass(); + void BeginDiscardRenderPass(); void EndRenderPass(); // Ends the current render pass if it was a clear render pass. @@ -78,53 +67,48 @@ public: void SetViewport(const VkViewport& viewport); void SetScissor(const VkRect2D& scissor); - bool Bind(bool rebind_all = false); + // Binds all dirty state to the commmand buffer. + // If this returns false, you should not issue the draw. + bool Bind(); - // CPU Access Tracking - // Call after a draw call is made. - void OnDraw(); - - // Call after CPU access is requested. - void OnCPUEFBAccess(); - - // Call after an EFB copy to RAM. If true, the current command buffer should be executed. - void OnEFBCopyToRAM(); - - // Call at the end of a frame. - void OnEndFrame(); - - // Prevent/allow background command buffer execution. - // Use when queries are active. - void SetBackgroundCommandBufferExecution(bool enabled); + // Binds all dirty compute state to the command buffer. + // If this returns false, you should not dispatch the shader. + bool BindCompute(); + // Returns true if the specified rectangle is inside the current render area (used for clears). bool IsWithinRenderArea(s32 x, s32 y, u32 width, u32 height) const; private: // Number of descriptor sets for game draws. enum { - NUM_GX_DRAW_DESCRIPTOR_SETS = DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS + 1, - NUM_GX_DRAW_DESCRIPTOR_SETS_SSBO = DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER + 1, - NUM_UTILITY_DRAW_DESCRIPTOR_SETS = 2 + NUM_GX_DESCRIPTOR_SETS = 3, + NUM_UTILITY_DESCRIPTOR_SETS = 2, + NUM_COMPUTE_DESCRIPTOR_SETS = 1 }; - enum DITRY_FLAG : u32 + enum DIRTY_FLAG : u32 { - DIRTY_FLAG_VS_UBO = (1 << 0), - DIRTY_FLAG_GS_UBO = (1 << 1), - DIRTY_FLAG_PS_UBO = (1 << 2), - DIRTY_FLAG_PS_SAMPLERS = (1 << 3), - DIRTY_FLAG_PS_SSBO = (1 << 4), - DIRTY_FLAG_DYNAMIC_OFFSETS = (1 << 5), - DIRTY_FLAG_VERTEX_BUFFER = (1 << 6), - DIRTY_FLAG_INDEX_BUFFER = (1 << 7), - DIRTY_FLAG_VIEWPORT = (1 << 8), - DIRTY_FLAG_SCISSOR = (1 << 9), - DIRTY_FLAG_PIPELINE = (1 << 10), - DIRTY_FLAG_DESCRIPTOR_SET_BINDING = (1 << 11), + DIRTY_FLAG_GX_UBOS = (1 << 0), + DIRTY_FLAG_GX_UBO_OFFSETS = (1 << 1), + DIRTY_FLAG_GX_SAMPLERS = (1 << 4), + DIRTY_FLAG_GX_SSBO = (1 << 5), + DIRTY_FLAG_UTILITY_UBO = (1 << 2), + DIRTY_FLAG_UTILITY_UBO_OFFSET = (1 << 3), + DIRTY_FLAG_UTILITY_BINDINGS = (1 << 6), + DIRTY_FLAG_COMPUTE_BINDINGS = (1 << 7), + DIRTY_FLAG_VERTEX_BUFFER = (1 << 8), + DIRTY_FLAG_INDEX_BUFFER = (1 << 9), + DIRTY_FLAG_VIEWPORT = (1 << 10), + DIRTY_FLAG_SCISSOR = (1 << 11), + DIRTY_FLAG_PIPELINE = (1 << 12), + DIRTY_FLAG_COMPUTE_SHADER = (1 << 13), + DIRTY_FLAG_DESCRIPTOR_SETS = (1 << 14), + DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET = (1 << 15), - DIRTY_FLAG_ALL_DESCRIPTOR_SETS = DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO | - DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO + DIRTY_FLAG_ALL_DESCRIPTORS = DIRTY_FLAG_GX_UBOS | DIRTY_FLAG_UTILITY_UBO | + DIRTY_FLAG_GX_SAMPLERS | DIRTY_FLAG_GX_SSBO | + DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS }; bool Initialize(); @@ -136,12 +120,7 @@ private: bool UpdateDescriptorSet(); bool UpdateGXDescriptorSet(); bool UpdateUtilityDescriptorSet(); - - // Allocates storage in the uniform buffer of the specified size. If this storage cannot be - // allocated immediately, the current command buffer will be submitted and all stage's - // constants will be re-uploaded. false will be returned in this case, otherwise true. - bool ReserveConstantStorage(); - void UploadAllConstants(); + bool UpdateComputeDescriptorSet(); // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = 0; @@ -155,42 +134,33 @@ private: // pipeline state const VKPipeline* m_pipeline = nullptr; + const VKShader* m_compute_shader = nullptr; // shader bindings - std::array m_descriptor_sets = {}; struct { - std::array uniform_buffer_bindings = - {}; - std::array uniform_buffer_offsets = {}; - - std::array ps_samplers = {}; - - VkDescriptorBufferInfo ps_ssbo = {}; - } m_bindings; - size_t m_uniform_buffer_reserve_size = 0; - u32 m_num_active_descriptor_sets = 0; - u32 m_num_dynamic_offsets = 0; + std::array gx_ubo_bindings; + std::array gx_ubo_offsets; + VkDescriptorBufferInfo utility_ubo_binding; + u32 utility_ubo_offset; + std::array samplers; + std::array texel_buffers; + VkDescriptorBufferInfo ssbo; + VkDescriptorImageInfo image_texture; + } m_bindings = {}; + std::array m_gx_descriptor_sets = {}; + std::array m_utility_descriptor_sets = {}; + VkDescriptorSet m_compute_descriptor_set = VK_NULL_HANDLE; // rasterization VkViewport m_viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; VkRect2D m_scissor = {{0, 0}, {1, 1}}; // uniform buffers - std::unique_ptr m_uniform_stream_buffer; + std::unique_ptr m_dummy_texture; - VkFramebuffer m_framebuffer = VK_NULL_HANDLE; - VkRenderPass m_load_render_pass = VK_NULL_HANDLE; - VkRenderPass m_clear_render_pass = VK_NULL_HANDLE; + VKFramebuffer* m_framebuffer = nullptr; VkRenderPass m_current_render_pass = VK_NULL_HANDLE; - VkRect2D m_framebuffer_size = {}; VkRect2D m_framebuffer_render_area = {}; - - // CPU access tracking - u32 m_draw_counter = 0; - u32 m_last_efb_copy_draw_counter = 0; - std::vector m_cpu_accesses_this_frame; - std::vector m_scheduled_command_buffer_kicks; - bool m_allow_background_execution = true; }; } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp index cb8e660af2..ea610f09cf 100644 --- a/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp @@ -8,29 +8,24 @@ #include #include +#include "Common/Align.h" #include "Common/Assert.h" #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan { -StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, size_t max_size) - : m_usage(usage), m_maximum_size(max_size) +StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size) { - // Add a callback that fires on fence point creation and signal - g_command_buffer_mgr->AddFencePointCallback( - this, - std::bind(&StreamBuffer::OnCommandBufferQueued, this, std::placeholders::_1, - std::placeholders::_2), - std::bind(&StreamBuffer::OnCommandBufferExecuted, this, std::placeholders::_1)); + g_command_buffer_mgr->AddFenceSignaledCallback( + this, std::bind(&StreamBuffer::OnFenceSignaled, this, std::placeholders::_1)); } StreamBuffer::~StreamBuffer() { - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); if (m_host_pointer) vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); @@ -41,24 +36,23 @@ StreamBuffer::~StreamBuffer() g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory); } -std::unique_ptr StreamBuffer::Create(VkBufferUsageFlags usage, size_t initial_size, - size_t max_size) +std::unique_ptr StreamBuffer::Create(VkBufferUsageFlags usage, u32 size) { - std::unique_ptr buffer = std::make_unique(usage, max_size); - if (!buffer->ResizeBuffer(initial_size)) + std::unique_ptr buffer = std::make_unique(usage, size); + if (!buffer->AllocateBuffer()) return nullptr; return buffer; } -bool StreamBuffer::ResizeBuffer(size_t size) +bool StreamBuffer::AllocateBuffer() { // Create the buffer descriptor VkBufferCreateInfo buffer_create_info = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType nullptr, // const void* pNext 0, // VkBufferCreateFlags flags - static_cast(size), // VkDeviceSize size + static_cast(m_size), // VkDeviceSize size m_usage, // VkBufferUsageFlags usage VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode 0, // uint32_t queueFamilyIndexCount @@ -110,7 +104,7 @@ bool StreamBuffer::ResizeBuffer(size_t size) // Map this buffer into user-space void* mapped_ptr = nullptr; - res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, size, 0, &mapped_ptr); + res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, m_size, 0, &mapped_ptr); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkMapMemory failed: "); @@ -133,36 +127,34 @@ bool StreamBuffer::ResizeBuffer(size_t size) m_buffer = buffer; m_memory = memory; m_host_pointer = reinterpret_cast(mapped_ptr); - m_current_size = size; m_current_offset = 0; m_current_gpu_position = 0; m_tracked_fences.clear(); return true; } -bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse /* = true */, - bool allow_growth /* = true */, - bool reallocate_if_full /* = false */) +bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) { - size_t required_bytes = num_bytes + alignment; + const u32 required_bytes = num_bytes + alignment; // Check for sane allocations - if (required_bytes > m_maximum_size) + if (required_bytes > m_size) { PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer", - static_cast(num_bytes), static_cast(m_maximum_size)); + static_cast(num_bytes), static_cast(m_size)); return false; } // Is the GPU behind or up to date with our current offset? + UpdateCurrentFencePosition(); if (m_current_offset >= m_current_gpu_position) { - size_t remaining_bytes = m_current_size - m_current_offset; + const u32 remaining_bytes = m_size - m_current_offset; if (required_bytes <= remaining_bytes) { // Place at the current position, after the GPU position. - m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_current_offset = Common::AlignUp(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } @@ -171,7 +163,7 @@ bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_ // We use < here because we don't want to have the case of m_current_offset == // m_current_gpu_position. That would mean the code above would assume the // GPU has caught up to us, which it hasn't. - if (allow_reuse && required_bytes < m_current_gpu_position) + if (required_bytes < m_current_gpu_position) { // Reset offset to zero, since we're allocating behind the gpu now m_current_offset = 0; @@ -184,56 +176,35 @@ bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_ if (m_current_offset < m_current_gpu_position) { // We have from m_current_offset..m_current_gpu_position space to use. - size_t remaining_bytes = m_current_gpu_position - m_current_offset; + const u32 remaining_bytes = m_current_gpu_position - m_current_offset; if (required_bytes < remaining_bytes) { // Place at the current position, since this is still behind the GPU. - m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_current_offset = Common::AlignUp(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } } - // Try to grow the buffer up to the maximum size before waiting. - // Double each time until the maximum size is reached. - if (allow_growth && m_current_size < m_maximum_size) - { - size_t new_size = std::min(std::max(num_bytes, m_current_size * 2), m_maximum_size); - if (ResizeBuffer(new_size)) - { - // Allocating from the start of the buffer. - m_last_allocation_size = new_size; - return true; - } - } - // Can we find a fence to wait on that will give us enough memory? - if (allow_reuse && WaitForClearSpace(required_bytes)) + if (WaitForClearSpace(required_bytes)) { ASSERT(m_current_offset == m_current_gpu_position || (m_current_offset + required_bytes) < m_current_gpu_position); - m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_current_offset = Common::AlignUp(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } - // If we are not allowed to execute in our current state (e.g. in the middle of a render pass), - // as a last resort, reallocate the buffer. This will incur a performance hit and is not - // encouraged. - if (reallocate_if_full && ResizeBuffer(m_current_size)) - { - m_last_allocation_size = num_bytes; - return true; - } - - // We tried everything we could, and still couldn't get anything. If we're not at a point - // where the state is known and can be resumed, this is probably a fatal error. + // We tried everything we could, and still couldn't get anything. This means that too much space + // in the buffer is being used by the command buffer currently being recorded. Therefore, the + // only option is to execute it, and wait until it's done. return false; } -void StreamBuffer::CommitMemory(size_t final_num_bytes) +void StreamBuffer::CommitMemory(u32 final_num_bytes) { - ASSERT((m_current_offset + final_num_bytes) <= m_current_size); + ASSERT((m_current_offset + final_num_bytes) <= m_size); ASSERT(final_num_bytes <= m_last_allocation_size); // For non-coherent mappings, flush the memory range @@ -247,23 +218,25 @@ void StreamBuffer::CommitMemory(size_t final_num_bytes) m_current_offset += final_num_bytes; } -void StreamBuffer::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence) +void StreamBuffer::UpdateCurrentFencePosition() { // Don't create a tracking entry if the GPU is caught up with the buffer. if (m_current_offset == m_current_gpu_position) return; // Has the offset changed since the last fence? - if (!m_tracked_fences.empty() && m_tracked_fences.back().second == m_current_offset) + const VkFence fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence) { - // No need to track the new fence, the old one is sufficient. + // Still haven't executed a command buffer, so just update the offset. + m_tracked_fences.back().second = m_current_offset; return; } m_tracked_fences.emplace_back(fence, m_current_offset); } -void StreamBuffer::OnCommandBufferExecuted(VkFence fence) +void StreamBuffer::OnFenceSignaled(VkFence fence) { // Locate the entry for this fence (if any, we may have been forced to wait already) auto iter = std::find_if(m_tracked_fences.begin(), m_tracked_fences.end(), @@ -279,10 +252,9 @@ void StreamBuffer::OnCommandBufferExecuted(VkFence fence) } } -bool StreamBuffer::WaitForClearSpace(size_t num_bytes) +bool StreamBuffer::WaitForClearSpace(u32 num_bytes) { - size_t new_offset = 0; - size_t new_gpu_position = 0; + u32 new_offset = 0; auto iter = m_tracked_fences.begin(); for (; iter != m_tracked_fences.end(); iter++) { @@ -290,12 +262,11 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes) // This is the "last resort" case, where a command buffer execution has been forced // after no additional data has been written to it, so we can assume that after the // fence has been signaled the entire buffer is now consumed. - size_t gpu_position = iter->second; + u32 gpu_position = iter->second; if (m_current_offset == gpu_position) { // Start at the start of the buffer again. new_offset = 0; - new_gpu_position = 0; break; } @@ -308,7 +279,6 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes) if (gpu_position > num_bytes) { new_offset = 0; - new_gpu_position = gpu_position; break; } } @@ -317,31 +287,27 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes) // We're currently allocating behind the GPU. This would give us between the current // offset and the GPU position worth of space to work with. Again, > because we can't // align the GPU position with the buffer offset. - size_t available_space_inbetween = gpu_position - m_current_offset; + u32 available_space_inbetween = gpu_position - m_current_offset; if (available_space_inbetween > num_bytes) { // Leave the offset as-is, but update the GPU position. new_offset = m_current_offset; - new_gpu_position = gpu_position; break; } } } // Did any fences satisfy this condition? - if (iter == m_tracked_fences.end()) + // Has the command buffer been executed yet? If not, the caller should execute it. + if (iter == m_tracked_fences.end() || + iter->first == g_command_buffer_mgr->GetCurrentCommandBufferFence()) + { return false; + } - // Wait until this fence is signaled. - VkResult res = - vkWaitForFences(g_vulkan_context->GetDevice(), 1, &iter->first, VK_TRUE, UINT64_MAX); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); - - // Update GPU position, and remove all fences up to (and including) this fence. + // Wait until this fence is signaled. This will fire the callback, updating the GPU position. + g_command_buffer_mgr->WaitForFence(iter->first); m_current_offset = new_offset; - m_current_gpu_position = new_gpu_position; - m_tracked_fences.erase(m_tracked_fences.begin(), ++iter); return true; } diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.h b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h index ce1b02164f..b52ce6cd35 100644 --- a/Source/Core/VideoBackends/Vulkan/StreamBuffer.h +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h @@ -17,43 +17,40 @@ namespace Vulkan class StreamBuffer { public: - StreamBuffer(VkBufferUsageFlags usage, size_t max_size); + StreamBuffer(VkBufferUsageFlags usage, u32 size); ~StreamBuffer(); VkBuffer GetBuffer() const { return m_buffer; } VkDeviceMemory GetDeviceMemory() const { return m_memory; } u8* GetHostPointer() const { return m_host_pointer; } u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } - size_t GetCurrentSize() const { return m_current_size; } - size_t GetCurrentOffset() const { return m_current_offset; } - bool ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse = true, - bool allow_growth = true, bool reallocate_if_full = false); - void CommitMemory(size_t final_num_bytes); + u32 GetCurrentSize() const { return m_size; } + u32 GetCurrentOffset() const { return m_current_offset; } + bool ReserveMemory(u32 num_bytes, u32 alignment); + void CommitMemory(u32 final_num_bytes); - static std::unique_ptr Create(VkBufferUsageFlags usage, size_t initial_size, - size_t max_size); + static std::unique_ptr Create(VkBufferUsageFlags usage, u32 size); private: - bool ResizeBuffer(size_t size); - void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence); - void OnCommandBufferExecuted(VkFence fence); + bool AllocateBuffer(); + void UpdateCurrentFencePosition(); + void OnFenceSignaled(VkFence fence); // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. - bool WaitForClearSpace(size_t num_bytes); + bool WaitForClearSpace(u32 num_bytes); VkBufferUsageFlags m_usage; - size_t m_current_size = 0; - size_t m_maximum_size; - size_t m_current_offset = 0; - size_t m_current_gpu_position = 0; - size_t m_last_allocation_size = 0; + u32 m_size; + u32 m_current_offset = 0; + u32 m_current_gpu_position = 0; + u32 m_last_allocation_size = 0; VkBuffer m_buffer = VK_NULL_HANDLE; VkDeviceMemory m_memory = VK_NULL_HANDLE; u8* m_host_pointer = nullptr; // List of fences and the corresponding positions in the buffer - std::deque> m_tracked_fences; + std::deque> m_tracked_fences; bool m_coherent_mapping = false; }; diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.cpp b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp index 94cd0ade67..ee53005b93 100644 --- a/Source/Core/VideoBackends/Vulkan/SwapChain.cpp +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp @@ -13,6 +13,8 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoCommon/RenderBase.h" @@ -32,7 +34,6 @@ SwapChain::~SwapChain() DestroySwapChainImages(); DestroySwapChain(); DestroySurface(); - DestroySemaphores(); } VkSurfaceKHR SwapChain::CreateVulkanSurface(VkInstance instance, const WindowSystemInfo& wsi) @@ -130,53 +131,12 @@ std::unique_ptr SwapChain::Create(const WindowSystemInfo& wsi, VkSurf bool vsync) { std::unique_ptr swap_chain = std::make_unique(wsi, surface, vsync); - if (!swap_chain->CreateSemaphores() || !swap_chain->CreateSwapChain() || - !swap_chain->SetupSwapChainImages()) - { + if (!swap_chain->CreateSwapChain() || !swap_chain->SetupSwapChainImages()) return nullptr; - } return swap_chain; } -bool SwapChain::CreateSemaphores() -{ - // Create two semaphores, one that is triggered when the swapchain buffer is ready, another after - // submit and before present - VkSemaphoreCreateInfo semaphore_info = { - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0 // VkSemaphoreCreateFlags flags - }; - - VkResult res; - if ((res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, - &m_image_available_semaphore)) != VK_SUCCESS || - (res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, - &m_rendering_finished_semaphore)) != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); - return false; - } - - return true; -} - -void SwapChain::DestroySemaphores() -{ - if (m_image_available_semaphore) - { - vkDestroySemaphore(g_vulkan_context->GetDevice(), m_image_available_semaphore, nullptr); - m_image_available_semaphore = VK_NULL_HANDLE; - } - - if (m_rendering_finished_semaphore) - { - vkDestroySemaphore(g_vulkan_context->GetDevice(), m_rendering_finished_semaphore, nullptr); - m_rendering_finished_semaphore = VK_NULL_HANDLE; - } -} - bool SwapChain::SelectSurfaceFormat() { u32 format_count; @@ -207,7 +167,7 @@ bool SwapChain::SelectSurfaceFormat() // Some drivers seem to return a SRGB format here (Intel Mesa). // This results in gamma correction when presenting to the screen, which we don't want. // Use a linear format instead, if this is the case. - VkFormat format = Util::GetLinearFormat(surface_format.format); + VkFormat format = VKTexture::GetLinearFormat(surface_format.format); if (format == VK_FORMAT_R8G8B8A8_UNORM) m_texture_format = AbstractTextureFormat::RGBA8; else if (format == VK_FORMAT_B8G8R8A8_UNORM) @@ -399,11 +359,13 @@ bool SwapChain::SetupSwapChainImages() images.data()); ASSERT(res == VK_SUCCESS); - m_render_pass = g_object_cache->GetRenderPass(m_surface_format.format, VK_FORMAT_UNDEFINED, 1, - VK_ATTACHMENT_LOAD_OP_LOAD); - m_clear_render_pass = g_object_cache->GetRenderPass(m_surface_format.format, VK_FORMAT_UNDEFINED, - 1, VK_ATTACHMENT_LOAD_OP_CLEAR); - if (m_render_pass == VK_NULL_HANDLE || m_clear_render_pass == VK_NULL_HANDLE) + const TextureConfig texture_config(TextureConfig( + m_width, m_height, 1, m_layers, 1, m_texture_format, AbstractTextureFlag_RenderTarget)); + const VkRenderPass load_render_pass = g_object_cache->GetRenderPass( + m_surface_format.format, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_LOAD); + const VkRenderPass clear_render_pass = g_object_cache->GetRenderPass( + m_surface_format.format, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_CLEAR); + if (load_render_pass == VK_NULL_HANDLE || clear_render_pass == VK_NULL_HANDLE) { PanicAlert("Failed to get swap chain render passes."); return false; @@ -416,26 +378,17 @@ bool SwapChain::SetupSwapChainImages() image.image = images[i]; // Create texture object, which creates a view of the backbuffer - image.texture = Texture2D::CreateFromExistingImage( - m_width, m_height, 1, 1, m_surface_format.format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, image.image); + image.texture = + VKTexture::CreateAdopted(texture_config, image.image, + m_layers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_LAYOUT_UNDEFINED); + if (!image.texture) + return false; - VkImageView view = image.texture->GetView(); - VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - m_render_pass, - 1, - &view, - m_width, - m_height, - m_layers}; - - res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &image.framebuffer); - if (res != VK_SUCCESS) + image.framebuffer = VKFramebuffer::Create(image.texture.get(), nullptr); + if (!image.framebuffer) { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + image.texture.reset(); return false; } @@ -447,10 +400,11 @@ bool SwapChain::SetupSwapChainImages() void SwapChain::DestroySwapChainImages() { - for (const auto& it : m_swap_chain_images) + for (auto& it : m_swap_chain_images) { // Images themselves are cleaned up by the swap chain object - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), it.framebuffer, nullptr); + it.framebuffer.reset(); + it.texture.reset(); } m_swap_chain_images.clear(); } @@ -467,8 +421,8 @@ void SwapChain::DestroySwapChain() VkResult SwapChain::AcquireNextImage() { VkResult res = vkAcquireNextImageKHR(g_vulkan_context->GetDevice(), m_swap_chain, UINT64_MAX, - m_image_available_semaphore, VK_NULL_HANDLE, - &m_current_swap_chain_image_index); + g_command_buffer_mgr->GetCurrentCommandBufferSemaphore(), + VK_NULL_HANDLE, &m_current_swap_chain_image_index); if (res != VK_SUCCESS && res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR failed: "); diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.h b/Source/Core/VideoBackends/Vulkan/SwapChain.h index 0687249acd..4359f4663c 100644 --- a/Source/Core/VideoBackends/Vulkan/SwapChain.h +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.h @@ -10,13 +10,14 @@ #include "Common/CommonTypes.h" #include "Common/WindowSystemInfo.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/Texture2D.h" #include "VideoCommon/TextureConfig.h" namespace Vulkan { class CommandBufferManager; class ObjectCache; +class VKTexture; +class VKFramebuffer; class SwapChain { @@ -44,19 +45,14 @@ public: { return m_swap_chain_images[m_current_swap_chain_image_index].image; } - Texture2D* GetCurrentTexture() const + VKTexture* GetCurrentTexture() const { return m_swap_chain_images[m_current_swap_chain_image_index].texture.get(); } - VkFramebuffer GetCurrentFramebuffer() const + VKFramebuffer* GetCurrentFramebuffer() const { - return m_swap_chain_images[m_current_swap_chain_image_index].framebuffer; + return m_swap_chain_images[m_current_swap_chain_image_index].framebuffer.get(); } - VkRenderPass GetLoadRenderPass() const { return m_render_pass; } - VkRenderPass GetClearRenderPass() const { return m_clear_render_pass; } - VkSemaphore GetImageAvailableSemaphore() const { return m_image_available_semaphore; } - VkSemaphore GetRenderingFinishedSemaphore() const { return m_rendering_finished_semaphore; } - VkResult AcquireNextImage(); bool RecreateSurface(void* native_handle); @@ -67,9 +63,6 @@ public: bool SetVSync(bool enabled); private: - bool CreateSemaphores(); - void DestroySemaphores(); - bool SelectSurfaceFormat(); bool SelectPresentMode(); @@ -84,8 +77,8 @@ private: struct SwapChainImage { VkImage image; - std::unique_ptr texture; - VkFramebuffer framebuffer; + std::unique_ptr texture; + std::unique_ptr framebuffer; }; WindowSystemInfo m_wsi; @@ -99,12 +92,6 @@ private: std::vector m_swap_chain_images; u32 m_current_swap_chain_image_index = 0; - VkSemaphore m_image_available_semaphore = VK_NULL_HANDLE; - VkSemaphore m_rendering_finished_semaphore = VK_NULL_HANDLE; - - VkRenderPass m_render_pass = VK_NULL_HANDLE; - VkRenderPass m_clear_render_pass = VK_NULL_HANDLE; - u32 m_width = 0; u32 m_height = 0; u32 m_layers = 0; diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp deleted file mode 100644 index 96bfba6538..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp +++ /dev/null @@ -1,404 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/Assert.h" -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -namespace Vulkan -{ -Texture2D::Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, - VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, - VkDeviceMemory device_memory, VkImageView view) - : m_width(width), m_height(height), m_levels(levels), m_layers(layers), m_format(format), - m_samples(samples), m_view_type(view_type), m_image(image), m_device_memory(device_memory), - m_view(view) -{ -} - -Texture2D::~Texture2D() -{ - g_command_buffer_mgr->DeferImageViewDestruction(m_view); - - // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) - if (m_device_memory != VK_NULL_HANDLE) - { - g_command_buffer_mgr->DeferImageDestruction(m_image); - g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_device_memory); - } -} - -std::unique_ptr Texture2D::Create(u32 width, u32 height, u32 levels, u32 layers, - VkFormat format, VkSampleCountFlagBits samples, - VkImageViewType view_type, VkImageTiling tiling, - VkImageUsageFlags usage) -{ - VkImageCreateInfo image_info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - nullptr, - 0, - VK_IMAGE_TYPE_2D, - format, - {width, height, 1}, - levels, - layers, - samples, - tiling, - usage, - VK_SHARING_MODE_EXCLUSIVE, - 0, - nullptr, - VK_IMAGE_LAYOUT_UNDEFINED}; - - VkImage image = VK_NULL_HANDLE; - VkResult res = vkCreateImage(g_vulkan_context->GetDevice(), &image_info, nullptr, &image); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImage failed: "); - return nullptr; - } - - // Allocate memory to back this texture, we want device local memory in this case - VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(g_vulkan_context->GetDevice(), image, &memory_requirements); - - VkMemoryAllocateInfo memory_info = { - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, - g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)}; - - VkDeviceMemory device_memory; - res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); - vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); - return nullptr; - } - - res = vkBindImageMemory(g_vulkan_context->GetDevice(), image, device_memory, 0); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkBindImageMemory failed: "); - vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); - vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); - return nullptr; - } - - VkImageViewCreateInfo view_info = { - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - image, - view_type, - format, - {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY}, - {Util::IsDepthFormat(format) ? static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : - static_cast(VK_IMAGE_ASPECT_COLOR_BIT), - 0, levels, 0, layers}}; - - VkImageView view = VK_NULL_HANDLE; - res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); - vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); - vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); - return nullptr; - } - - return std::make_unique(width, height, levels, layers, format, samples, view_type, - image, device_memory, view); -} - -std::unique_ptr Texture2D::CreateFromExistingImage(u32 width, u32 height, u32 levels, - u32 layers, VkFormat format, - VkSampleCountFlagBits samples, - VkImageViewType view_type, - VkImage existing_image) -{ - // Only need to create the image view, this is mainly for swap chains. - VkImageViewCreateInfo view_info = { - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - existing_image, - view_type, - format, - {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY}, - {Util::IsDepthFormat(format) ? static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : - static_cast(VK_IMAGE_ASPECT_COLOR_BIT), - 0, levels, 0, layers}}; - - // Memory is managed by the owner of the image. - VkDeviceMemory memory = VK_NULL_HANDLE; - VkImageView view = VK_NULL_HANDLE; - VkResult res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); - return nullptr; - } - - return std::make_unique(width, height, levels, layers, format, samples, view_type, - existing_image, memory, view); -} - -void Texture2D::OverrideImageLayout(VkImageLayout new_layout) -{ - m_layout = new_layout; -} - -void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) -{ - if (m_layout == new_layout) - return; - - VkImageMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkAccessFlags srcAccessMask - 0, // VkAccessFlags dstAccessMask - m_layout, // VkImageLayout oldLayout - new_layout, // VkImageLayout newLayout - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - m_image, // VkImage image - {Util::GetImageAspectForFormat(m_format), 0, m_levels, 0, - m_layers} // VkImageSubresourceRange subresourceRange - }; - - // srcStageMask -> Stages that must complete before the barrier - // dstStageMask -> Stages that must wait for after the barrier before beginning - VkPipelineStageFlags srcStageMask, dstStageMask; - switch (m_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - // Layout undefined therefore contents undefined, and we don't care what happens to it. - barrier.srcAccessMask = 0; - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: - // Image has been pre-initialized by the host, so ensure all writes have completed. - barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - // Image was being used as a color attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - // Image was being used as a depthstencil attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - srcStageMask = - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - // Image was being used as a shader resource, make sure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - // Image was being used as a copy source, ensure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - // Image was being used as a copy destination, ensure all writes have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - default: - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - } - - switch (new_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - barrier.dstAccessMask = 0; - dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - barrier.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - barrier.dstAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dstStageMask = - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - default: - dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - break; - } - - // If we were using a compute layout, the stages need to reflect that - switch (m_compute_layout) - { - case ComputeImageLayout::Undefined: - break; - case ComputeImageLayout::ReadOnly: - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::WriteOnly: - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::ReadWrite: - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - } - m_compute_layout = ComputeImageLayout::Undefined; - - vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, - &barrier); - - m_layout = new_layout; -} - -void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout) -{ - ASSERT(new_layout != ComputeImageLayout::Undefined); - if (m_compute_layout == new_layout) - return; - - VkImageMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkAccessFlags srcAccessMask - 0, // VkAccessFlags dstAccessMask - m_layout, // VkImageLayout oldLayout - VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - m_image, // VkImage image - {Util::GetImageAspectForFormat(m_format), 0, m_levels, 0, - m_layers} // VkImageSubresourceRange subresourceRange - }; - - VkPipelineStageFlags srcStageMask, dstStageMask; - switch (m_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - // Layout undefined therefore contents undefined, and we don't care what happens to it. - barrier.srcAccessMask = 0; - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: - // Image has been pre-initialized by the host, so ensure all writes have completed. - barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - // Image was being used as a color attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - // Image was being used as a depthstencil attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - srcStageMask = - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - // Image was being used as a shader resource, make sure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - // Image was being used as a copy source, ensure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - // Image was being used as a copy destination, ensure all writes have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - default: - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - } - - switch (new_layout) - { - case ComputeImageLayout::ReadOnly: - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::WriteOnly: - barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::ReadWrite: - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - default: - dstStageMask = 0; - break; - } - - m_layout = barrier.newLayout; - m_compute_layout = new_layout; - - vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, - &barrier); -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.h b/Source/Core/VideoBackends/Vulkan/Texture2D.h deleted file mode 100644 index 3fce48d758..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Texture2D.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/Constants.h" - -namespace Vulkan -{ -class CommandBufferManager; -class ObjectCache; - -class Texture2D -{ -public: - // Custom image layouts, mainly used for switching to/from compute - enum class ComputeImageLayout - { - Undefined, - ReadOnly, - WriteOnly, - ReadWrite - }; - - Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, - VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, - VkDeviceMemory device_memory, VkImageView view); - ~Texture2D(); - - static std::unique_ptr Create(u32 width, u32 height, u32 levels, u32 layers, - VkFormat format, VkSampleCountFlagBits samples, - VkImageViewType view_type, VkImageTiling tiling, - VkImageUsageFlags usage); - - static std::unique_ptr CreateFromExistingImage(u32 width, u32 height, u32 levels, - u32 layers, VkFormat format, - VkSampleCountFlagBits samples, - VkImageViewType view_type, - VkImage existing_image); - - u32 GetWidth() const { return m_width; } - u32 GetHeight() const { return m_height; } - u32 GetLevels() const { return m_levels; } - u32 GetLayers() const { return m_layers; } - VkFormat GetFormat() const { return m_format; } - VkSampleCountFlagBits GetSamples() const { return m_samples; } - VkImageLayout GetLayout() const { return m_layout; } - VkImageViewType GetViewType() const { return m_view_type; } - VkImage GetImage() const { return m_image; } - VkDeviceMemory GetDeviceMemory() const { return m_device_memory; } - VkImageView GetView() const { return m_view; } - // Used when the render pass is changing the image layout, or to force it to - // VK_IMAGE_LAYOUT_UNDEFINED, if the existing contents of the image is - // irrelevant and will not be loaded. - void OverrideImageLayout(VkImageLayout new_layout); - - void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout); - void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout); - -private: - u32 m_width; - u32 m_height; - u32 m_levels; - u32 m_layers; - VkFormat m_format; - VkSampleCountFlagBits m_samples; - VkImageViewType m_view_type; - VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; - ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined; - - VkImage m_image; - VkDeviceMemory m_device_memory; - VkImageView m_view; -}; -} diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp deleted file mode 100644 index 3f26b28a7c..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/TextureCache.h" - -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/Renderer.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/TextureConverter.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VKTexture.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" - -namespace Vulkan -{ -TextureCache::TextureCache() -{ -} - -TextureCache::~TextureCache() -{ - TextureCache::DeleteShaders(); -} - -VkShaderModule TextureCache::GetCopyShader() const -{ - return m_copy_shader; -} - -TextureCache* TextureCache::GetInstance() -{ - return static_cast(g_texture_cache.get()); -} - -bool TextureCache::Initialize() -{ - m_texture_converter = std::make_unique(); - if (!m_texture_converter->Initialize()) - { - PanicAlert("Failed to initialize texture converter"); - return false; - } - - if (!CompileShaders()) - { - PanicAlert("Failed to compile one or more shaders"); - return false; - } - - return true; -} - -void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, - const void* palette, TLUTFormat format) -{ - m_texture_converter->ConvertTexture(destination, source, palette, format); - - // Ensure both textures remain in the SHADER_READ_ONLY layout so they can be bound. - static_cast(source->texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - static_cast(destination->texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); -} - -void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - // Flush EFB pokes first, as they're expected to be included. - FramebufferManager::GetInstance()->FlushEFBPokes(); - - // MSAA case where we need to resolve first. - // An out-of-bounds source region is valid here, and fine for the draw (since it is converted - // to texture coordinates), but it's not valid to resolve an out-of-range rectangle. - TargetRectangle scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); - VkRect2D region = {{scaled_src_rect.left, scaled_src_rect.top}, - {static_cast(scaled_src_rect.GetWidth()), - static_cast(scaled_src_rect.GetHeight())}}; - region = Util::ClampRect2D(region, FramebufferManager::GetInstance()->GetEFBWidth(), - FramebufferManager::GetInstance()->GetEFBHeight()); - Texture2D* src_texture; - if (params.depth) - src_texture = FramebufferManager::GetInstance()->ResolveEFBDepthTexture(region); - else - src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region); - - // End render pass before barrier (since we have no self-dependencies). - // The barrier has to happen after the render pass, not inside it, as we are going to be - // reading from the texture immediately afterwards. - StateTracker::GetInstance()->EndRenderPass(); - - // Transition to shader resource before reading. - VkImageLayout original_layout = src_texture->GetLayout(); - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - m_texture_converter->EncodeTextureToMemory( - src_texture->GetView(), dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, - src_rect, scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); - - // Transition back to original state - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout); - - StateTracker::GetInstance()->OnEFBCopyToRAM(); -} - -bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) -{ - return m_texture_converter->SupportsTextureDecoding(format, palette_format); -} - -void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, - size_t data_size, TextureFormat format, u32 width, u32 height, - u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format) -{ - // Group compute shader dispatches together in the init command buffer. That way we don't have to - // pay a penalty for switching from graphics->compute, or end/restart our render pass. - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentInitCommandBuffer(); - m_texture_converter->DecodeTexture(command_buffer, entry, dst_level, data, data_size, format, - width, height, aligned_width, aligned_height, row_stride, - palette, palette_format); - - // Last mip level? Ensure the texture is ready for use. - if (dst_level == (entry->GetNumLevels() - 1)) - { - static_cast(entry->texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } -} - -bool TextureCache::CompileShaders() -{ - static const char COPY_SHADER_SOURCE[] = R"( - layout(set = 1, binding = 0) uniform sampler2DArray samp0; - - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - void main() - { - ocol0 = texture(samp0, uv0); - } - )"; - - std::string header = g_shader_cache->GetUtilityShaderHeader(); - std::string source = header + COPY_SHADER_SOURCE; - - m_copy_shader = Util::CompileAndCreateFragmentShader(source); - - return m_copy_shader != VK_NULL_HANDLE; -} - -void TextureCache::DeleteShaders() -{ - // It is safe to destroy shader modules after they are consumed by creating a pipeline. - // Therefore, no matter where this function is called from, it won't cause an issue due to - // pending commands, although at the time of writing should only be called at the end of - // a frame. See Vulkan spec, section 2.3.1. Object Lifetime. - if (m_copy_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_shader, nullptr); - m_copy_shader = VK_NULL_HANDLE; - } - - for (auto& shader : m_efb_copy_to_tex_shaders) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader.second, nullptr); - } - m_efb_copy_to_tex_shaders.clear(); -} - -void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - VKTexture* texture = static_cast(entry->texture.get()); - - // A better way of doing this would be nice. - FramebufferManager* framebuffer_mgr = - static_cast(g_framebuffer_manager.get()); - TargetRectangle scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); - - // Flush EFB pokes first, as they're expected to be included. - framebuffer_mgr->FlushEFBPokes(); - - // Has to be flagged as a render target. - ASSERT(texture->GetFramebuffer() != VK_NULL_HANDLE); - - // Can't be done in a render pass, since we're doing our own render pass! - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - StateTracker::GetInstance()->EndRenderPass(); - - // Fill uniform buffer. - struct PixelUniforms - { - float filter_coefficients[3]; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float pixel_height; - u32 padding; - }; - PixelUniforms uniforms; - for (size_t i = 0; i < filter_coefficients.size(); i++) - uniforms.filter_coefficients[i] = filter_coefficients[i]; - uniforms.gamma_rcp = 1.0f / gamma; - uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - uniforms.pixel_height = - g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; - uniforms.padding = 0; - - // Transition EFB to shader resource before binding. - // An out-of-bounds source region is valid here, and fine for the draw (since it is converted - // to texture coordinates), but it's not valid to resolve an out-of-range rectangle. - VkRect2D region = {{scaled_src_rect.left, scaled_src_rect.top}, - {static_cast(scaled_src_rect.GetWidth()), - static_cast(scaled_src_rect.GetHeight())}}; - region = Util::ClampRect2D(region, FramebufferManager::GetInstance()->GetEFBWidth(), - FramebufferManager::GetInstance()->GetEFBHeight()); - Texture2D* src_texture; - if (is_depth_copy) - src_texture = FramebufferManager::GetInstance()->ResolveEFBDepthTexture(region); - else - src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region); - - VkSampler src_sampler = - scale_by_half ? g_object_cache->GetLinearSampler() : g_object_cache->GetPointSampler(); - VkImageLayout original_layout = src_texture->GetLayout(); - src_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - texture->GetRawTexIdentifier()->TransitionToLayout(command_buffer, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half, - NeedsCopyFilterInShader(filter_coefficients)); - - auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE)); - VkShaderModule& shader = it.first->second; - bool created = it.second; - - if (created) - { - std::string source = g_shader_cache->GetUtilityShaderHeader(); - source += - TextureConversionShaderGen::GenerateShader(APIType::Vulkan, uid.GetUidData()).GetBuffer(); - - shader = Util::CompileAndCreateFragmentShader(source); - } - - VkRenderPass render_pass = g_object_cache->GetRenderPass( - texture->GetRawTexIdentifier()->GetFormat(), VK_FORMAT_UNDEFINED, - texture->GetRawTexIdentifier()->GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE); - - UtilityShaderDraw draw(command_buffer, - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), render_pass, - g_shader_cache->GetPassthroughVertexShader(), - g_shader_cache->GetPassthroughGeometryShader(), shader); - - u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(PixelUniforms)); - std::memcpy(ubo_ptr, &uniforms, sizeof(PixelUniforms)); - draw.CommitPSUniforms(sizeof(PixelUniforms)); - - draw.SetPSSampler(0, src_texture->GetView(), src_sampler); - - VkRect2D dest_region = {{0, 0}, {texture->GetConfig().width, texture->GetConfig().height}}; - - draw.BeginRenderPass(texture->GetFramebuffer(), dest_region); - - draw.DrawQuad(0, 0, texture->GetConfig().width, texture->GetConfig().height, scaled_src_rect.left, - scaled_src_rect.top, 0, scaled_src_rect.GetWidth(), scaled_src_rect.GetHeight(), - framebuffer_mgr->GetEFBWidth(), framebuffer_mgr->GetEFBHeight()); - - draw.EndRenderPass(); - - // We touched everything, so put it back. - StateTracker::GetInstance()->SetPendingRebind(); - - // Transition the EFB back to its original layout. - src_texture->TransitionToLayout(command_buffer, original_layout); - - // Ensure texture is in SHADER_READ_ONLY layout, ready for usage. - texture->GetRawTexIdentifier()->TransitionToLayout(command_buffer, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h deleted file mode 100644 index 35b3b6c360..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConverterShaderGen.h" - -namespace Vulkan -{ -class TextureConverter; -class StateTracker; -class Texture2D; -class VKTexture; - -class TextureCache : public TextureCacheBase -{ -public: - TextureCache(); - ~TextureCache(); - - static TextureCache* GetInstance(); - - TextureConverter* GetTextureConverter() const { return m_texture_converter.get(); } - bool Initialize(); - - bool CompileShaders() override; - void DeleteShaders() override; - - void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, - TLUTFormat format) override; - - void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override; - - void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size, - TextureFormat format, u32 width, u32 height, u32 aligned_width, - u32 aligned_height, u32 row_stride, const u8* palette, - TLUTFormat palette_format) override; - - VkShaderModule GetCopyShader() const; - -private: - void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - std::unique_ptr m_texture_converter; - - VkShaderModule m_copy_shader = VK_NULL_HANDLE; - std::map m_efb_copy_to_tex_shaders; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp deleted file mode 100644 index c5acdc8e7f..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp +++ /dev/null @@ -1,634 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/TextureConverter.h" - -#include -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VKTexture.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VideoConfig.h" - -namespace Vulkan -{ -namespace -{ -struct EFBEncodeParams -{ - std::array position_uniform; - float y_scale; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float filter_coefficients[3]; - u32 padding; -}; -} // namespace -TextureConverter::TextureConverter() -{ -} - -TextureConverter::~TextureConverter() -{ - for (const auto& it : m_palette_conversion_shaders) - { - if (it != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr); - } - - if (m_texel_buffer_view_r8_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r8_uint, nullptr); - if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr); - if (m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r32g32_uint, nullptr); - if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr); - if (m_texel_buffer_view_rgba8_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_uint, nullptr); - - for (auto& it : m_encoding_shaders) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second, nullptr); - - for (const auto& it : m_decoding_pipelines) - { - if (it.second.compute_shader != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.compute_shader, nullptr); - } -} - -bool TextureConverter::Initialize() -{ - if (!CreateTexelBuffer()) - { - PanicAlert("Failed to create uniform buffer"); - return false; - } - - if (!CompilePaletteConversionShaders()) - { - PanicAlert("Failed to compile palette conversion shaders"); - return false; - } - - if (!CreateEncodingTexture()) - { - PanicAlert("Failed to create encoding texture"); - return false; - } - - if (!CreateDecodingTexture()) - { - PanicAlert("Failed to create decoding texture"); - return false; - } - - return true; -} - -bool TextureConverter::ReserveTexelBufferStorage(size_t size, size_t alignment) -{ - // Enforce the minimum alignment for texture buffers on the device. - size_t actual_alignment = - std::max(static_cast(g_vulkan_context->GetTexelBufferAlignment()), alignment); - if (m_texel_buffer->ReserveMemory(size, actual_alignment)) - return true; - - WARN_LOG(VIDEO, "Executing command list while waiting for space in palette buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); - - // This next call should never fail, since a command buffer is now in-flight and we can - // wait on the fence for the GPU to finish. If this returns false, it's probably because - // the device has been lost, which is fatal anyway. - if (!m_texel_buffer->ReserveMemory(size, actual_alignment)) - { - PanicAlert("Failed to allocate space for texture conversion"); - return false; - } - - return true; -} - -VkCommandBuffer -TextureConverter::GetCommandBufferForTextureConversion(const TextureCache::TCacheEntry* src_entry) -{ - // EFB copies can be used as paletted textures as well. For these, we can't assume them to be - // contain the correct data before the frame begins (when the init command buffer is executed), - // so we must convert them at the appropriate time, during the drawing command buffer. - if (src_entry->IsCopy()) - { - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - return g_command_buffer_mgr->GetCurrentCommandBuffer(); - } - else - { - // Use initialization command buffer and perform conversion before the drawing commands. - return g_command_buffer_mgr->GetCurrentInitCommandBuffer(); - } -} - -void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, - TextureCacheBase::TCacheEntry* src_entry, const void* palette, - TLUTFormat palette_format) -{ - struct PSUniformBlock - { - float multiplier; - int texel_buffer_offset; - int pad[2]; - }; - - VKTexture* source_texture = static_cast(src_entry->texture.get()); - VKTexture* destination_texture = static_cast(dst_entry->texture.get()); - - ASSERT(static_cast(palette_format) < NUM_PALETTE_CONVERSION_SHADERS); - ASSERT(destination_texture->GetConfig().rendertarget); - - // We want to align to 2 bytes (R16) or the device's texel buffer alignment, whichever is greater. - size_t palette_size = src_entry->format == TextureFormat::I4 ? 32 : 512; - if (!ReserveTexelBufferStorage(palette_size, sizeof(u16))) - return; - - // Copy in palette to texel buffer. - u32 palette_offset = static_cast(m_texel_buffer->GetCurrentOffset()); - memcpy(m_texel_buffer->GetCurrentHostPointer(), palette, palette_size); - m_texel_buffer->CommitMemory(palette_size); - - VkCommandBuffer command_buffer = GetCommandBufferForTextureConversion(src_entry); - source_texture->GetRawTexIdentifier()->TransitionToLayout( - command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - destination_texture->GetRawTexIdentifier()->TransitionToLayout( - command_buffer, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // Bind and draw to the destination. - VkRenderPass render_pass = g_object_cache->GetRenderPass( - destination_texture->GetRawTexIdentifier()->GetFormat(), VK_FORMAT_UNDEFINED, - destination_texture->GetRawTexIdentifier()->GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE); - UtilityShaderDraw draw(command_buffer, - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION), - render_pass, g_shader_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, - m_palette_conversion_shaders[static_cast(palette_format)]); - - VkRect2D region = {{0, 0}, {dst_entry->GetWidth(), dst_entry->GetHeight()}}; - draw.BeginRenderPass(destination_texture->GetFramebuffer(), region); - - PSUniformBlock uniforms = {}; - uniforms.multiplier = src_entry->format == TextureFormat::I4 ? 15.0f : 255.0f; - uniforms.texel_buffer_offset = static_cast(palette_offset / sizeof(u16)); - draw.SetPushConstants(&uniforms, sizeof(uniforms)); - draw.SetPSSampler(0, source_texture->GetRawTexIdentifier()->GetView(), - g_object_cache->GetPointSampler()); - draw.SetPSTexelBuffer(m_texel_buffer_view_r16_uint); - draw.SetViewportAndScissor(0, 0, dst_entry->GetWidth(), dst_entry->GetHeight()); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); -} - -void TextureConverter::EncodeTextureToMemory( - VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, - bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) -{ - VkShaderModule shader = GetEncodingShader(params); - if (shader == VK_NULL_HANDLE) - { - ERROR_LOG(VIDEO, "Missing encoding fragment shader for format %u->%u", - static_cast(params.efb_format), static_cast(params.copy_format)); - return; - } - - // Can't do our own draw within a render pass. - StateTracker::GetInstance()->EndRenderPass(); - - static_cast(m_encoding_render_texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - VkRenderPass render_pass = g_object_cache->GetRenderPass( - Util::GetVkFormatForHostTextureFormat(m_encoding_render_texture->GetConfig().format), - VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_PUSH_CONSTANT), - render_pass, g_shader_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, - shader); - - // Uniform - int4 of left,top,native_width,scale - EFBEncodeParams encoder_params; - encoder_params.position_uniform[0] = src_rect.left; - encoder_params.position_uniform[1] = src_rect.top; - encoder_params.position_uniform[2] = static_cast(native_width); - encoder_params.position_uniform[3] = scale_by_half ? 2 : 1; - encoder_params.y_scale = y_scale; - encoder_params.gamma_rcp = 1.0f / gamma; - encoder_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - encoder_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - for (size_t i = 0; i < filter_coefficients.size(); i++) - encoder_params.filter_coefficients[i] = filter_coefficients[i]; - u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(EFBEncodeParams)); - std::memcpy(ubo_ptr, &encoder_params, sizeof(EFBEncodeParams)); - draw.CommitPSUniforms(sizeof(EFBEncodeParams)); - - // We also linear filtering for both box filtering and downsampling higher resolutions to 1x - // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more - // complex down filtering to average all pixels and produce the correct result. - bool linear_filter = - (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f; - draw.SetPSSampler(0, src_texture, - linear_filter ? g_object_cache->GetLinearSampler() : - g_object_cache->GetPointSampler()); - - u32 render_width = bytes_per_row / sizeof(u32); - u32 render_height = num_blocks_y; - Util::SetViewportAndScissor(g_command_buffer_mgr->GetCurrentCommandBuffer(), 0, 0, render_width, - render_height); - - VkRect2D render_region = {{0, 0}, {render_width, render_height}}; - draw.BeginRenderPass(static_cast(m_encoding_render_texture.get())->GetFramebuffer(), - render_region); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - MathUtil::Rectangle copy_rect(0, 0, render_width, render_height); - dest->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); -} - -bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format) -{ - auto key = std::make_pair(format, palette_format); - auto iter = m_decoding_pipelines.find(key); - if (iter != m_decoding_pipelines.end()) - return iter->second.valid; - - TextureDecodingPipeline pipeline; - pipeline.base_info = TextureConversionShaderTiled::GetDecodingShaderInfo(format); - pipeline.compute_shader = VK_NULL_HANDLE; - pipeline.valid = false; - - if (!pipeline.base_info) - { - m_decoding_pipelines.emplace(key, pipeline); - return false; - } - - std::string shader_source = - TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::Vulkan); - - pipeline.compute_shader = Util::CompileAndCreateComputeShader(shader_source); - if (pipeline.compute_shader == VK_NULL_HANDLE) - { - m_decoding_pipelines.emplace(key, pipeline); - return false; - } - - pipeline.valid = true; - m_decoding_pipelines.emplace(key, pipeline); - return true; -} - -void TextureConverter::DecodeTexture(VkCommandBuffer command_buffer, - TextureCache::TCacheEntry* entry, u32 dst_level, - const u8* data, size_t data_size, TextureFormat format, - u32 width, u32 height, u32 aligned_width, u32 aligned_height, - u32 row_stride, const u8* palette, TLUTFormat palette_format) -{ - VKTexture* destination_texture = static_cast(entry->texture.get()); - auto key = std::make_pair(format, palette_format); - auto iter = m_decoding_pipelines.find(key); - if (iter == m_decoding_pipelines.end()) - return; - - struct PushConstants - { - u32 dst_size[2]; - u32 src_size[2]; - u32 src_offset; - u32 src_row_stride; - u32 palette_offset; - }; - - // Copy to GPU-visible buffer, aligned to the data type - auto info = iter->second; - u32 bytes_per_buffer_elem = - TextureConversionShaderTiled::GetBytesPerBufferElement(info.base_info->buffer_format); - - // Calculate total data size, including palette. - // Only copy palette if it is required. - u32 total_upload_size = static_cast(data_size); - u32 palette_size = iter->second.base_info->palette_size; - u32 palette_offset = total_upload_size; - bool has_palette = palette_size > 0; - if (has_palette) - { - // Align to u16. - if ((total_upload_size % sizeof(u16)) != 0) - { - total_upload_size++; - palette_offset++; - } - - total_upload_size += palette_size; - } - - // Allocate space for upload, if it fails, execute the buffer. - if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem)) - { - Util::ExecuteCurrentCommandsAndRestoreState(true, false); - if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem)) - PanicAlert("Failed to reserve memory for encoded texture upload"); - } - - // Copy/commit upload buffer. - u32 texel_buffer_offset = static_cast(m_texel_buffer->GetCurrentOffset()); - - Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_texel_buffer->GetBuffer(), VK_ACCESS_SHADER_READ_BIT, - VK_ACCESS_HOST_WRITE_BIT, texel_buffer_offset, total_upload_size, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT); - - std::memcpy(m_texel_buffer->GetCurrentHostPointer(), data, data_size); - if (has_palette) - std::memcpy(m_texel_buffer->GetCurrentHostPointer() + palette_offset, palette, palette_size); - m_texel_buffer->CommitMemory(total_upload_size); - - Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_texel_buffer->GetBuffer(), VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, texel_buffer_offset, total_upload_size, - VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); - - // Determine uniforms. - PushConstants constants = { - {width, height}, - {aligned_width, aligned_height}, - texel_buffer_offset / bytes_per_buffer_elem, - row_stride / bytes_per_buffer_elem, - static_cast((texel_buffer_offset + palette_offset) / sizeof(u16))}; - - // Determine view to use for texel buffers. - VkBufferView data_view = VK_NULL_HANDLE; - switch (iter->second.base_info->buffer_format) - { - case TextureConversionShaderTiled::BUFFER_FORMAT_R8_UINT: - data_view = m_texel_buffer_view_r8_uint; - break; - case TextureConversionShaderTiled::BUFFER_FORMAT_R16_UINT: - data_view = m_texel_buffer_view_r16_uint; - break; - case TextureConversionShaderTiled::BUFFER_FORMAT_R32G32_UINT: - data_view = m_texel_buffer_view_r32g32_uint; - break; - case TextureConversionShaderTiled::BUFFER_FORMAT_RGBA8_UINT: - data_view = m_texel_buffer_view_rgba8_uint; - break; - default: - break; - } - - // Dispatch compute to temporary texture. - ComputeShaderDispatcher dispatcher(command_buffer, - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE), - iter->second.compute_shader); - m_decoding_texture->TransitionToLayout(command_buffer, Texture2D::ComputeImageLayout::WriteOnly); - dispatcher.SetPushConstants(&constants, sizeof(constants)); - dispatcher.SetStorageImage(m_decoding_texture->GetView(), m_decoding_texture->GetLayout()); - dispatcher.SetTexelBuffer(0, data_view); - if (has_palette) - dispatcher.SetTexelBuffer(1, m_texel_buffer_view_r16_uint); - auto groups = TextureConversionShaderTiled::GetDispatchCount(iter->second.base_info, - aligned_width, aligned_height); - dispatcher.Dispatch(groups.first, groups.second, 1); - - // Copy from temporary texture to final destination. - Texture2D* vulkan_tex_identifier = destination_texture->GetRawTexIdentifier(); - m_decoding_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - vulkan_tex_identifier->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageCopy image_copy = {{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, - {0, 0, 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, 0, 1}, - {0, 0, 0}, - {width, height, 1}}; - vkCmdCopyImage(command_buffer, m_decoding_texture->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vulkan_tex_identifier->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); -} - -bool TextureConverter::CreateTexelBuffer() -{ - // Prefer an 8MB buffer if possible, but use less if the device doesn't support this. - // This buffer is potentially going to be addressed as R8s in the future, so we assume - // that one element is one byte. - m_texel_buffer_size = - std::min(TEXTURE_CONVERSION_TEXEL_BUFFER_SIZE, - static_cast(g_vulkan_context->GetDeviceLimits().maxTexelBufferElements)); - - m_texel_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, - m_texel_buffer_size, m_texel_buffer_size); - if (!m_texel_buffer) - return false; - - // Create views of the formats that we will be using. - m_texel_buffer_view_r8_uint = CreateTexelBufferView(VK_FORMAT_R8_UINT); - m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT); - m_texel_buffer_view_r32g32_uint = CreateTexelBufferView(VK_FORMAT_R32G32_UINT); - m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM); - m_texel_buffer_view_rgba8_uint = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UINT); - return m_texel_buffer_view_r8_uint != VK_NULL_HANDLE && - m_texel_buffer_view_r16_uint != VK_NULL_HANDLE && - m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE && - m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE && - m_texel_buffer_view_rgba8_uint != VK_NULL_HANDLE; -} - -VkBufferView TextureConverter::CreateTexelBufferView(VkFormat format) const -{ - // Create a view of the whole buffer, we'll offset our texel load into it - VkBufferViewCreateInfo view_info = { - VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkBufferViewCreateFlags flags - m_texel_buffer->GetBuffer(), // VkBuffer buffer - format, // VkFormat format - 0, // VkDeviceSize offset - m_texel_buffer_size // VkDeviceSize range - }; - - VkBufferView view; - VkResult res = vkCreateBufferView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateBufferView failed: "); - return VK_NULL_HANDLE; - } - - return view; -} - -bool TextureConverter::CompilePaletteConversionShaders() -{ - static const char PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE[] = R"( - layout(std140, push_constant) uniform PCBlock - { - float multiplier; - int texture_buffer_offset; - } PC; - - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp1; - - layout(location = 0) in vec3 f_uv0; - layout(location = 0) out vec4 ocol0; - - int Convert3To8(int v) - { - // Swizzle bits: 00000123 -> 12312312 - return (v << 5) | (v << 2) | (v >> 1); - } - int Convert4To8(int v) - { - // Swizzle bits: 00001234 -> 12341234 - return (v << 4) | v; - } - int Convert5To8(int v) - { - // Swizzle bits: 00012345 -> 12345123 - return (v << 3) | (v >> 2); - } - int Convert6To8(int v) - { - // Swizzle bits: 00123456 -> 12345612 - return (v << 2) | (v >> 4); - } - float4 DecodePixel_RGB5A3(int val) - { - int r,g,b,a; - if ((val&0x8000) > 0) - { - r=Convert5To8((val>>10) & 0x1f); - g=Convert5To8((val>>5 ) & 0x1f); - b=Convert5To8((val ) & 0x1f); - a=0xFF; - } - else - { - a=Convert3To8((val>>12) & 0x7); - r=Convert4To8((val>>8 ) & 0xf); - g=Convert4To8((val>>4 ) & 0xf); - b=Convert4To8((val ) & 0xf); - } - return float4(r, g, b, a) / 255.0; - } - float4 DecodePixel_RGB565(int val) - { - int r, g, b, a; - r = Convert5To8((val >> 11) & 0x1f); - g = Convert6To8((val >> 5) & 0x3f); - b = Convert5To8((val) & 0x1f); - a = 0xFF; - return float4(r, g, b, a) / 255.0; - } - float4 DecodePixel_IA8(int val) - { - int i = val & 0xFF; - int a = val >> 8; - return float4(i, i, i, a) / 255.0; - } - void main() - { - int src = int(round(texture(samp0, f_uv0).r * PC.multiplier)); - src = int(texelFetch(samp1, src + PC.texture_buffer_offset).r); - src = ((src << 8) & 0xFF00) | (src >> 8); - ocol0 = DECODE(src); - } - - )"; - - std::string palette_ia8_program = StringFromFormat("%s\n%s", "#define DECODE DecodePixel_IA8", - PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); - std::string palette_rgb565_program = StringFromFormat( - "%s\n%s", "#define DECODE DecodePixel_RGB565", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); - std::string palette_rgb5a3_program = StringFromFormat( - "%s\n%s", "#define DECODE DecodePixel_RGB5A3", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); - - m_palette_conversion_shaders[static_cast(TLUTFormat::IA8)] = - Util::CompileAndCreateFragmentShader(palette_ia8_program); - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB565)] = - Util::CompileAndCreateFragmentShader(palette_rgb565_program); - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB5A3)] = - Util::CompileAndCreateFragmentShader(palette_rgb5a3_program); - - return m_palette_conversion_shaders[static_cast(TLUTFormat::IA8)] != VK_NULL_HANDLE && - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB565)] != VK_NULL_HANDLE && - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB5A3)] != VK_NULL_HANDLE; -} - -VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyParams& params) -{ - const char* shader = - TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::Vulkan); - VkShaderModule module = Util::CompileAndCreateFragmentShader(shader); - if (module == VK_NULL_HANDLE) - PanicAlert("Failed to compile texture encoding shader."); - - return module; -} - -VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyParams& params) -{ - auto iter = m_encoding_shaders.find(params); - if (iter != m_encoding_shaders.end()) - return iter->second; - - VkShaderModule shader = CompileEncodingShader(params); - m_encoding_shaders.emplace(params, shader); - return shader; -} - -bool TextureConverter::CreateEncodingTexture() -{ - m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); - return m_encoding_render_texture != nullptr; -} - -bool TextureConverter::CreateDecodingTexture() -{ - m_decoding_texture = Texture2D::Create( - DECODING_TEXTURE_WIDTH, DECODING_TEXTURE_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT); - if (!m_decoding_texture) - return false; - - VkClearColorValue clear_value = {{0.0f, 0.0f, 0.0f, 1.0f}}; - VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - m_decoding_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - m_decoding_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - &clear_value, 1, &clear_range); - return true; -} -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.h b/Source/Core/VideoBackends/Vulkan/TextureConverter.h deleted file mode 100644 index 9c035296b2..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.h +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/TextureCache.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VideoCommon.h" - -class AbstractTexture; -class AbstractStagingTexture; - -namespace Vulkan -{ -class Texture2D; -class VKTexture; - -class TextureConverter -{ -public: - TextureConverter(); - ~TextureConverter(); - - bool Initialize(); - - // Applies palette to dst_entry, using indices from src_entry. - void ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, - TextureCache::TCacheEntry* src_entry, const void* palette, - TLUTFormat palette_format); - - // Uses an encoding shader to copy src_texture to dest. - void EncodeTextureToMemory( - VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, - bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); - - bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format); - void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry, - u32 dst_level, const u8* data, size_t data_size, TextureFormat format, - u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format); - -private: - static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3; - - // Maximum size of a texture based on BP registers. - static const u32 DECODING_TEXTURE_WIDTH = 1024; - static const u32 DECODING_TEXTURE_HEIGHT = 1024; - - bool CreateTexelBuffer(); - VkBufferView CreateTexelBufferView(VkFormat format) const; - - bool CompilePaletteConversionShaders(); - - VkShaderModule CompileEncodingShader(const EFBCopyParams& params); - VkShaderModule GetEncodingShader(const EFBCopyParams& params); - - bool CreateEncodingTexture(); - bool CreateDecodingTexture(); - - // Allocates storage in the texel command buffer of the specified size. - // If the buffer does not have enough space, executes the current command buffer and tries again. - // If this is done, g_command_buffer_mgr->GetCurrentCommandBuffer() will return a different value, - // so it always should be re-obtained after calling this method. - // Once the data copy is done, call m_texel_buffer->CommitMemory(size). - bool ReserveTexelBufferStorage(size_t size, size_t alignment); - - // Returns the command buffer that the texture conversion should occur in for the given texture. - // This can be the initialization/copy command buffer, or the drawing command buffer. - VkCommandBuffer GetCommandBufferForTextureConversion(const TextureCache::TCacheEntry* src_entry); - - // Shared between conversion types - std::unique_ptr m_texel_buffer; - VkBufferView m_texel_buffer_view_r8_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_r32g32_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_rgba8_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_rgba8_unorm = VK_NULL_HANDLE; - size_t m_texel_buffer_size = 0; - - // Palette conversion - taking an indexed texture and applying palette - std::array m_palette_conversion_shaders = {}; - - // Texture encoding - RGBA8->GX format in memory - std::map m_encoding_shaders; - std::unique_ptr m_encoding_render_texture; - - // Texture decoding - GX format in memory->RGBA8 - struct TextureDecodingPipeline - { - const TextureConversionShaderTiled::DecodingShaderInfo* base_info; - VkShaderModule compute_shader; - bool valid; - }; - std::map, TextureDecodingPipeline> m_decoding_pipelines; - std::unique_ptr m_decoding_texture; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp deleted file mode 100644 index 35c9bcfbaf..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Util.cpp +++ /dev/null @@ -1,925 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/Util.h" - -#include "Common/Align.h" -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/MathUtil.h" -#include "Common/MsgHandler.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoBackends/Vulkan/ShaderCompiler.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -namespace Vulkan -{ -namespace Util -{ -size_t AlignBufferOffset(size_t offset, size_t alignment) -{ - // Assume an offset of zero is already aligned to a value larger than alignment. - if (offset == 0) - return 0; - - return Common::AlignUp(offset, alignment); -} - -u32 MakeRGBA8Color(float r, float g, float b, float a) -{ - return (static_cast(MathUtil::Clamp(static_cast(r * 255.0f), 0, 255)) << 0) | - (static_cast(MathUtil::Clamp(static_cast(g * 255.0f), 0, 255)) << 8) | - (static_cast(MathUtil::Clamp(static_cast(b * 255.0f), 0, 255)) << 16) | - (static_cast(MathUtil::Clamp(static_cast(a * 255.0f), 0, 255)) << 24); -} - -bool IsDepthFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D16_UNORM_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - return true; - default: - return false; - } -} - -bool IsCompressedFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return true; - - default: - return false; - } -} - -VkFormat GetLinearFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_R8_SRGB: - return VK_FORMAT_R8_UNORM; - case VK_FORMAT_R8G8_SRGB: - return VK_FORMAT_R8G8_UNORM; - case VK_FORMAT_R8G8B8_SRGB: - return VK_FORMAT_R8G8B8_UNORM; - case VK_FORMAT_R8G8B8A8_SRGB: - return VK_FORMAT_R8G8B8A8_UNORM; - case VK_FORMAT_B8G8R8_SRGB: - return VK_FORMAT_B8G8R8_UNORM; - case VK_FORMAT_B8G8R8A8_SRGB: - return VK_FORMAT_B8G8R8A8_UNORM; - default: - return format; - } -} - -VkFormat GetVkFormatForHostTextureFormat(AbstractTextureFormat format) -{ - switch (format) - { - case AbstractTextureFormat::DXT1: - return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; - - case AbstractTextureFormat::DXT3: - return VK_FORMAT_BC2_UNORM_BLOCK; - - case AbstractTextureFormat::DXT5: - return VK_FORMAT_BC3_UNORM_BLOCK; - - case AbstractTextureFormat::BPTC: - return VK_FORMAT_BC7_UNORM_BLOCK; - - case AbstractTextureFormat::RGBA8: - return VK_FORMAT_R8G8B8A8_UNORM; - - case AbstractTextureFormat::BGRA8: - return VK_FORMAT_B8G8R8A8_UNORM; - - case AbstractTextureFormat::R16: - return VK_FORMAT_R16_UNORM; - - case AbstractTextureFormat::D16: - return VK_FORMAT_D16_UNORM; - - case AbstractTextureFormat::D24_S8: - return VK_FORMAT_D24_UNORM_S8_UINT; - - case AbstractTextureFormat::R32F: - return VK_FORMAT_R32_SFLOAT; - - case AbstractTextureFormat::D32F: - return VK_FORMAT_D32_SFLOAT; - - case AbstractTextureFormat::D32F_S8: - return VK_FORMAT_D32_SFLOAT_S8_UINT; - - case AbstractTextureFormat::Undefined: - return VK_FORMAT_UNDEFINED; - - default: - PanicAlert("Unhandled texture format."); - return VK_FORMAT_R8G8B8A8_UNORM; - } -} - -VkImageAspectFlags GetImageAspectForFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_D16_UNORM_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D32_SFLOAT: - return VK_IMAGE_ASPECT_DEPTH_BIT; - - default: - return VK_IMAGE_ASPECT_COLOR_BIT; - } -} - -u32 GetTexelSize(VkFormat format) -{ - // Only contains pixel formats we use. - switch (format) - { - case VK_FORMAT_R32_SFLOAT: - return 4; - - case VK_FORMAT_D32_SFLOAT: - return 4; - - case VK_FORMAT_R8G8B8A8_UNORM: - return 4; - - case VK_FORMAT_B8G8R8A8_UNORM: - return 4; - - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - return 8; - - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return 16; - - default: - PanicAlert("Unhandled pixel format"); - return 1; - } -} - -u32 GetBlockSize(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return 4; - - default: - return 1; - } -} - -VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height) -{ - VkRect2D out; - out.offset.x = MathUtil::Clamp(rect.offset.x, 0, static_cast(width - 1)); - out.offset.y = MathUtil::Clamp(rect.offset.y, 0, static_cast(height - 1)); - out.extent.width = std::min(rect.extent.width, width - static_cast(rect.offset.x)); - out.extent.height = std::min(rect.extent.height, height - static_cast(rect.offset.y)); - return out; -} - -VkBlendFactor GetAlphaBlendFactor(VkBlendFactor factor) -{ - switch (factor) - { - case VK_BLEND_FACTOR_SRC_COLOR: - return VK_BLEND_FACTOR_SRC_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - case VK_BLEND_FACTOR_DST_COLOR: - return VK_BLEND_FACTOR_DST_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; - default: - return factor; - } -} - -void SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth /*= 0.0f*/, float max_depth /*= 1.0f*/) -{ - VkViewport viewport = {static_cast(x), - static_cast(y), - static_cast(width), - static_cast(height), - min_depth, - max_depth}; - - VkRect2D scissor = {{x, y}, {static_cast(width), static_cast(height)}}; - - vkCmdSetViewport(command_buffer, 0, 1, &viewport); - vkCmdSetScissor(command_buffer, 0, 1, &scissor); -} - -void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, - VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, - VkDeviceSize offset, VkDeviceSize size, - VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask) -{ - VkBufferMemoryBarrier buffer_info = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - src_access_mask, // VkAccessFlags srcAccessMask - dst_access_mask, // VkAccessFlags dstAccessMask - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - buffer, // VkBuffer buffer - offset, // VkDeviceSize offset - size // VkDeviceSize size - }; - - vkCmdPipelineBarrier(command_buffer, src_stage_mask, dst_stage_mask, 0, 0, nullptr, 1, - &buffer_info, 0, nullptr); -} - -void ExecuteCurrentCommandsAndRestoreState(bool execute_off_thread, bool wait_for_completion) -{ - StateTracker::GetInstance()->EndRenderPass(); - g_command_buffer_mgr->ExecuteCommandBuffer(execute_off_thread, wait_for_completion); - StateTracker::GetInstance()->InvalidateDescriptorSets(); - StateTracker::GetInstance()->InvalidateConstants(); - StateTracker::GetInstance()->SetPendingRebind(); -} - -VkShaderModule CreateShaderModule(const u32* spv, size_t spv_word_count) -{ - VkShaderModuleCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - info.codeSize = spv_word_count * sizeof(u32); - info.pCode = spv; - - VkShaderModule module; - VkResult res = vkCreateShaderModule(g_vulkan_context->GetDevice(), &info, nullptr, &module); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateShaderModule failed: "); - return VK_NULL_HANDLE; - } - - return module; -} - -VkShaderModule CompileAndCreateVertexShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileVertexShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileGeometryShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileFragmentShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -VkShaderModule CompileAndCreateComputeShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileComputeShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -} // namespace Util - -UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer, - VkPipelineLayout pipeline_layout, VkRenderPass render_pass, - VkShaderModule vertex_shader, VkShaderModule geometry_shader, - VkShaderModule pixel_shader, PrimitiveType primitive) - : m_command_buffer(command_buffer) -{ - // Populate minimal pipeline state - m_pipeline_info.vertex_format = g_object_cache->GetUtilityShaderVertexFormat(); - m_pipeline_info.pipeline_layout = pipeline_layout; - m_pipeline_info.render_pass = render_pass; - m_pipeline_info.vs = vertex_shader; - m_pipeline_info.gs = geometry_shader; - m_pipeline_info.ps = pixel_shader; - m_pipeline_info.rasterization_state.hex = RenderState::GetNoCullRasterizationState().hex; - m_pipeline_info.rasterization_state.primitive = primitive; - m_pipeline_info.depth_state.hex = RenderState::GetNoDepthTestingDepthStencilState().hex; - m_pipeline_info.blend_state.hex = RenderState::GetNoBlendingBlendState().hex; - m_pipeline_info.multisampling_state.per_sample_shading = false; - m_pipeline_info.multisampling_state.samples = 1; -} - -UtilityShaderVertex* UtilityShaderDraw::ReserveVertices(size_t count) -{ - if (!g_object_cache->GetUtilityShaderVertexBuffer()->ReserveMemory( - sizeof(UtilityShaderVertex) * count, sizeof(UtilityShaderVertex), true, true, true)) - PanicAlert("Failed to allocate space for vertices in backend shader"); - - m_vertex_buffer = g_object_cache->GetUtilityShaderVertexBuffer()->GetBuffer(); - m_vertex_buffer_offset = g_object_cache->GetUtilityShaderVertexBuffer()->GetCurrentOffset(); - - return reinterpret_cast( - g_object_cache->GetUtilityShaderVertexBuffer()->GetCurrentHostPointer()); -} - -void UtilityShaderDraw::CommitVertices(size_t count) -{ - g_object_cache->GetUtilityShaderVertexBuffer()->CommitMemory(sizeof(UtilityShaderVertex) * count); - m_vertex_count = static_cast(count); -} - -void UtilityShaderDraw::UploadVertices(UtilityShaderVertex* vertices, size_t count) -{ - UtilityShaderVertex* upload_vertices = ReserveVertices(count); - memcpy(upload_vertices, vertices, sizeof(UtilityShaderVertex) * count); - CommitVertices(count); -} - -u8* UtilityShaderDraw::AllocateVSUniforms(size_t size) -{ - if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( - size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) - PanicAlert("Failed to allocate util uniforms"); - - return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); -} - -void UtilityShaderDraw::CommitVSUniforms(size_t size) -{ - m_vs_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - m_vs_uniform_buffer.offset = 0; - m_vs_uniform_buffer.range = size; - m_ubo_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = - static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); - - g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); -} - -u8* UtilityShaderDraw::AllocatePSUniforms(size_t size) -{ - if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( - size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) - PanicAlert("Failed to allocate util uniforms"); - - return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); -} - -void UtilityShaderDraw::CommitPSUniforms(size_t size) -{ - m_ps_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - m_ps_uniform_buffer.offset = 0; - m_ps_uniform_buffer.range = size; - m_ubo_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = - static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); - - g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); -} - -void UtilityShaderDraw::SetPushConstants(const void* data, size_t data_size) -{ - ASSERT(static_cast(data_size) < PUSH_CONSTANT_BUFFER_SIZE); - - vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, - static_cast(data_size), data); -} - -void UtilityShaderDraw::SetPSSampler(size_t index, VkImageView view, VkSampler sampler) -{ - m_ps_samplers[index].sampler = sampler; - m_ps_samplers[index].imageView = view; - m_ps_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; -} - -void UtilityShaderDraw::SetPSTexelBuffer(VkBufferView view) -{ - // Should only be used with the texture conversion pipeline layout. - ASSERT(m_pipeline_info.pipeline_layout == - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION)); - - m_ps_texel_buffer = view; -} - -void UtilityShaderDraw::SetRasterizationState(const RasterizationState& state) -{ - m_pipeline_info.rasterization_state.hex = state.hex; -} - -void UtilityShaderDraw::SetMultisamplingState(const MultisamplingState& state) -{ - m_pipeline_info.multisampling_state.hex = state.hex; -} - -void UtilityShaderDraw::SetDepthState(const DepthState& state) -{ - m_pipeline_info.depth_state.hex = state.hex; -} - -void UtilityShaderDraw::SetBlendState(const BlendingState& state) -{ - m_pipeline_info.blend_state.hex = state.hex; -} - -void UtilityShaderDraw::BeginRenderPass(VkFramebuffer framebuffer, const VkRect2D& region, - const VkClearValue* clear_value) -{ - VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - m_pipeline_info.render_pass, - framebuffer, - region, - clear_value ? 1u : 0u, - clear_value}; - - vkCmdBeginRenderPass(m_command_buffer, &begin_info, VK_SUBPASS_CONTENTS_INLINE); -} - -void UtilityShaderDraw::EndRenderPass() -{ - vkCmdEndRenderPass(m_command_buffer); -} - -void UtilityShaderDraw::Draw() -{ - BindVertexBuffer(); - BindDescriptors(); - if (!BindPipeline()) - return; - - vkCmdDraw(m_command_buffer, m_vertex_count, 1, 0, 0); -} - -void UtilityShaderDraw::DrawQuad(int x, int y, int width, int height, float z) -{ - UtilityShaderVertex vertices[4]; - vertices[0].SetPosition(-1.0f, 1.0f, z); - vertices[0].SetTextureCoordinates(0.0f, 1.0f); - vertices[0].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[1].SetPosition(1.0f, 1.0f, z); - vertices[1].SetTextureCoordinates(1.0f, 1.0f); - vertices[1].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[2].SetPosition(-1.0f, -1.0f, z); - vertices[2].SetTextureCoordinates(0.0f, 0.0f); - vertices[2].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[3].SetPosition(1.0f, -1.0f, z); - vertices[3].SetTextureCoordinates(1.0f, 0.0f); - vertices[3].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - - Util::SetViewportAndScissor(m_command_buffer, x, y, width, height); - UploadVertices(vertices, ArraySize(vertices)); - Draw(); -} - -void UtilityShaderDraw::DrawQuad(int dst_x, int dst_y, int dst_width, int dst_height, int src_x, - int src_y, int src_layer, int src_width, int src_height, - int src_full_width, int src_full_height, float z) -{ - float u0 = float(src_x) / float(src_full_width); - float v0 = float(src_y) / float(src_full_height); - float u1 = float(src_x + src_width) / float(src_full_width); - float v1 = float(src_y + src_height) / float(src_full_height); - float w = static_cast(src_layer); - - UtilityShaderVertex vertices[4]; - vertices[0].SetPosition(-1.0f, 1.0f, z); - vertices[0].SetTextureCoordinates(u0, v1, w); - vertices[0].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[1].SetPosition(1.0f, 1.0f, z); - vertices[1].SetTextureCoordinates(u1, v1, w); - vertices[1].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[2].SetPosition(-1.0f, -1.0f, z); - vertices[2].SetTextureCoordinates(u0, v0, w); - vertices[2].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[3].SetPosition(1.0f, -1.0f, z); - vertices[3].SetTextureCoordinates(u1, v0, w); - vertices[3].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - - Util::SetViewportAndScissor(m_command_buffer, dst_x, dst_y, dst_width, dst_height); - UploadVertices(vertices, ArraySize(vertices)); - Draw(); -} - -void UtilityShaderDraw::DrawColoredQuad(int x, int y, int width, int height, float r, float g, - float b, float a, float z) -{ - return DrawColoredQuad(x, y, width, height, Util::MakeRGBA8Color(r, g, b, a), z); -} - -void UtilityShaderDraw::DrawColoredQuad(int x, int y, int width, int height, u32 color, float z) -{ - UtilityShaderVertex vertices[4]; - vertices[0].SetPosition(-1.0f, 1.0f, z); - vertices[0].SetTextureCoordinates(0.0f, 1.0f); - vertices[0].SetColor(color); - vertices[1].SetPosition(1.0f, 1.0f, z); - vertices[1].SetTextureCoordinates(1.0f, 1.0f); - vertices[1].SetColor(color); - vertices[2].SetPosition(-1.0f, -1.0f, z); - vertices[2].SetTextureCoordinates(0.0f, 0.0f); - vertices[2].SetColor(color); - vertices[3].SetPosition(1.0f, -1.0f, z); - vertices[3].SetTextureCoordinates(1.0f, 0.0f); - vertices[3].SetColor(color); - - Util::SetViewportAndScissor(m_command_buffer, x, y, width, height); - UploadVertices(vertices, ArraySize(vertices)); - Draw(); -} - -void UtilityShaderDraw::SetViewportAndScissor(int x, int y, int width, int height) -{ - Util::SetViewportAndScissor(m_command_buffer, x, y, width, height, 0.0f, 1.0f); -} - -void UtilityShaderDraw::DrawWithoutVertexBuffer(u32 vertex_count) -{ - m_pipeline_info.vertex_format = nullptr; - - BindDescriptors(); - if (!BindPipeline()) - return; - - vkCmdDraw(m_command_buffer, vertex_count, 1, 0, 0); -} - -void UtilityShaderDraw::BindVertexBuffer() -{ - vkCmdBindVertexBuffers(m_command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); -} - -void UtilityShaderDraw::BindDescriptors() -{ - // TODO: This method is a mess, clean it up - std::array bind_descriptor_sets = {}; - std::array set_writes = {}; - uint32_t num_set_writes = 0; - - VkDescriptorBufferInfo dummy_uniform_buffer = { - g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(), 0, 1}; - - // uniform buffers - if (m_vs_uniform_buffer.buffer != VK_NULL_HANDLE || m_ps_uniform_buffer.buffer != VK_NULL_HANDLE) - { - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS)); - if (set == VK_NULL_HANDLE) - PanicAlert("Failed to allocate descriptor set for utility draw"); - - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - UBO_DESCRIPTOR_SET_BINDING_VS, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - (m_vs_uniform_buffer.buffer != VK_NULL_HANDLE) ? - &m_vs_uniform_buffer : - &dummy_uniform_buffer, - nullptr}; - - if (g_vulkan_context->SupportsGeometryShaders()) - { - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - UBO_DESCRIPTOR_SET_BINDING_GS, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - &dummy_uniform_buffer, - nullptr}; - } - - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - UBO_DESCRIPTOR_SET_BINDING_PS, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - (m_ps_uniform_buffer.buffer != VK_NULL_HANDLE) ? - &m_ps_uniform_buffer : - &dummy_uniform_buffer, - nullptr}; - - bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS] = set; - } - - // PS samplers - size_t first_active_sampler; - for (first_active_sampler = 0; first_active_sampler < NUM_PIXEL_SHADER_SAMPLERS; - first_active_sampler++) - { - if (m_ps_samplers[first_active_sampler].imageView != VK_NULL_HANDLE && - m_ps_samplers[first_active_sampler].sampler != VK_NULL_HANDLE) - { - break; - } - } - - // Check if we have any at all, skip the binding process entirely if we don't - if (first_active_sampler != NUM_PIXEL_SHADER_SAMPLERS) - { - // We need to fill it with non-empty images. - for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) - { - if (m_ps_samplers[i].imageView == VK_NULL_HANDLE) - { - m_ps_samplers[i].imageView = g_object_cache->GetDummyImageView(); - m_ps_samplers[i].sampler = g_object_cache->GetPointSampler(); - } - } - - // Allocate a new descriptor set - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); - if (set == VK_NULL_HANDLE) - PanicAlert("Failed to allocate descriptor set for utility draw"); - - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - static_cast(NUM_PIXEL_SHADER_SAMPLERS), - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - m_ps_samplers.data(), - nullptr, - nullptr}; - - bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set; - } - - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0, - nullptr); - - if (m_ps_texel_buffer != VK_NULL_HANDLE) - { - // TODO: Handle case where this fails. - // This'll only be when we do over say, 1024 allocations per frame, which shouldn't happen. - // TODO: Execute the command buffer, reset render passes and then try again. - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS)); - if (set == VK_NULL_HANDLE) - { - PanicAlert("Failed to allocate texel buffer descriptor set for utility draw"); - return; - } - - VkWriteDescriptorSet set_write = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - nullptr, - nullptr, - &m_ps_texel_buffer}; - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), 1, &set_write, 0, nullptr); - bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] = set; - } - - // Fast path when there are no gaps in the set bindings - u32 bind_point_index; - for (bind_point_index = 0; bind_point_index < NUM_DESCRIPTOR_SET_BIND_POINTS; bind_point_index++) - { - if (bind_descriptor_sets[bind_point_index] == VK_NULL_HANDLE) - break; - } - if (bind_point_index > 0) - { - // Bind the contiguous sets, any others after any gaps will be handled below - vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline_info.pipeline_layout, 0, bind_point_index, - &bind_descriptor_sets[0], NUM_UBO_DESCRIPTOR_SET_BINDINGS, - m_ubo_offsets.data()); - } - - // Handle any remaining sets - for (u32 i = bind_point_index; i < NUM_DESCRIPTOR_SET_BIND_POINTS; i++) - { - if (bind_descriptor_sets[i] == VK_NULL_HANDLE) - continue; - - // No need to worry about dynamic offsets here, since #0 will always be bound above. - vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline_info.pipeline_layout, i, 1, &bind_descriptor_sets[i], 0, - nullptr); - } -} - -bool UtilityShaderDraw::BindPipeline() -{ - VkPipeline pipeline = g_shader_cache->GetPipeline(m_pipeline_info); - if (pipeline == VK_NULL_HANDLE) - { - PanicAlert("Failed to get pipeline for backend shader draw"); - return false; - } - - vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - return true; -} - -ComputeShaderDispatcher::ComputeShaderDispatcher(VkCommandBuffer command_buffer, - VkPipelineLayout pipeline_layout, - VkShaderModule compute_shader) - : m_command_buffer(command_buffer) -{ - // Populate minimal pipeline state - m_pipeline_info.pipeline_layout = pipeline_layout; - m_pipeline_info.cs = compute_shader; -} - -u8* ComputeShaderDispatcher::AllocateUniformBuffer(size_t size) -{ - if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( - size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) - PanicAlert("Failed to allocate util uniforms"); - - return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); -} - -void ComputeShaderDispatcher::CommitUniformBuffer(size_t size) -{ - m_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - m_uniform_buffer.offset = 0; - m_uniform_buffer.range = size; - m_uniform_buffer_offset = - static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); - - g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); -} - -void ComputeShaderDispatcher::SetPushConstants(const void* data, size_t data_size) -{ - ASSERT(static_cast(data_size) < PUSH_CONSTANT_BUFFER_SIZE); - - vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, static_cast(data_size), data); -} - -void ComputeShaderDispatcher::SetSampler(size_t index, VkImageView view, VkSampler sampler) -{ - m_samplers[index].sampler = sampler; - m_samplers[index].imageView = view; - m_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; -} - -void ComputeShaderDispatcher::SetStorageImage(VkImageView view, VkImageLayout image_layout) -{ - m_storage_image.sampler = VK_NULL_HANDLE; - m_storage_image.imageView = view; - m_storage_image.imageLayout = image_layout; -} - -void ComputeShaderDispatcher::SetTexelBuffer(size_t index, VkBufferView view) -{ - m_texel_buffers[index] = view; -} - -void ComputeShaderDispatcher::Dispatch(u32 groups_x, u32 groups_y, u32 groups_z) -{ - BindDescriptors(); - if (!BindPipeline()) - return; - - vkCmdDispatch(m_command_buffer, groups_x, groups_y, groups_z); -} - -void ComputeShaderDispatcher::BindDescriptors() -{ - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); - if (set == VK_NULL_HANDLE) - { - PanicAlert("Failed to allocate descriptor set for compute dispatch"); - return; - } - - // Reserve enough descriptors to write every binding. - std::array set_writes = {}; - u32 num_set_writes = 0; - - if (m_uniform_buffer.buffer != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - &m_uniform_buffer, - nullptr}; - } - - // Samplers - for (size_t i = 0; i < m_samplers.size(); i++) - { - const VkDescriptorImageInfo& info = m_samplers[i]; - if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - static_cast(1 + i), - 0, - 1, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - &info, - nullptr, - nullptr}; - } - } - - for (size_t i = 0; i < m_texel_buffers.size(); i++) - { - if (m_texel_buffers[i] != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 5 + static_cast(i), 0, 1, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, nullptr, nullptr, &m_texel_buffers[i]}; - } - } - - if (m_storage_image.imageView != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 7, 0, 1, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &m_storage_image, nullptr, nullptr}; - } - - if (num_set_writes > 0) - { - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0, - nullptr); - } - - vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - m_pipeline_info.pipeline_layout, 0, 1, &set, 1, &m_uniform_buffer_offset); -} - -bool ComputeShaderDispatcher::BindPipeline() -{ - VkPipeline pipeline = g_shader_cache->GetComputePipeline(m_pipeline_info); - if (pipeline == VK_NULL_HANDLE) - { - PanicAlert("Failed to get pipeline for backend compute dispatch"); - return false; - } - - vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - return true; -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Util.h b/Source/Core/VideoBackends/Vulkan/Util.h deleted file mode 100644 index ced63ef06b..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Util.h +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoCommon/RenderState.h" -#include "VideoCommon/TextureConfig.h" - -namespace Vulkan -{ -class CommandBufferManager; -class StateTracker; - -namespace Util -{ -size_t AlignBufferOffset(size_t offset, size_t alignment); - -u32 MakeRGBA8Color(float r, float g, float b, float a); - -bool IsDepthFormat(VkFormat format); -bool IsCompressedFormat(VkFormat format); -VkFormat GetLinearFormat(VkFormat format); -VkFormat GetVkFormatForHostTextureFormat(AbstractTextureFormat format); -VkImageAspectFlags GetImageAspectForFormat(VkFormat format); -u32 GetTexelSize(VkFormat format); -u32 GetBlockSize(VkFormat format); - -// Clamps a VkRect2D to the specified dimensions. -VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height); - -// Map {SRC,DST}_COLOR to {SRC,DST}_ALPHA -VkBlendFactor GetAlphaBlendFactor(VkBlendFactor factor); - -// Combines viewport and scissor updates -void SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth = 0.0f, float max_depth = 1.0f); - -// Wrapper for creating an barrier on a buffer -void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, - VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, - VkDeviceSize offset, VkDeviceSize size, - VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask); - -// Completes the current render pass, executes the command buffer, and restores state ready for next -// render. Use when you want to kick the current buffer to make room for new data. -void ExecuteCurrentCommandsAndRestoreState(bool execute_off_thread, - bool wait_for_completion = false); - -// Create a shader module from the specified SPIR-V. -VkShaderModule CreateShaderModule(const u32* spv, size_t spv_word_count); - -// Compile a vertex shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateVertexShader(const std::string& source_code); - -// Compile a geometry shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code); - -// Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code); - -// Compile a compute shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateComputeShader(const std::string& source_code); -} - -// Utility shader vertex format -#pragma pack(push, 1) -struct UtilityShaderVertex -{ - float Position[4]; - float TexCoord[4]; - u32 Color; - - void SetPosition(float x, float y) - { - Position[0] = x; - Position[1] = y; - Position[2] = 0.0f; - Position[3] = 1.0f; - } - void SetPosition(float x, float y, float z) - { - Position[0] = x; - Position[1] = y; - Position[2] = z; - Position[3] = 1.0f; - } - void SetTextureCoordinates(float u, float v) - { - TexCoord[0] = u; - TexCoord[1] = v; - TexCoord[2] = 0.0f; - TexCoord[3] = 0.0f; - } - void SetTextureCoordinates(float u, float v, float w) - { - TexCoord[0] = u; - TexCoord[1] = v; - TexCoord[2] = w; - TexCoord[3] = 0.0f; - } - void SetTextureCoordinates(float u, float v, float w, float x) - { - TexCoord[0] = u; - TexCoord[1] = v; - TexCoord[2] = w; - TexCoord[3] = x; - } - void SetColor(u32 color) { Color = color; } - void SetColor(float r, float g, float b) { Color = Util::MakeRGBA8Color(r, g, b, 1.0f); } - void SetColor(float r, float g, float b, float a) { Color = Util::MakeRGBA8Color(r, g, b, a); } -}; -#pragma pack(pop) - -class UtilityShaderDraw -{ -public: - UtilityShaderDraw(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout, - VkRenderPass render_pass, VkShaderModule vertex_shader, - VkShaderModule geometry_shader, VkShaderModule pixel_shader, - PrimitiveType primitive = PrimitiveType::TriangleStrip); - - UtilityShaderVertex* ReserveVertices(size_t count); - void CommitVertices(size_t count); - - void UploadVertices(UtilityShaderVertex* vertices, size_t count); - - u8* AllocateVSUniforms(size_t size); - void CommitVSUniforms(size_t size); - - u8* AllocatePSUniforms(size_t size); - void CommitPSUniforms(size_t size); - - void SetPushConstants(const void* data, size_t data_size); - - void SetPSSampler(size_t index, VkImageView view, VkSampler sampler); - - void SetPSTexelBuffer(VkBufferView view); - - void SetRasterizationState(const RasterizationState& state); - void SetMultisamplingState(const MultisamplingState& state); - void SetDepthState(const DepthState& state); - void SetBlendState(const BlendingState& state); - - void BeginRenderPass(VkFramebuffer framebuffer, const VkRect2D& region, - const VkClearValue* clear_value = nullptr); - void EndRenderPass(); - - void Draw(); - - // NOTE: These methods alter the viewport state of the command buffer. - - // Sets texture coordinates to 0..1 - void DrawQuad(int x, int y, int width, int height, float z = 0.0f); - - // Sets texture coordinates to the specified range - void DrawQuad(int dst_x, int dst_y, int dst_width, int dst_height, int src_x, int src_y, - int src_layer, int src_width, int src_height, int src_full_width, - int src_full_height, float z = 0.0f); - - void DrawColoredQuad(int x, int y, int width, int height, u32 color, float z = 0.0f); - - void DrawColoredQuad(int x, int y, int width, int height, float r, float g, float b, float a, - float z = 0.0f); - - // Draw without a vertex buffer. Assumes viewport has been initialized separately. - void SetViewportAndScissor(int x, int y, int width, int height); - void DrawWithoutVertexBuffer(u32 vertex_count); - -private: - void BindVertexBuffer(); - void BindDescriptors(); - bool BindPipeline(); - - VkCommandBuffer m_command_buffer = VK_NULL_HANDLE; - VkBuffer m_vertex_buffer = VK_NULL_HANDLE; - VkDeviceSize m_vertex_buffer_offset = 0; - uint32_t m_vertex_count = 0; - - VkDescriptorBufferInfo m_vs_uniform_buffer = {}; - VkDescriptorBufferInfo m_ps_uniform_buffer = {}; - std::array m_ubo_offsets = {}; - - std::array m_ps_samplers = {}; - - VkBufferView m_ps_texel_buffer = VK_NULL_HANDLE; - - PipelineInfo m_pipeline_info = {}; -}; - -class ComputeShaderDispatcher -{ -public: - ComputeShaderDispatcher(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout, - VkShaderModule compute_shader); - - u8* AllocateUniformBuffer(size_t size); - void CommitUniformBuffer(size_t size); - - void SetPushConstants(const void* data, size_t data_size); - - void SetSampler(size_t index, VkImageView view, VkSampler sampler); - - void SetTexelBuffer(size_t index, VkBufferView view); - - void SetStorageImage(VkImageView view, VkImageLayout image_layout); - - void Dispatch(u32 groups_x, u32 groups_y, u32 groups_z); - -private: - void BindDescriptors(); - bool BindPipeline(); - - VkCommandBuffer m_command_buffer = VK_NULL_HANDLE; - - VkDescriptorBufferInfo m_uniform_buffer = {}; - u32 m_uniform_buffer_offset = 0; - - std::array m_samplers = {}; - - std::array m_texel_buffers = {}; - - VkDescriptorImageInfo m_storage_image = {}; - - ComputePipelineInfo m_pipeline_info = {}; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp index fa513cef7d..8ff1a30009 100644 --- a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp @@ -6,9 +6,9 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VKPipeline.h" #include "VideoBackends/Vulkan/VKShader.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" @@ -25,14 +25,213 @@ VKPipeline::~VKPipeline() vkDestroyPipeline(g_vulkan_context->GetDevice(), m_pipeline, nullptr); } +static bool IsStripPrimitiveTopology(VkPrimitiveTopology topology) +{ + return topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP || + topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP || + topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY || + topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; +} + +static VkPipelineRasterizationStateCreateInfo +GetVulkanRasterizationState(const RasterizationState& state) +{ + static constexpr std::array cull_modes = { + {VK_CULL_MODE_NONE, VK_CULL_MODE_BACK_BIT, VK_CULL_MODE_FRONT_BIT, + VK_CULL_MODE_FRONT_AND_BACK}}; + + bool depth_clamp = g_ActiveConfig.backend_info.bSupportsDepthClamp; + + return { + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineRasterizationStateCreateFlags flags + depth_clamp, // VkBool32 depthClampEnable + VK_FALSE, // VkBool32 rasterizerDiscardEnable + VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode + cull_modes[state.cullmode], // VkCullModeFlags cullMode + VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace + VK_FALSE, // VkBool32 depthBiasEnable + 0.0f, // float depthBiasConstantFactor + 0.0f, // float depthBiasClamp + 0.0f, // float depthBiasSlopeFactor + 1.0f // float lineWidth + }; +} + +static VkPipelineMultisampleStateCreateInfo GetVulkanMultisampleState(const FramebufferState& state) +{ + return { + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineMultisampleStateCreateFlags flags + static_cast( + state.samples.Value()), // VkSampleCountFlagBits rasterizationSamples + state.per_sample_shading, // VkBool32 sampleShadingEnable + 1.0f, // float minSampleShading + nullptr, // const VkSampleMask* pSampleMask; + VK_FALSE, // VkBool32 alphaToCoverageEnable + VK_FALSE // VkBool32 alphaToOneEnable + }; +} + +static VkPipelineDepthStencilStateCreateInfo GetVulkanDepthStencilState(const DepthState& state) +{ + // Less/greater are swapped due to inverted depth. + VkCompareOp compare_op; + bool inverted_depth = !g_ActiveConfig.backend_info.bSupportsReversedDepthRange; + switch (state.func) + { + case ZMode::NEVER: + compare_op = VK_COMPARE_OP_NEVER; + break; + case ZMode::LESS: + compare_op = inverted_depth ? VK_COMPARE_OP_GREATER : VK_COMPARE_OP_LESS; + break; + case ZMode::EQUAL: + compare_op = VK_COMPARE_OP_EQUAL; + break; + case ZMode::LEQUAL: + compare_op = inverted_depth ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_LESS_OR_EQUAL; + break; + case ZMode::GREATER: + compare_op = inverted_depth ? VK_COMPARE_OP_LESS : VK_COMPARE_OP_GREATER; + break; + case ZMode::NEQUAL: + compare_op = VK_COMPARE_OP_NOT_EQUAL; + break; + case ZMode::GEQUAL: + compare_op = inverted_depth ? VK_COMPARE_OP_LESS_OR_EQUAL : VK_COMPARE_OP_GREATER_OR_EQUAL; + break; + case ZMode::ALWAYS: + compare_op = VK_COMPARE_OP_ALWAYS; + break; + default: + compare_op = VK_COMPARE_OP_ALWAYS; + break; + } + + return { + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineDepthStencilStateCreateFlags flags + state.testenable, // VkBool32 depthTestEnable + state.updateenable, // VkBool32 depthWriteEnable + compare_op, // VkCompareOp depthCompareOp + VK_FALSE, // VkBool32 depthBoundsTestEnable + VK_FALSE, // VkBool32 stencilTestEnable + {}, // VkStencilOpState front + {}, // VkStencilOpState back + 0.0f, // float minDepthBounds + 1.0f // float maxDepthBounds + }; +} + +static VkPipelineColorBlendAttachmentState GetVulkanAttachmentBlendState(const BlendingState& state) +{ + VkPipelineColorBlendAttachmentState vk_state = {}; + vk_state.blendEnable = static_cast(state.blendenable); + vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; + vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; + + if (state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend) + { + static constexpr std::array src_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + static constexpr std::array dst_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + + vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; + vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; + vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; + vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; + } + else + { + static constexpr std::array src_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + + static constexpr std::array dst_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + + vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; + vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; + vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; + vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; + } + + if (state.colorupdate) + { + vk_state.colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT; + } + else + { + vk_state.colorWriteMask = 0; + } + + if (state.alphaupdate) + vk_state.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT; + + return vk_state; +} + +static VkPipelineColorBlendStateCreateInfo +GetVulkanColorBlendState(const BlendingState& state, + const VkPipelineColorBlendAttachmentState* attachments, + uint32_t num_attachments) +{ + static constexpr std::array vk_logic_ops = { + {VK_LOGIC_OP_CLEAR, VK_LOGIC_OP_AND, VK_LOGIC_OP_AND_REVERSE, VK_LOGIC_OP_COPY, + VK_LOGIC_OP_AND_INVERTED, VK_LOGIC_OP_NO_OP, VK_LOGIC_OP_XOR, VK_LOGIC_OP_OR, + VK_LOGIC_OP_NOR, VK_LOGIC_OP_EQUIVALENT, VK_LOGIC_OP_INVERT, VK_LOGIC_OP_OR_REVERSE, + VK_LOGIC_OP_COPY_INVERTED, VK_LOGIC_OP_OR_INVERTED, VK_LOGIC_OP_NAND, VK_LOGIC_OP_SET}}; + + VkBool32 vk_logic_op_enable = static_cast(state.logicopenable); + if (vk_logic_op_enable && !g_ActiveConfig.backend_info.bSupportsLogicOp) + { + // At the time of writing, Adreno and Mali drivers didn't support logic ops. + // The "emulation" through blending path has been removed, so just disable it completely. + // These drivers don't support dual-source blend either, so issues are to be expected. + vk_logic_op_enable = VK_FALSE; + } + + VkLogicOp vk_logic_op = vk_logic_op_enable ? vk_logic_ops[state.logicmode] : VK_LOGIC_OP_CLEAR; + + VkPipelineColorBlendStateCreateInfo vk_state = { + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineColorBlendStateCreateFlags flags + vk_logic_op_enable, // VkBool32 logicOpEnable + vk_logic_op, // VkLogicOp logicOp + num_attachments, // uint32_t attachmentCount + attachments, // const VkPipelineColorBlendAttachmentState* pAttachments + {1.0f, 1.0f, 1.0f, 1.0f} // float blendConstants[4] + }; + + return vk_state; +} + std::unique_ptr VKPipeline::Create(const AbstractPipelineConfig& config) { DEBUG_ASSERT(config.vertex_shader && config.pixel_shader); // Get render pass for config. VkRenderPass render_pass = g_object_cache->GetRenderPass( - Util::GetVkFormatForHostTextureFormat(config.framebuffer_state.color_texture_format), - Util::GetVkFormatForHostTextureFormat(config.framebuffer_state.depth_texture_format), + VKTexture::GetVkFormatForHostTextureFormat(config.framebuffer_state.color_texture_format), + VKTexture::GetVkFormatForHostTextureFormat(config.framebuffer_state.depth_texture_format), config.framebuffer_state.samples, VK_ATTACHMENT_LOAD_OP_LOAD); // Get pipeline layout. @@ -50,26 +249,144 @@ std::unique_ptr VKPipeline::Create(const AbstractPipelineConfig& con return nullptr; } - // TODO: Move ShaderCache stuff to here. - PipelineInfo pinfo; - pinfo.vertex_format = static_cast(config.vertex_format); - pinfo.pipeline_layout = pipeline_layout; - pinfo.vs = static_cast(config.vertex_shader)->GetShaderModule(); - pinfo.ps = static_cast(config.pixel_shader)->GetShaderModule(); - pinfo.gs = config.geometry_shader ? - static_cast(config.geometry_shader)->GetShaderModule() : - VK_NULL_HANDLE; - pinfo.render_pass = render_pass; - pinfo.rasterization_state.hex = config.rasterization_state.hex; - pinfo.depth_state.hex = config.depth_state.hex; - pinfo.blend_state.hex = config.blending_state.hex; - pinfo.multisampling_state.hex = 0; - pinfo.multisampling_state.samples = config.framebuffer_state.samples; - pinfo.multisampling_state.per_sample_shading = config.framebuffer_state.per_sample_shading; + // Declare descriptors for empty vertex buffers/attributes + static const VkPipelineVertexInputStateCreateInfo empty_vertex_input_state = { + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineVertexInputStateCreateFlags flags + 0, // uint32_t vertexBindingDescriptionCount + nullptr, // const VkVertexInputBindingDescription* pVertexBindingDescriptions + 0, // uint32_t vertexAttributeDescriptionCount + nullptr // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions + }; - VkPipeline pipeline = g_shader_cache->CreatePipeline(pinfo); - if (pipeline == VK_NULL_HANDLE) - return nullptr; + // Vertex inputs + const VkPipelineVertexInputStateCreateInfo& vertex_input_state = + config.vertex_format ? + static_cast(config.vertex_format)->GetVertexInputStateInfo() : + empty_vertex_input_state; + + // Input assembly + static constexpr std::array vk_primitive_topologies = { + {VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP}}; + VkPipelineInputAssemblyStateCreateInfo input_assembly_state = { + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, nullptr, 0, + vk_primitive_topologies[static_cast(config.rasterization_state.primitive.Value())], + VK_FALSE}; + + // See Vulkan spec, section 19: + // If topology is VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY or VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // primitiveRestartEnable must be VK_FALSE + if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart && + IsStripPrimitiveTopology(input_assembly_state.topology)) + { + input_assembly_state.primitiveRestartEnable = VK_TRUE; + } + + // Shaders to stages + VkPipelineShaderStageCreateInfo shader_stages[3]; + uint32_t num_shader_stages = 0; + if (config.vertex_shader) + { + shader_stages[num_shader_stages++] = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_VERTEX_BIT, + static_cast(config.vertex_shader)->GetShaderModule(), + "main"}; + } + if (config.geometry_shader) + { + shader_stages[num_shader_stages++] = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_GEOMETRY_BIT, + static_cast(config.geometry_shader)->GetShaderModule(), + "main"}; + } + if (config.pixel_shader) + { + shader_stages[num_shader_stages++] = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_FRAGMENT_BIT, + static_cast(config.pixel_shader)->GetShaderModule(), + "main"}; + } + + // Fill in Vulkan descriptor structs from our state structures. + VkPipelineRasterizationStateCreateInfo rasterization_state = + GetVulkanRasterizationState(config.rasterization_state); + VkPipelineMultisampleStateCreateInfo multisample_state = + GetVulkanMultisampleState(config.framebuffer_state); + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = + GetVulkanDepthStencilState(config.depth_state); + VkPipelineColorBlendAttachmentState blend_attachment_state = + GetVulkanAttachmentBlendState(config.blending_state); + VkPipelineColorBlendStateCreateInfo blend_state = + GetVulkanColorBlendState(config.blending_state, &blend_attachment_state, 1); + + // This viewport isn't used, but needs to be specified anyway. + static const VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + static const VkRect2D scissor = {{0, 0}, {1, 1}}; + static const VkPipelineViewportStateCreateInfo viewport_state = { + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + nullptr, + 0, // VkPipelineViewportStateCreateFlags flags; + 1, // uint32_t viewportCount + &viewport, // const VkViewport* pViewports + 1, // uint32_t scissorCount + &scissor // const VkRect2D* pScissors + }; + + // Set viewport and scissor dynamic state so we can change it elsewhere. + static const VkDynamicState dynamic_states[] = {VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + static const VkPipelineDynamicStateCreateInfo dynamic_state = { + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, nullptr, + 0, // VkPipelineDynamicStateCreateFlags flags + static_cast(ArraySize(dynamic_states)), // uint32_t dynamicStateCount + dynamic_states // const VkDynamicState* pDynamicStates + }; + + // Combine to full pipeline info structure. + VkGraphicsPipelineCreateInfo pipeline_info = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + nullptr, // VkStructureType sType + 0, // VkPipelineCreateFlags flags + num_shader_stages, // uint32_t stageCount + shader_stages, // const VkPipelineShaderStageCreateInfo* pStages + &vertex_input_state, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState + &input_assembly_state, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState + nullptr, // const VkPipelineTessellationStateCreateInfo* pTessellationState + &viewport_state, // const VkPipelineViewportStateCreateInfo* pViewportState + &rasterization_state, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState + &multisample_state, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState + &depth_stencil_state, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState + &blend_state, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState + &dynamic_state, // const VkPipelineDynamicStateCreateInfo* pDynamicState + pipeline_layout, // VkPipelineLayout layout + render_pass, // VkRenderPass renderPass + 0, // uint32_t subpass + VK_NULL_HANDLE, // VkPipeline basePipelineHandle + -1 // int32_t basePipelineIndex + }; + + VkPipeline pipeline; + VkResult res = + vkCreateGraphicsPipelines(g_vulkan_context->GetDevice(), g_object_cache->GetPipelineCache(), + 1, &pipeline_info, nullptr, &pipeline); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines failed: "); + return VK_NULL_HANDLE; + } return std::make_unique(pipeline, pipeline_layout, config.usage); } diff --git a/Source/Core/VideoBackends/Vulkan/VKShader.cpp b/Source/Core/VideoBackends/Vulkan/VKShader.cpp index 5b44ed99a4..1a95a9c1f8 100644 --- a/Source/Core/VideoBackends/Vulkan/VKShader.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKShader.cpp @@ -5,8 +5,8 @@ #include "Common/Align.h" #include "Common/Assert.h" +#include "VideoBackends/Vulkan/ObjectCache.h" #include "VideoBackends/Vulkan/ShaderCompiler.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VKShader.h" #include "VideoBackends/Vulkan/VulkanContext.h" @@ -48,26 +48,47 @@ AbstractShader::BinaryData VKShader::GetBinary() const static std::unique_ptr CreateShaderObject(ShaderStage stage, ShaderCompiler::SPIRVCodeVector spv) { - VkShaderModule mod = Util::CreateShaderModule(spv.data(), spv.size()); - if (mod == VK_NULL_HANDLE) - return nullptr; + VkShaderModuleCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + info.codeSize = spv.size() * sizeof(u32); + info.pCode = spv.data(); + + VkShaderModule mod; + VkResult res = vkCreateShaderModule(g_vulkan_context->GetDevice(), &info, nullptr, &mod); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateShaderModule failed: "); + return VK_NULL_HANDLE; + } // If it's a graphics shader, we defer pipeline creation. if (stage != ShaderStage::Compute) return std::make_unique(stage, std::move(spv), mod); // If it's a compute shader, we create the pipeline straight away. - ComputePipelineInfo pinfo; - pinfo.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE); - pinfo.cs = mod; - VkPipeline pipeline = g_shader_cache->CreateComputePipeline(pinfo); - if (pipeline == VK_NULL_HANDLE) + const VkComputePipelineCreateInfo pipeline_info = { + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + nullptr, + 0, + {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, + mod, "main", nullptr}, + g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE), + VK_NULL_HANDLE, + -1}; + + VkPipeline pipeline; + res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), g_object_cache->GetPipelineCache(), + 1, &pipeline_info, nullptr, &pipeline); + + // Shader module is no longer needed, now it is compiled to a pipeline. + vkDestroyShaderModule(g_vulkan_context->GetDevice(), mod, nullptr); + + if (res != VK_SUCCESS) { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), mod, nullptr); + LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: "); return nullptr; } - // Shader module is no longer needed, now it is compiled to a pipeline. return std::make_unique(std::move(spv), pipeline); } diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp index 929307edb3..a19e169412 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp @@ -13,115 +13,228 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StagingBuffer.h" #include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" #include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" - namespace Vulkan { -VKTexture::VKTexture(const TextureConfig& tex_config, std::unique_ptr texture, - VkFramebuffer framebuffer) - : AbstractTexture(tex_config), m_texture(std::move(texture)), m_framebuffer(framebuffer) +VKTexture::VKTexture(const TextureConfig& tex_config, VkDeviceMemory device_memory, VkImage image, + VkImageLayout layout /* = VK_IMAGE_LAYOUT_UNDEFINED */, + ComputeImageLayout compute_layout /* = ComputeImageLayout::Undefined */) + : AbstractTexture(tex_config), m_device_memory(device_memory), m_image(image), m_layout(layout), + m_compute_layout(compute_layout) { } +VKTexture::~VKTexture() +{ + StateTracker::GetInstance()->UnbindTexture(m_view); + g_command_buffer_mgr->DeferImageViewDestruction(m_view); + + // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) + if (m_device_memory != VK_NULL_HANDLE) + { + g_command_buffer_mgr->DeferImageDestruction(m_image); + g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_device_memory); + } +} + std::unique_ptr VKTexture::Create(const TextureConfig& tex_config) { // Determine image usage, we need to flag as an attachment if it can be used as a rendertarget. VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - if (tex_config.rendertarget) - usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - // Allocate texture object - VkFormat vk_format = Util::GetVkFormatForHostTextureFormat(tex_config.format); - auto texture = - Texture2D::Create(tex_config.width, tex_config.height, tex_config.levels, tex_config.layers, - vk_format, static_cast(tex_config.samples), - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage); - - if (!texture) + if (tex_config.IsRenderTarget()) { + usage |= IsDepthFormat(tex_config.format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + if (tex_config.IsComputeImage()) + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + + VkImageCreateInfo image_info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + nullptr, + 0, + VK_IMAGE_TYPE_2D, + GetVkFormatForHostTextureFormat(tex_config.format), + {tex_config.width, tex_config.height, 1}, + tex_config.levels, + tex_config.layers, + static_cast(tex_config.samples), + VK_IMAGE_TILING_OPTIMAL, + usage, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr, + VK_IMAGE_LAYOUT_UNDEFINED}; + + VkImage image = VK_NULL_HANDLE; + VkResult res = vkCreateImage(g_vulkan_context->GetDevice(), &image_info, nullptr, &image); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImage failed: "); return nullptr; } - // If this is a render target (for efb copies), allocate a framebuffer - VkFramebuffer framebuffer = VK_NULL_HANDLE; - if (tex_config.rendertarget) + // Allocate memory to back this texture, we want device local memory in this case + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(g_vulkan_context->GetDevice(), image, &memory_requirements); + + VkMemoryAllocateInfo memory_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, + g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)}; + + VkDeviceMemory device_memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); + if (res != VK_SUCCESS) { - VkImageView framebuffer_attachments[] = {texture->GetView()}; - VkRenderPass render_pass = - g_object_cache->GetRenderPass(texture->GetFormat(), VK_FORMAT_UNDEFINED, tex_config.samples, - VK_ATTACHMENT_LOAD_OP_DONT_CARE); - VkFramebufferCreateInfo framebuffer_info = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - render_pass, - static_cast(ArraySize(framebuffer_attachments)), - framebuffer_attachments, - texture->GetWidth(), - texture->GetHeight(), - texture->GetLayers()}; - - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return nullptr; - } - - if (!IsDepthFormat(tex_config.format)) - { - // Clear render targets before use to prevent reading uninitialized memory. - VkClearColorValue clear_value = {{0.0f, 0.0f, 0.0f, 1.0f}}; - VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, tex_config.levels, 0, - tex_config.layers}; - texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), texture->GetImage(), - texture->GetLayout(), &clear_value, 1, &clear_range); - } - else - { - // Clear render targets before use to prevent reading uninitialized memory. - VkClearDepthStencilValue clear_value = {0.0f, 0}; - VkImageSubresourceRange clear_range = {Util::GetImageAspectForFormat(vk_format), 0, - tex_config.levels, 0, tex_config.layers}; - texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearDepthStencilImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - texture->GetImage(), texture->GetLayout(), &clear_value, 1, - &clear_range); - } + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + return nullptr; } - return std::unique_ptr(new VKTexture(tex_config, std::move(texture), framebuffer)); + res = vkBindImageMemory(g_vulkan_context->GetDevice(), image, device_memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindImageMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); + return nullptr; + } + + std::unique_ptr texture = std::make_unique( + tex_config, device_memory, image, VK_IMAGE_LAYOUT_UNDEFINED, ComputeImageLayout::Undefined); + if (!texture->CreateView(VK_IMAGE_VIEW_TYPE_2D_ARRAY)) + return nullptr; + + return texture; } -VKTexture::~VKTexture() +std::unique_ptr VKTexture::CreateAdopted(const TextureConfig& tex_config, VkImage image, + VkImageViewType view_type, VkImageLayout layout) { - // Texture is automatically cleaned up, however, we don't want to leave it bound. - g_renderer->UnbindTexture(this); - if (m_framebuffer != VK_NULL_HANDLE) - g_command_buffer_mgr->DeferFramebufferDestruction(m_framebuffer); + std::unique_ptr texture = std::make_unique( + tex_config, nullptr, image, layout, ComputeImageLayout::Undefined); + if (!texture->CreateView(VK_IMAGE_VIEW_TYPE_2D_ARRAY)) + return nullptr; + + return texture; } -Texture2D* VKTexture::GetRawTexIdentifier() const +bool VKTexture::CreateView(VkImageViewType type) { - return m_texture.get(); + VkImageViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + nullptr, + 0, + m_image, + type, + GetVkFormat(), + {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY}, + {GetImageAspectForFormat(GetFormat()), 0, GetLevels(), 0, GetLayers()}}; + + VkResult res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &m_view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); + return false; + } + + return true; } -VkFramebuffer VKTexture::GetFramebuffer() const + +VkFormat VKTexture::GetLinearFormat(VkFormat format) { - return m_framebuffer; + switch (format) + { + case VK_FORMAT_R8_SRGB: + return VK_FORMAT_R8_UNORM; + case VK_FORMAT_R8G8_SRGB: + return VK_FORMAT_R8G8_UNORM; + case VK_FORMAT_R8G8B8_SRGB: + return VK_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_R8G8B8A8_SRGB: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_B8G8R8_SRGB: + return VK_FORMAT_B8G8R8_UNORM; + case VK_FORMAT_B8G8R8A8_SRGB: + return VK_FORMAT_B8G8R8A8_UNORM; + default: + return format; + } +} + +VkFormat VKTexture::GetVkFormatForHostTextureFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::DXT1: + return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + + case AbstractTextureFormat::DXT3: + return VK_FORMAT_BC2_UNORM_BLOCK; + + case AbstractTextureFormat::DXT5: + return VK_FORMAT_BC3_UNORM_BLOCK; + + case AbstractTextureFormat::BPTC: + return VK_FORMAT_BC7_UNORM_BLOCK; + + case AbstractTextureFormat::RGBA8: + return VK_FORMAT_R8G8B8A8_UNORM; + + case AbstractTextureFormat::BGRA8: + return VK_FORMAT_B8G8R8A8_UNORM; + + case AbstractTextureFormat::R16: + return VK_FORMAT_R16_UNORM; + + case AbstractTextureFormat::D16: + return VK_FORMAT_D16_UNORM; + + case AbstractTextureFormat::D24_S8: + return VK_FORMAT_D24_UNORM_S8_UINT; + + case AbstractTextureFormat::R32F: + return VK_FORMAT_R32_SFLOAT; + + case AbstractTextureFormat::D32F: + return VK_FORMAT_D32_SFLOAT; + + case AbstractTextureFormat::D32F_S8: + return VK_FORMAT_D32_SFLOAT_S8_UINT; + + case AbstractTextureFormat::Undefined: + return VK_FORMAT_UNDEFINED; + + default: + PanicAlert("Unhandled texture format."); + return VK_FORMAT_R8G8B8A8_UNORM; + } +} + +VkImageAspectFlags VKTexture::GetImageAspectForFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::D24_S8: + case AbstractTextureFormat::D32F_S8: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + case AbstractTextureFormat::D16: + case AbstractTextureFormat::D32F: + return VK_IMAGE_ASPECT_DEPTH_BIT; + + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } } void VKTexture::CopyRectangleFromTexture(const AbstractTexture* src, @@ -129,7 +242,7 @@ void VKTexture::CopyRectangleFromTexture(const AbstractTexture* src, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - Texture2D* src_texture = static_cast(src)->GetRawTexIdentifier(); + const VKTexture* src_texture = static_cast(src); ASSERT_MSG(VIDEO, static_cast(src_rect.GetWidth()) <= src_texture->GetWidth() && @@ -151,67 +264,18 @@ void VKTexture::CopyRectangleFromTexture(const AbstractTexture* src, // Must be called outside of a render pass. StateTracker::GetInstance()->EndRenderPass(); + const VkImageLayout old_src_layout = src_texture->GetLayout(); src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdCopyImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_texture->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_texture->GetImage(), + vkCmdCopyImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_texture->m_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); - // Ensure both textures remain in the SHADER_READ_ONLY layout so they can be bound. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); -} - -void VKTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) -{ - Texture2D* src_texture = static_cast(source)->GetRawTexIdentifier(); - - // Can't do this within a game render pass. - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - - // Can't render to a non-rendertarget (no framebuffer). - ASSERT_MSG(VIDEO, m_config.rendertarget, - "Destination texture for partial copy is not a rendertarget"); - - // Render pass expects dst_texture to be in COLOR_ATTACHMENT_OPTIMAL state. - // src_texture should already be in SHADER_READ_ONLY state, but transition in case (XFB). - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - VkRenderPass render_pass = g_object_cache->GetRenderPass( - m_texture->GetFormat(), VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), render_pass, - g_shader_cache->GetPassthroughVertexShader(), - g_shader_cache->GetPassthroughGeometryShader(), - TextureCache::GetInstance()->GetCopyShader()); - - VkRect2D region = { - {dst_rect.left, dst_rect.top}, - {static_cast(dst_rect.GetWidth()), static_cast(dst_rect.GetHeight())}}; - draw.BeginRenderPass(m_framebuffer, region); - draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetLinearSampler()); - draw.DrawQuad(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight(), - src_rect.left, src_rect.top, 0, src_rect.GetWidth(), src_rect.GetHeight(), - static_cast(src_texture->GetWidth()), - static_cast(src_texture->GetHeight())); - draw.EndRenderPass(); - - // Ensure both textures remain in the SHADER_READ_ONLY layout so they can be bound. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + // Only restore the source layout. Destination is restored by FinishedRendering(). + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_src_layout); } void VKTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, @@ -225,11 +289,11 @@ void VKTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::R // Resolving is considered to be a transfer operation. StateTracker::GetInstance()->EndRenderPass(); - VkImageLayout old_src_layout = srcentry->m_texture->GetLayout(); - srcentry->m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + VkImageLayout old_src_layout = srcentry->m_layout; + srcentry->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); VkImageResolve resolve = { {VK_IMAGE_ASPECT_COLOR_BIT, level, layer, 1}, // srcSubresource @@ -238,23 +302,18 @@ void VKTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::R {rect.left, rect.top, 0}, // dstOffset {static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), 1} // extent }; - vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - srcentry->m_texture->GetImage(), srcentry->m_texture->GetLayout(), - m_texture->GetImage(), m_texture->GetLayout(), 1, &resolve); + vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), srcentry->m_image, + srcentry->m_layout, m_image, m_layout, 1, &resolve); - // Restore old source texture layout. Destination is assumed to be bound as a shader resource. - srcentry->m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - old_src_layout); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + srcentry->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_src_layout); } void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) { // Can't copy data larger than the texture extents. - width = std::max(1u, std::min(width, m_texture->GetWidth() >> level)); - height = std::max(1u, std::min(height, m_texture->GetHeight() >> level)); + width = std::max(1u, std::min(width, GetWidth() >> level)); + height = std::max(1u, std::min(height, GetHeight() >> level)); // We don't care about the existing contents of the texture, so we could the image layout to // VK_IMAGE_LAYOUT_UNDEFINED here. However, under section 2.2.1, Queue Operation of the Vulkan @@ -272,30 +331,29 @@ void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* // When the last mip level is uploaded, we transition to SHADER_READ_ONLY, ready for use. This is // because we can't transition in a render pass, and we don't necessarily know when this texture // is going to be used. - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); // For unaligned textures, we can save some memory in the transfer buffer by skipping the rows // that lie outside of the texture's dimensions. - u32 upload_alignment = static_cast(g_vulkan_context->GetBufferImageGranularity()); - u32 block_size = Util::GetBlockSize(m_texture->GetFormat()); - u32 num_rows = Common::AlignUp(height, block_size) / block_size; - size_t source_pitch = CalculateStrideForFormat(m_config.format, row_length); - size_t upload_size = source_pitch * num_rows; + const u32 upload_alignment = static_cast(g_vulkan_context->GetBufferImageGranularity()); + const u32 block_size = GetBlockSizeForFormat(GetFormat()); + const u32 num_rows = Common::AlignUp(height, block_size) / block_size; + const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); + const u32 upload_size = source_pitch * num_rows; std::unique_ptr temp_buffer; VkBuffer upload_buffer; VkDeviceSize upload_buffer_offset; // Does this texture data fit within the streaming buffer? - if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD && - upload_size <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE) + if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD) { StreamBuffer* stream_buffer = g_object_cache->GetTextureUploadBuffer(); if (!stream_buffer->ReserveMemory(upload_size, upload_alignment)) { // Execute the command buffer first. WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); + Renderer::GetInstance()->ExecuteCommandBuffer(false); // Try allocating again. This may cause a fence wait. if (!stream_buffer->ReserveMemory(upload_size, upload_alignment)) @@ -334,17 +392,282 @@ void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* {width, height, 1} // VkExtent3D imageExtent }; vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer, - m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, - &image_copy); + m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); - // Last mip level? We shouldn't be doing any further uploads now, so transition for rendering. + // Preemptively transition to shader read only after uploading the last mip level, as we're + // likely finished with writes to this texture for now. We can't do this in common with a + // FinishedRendering() call because the upload happens in the init command buffer, and we + // don't want to interrupt the render pass with calls which were executed ages before. if (level == (m_config.levels - 1)) { - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } } +void VKTexture::FinishedRendering() +{ + if (m_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + return; + + StateTracker::GetInstance()->EndRenderPass(); + TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void VKTexture::OverrideImageLayout(VkImageLayout new_layout) +{ + m_layout = new_layout; +} + +void VKTexture::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) const +{ + if (m_layout == new_layout) + return; + + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + 0, // VkAccessFlags dstAccessMask + m_layout, // VkImageLayout oldLayout + new_layout, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {GetImageAspectForFormat(GetFormat()), 0, GetLevels(), 0, + GetLayers()} // VkImageSubresourceRange subresourceRange + }; + + // srcStageMask -> Stages that must complete before the barrier + // dstStageMask -> Stages that must wait for after the barrier before beginning + VkPipelineStageFlags srcStageMask, dstStageMask; + switch (m_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + barrier.srcAccessMask = 0; + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_PREINITIALIZED: + // Image has been pre-initialized by the host, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + // Image was being used as a color attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // Image was being used as a shader resource, make sure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + // Image was being used as a copy source, ensure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + // Image was being used as a copy destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + default: + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + + switch (new_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + barrier.dstAccessMask = 0; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dstStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + default: + dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + break; + } + + // If we were using a compute layout, the stages need to reflect that + switch (m_compute_layout) + { + case ComputeImageLayout::Undefined: + break; + case ComputeImageLayout::ReadOnly: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::WriteOnly: + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::ReadWrite: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + } + m_compute_layout = ComputeImageLayout::Undefined; + + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, + &barrier); + + m_layout = new_layout; +} + +void VKTexture::TransitionToLayout(VkCommandBuffer command_buffer, + ComputeImageLayout new_layout) const +{ + ASSERT(new_layout != ComputeImageLayout::Undefined); + if (m_compute_layout == new_layout) + return; + + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + 0, // VkAccessFlags dstAccessMask + m_layout, // VkImageLayout oldLayout + VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {GetImageAspectForFormat(GetFormat()), 0, GetLevels(), 0, + GetLayers()} // VkImageSubresourceRange subresourceRange + }; + + VkPipelineStageFlags srcStageMask, dstStageMask; + switch (m_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + barrier.srcAccessMask = 0; + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_PREINITIALIZED: + // Image has been pre-initialized by the host, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + // Image was being used as a color attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // Image was being used as a shader resource, make sure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + // Image was being used as a copy source, ensure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + // Image was being used as a copy destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + default: + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + + switch (new_layout) + { + case ComputeImageLayout::ReadOnly: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::WriteOnly: + barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::ReadWrite: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + default: + dstStageMask = 0; + break; + } + + m_layout = barrier.newLayout; + m_compute_layout = new_layout; + + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, + &barrier); +} + VKStagingTexture::VKStagingTexture(StagingTextureType type, const TextureConfig& config, std::unique_ptr buffer) : AbstractStagingTexture(type, config), m_staging_buffer(std::move(buffer)) @@ -407,38 +730,32 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect) { + const VKTexture* src_tex = static_cast(src); ASSERT(m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); - ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && - src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetConfig().height); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src_tex->GetWidth() && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src_tex->GetHeight()); ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); - Texture2D* src_tex = static_cast(src)->GetRawTexIdentifier(); - CopyFromTexture(src_tex, src_rect, src_layer, src_level, dst_rect); -} - -void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle& src_rect, - u32 src_layer, u32 src_level, - const MathUtil::Rectangle& dst_rect) -{ if (m_needs_flush) { // Drop copy before reusing it. - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); m_flush_fence = VK_NULL_HANDLE; m_needs_flush = false; } - VkImageLayout old_layout = src->GetLayout(); - src->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + StateTracker::GetInstance()->EndRenderPass(); + + VkImageLayout old_layout = src_tex->GetLayout(); + src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); // Issue the image->buffer copy, but delay it for now. VkBufferImageCopy image_copy = {}; - VkImageAspectFlags aspect = - Util::IsDepthFormat(src->GetFormat()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + const VkImageAspectFlags aspect = VKTexture::GetImageAspectForFormat(src_tex->GetFormat()); image_copy.bufferOffset = static_cast(static_cast(dst_rect.top) * m_config.GetStride() + static_cast(dst_rect.left) * m_texel_size); @@ -448,58 +765,51 @@ void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle image_copy.imageOffset = {src_rect.left, src_rect.top, 0}; image_copy.imageExtent = {static_cast(src_rect.GetWidth()), static_cast(src_rect.GetHeight()), 1u}; - vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src->GetImage(), + vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_tex->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_staging_buffer->GetBuffer(), 1, &image_copy); // Restore old source texture layout. - src->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); + src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); m_needs_flush = true; - g_command_buffer_mgr->AddFencePointCallback(this, - [this](VkCommandBuffer buf, VkFence fence) { - ASSERT(m_needs_flush); - if (m_flush_fence != VK_NULL_HANDLE) - return; + m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) { + if (m_flush_fence != fence) + return; - m_flush_fence = fence; - }, - [this](VkFence fence) { - if (m_flush_fence != fence) - return; - - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - g_command_buffer_mgr->RemoveFencePointCallback( - this); - m_staging_buffer->InvalidateCPUCache(); - }); + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); + m_staging_buffer->InvalidateCPUCache(); + }); } void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { + const VKTexture* dst_tex = static_cast(dst); ASSERT(m_type == StagingTextureType::Upload || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); - ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && - dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); + ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst_tex->GetWidth() && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst_tex->GetHeight()); if (m_needs_flush) { // Drop copy before reusing it. - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); m_flush_fence = VK_NULL_HANDLE; m_needs_flush = false; } // Flush caches before copying. m_staging_buffer->FlushCPUCache(); + StateTracker::GetInstance()->EndRenderPass(); - Texture2D* dst_tex = static_cast(dst)->GetRawTexIdentifier(); VkImageLayout old_layout = dst_tex->GetLayout(); dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); @@ -523,23 +833,15 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, A dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); m_needs_flush = true; - g_command_buffer_mgr->AddFencePointCallback(this, - [this](VkCommandBuffer buf, VkFence fence) { - ASSERT(m_needs_flush); - if (m_flush_fence != VK_NULL_HANDLE) - return; + m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) { + if (m_flush_fence != fence) + return; - m_flush_fence = fence; - }, - [this](VkFence fence) { - if (m_flush_fence != fence) - return; - - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - g_command_buffer_mgr->RemoveFencePointCallback( - this); - }); + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); + }); } bool VKStagingTexture::Map() @@ -559,19 +861,19 @@ void VKStagingTexture::Flush() return; // Either of the below two calls will cause the callback to fire. - g_command_buffer_mgr->RemoveFencePointCallback(this); - if (m_flush_fence != VK_NULL_HANDLE) + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); + if (m_flush_fence == g_command_buffer_mgr->GetCurrentCommandBufferFence()) { - // WaitForFence should fire the callback. - g_command_buffer_mgr->WaitForFence(m_flush_fence); - m_flush_fence = VK_NULL_HANDLE; + // The readback is in the current command buffer, and we must execute it. + Renderer::GetInstance()->ExecuteCommandBuffer(false, true); } else { - // We don't have a fence, and are pending. That means the readback is in the current - // command buffer, and must execute it to populate the staging texture. - Util::ExecuteCurrentCommandsAndRestoreState(false, true); + // WaitForFence should fire the callback. + g_command_buffer_mgr->WaitForFence(m_flush_fence); } + + DEBUG_ASSERT(m_flush_fence == VK_NULL_HANDLE); m_needs_flush = false; // For readback textures, invalidate the CPU cache as there is new data there. @@ -579,16 +881,16 @@ void VKStagingTexture::Flush() m_staging_buffer->InvalidateCPUCache(); } -VKFramebuffer::VKFramebuffer(const VKTexture* color_attachment, const VKTexture* depth_attachment, - u32 width, u32 height, u32 layers, u32 samples, VkFramebuffer fb, +VKFramebuffer::VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width, + u32 height, u32 layers, u32 samples, VkFramebuffer fb, VkRenderPass load_render_pass, VkRenderPass discard_render_pass, VkRenderPass clear_render_pass) : AbstractFramebuffer( + color_attachment, depth_attachment, color_attachment ? color_attachment->GetFormat() : AbstractTextureFormat::Undefined, depth_attachment ? depth_attachment->GetFormat() : AbstractTextureFormat::Undefined, width, height, layers, samples), - m_color_attachment(color_attachment), m_depth_attachment(depth_attachment), m_fb(fb), - m_load_render_pass(load_render_pass), m_discard_render_pass(discard_render_pass), + m_fb(fb), m_load_render_pass(load_render_pass), m_discard_render_pass(discard_render_pass), m_clear_render_pass(clear_render_pass) { } @@ -598,16 +900,16 @@ VKFramebuffer::~VKFramebuffer() g_command_buffer_mgr->DeferFramebufferDestruction(m_fb); } -std::unique_ptr VKFramebuffer::Create(const VKTexture* color_attachment, - const VKTexture* depth_attachment) +std::unique_ptr VKFramebuffer::Create(VKTexture* color_attachment, + VKTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; const VkFormat vk_color_format = - color_attachment ? color_attachment->GetRawTexIdentifier()->GetFormat() : VK_FORMAT_UNDEFINED; + color_attachment ? color_attachment->GetVkFormat() : VK_FORMAT_UNDEFINED; const VkFormat vk_depth_format = - depth_attachment ? depth_attachment->GetRawTexIdentifier()->GetFormat() : VK_FORMAT_UNDEFINED; + depth_attachment ? depth_attachment->GetVkFormat() : VK_FORMAT_UNDEFINED; const VKTexture* either_attachment = color_attachment ? color_attachment : depth_attachment; const u32 width = either_attachment->GetWidth(); const u32 height = either_attachment->GetHeight(); @@ -618,10 +920,10 @@ std::unique_ptr VKFramebuffer::Create(const VKTexture* color_atta u32 num_attachments = 0; if (color_attachment) - attachment_views[num_attachments++] = color_attachment->GetRawTexIdentifier()->GetView(); + attachment_views[num_attachments++] = color_attachment->GetView(); if (depth_attachment) - attachment_views[num_attachments++] = depth_attachment->GetRawTexIdentifier()->GetView(); + attachment_views[num_attachments++] = depth_attachment->GetView(); VkRenderPass load_render_pass = g_object_cache->GetRenderPass( vk_color_format, vk_depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); @@ -659,38 +961,20 @@ std::unique_ptr VKFramebuffer::Create(const VKTexture* color_atta clear_render_pass); } -void VKFramebuffer::TransitionForRender() const +void VKFramebuffer::TransitionForRender() { if (m_color_attachment) { - m_color_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + static_cast(m_color_attachment) + ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } if (m_depth_attachment) { - m_depth_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + static_cast(m_depth_attachment) + ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); } } - -void VKFramebuffer::TransitionForSample() const -{ - if (StateTracker::GetInstance()->GetFramebuffer() == m_fb) - StateTracker::GetInstance()->EndRenderPass(); - - if (m_color_attachment) - { - m_color_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - if (m_depth_attachment) - { - m_depth_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } -} - } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.h b/Source/Core/VideoBackends/Vulkan/VKTexture.h index 3a5c8cadc8..bab11ec108 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.h +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.h @@ -5,8 +5,8 @@ #pragma once #include -#include +#include "VideoBackends/Vulkan/VulkanLoader.h" #include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" @@ -19,33 +19,64 @@ class Texture2D; class VKTexture final : public AbstractTexture { public: + // Custom image layouts, mainly used for switching to/from compute + enum class ComputeImageLayout + { + Undefined, + ReadOnly, + WriteOnly, + ReadWrite + }; + VKTexture() = delete; + VKTexture(const TextureConfig& tex_config, VkDeviceMemory device_memory, VkImage image, + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED, + ComputeImageLayout compute_layout = ComputeImageLayout::Undefined); ~VKTexture(); + static VkFormat GetLinearFormat(VkFormat format); + static VkFormat GetVkFormatForHostTextureFormat(AbstractTextureFormat format); + static VkImageAspectFlags GetImageAspectForFormat(AbstractTextureFormat format); + void CopyRectangleFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; - void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) override; + void FinishedRendering() override; - Texture2D* GetRawTexIdentifier() const; - VkFramebuffer GetFramebuffer() const; + VkImage GetImage() const { return m_image; } + VkDeviceMemory GetDeviceMemory() const { return m_device_memory; } + VkImageView GetView() const { return m_view; } + VkImageLayout GetLayout() const { return m_layout; } + VkFormat GetVkFormat() const { return GetVkFormatForHostTextureFormat(m_config.format); } + bool IsAdopted() const { return m_device_memory != nullptr; } static std::unique_ptr Create(const TextureConfig& tex_config); + static std::unique_ptr + CreateAdopted(const TextureConfig& tex_config, VkImage image, + VkImageViewType view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY, + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED); + + // Used when the render pass is changing the image layout, or to force it to + // VK_IMAGE_LAYOUT_UNDEFINED, if the existing contents of the image is + // irrelevant and will not be loaded. + void OverrideImageLayout(VkImageLayout new_layout); + + void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) const; + void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout) const; private: - VKTexture(const TextureConfig& tex_config, std::unique_ptr texture, - VkFramebuffer framebuffer); + bool CreateView(VkImageViewType type); - std::unique_ptr m_texture; - VkFramebuffer m_framebuffer; + VkDeviceMemory m_device_memory; + VkImage m_image; + VkImageView m_view = VK_NULL_HANDLE; + mutable VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; + mutable ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined; }; class VKStagingTexture final : public AbstractStagingTexture @@ -65,11 +96,6 @@ public: void Unmap() override; void Flush() override; - // This overload is provided for compatibility as we dropped StagingTexture2D. - // For now, FramebufferManager relies on them. But we can drop it once we move that to common. - void CopyFromTexture(Texture2D* src, const MathUtil::Rectangle& src_rect, u32 src_layer, - u32 src_level, const MathUtil::Rectangle& dst_rect); - static std::unique_ptr Create(StagingTextureType type, const TextureConfig& config); @@ -84,25 +110,23 @@ private: class VKFramebuffer final : public AbstractFramebuffer { public: - VKFramebuffer(const VKTexture* color_attachment, const VKTexture* depth_attachment, u32 width, - u32 height, u32 layers, u32 samples, VkFramebuffer fb, - VkRenderPass load_render_pass, VkRenderPass discard_render_pass, - VkRenderPass clear_render_pass); + VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width, u32 height, + u32 layers, u32 samples, VkFramebuffer fb, VkRenderPass load_render_pass, + VkRenderPass discard_render_pass, VkRenderPass clear_render_pass); ~VKFramebuffer() override; VkFramebuffer GetFB() const { return m_fb; } + VkRect2D GetRect() const { return VkRect2D{{0, 0}, {m_width, m_height}}; } + VkRenderPass GetLoadRenderPass() const { return m_load_render_pass; } VkRenderPass GetDiscardRenderPass() const { return m_discard_render_pass; } VkRenderPass GetClearRenderPass() const { return m_clear_render_pass; } - void TransitionForRender() const; - void TransitionForSample() const; + void TransitionForRender(); - static std::unique_ptr Create(const VKTexture* color_attachments, - const VKTexture* depth_attachment); + static std::unique_ptr Create(VKTexture* color_attachments, + VKTexture* depth_attachment); protected: - const VKTexture* m_color_attachment; - const VKTexture* m_depth_attachment; VkFramebuffer m_fb; VkRenderPass m_load_render_pass; VkRenderPass m_discard_render_pass; diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp index 3ea824e530..5d8006c2c0 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp @@ -46,9 +46,8 @@ static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer) return integer ? integer_type_lookup[t][components - 1] : float_type_lookup[t][components - 1]; } -VertexFormat::VertexFormat(const PortableVertexDeclaration& in_vtx_decl) +VertexFormat::VertexFormat(const PortableVertexDeclaration& vtx_decl) : NativeVertexFormat(vtx_decl) { - vtx_decl = in_vtx_decl; MapAttributes(); SetupInputState(); } @@ -62,50 +61,49 @@ void VertexFormat::MapAttributes() { m_num_attributes = 0; - if (vtx_decl.position.enable) - AddAttribute(SHADER_POSITION_ATTRIB, 0, - VarToVkFormat(vtx_decl.position.type, vtx_decl.position.components, - vtx_decl.position.integer), - vtx_decl.position.offset); + if (m_decl.position.enable) + AddAttribute( + SHADER_POSITION_ATTRIB, 0, + VarToVkFormat(m_decl.position.type, m_decl.position.components, m_decl.position.integer), + m_decl.position.offset); for (uint32_t i = 0; i < 3; i++) { - if (vtx_decl.normals[i].enable) + if (m_decl.normals[i].enable) AddAttribute(SHADER_NORM0_ATTRIB + i, 0, - VarToVkFormat(vtx_decl.normals[i].type, vtx_decl.normals[i].components, - vtx_decl.normals[i].integer), - vtx_decl.normals[i].offset); + VarToVkFormat(m_decl.normals[i].type, m_decl.normals[i].components, + m_decl.normals[i].integer), + m_decl.normals[i].offset); } for (uint32_t i = 0; i < 2; i++) { - if (vtx_decl.colors[i].enable) + if (m_decl.colors[i].enable) AddAttribute(SHADER_COLOR0_ATTRIB + i, 0, - VarToVkFormat(vtx_decl.colors[i].type, vtx_decl.colors[i].components, - vtx_decl.colors[i].integer), - vtx_decl.colors[i].offset); + VarToVkFormat(m_decl.colors[i].type, m_decl.colors[i].components, + m_decl.colors[i].integer), + m_decl.colors[i].offset); } for (uint32_t i = 0; i < 8; i++) { - if (vtx_decl.texcoords[i].enable) + if (m_decl.texcoords[i].enable) AddAttribute(SHADER_TEXTURE0_ATTRIB + i, 0, - VarToVkFormat(vtx_decl.texcoords[i].type, vtx_decl.texcoords[i].components, - vtx_decl.texcoords[i].integer), - vtx_decl.texcoords[i].offset); + VarToVkFormat(m_decl.texcoords[i].type, m_decl.texcoords[i].components, + m_decl.texcoords[i].integer), + m_decl.texcoords[i].offset); } - if (vtx_decl.posmtx.enable) - AddAttribute( - SHADER_POSMTX_ATTRIB, 0, - VarToVkFormat(vtx_decl.posmtx.type, vtx_decl.posmtx.components, vtx_decl.posmtx.integer), - vtx_decl.posmtx.offset); + if (m_decl.posmtx.enable) + AddAttribute(SHADER_POSMTX_ATTRIB, 0, + VarToVkFormat(m_decl.posmtx.type, m_decl.posmtx.components, m_decl.posmtx.integer), + m_decl.posmtx.offset); } void VertexFormat::SetupInputState() { m_binding_description.binding = 0; - m_binding_description.stride = vtx_decl.stride; + m_binding_description.stride = m_decl.stride; m_binding_description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; m_input_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.h b/Source/Core/VideoBackends/Vulkan/VertexFormat.h index 9b5810ced7..617967202b 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexFormat.h +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.h @@ -14,7 +14,7 @@ namespace Vulkan class VertexFormat : public ::NativeVertexFormat { public: - VertexFormat(const PortableVertexDeclaration& in_vtx_decl); + VertexFormat(const PortableVertexDeclaration& vtx_decl); // Passed to pipeline state creation const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const; @@ -35,4 +35,4 @@ private: uint32_t m_num_attributes = 0; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp index bd0ab0b54d..7fcbf84013 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp @@ -4,87 +4,140 @@ #include "VideoBackends/Vulkan/VertexManager.h" +#include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" -#include "VideoBackends/Vulkan/BoundingBox.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/BoundingBox.h" +#include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" +#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" namespace Vulkan { -// TODO: Clean up this mess -constexpr size_t INITIAL_VERTEX_BUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 2; -constexpr size_t MAX_VERTEX_BUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 16; -constexpr size_t INITIAL_INDEX_BUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 2; -constexpr size_t MAX_INDEX_BUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 16; - -VertexManager::VertexManager() - : m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE) +static VkBufferView CreateTexelBufferView(VkBuffer buffer, VkFormat vk_format) { + // Create a view of the whole buffer, we'll offset our texel load into it + VkBufferViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkBufferViewCreateFlags flags + buffer, // VkBuffer buffer + vk_format, // VkFormat format + 0, // VkDeviceSize offset + VK_WHOLE_SIZE // VkDeviceSize range + }; + + VkBufferView view; + VkResult res = vkCreateBufferView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBufferView failed: "); + return VK_NULL_HANDLE; + } + + return view; } +VertexManager::VertexManager() = default; + VertexManager::~VertexManager() { -} - -VertexManager* VertexManager::GetInstance() -{ - return static_cast(g_vertex_manager.get()); + DestroyTexelBufferViews(); } bool VertexManager::Initialize() { - m_vertex_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - INITIAL_VERTEX_BUFFER_SIZE, MAX_VERTEX_BUFFER_SIZE); - - m_index_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, - INITIAL_INDEX_BUFFER_SIZE, MAX_INDEX_BUFFER_SIZE); - - if (!m_vertex_stream_buffer || !m_index_stream_buffer) + m_vertex_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE * 4); + m_index_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE * 4); + m_uniform_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE * 4); + if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_uniform_stream_buffer) { PanicAlert("Failed to allocate streaming buffers"); return false; } + // The validation layer complains if max(offsets) + max(ubo_ranges) >= ubo_size. + // To work around this we reserve the maximum buffer size at all times, but only commit + // as many bytes as we use. + m_uniform_buffer_reserve_size = sizeof(PixelShaderConstants); + m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment()) + + sizeof(VertexShaderConstants); + m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment()) + + sizeof(GeometryShaderConstants); + + // Prefer an 8MB buffer if possible, but use less if the device doesn't support this. + // This buffer is potentially going to be addressed as R8s in the future, so we assume + // that one element is one byte. This doesn't use min() because of a NDK compiler bug.. + const u32 texel_buffer_size = + TEXEL_STREAM_BUFFER_SIZE > g_vulkan_context->GetDeviceLimits().maxTexelBufferElements ? + g_vulkan_context->GetDeviceLimits().maxTexelBufferElements : + TEXEL_STREAM_BUFFER_SIZE; + m_texel_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, texel_buffer_size); + if (!m_texel_stream_buffer) + { + PanicAlert("Failed to allocate streaming texel buffer"); + return false; + } + + static constexpr std::array, NUM_TEXEL_BUFFER_FORMATS> + format_mapping = {{ + {TEXEL_BUFFER_FORMAT_R8_UINT, VK_FORMAT_R8_UINT}, + {TEXEL_BUFFER_FORMAT_R16_UINT, VK_FORMAT_R16_UINT}, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, VK_FORMAT_R8G8B8A8_UNORM}, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, VK_FORMAT_R32G32_UINT}, + }}; + for (const auto& it : format_mapping) + { + if ((m_texel_buffer_views[it.first] = CreateTexelBufferView(m_texel_stream_buffer->GetBuffer(), + it.second)) == VK_NULL_HANDLE) + { + PanicAlert("Failed to create texel buffer view"); + return false; + } + } + + // Bind the buffers to all the known spots even if it's not used, to keep the driver happy. + UploadAllConstants(); + StateTracker::GetInstance()->SetUtilityUniformBuffer(m_uniform_stream_buffer->GetBuffer(), 0, + sizeof(VertexShaderConstants)); + for (u32 i = 0; i < NUM_COMPUTE_TEXEL_BUFFERS; i++) + { + StateTracker::GetInstance()->SetTexelBuffer(i, + m_texel_buffer_views[TEXEL_BUFFER_FORMAT_R8_UINT]); + } + return true; } -std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +void VertexManager::DestroyTexelBufferViews() { - return std::make_unique(vtx_decl); -} - -void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) -{ - StateTracker::GetInstance()->UpdateConstants(uniforms, uniforms_size); -} - -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - if (cull_all) + for (VkBufferView view : m_texel_buffer_views) { - // Not drawing on the gpu, so store in a heap buffer instead - m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data(); - m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); - IndexGenerator::Start(m_cpu_index_buffer.data()); - return; + if (view != VK_NULL_HANDLE) + vkDestroyBufferView(g_vulkan_context->GetDevice(), view, nullptr); } +} +void VertexManager::ResetBuffer(u32 vertex_stride) +{ // Attempt to allocate from buffers bool has_vbuffer_allocation = m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, vertex_stride); @@ -94,7 +147,7 @@ void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) { // Flush any pending commands first, so that we can wait on the fences WARN_LOG(VIDEO, "Executing command list while waiting for space in vertex/index buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); + Renderer::GetInstance()->ExecuteCommandBuffer(false); // Attempt to allocate again, this may cause a fence wait if (!has_vbuffer_allocation) @@ -122,10 +175,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in const u32 index_data_size = num_indices * sizeof(u16); *out_base_vertex = - vertex_stride > 0 ? - static_cast(m_vertex_stream_buffer->GetCurrentOffset() / vertex_stride) : - 0; - *out_base_index = static_cast(m_index_stream_buffer->GetCurrentOffset() / sizeof(u16)); + vertex_stride > 0 ? (m_vertex_stream_buffer->GetCurrentOffset() / vertex_stride) : 0; + *out_base_index = m_index_stream_buffer->GetCurrentOffset() / sizeof(u16); m_vertex_stream_buffer->CommitMemory(vertex_data_size); m_index_stream_buffer->CommitMemory(index_data_size); @@ -138,43 +189,206 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in VK_INDEX_TYPE_UINT16); } -void VertexManager::UploadConstants() +void VertexManager::UploadUniforms() { - StateTracker::GetInstance()->UpdateVertexShaderConstants(); - StateTracker::GetInstance()->UpdateGeometryShaderConstants(); - StateTracker::GetInstance()->UpdatePixelShaderConstants(); + UpdateVertexShaderConstants(); + UpdateGeometryShaderConstants(); + UpdatePixelShaderConstants(); } -void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) +void VertexManager::UpdateVertexShaderConstants() { - // Flush all EFB pokes and invalidate the peek cache. - FramebufferManager::GetInstance()->InvalidatePeekCache(); - FramebufferManager::GetInstance()->FlushEFBPokes(); + if (!VertexShaderManager::dirty || !ReserveConstantStorage()) + return; - // If bounding box is enabled, we need to flush any changes first, then invalidate what we have. - if (g_vulkan_context->SupportsBoundingBox()) + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_VS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset(), sizeof(VertexShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &VertexShaderManager::constants, + sizeof(VertexShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(VertexShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); + VertexShaderManager::dirty = false; +} + +void VertexManager::UpdateGeometryShaderConstants() +{ + if (!GeometryShaderManager::dirty || !ReserveConstantStorage()) + return; + + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_GS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset(), sizeof(GeometryShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &GeometryShaderManager::constants, + sizeof(GeometryShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(GeometryShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); + GeometryShaderManager::dirty = false; +} + +void VertexManager::UpdatePixelShaderConstants() +{ + if (!PixelShaderManager::dirty || !ReserveConstantStorage()) + return; + + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset(), sizeof(PixelShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &PixelShaderManager::constants, + sizeof(PixelShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); + PixelShaderManager::dirty = false; +} + +bool VertexManager::ReserveConstantStorage() +{ + if (m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment())) { - BoundingBox* bounding_box = Renderer::GetInstance()->GetBoundingBox(); - bool bounding_box_enabled = (::BoundingBox::active && g_ActiveConfig.bBBoxEnable); - if (bounding_box_enabled) + return true; + } + + // The only places that call constant updates are safe to have state restored. + WARN_LOG(VIDEO, "Executing command buffer while waiting for space in uniform buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false); + + // Since we are on a new command buffer, all constants have been invalidated, and we need + // to reupload them. We may as well do this now, since we're issuing a draw anyway. + UploadAllConstants(); + return false; +} + +void VertexManager::UploadAllConstants() +{ + // We are free to re-use parts of the buffer now since we're uploading all constants. + const u32 ub_alignment = static_cast(g_vulkan_context->GetUniformBufferAlignment()); + const u32 pixel_constants_offset = 0; + const u32 vertex_constants_offset = + Common::AlignUp(pixel_constants_offset + sizeof(PixelShaderConstants), ub_alignment); + const u32 geometry_constants_offset = + Common::AlignUp(vertex_constants_offset + sizeof(VertexShaderConstants), ub_alignment); + const u32 allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants); + + // Allocate everything at once. + // We should only be here if the buffer was full and a command buffer was submitted anyway. + if (!m_uniform_stream_buffer->ReserveMemory(allocation_size, ub_alignment)) + { + PanicAlert("Failed to allocate space for constants in streaming buffer"); + return; + } + + // Update bindings + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset() + pixel_constants_offset, + sizeof(PixelShaderConstants)); + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_VS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset, + sizeof(VertexShaderConstants)); + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_GS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset() + geometry_constants_offset, + sizeof(GeometryShaderConstants)); + + // Copy the actual data in + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + pixel_constants_offset, + &PixelShaderManager::constants, sizeof(PixelShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + vertex_constants_offset, + &VertexShaderManager::constants, sizeof(VertexShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + geometry_constants_offset, + &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); + + // Finally, flush buffer memory after copying + m_uniform_stream_buffer->CommitMemory(allocation_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, allocation_size); + + // Clear dirty flags + VertexShaderManager::dirty = false; + GeometryShaderManager::dirty = false; + PixelShaderManager::dirty = false; +} + +void VertexManager::UploadUtilityUniforms(const void* data, u32 data_size) +{ + InvalidateConstants(); + if (!m_uniform_stream_buffer->ReserveMemory(data_size, + g_vulkan_context->GetUniformBufferAlignment())) + { + WARN_LOG(VIDEO, "Executing command buffer while waiting for ext space in uniform buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false); + } + + StateTracker::GetInstance()->SetUtilityUniformBuffer( + m_uniform_stream_buffer->GetBuffer(), m_uniform_stream_buffer->GetCurrentOffset(), data_size); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), data, data_size); + m_uniform_stream_buffer->CommitMemory(data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + if (data_size > m_texel_stream_buffer->GetCurrentSize()) + return false; + + const u32 elem_size = GetTexelBufferElementSize(format); + if (!m_texel_stream_buffer->ReserveMemory(data_size, elem_size)) + { + // Try submitting cmdbuffer. + WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); + if (!m_texel_stream_buffer->ReserveMemory(data_size, elem_size)) { - bounding_box->Flush(); - bounding_box->Invalidate(); + PanicAlert("Failed to allocate %u bytes from texel buffer", data_size); + return false; } } - // Bind all pending state to the command buffer - if (StateTracker::GetInstance()->Bind()) - { - vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, - base_vertex, 0); - } - else - { - WARN_LOG(VIDEO, "Skipped draw of %u indices", num_indices); - } - - StateTracker::GetInstance()->OnDraw(); + std::memcpy(m_texel_stream_buffer->GetCurrentHostPointer(), data, data_size); + *out_offset = static_cast(m_texel_stream_buffer->GetCurrentOffset()) / elem_size; + m_texel_stream_buffer->CommitMemory(data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + StateTracker::GetInstance()->SetTexelBuffer(0, m_texel_buffer_views[format]); + return true; } +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) +{ + const u32 elem_size = GetTexelBufferElementSize(format); + const u32 palette_elem_size = GetTexelBufferElementSize(palette_format); + const u32 reserve_size = data_size + palette_size + palette_elem_size; + if (reserve_size > m_texel_stream_buffer->GetCurrentSize()) + return false; + + if (!m_texel_stream_buffer->ReserveMemory(reserve_size, elem_size)) + { + // Try submitting cmdbuffer. + WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); + if (!m_texel_stream_buffer->ReserveMemory(reserve_size, elem_size)) + { + PanicAlert("Failed to allocate %u bytes from texel buffer", reserve_size); + return false; + } + } + + const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size); + std::memcpy(m_texel_stream_buffer->GetCurrentHostPointer(), data, data_size); + std::memcpy(m_texel_stream_buffer->GetCurrentHostPointer() + palette_byte_offset, palette_data, + palette_size); + *out_offset = static_cast(m_texel_stream_buffer->GetCurrentOffset()) / elem_size; + *out_palette_offset = + (static_cast(m_texel_stream_buffer->GetCurrentOffset()) + palette_byte_offset) / + palette_elem_size; + + m_texel_stream_buffer->CommitMemory(palette_byte_offset + palette_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size); + StateTracker::GetInstance()->SetTexelBuffer(0, m_texel_buffer_views[format]); + StateTracker::GetInstance()->SetTexelBuffer(1, m_texel_buffer_views[palette_format]); + return true; +} } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.h b/Source/Core/VideoBackends/Vulkan/VertexManager.h index 65c31e11f4..0a71903c83 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexManager.h +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.h @@ -8,6 +8,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoBackends/Vulkan/VulkanLoader.h" #include "VideoCommon/VertexManagerBase.h" namespace Vulkan @@ -20,26 +21,38 @@ public: VertexManager(); ~VertexManager(); - static VertexManager* GetInstance(); - - bool Initialize(); - - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + bool Initialize() override; void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; protected: - void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void ResetBuffer(u32 vertex_stride) override; void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) override; - void UploadConstants() override; - void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; + void UploadUniforms() override; - std::vector m_cpu_vertex_buffer; - std::vector m_cpu_index_buffer; + void DestroyTexelBufferViews(); + + void UpdateVertexShaderConstants(); + void UpdateGeometryShaderConstants(); + void UpdatePixelShaderConstants(); + + // Allocates storage in the uniform buffer of the specified size. If this storage cannot be + // allocated immediately, the current command buffer will be submitted and all stage's + // constants will be re-uploaded. false will be returned in this case, otherwise true. + bool ReserveConstantStorage(); + void UploadAllConstants(); std::unique_ptr m_vertex_stream_buffer; std::unique_ptr m_index_stream_buffer; + std::unique_ptr m_uniform_stream_buffer; + std::unique_ptr m_texel_stream_buffer; + std::array m_texel_buffer_views = {}; + u32 m_uniform_buffer_reserve_size = 0; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj b/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj index 5b465ff733..fab11bab7e 100644 --- a/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj +++ b/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj @@ -38,14 +38,9 @@ - - - - - @@ -53,8 +48,6 @@ - - @@ -65,13 +58,8 @@ - - - - - @@ -80,8 +68,6 @@ - - diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 4eaea78865..72f929af1c 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -255,6 +255,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support. config->backend_info.bSupportsPostProcessing = true; // Assumed support. config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support. + config->backend_info.bSupportsCopyToVram = true; // Assumed support. config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features. config->backend_info.bSupportsGeometryShaders = false; // Dependent on features. config->backend_info.bSupportsGSInstancing = false; // Dependent on features. @@ -264,10 +265,10 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsDepthClamp = false; // Dependent on features. config->backend_info.bSupportsST3CTextures = false; // Dependent on features. config->backend_info.bSupportsBPTCTextures = false; // Dependent on features. + config->backend_info.bSupportsLogicOp = false; // Dependent on features. + config->backend_info.bSupportsLargePoints = false; // Dependent on features. config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs. - config->backend_info.bSupportsLogicOp = false; // Dependent on features. - config->backend_info.bSupportsCopyToVram = true; // Assumed support. - config->backend_info.bSupportsFramebufferFetch = false; + config->backend_info.bSupportsFramebufferFetch = false; // No support. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) @@ -286,6 +287,7 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD const VkPhysicalDeviceFeatures& features) { config->backend_info.MaxTextureSize = properties.limits.maxImageDimension2D; + config->backend_info.bUsesLowerLeftOrigin = false; config->backend_info.bSupportsDualSourceBlend = (features.dualSrcBlend == VK_TRUE); config->backend_info.bSupportsGeometryShaders = (features.geometryShader == VK_TRUE); config->backend_info.bSupportsGSInstancing = (features.geometryShader == VK_TRUE); @@ -311,6 +313,13 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD config->backend_info.bSupportsST3CTextures = supports_bc; config->backend_info.bSupportsBPTCTextures = supports_bc; + // Some devices don't support point sizes >1 (e.g. Adreno). + // If we can't use a point size above our maximum IR, use triangles instead for EFB pokes. + // This means a 6x increase in the size of the vertices, though. + config->backend_info.bSupportsLargePoints = features.largePoints && + properties.limits.pointSizeRange[0] <= 1.0f && + properties.limits.pointSizeRange[1] >= 16; + // Our usage of primitive restart appears to be broken on AMD's binary drivers. // Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4. if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART)) @@ -323,11 +332,11 @@ void VulkanContext::PopulateBackendInfoMultisampleModes( // Query image support for the EFB texture formats. VkImageFormatProperties efb_color_properties = {}; vkGetPhysicalDeviceImageFormatProperties( - gpu, EFB_COLOR_TEXTURE_FORMAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + gpu, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, &efb_color_properties); VkImageFormatProperties efb_depth_properties = {}; vkGetPhysicalDeviceImageFormatProperties( - gpu, EFB_DEPTH_TEXTURE_FORMAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + gpu, VK_FORMAT_D32_SFLOAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, &efb_depth_properties); // We can only support MSAA if it's supported on our render target formats. @@ -456,15 +465,6 @@ bool VulkanContext::SelectDeviceFeatures() if (!available_features.occlusionQueryPrecise) WARN_LOG(VIDEO, "Vulkan: Missing precise occlusion queries. Perf queries will be inaccurate."); - // Check push constant size. - if (properties.limits.maxPushConstantsSize < static_cast(PUSH_CONSTANT_BUFFER_SIZE)) - { - PanicAlert("Vulkan: Push contant buffer size %u is below minimum %u.", - properties.limits.maxPushConstantsSize, static_cast(PUSH_CONSTANT_BUFFER_SIZE)); - - return false; - } - // Enable the features we use. m_device_features.dualSrcBlend = available_features.dualSrcBlend; m_device_features.geometryShader = available_features.geometryShader; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.h b/Source/Core/VideoBackends/Vulkan/VulkanContext.h index 6b254e24c0..3f4492bc4a 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.h +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.h @@ -76,10 +76,6 @@ public: { return m_device_features.samplerAnisotropy == VK_TRUE; } - bool SupportsGeometryShaders() const { return m_device_features.geometryShader == VK_TRUE; } - bool SupportsDualSourceBlend() const { return m_device_features.dualSrcBlend == VK_TRUE; } - bool SupportsLogicOps() const { return m_device_features.logicOp == VK_TRUE; } - bool SupportsBoundingBox() const { return m_device_features.fragmentStoresAndAtomics == VK_TRUE; } bool SupportsPreciseOcclusionQueries() const { return m_device_features.occlusionQueryPrecise == VK_TRUE; diff --git a/Source/Core/VideoBackends/Vulkan/main.cpp b/Source/Core/VideoBackends/Vulkan/main.cpp index 8055aab178..911f8d1991 100644 --- a/Source/Core/VideoBackends/Vulkan/main.cpp +++ b/Source/Core/VideoBackends/Vulkan/main.cpp @@ -9,18 +9,17 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" #include "VideoBackends/Vulkan/ObjectCache.h" #include "VideoBackends/Vulkan/PerfQuery.h" #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/SwapChain.h" -#include "VideoBackends/Vulkan/TextureCache.h" #include "VideoBackends/Vulkan/VertexManager.h" #include "VideoBackends/Vulkan/VideoBackend.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" @@ -200,10 +199,9 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) return false; } - // Remaining classes are also dependent on object/shader cache. + // Remaining classes are also dependent on object cache. g_object_cache = std::make_unique(); - g_shader_cache = std::make_unique(); - if (!g_object_cache->Initialize() || !g_shader_cache->Initialize()) + if (!g_object_cache->Initialize()) { PanicAlert("Failed to initialize Vulkan object cache."); Shutdown(); @@ -223,29 +221,31 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) } } - // Create main wrapper instances. - g_framebuffer_manager = std::make_unique(); - g_renderer = std::make_unique(std::move(swap_chain), wsi.render_surface_scale); - g_vertex_manager = std::make_unique(); - g_texture_cache = std::make_unique(); - ::g_shader_cache = std::make_unique(); - g_perf_query = std::make_unique(); - - // Invoke init methods on main wrapper classes. - // These have to be done before the others because the destructors - // for the remaining classes may call methods on these. - if (!StateTracker::CreateInstance() || !FramebufferManager::GetInstance()->Initialize() || - !Renderer::GetInstance()->Initialize() || !VertexManager::GetInstance()->Initialize() || - !TextureCache::GetInstance()->Initialize() || !PerfQuery::GetInstance()->Initialize() || - !::g_shader_cache->Initialize()) + if (!StateTracker::CreateInstance()) { - PanicAlert("Failed to initialize Vulkan classes."); + PanicAlert("Failed to create state tracker"); Shutdown(); return false; } - // Display the name so the user knows which device was actually created. - INFO_LOG(VIDEO, "Vulkan Device: %s", g_vulkan_context->GetDeviceProperties().deviceName); + // Create main wrapper instances. + g_renderer = std::make_unique(std::move(swap_chain), wsi.render_surface_scale); + g_vertex_manager = std::make_unique(); + g_shader_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_texture_cache = std::make_unique(); + g_perf_query = std::make_unique(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize() || !PerfQuery::GetInstance()->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); return true; } @@ -254,23 +254,23 @@ void VideoBackend::Shutdown() if (g_command_buffer_mgr) g_command_buffer_mgr->WaitForGPUIdle(); - if (::g_shader_cache) - ::g_shader_cache->Shutdown(); + if (g_shader_cache) + g_shader_cache->Shutdown(); + + if (g_object_cache) + g_object_cache->Shutdown(); if (g_renderer) g_renderer->Shutdown(); g_perf_query.reset(); - ::g_shader_cache.reset(); g_texture_cache.reset(); + g_framebuffer_manager.reset(); + g_shader_cache.reset(); g_vertex_manager.reset(); g_renderer.reset(); - g_framebuffer_manager.reset(); - StateTracker::DestroyInstance(); - if (g_shader_cache) - g_shader_cache->Shutdown(); - g_shader_cache.reset(); g_object_cache.reset(); + StateTracker::DestroyInstance(); g_command_buffer_mgr.reset(); g_vulkan_context.reset(); ShutdownShared(); diff --git a/Source/Core/VideoCommon/AbstractFramebuffer.cpp b/Source/Core/VideoCommon/AbstractFramebuffer.cpp index f8a9b07ba8..c6a1693788 100644 --- a/Source/Core/VideoCommon/AbstractFramebuffer.cpp +++ b/Source/Core/VideoCommon/AbstractFramebuffer.cpp @@ -5,10 +5,13 @@ #include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractTexture.h" -AbstractFramebuffer::AbstractFramebuffer(AbstractTextureFormat color_format, +AbstractFramebuffer::AbstractFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples) - : m_color_format(color_format), m_depth_format(depth_format), m_width(width), m_height(height), + : m_color_attachment(color_attachment), m_depth_attachment(depth_attachment), + m_color_format(color_format), m_depth_format(depth_format), m_width(width), m_height(height), m_layers(layers), m_samples(samples) { } @@ -26,7 +29,7 @@ bool AbstractFramebuffer::ValidateConfig(const AbstractTexture* color_attachment // MSAA textures are not supported with mip levels on most backends, and it simplifies our // handling of framebuffers. auto CheckAttachment = [](const AbstractTexture* tex) { - return tex->GetConfig().rendertarget && tex->GetConfig().levels == 1; + return tex->GetConfig().IsRenderTarget() && tex->GetConfig().levels == 1; }; if ((color_attachment && !CheckAttachment(color_attachment)) || (depth_attachment && !CheckAttachment(depth_attachment))) diff --git a/Source/Core/VideoCommon/AbstractFramebuffer.h b/Source/Core/VideoCommon/AbstractFramebuffer.h index 9d4b2d29cd..33b243e3b3 100644 --- a/Source/Core/VideoCommon/AbstractFramebuffer.h +++ b/Source/Core/VideoCommon/AbstractFramebuffer.h @@ -18,13 +18,16 @@ class AbstractTexture; class AbstractFramebuffer { public: - AbstractFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, + AbstractFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples); virtual ~AbstractFramebuffer(); static bool ValidateConfig(const AbstractTexture* color_attachment, const AbstractTexture* depth_attachment); + AbstractTexture* GetColorAttachment() const { return m_color_attachment; } + AbstractTexture* GetDepthAttachment() const { return m_depth_attachment; } AbstractTextureFormat GetColorFormat() const { return m_color_format; } AbstractTextureFormat GetDepthFormat() const { return m_depth_format; } bool HasColorBuffer() const { return m_color_format != AbstractTextureFormat::Undefined; } @@ -36,6 +39,8 @@ public: MathUtil::Rectangle GetRect() const; protected: + AbstractTexture* m_color_attachment; + AbstractTexture* m_depth_attachment; AbstractTextureFormat m_color_format; AbstractTextureFormat m_depth_format; u32 m_width; diff --git a/Source/Core/VideoCommon/AbstractPipeline.h b/Source/Core/VideoCommon/AbstractPipeline.h index c0ae61af28..8c7d7482de 100644 --- a/Source/Core/VideoCommon/AbstractPipeline.h +++ b/Source/Core/VideoCommon/AbstractPipeline.h @@ -45,24 +45,7 @@ struct AbstractPipelineConfig RasterizationState rasterization_state; DepthState depth_state; BlendingState blending_state; - - union FramebufferState - { - BitField<0, 8, AbstractTextureFormat> color_texture_format; - BitField<8, 8, AbstractTextureFormat> depth_texture_format; - BitField<16, 8, u32> samples; - BitField<24, 1, u32> per_sample_shading; - - bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; } - bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; } - FramebufferState& operator=(const FramebufferState& rhs) - { - hex = rhs.hex; - return *this; - } - - u32 hex; - } framebuffer_state; + FramebufferState framebuffer_state; AbstractPipelineUsage usage; diff --git a/Source/Core/VideoCommon/AbstractStagingTexture.h b/Source/Core/VideoCommon/AbstractStagingTexture.h index c87dfd70b0..759f1e79e8 100644 --- a/Source/Core/VideoCommon/AbstractStagingTexture.h +++ b/Source/Core/VideoCommon/AbstractStagingTexture.h @@ -20,8 +20,16 @@ public: virtual ~AbstractStagingTexture(); const TextureConfig& GetConfig() const { return m_config; } + u32 GetWidth() const { return m_config.width; } + u32 GetHeight() const { return m_config.height; } + u32 GetLevels() const { return m_config.levels; } + u32 GetLayers() const { return m_config.layers; } + u32 GetSamples() const { return m_config.samples; } + AbstractTextureFormat GetFormat() const { return m_config.format; } + MathUtil::Rectangle GetRect() const { return m_config.GetRect(); } StagingTextureType GetType() const { return m_type; } size_t GetTexelSize() const { return m_texel_size; } + bool IsMapped() const { return m_map_pointer != nullptr; } char* GetMappedPointer() const { return m_map_pointer; } size_t GetMappedStride() const { return m_map_stride; } diff --git a/Source/Core/VideoCommon/AbstractTexture.cpp b/Source/Core/VideoCommon/AbstractTexture.cpp index 6190f0e51b..d0c27617db 100644 --- a/Source/Core/VideoCommon/AbstractTexture.cpp +++ b/Source/Core/VideoCommon/AbstractTexture.cpp @@ -15,6 +15,10 @@ AbstractTexture::AbstractTexture(const TextureConfig& c) : m_config(c) { } +void AbstractTexture::FinishedRendering() +{ +} + bool AbstractTexture::Save(const std::string& filename, unsigned int level) { // We can't dump compressed textures currently (it would mean drawing them to a RGBA8 @@ -30,7 +34,7 @@ bool AbstractTexture::Save(const std::string& filename, unsigned int level) // Use a temporary staging texture for the download. Certainly not optimal, // but this is not a frequently-executed code path.. TextureConfig readback_texture_config(level_width, level_height, 1, 1, 1, - AbstractTextureFormat::RGBA8, false); + AbstractTextureFormat::RGBA8, 0); auto readback_texture = g_renderer->CreateStagingTexture(StagingTextureType::Readback, readback_texture_config); if (!readback_texture) @@ -84,7 +88,24 @@ bool AbstractTexture::IsStencilFormat(AbstractTextureFormat format) return format == AbstractTextureFormat::D24_S8 || format == AbstractTextureFormat::D32F_S8; } -size_t AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length) +AbstractTextureFormat AbstractTexture::GetColorFormatForDepthFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::D16: + return AbstractTextureFormat::R16; + + case AbstractTextureFormat::D24_S8: // TODO: Incorrect + case AbstractTextureFormat::D32F: + case AbstractTextureFormat::D32F_S8: + return AbstractTextureFormat::R32F; + + default: + return format; + } +} + +u32 AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length) { switch (format) { @@ -111,7 +132,7 @@ size_t AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u } } -size_t AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format) +u32 AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format) { switch (format) { @@ -138,6 +159,21 @@ size_t AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format) } } +u32 AbstractTexture::GetBlockSizeForFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::DXT1: + case AbstractTextureFormat::DXT3: + case AbstractTextureFormat::DXT5: + case AbstractTextureFormat::BPTC: + return 4; + + default: + return 1; + } +} + const TextureConfig& AbstractTexture::GetConfig() const { return m_config; diff --git a/Source/Core/VideoCommon/AbstractTexture.h b/Source/Core/VideoCommon/AbstractTexture.h index b193e5459e..6ff80e9703 100644 --- a/Source/Core/VideoCommon/AbstractTexture.h +++ b/Source/Core/VideoCommon/AbstractTexture.h @@ -21,28 +21,33 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) = 0; - virtual void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) = 0; virtual void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) = 0; virtual void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) = 0; + // Hints to the backend that we have finished rendering to this texture, and it will be used + // as a shader resource and sampled. For Vulkan, this transitions the image layout. + virtual void FinishedRendering(); + u32 GetWidth() const { return m_config.width; } u32 GetHeight() const { return m_config.height; } u32 GetLevels() const { return m_config.levels; } u32 GetLayers() const { return m_config.layers; } u32 GetSamples() const { return m_config.samples; } AbstractTextureFormat GetFormat() const { return m_config.format; } + MathUtil::Rectangle GetRect() const { return m_config.GetRect(); } + MathUtil::Rectangle GetMipRect(u32 level) const { return m_config.GetMipRect(level); } bool IsMultisampled() const { return m_config.IsMultisampled(); } bool Save(const std::string& filename, unsigned int level); static bool IsCompressedFormat(AbstractTextureFormat format); static bool IsDepthFormat(AbstractTextureFormat format); static bool IsStencilFormat(AbstractTextureFormat format); - static size_t CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length); - static size_t GetTexelSizeForFormat(AbstractTextureFormat format); + static AbstractTextureFormat GetColorFormatForDepthFormat(AbstractTextureFormat format); + static u32 CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length); + static u32 GetTexelSizeForFormat(AbstractTextureFormat format); + static u32 GetBlockSizeForFormat(AbstractTextureFormat format); const TextureConfig& GetConfig() const; diff --git a/Source/Core/VideoCommon/BPFunctions.cpp b/Source/Core/VideoCommon/BPFunctions.cpp index 61e4c54b27..7db9d58cad 100644 --- a/Source/Core/VideoCommon/BPFunctions.cpp +++ b/Source/Core/VideoCommon/BPFunctions.cpp @@ -5,8 +5,10 @@ #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" +#include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/BPFunctions.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/RenderState.h" #include "VideoCommon/VertexManagerBase.h" @@ -51,8 +53,10 @@ void SetScissor() bpmem.scissorBR.x - xoff + 1, bpmem.scissorBR.y - yoff + 1); native_rc.ClampUL(0, 0, EFB_WIDTH, EFB_HEIGHT); - TargetRectangle target_rc = g_renderer->ConvertEFBRectangle(native_rc); - g_renderer->SetScissorRect(target_rc); + auto target_rc = g_renderer->ConvertEFBRectangle(native_rc); + auto converted_rc = + g_renderer->ConvertFramebufferRectangle(target_rc, g_renderer->GetCurrentFramebuffer()); + g_renderer->SetScissorRect(converted_rc); } void SetViewport() @@ -122,6 +126,21 @@ void SetViewport() far_depth = 1.0f - min_depth; } + // Clamp to size if oversized not supported. Required for D3D. + if (!g_ActiveConfig.backend_info.bSupportsOversizedViewports) + { + const float max_width = static_cast(g_renderer->GetCurrentFramebuffer()->GetWidth()); + const float max_height = static_cast(g_renderer->GetCurrentFramebuffer()->GetHeight()); + x = MathUtil::Clamp(x, 0.0f, max_width - 1.0f); + y = MathUtil::Clamp(y, 0.0f, max_height - 1.0f); + width = MathUtil::Clamp(width, 1.0f, max_width - x); + height = MathUtil::Clamp(height, 1.0f, max_height - y); + } + + // Lower-left flip. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = static_cast(g_renderer->GetCurrentFramebuffer()->GetHeight()) - y - height; + g_renderer->SetViewport(x, y, width, height, near_depth, far_depth); } @@ -188,8 +207,6 @@ void ClearScreen(const EFBRectangle& rc) void OnPixelFormatChange() { - int convtype = -1; - // TODO : Check for Z compression format change // When using 16bit Z, the game may enable a special compression format which we need to handle // If we don't, Z values will be completely screwed up, currently only Star Wars:RS2 uses that. @@ -205,58 +222,74 @@ void OnPixelFormatChange() auto old_format = g_renderer->GetPrevPixelFormat(); auto new_format = bpmem.zcontrol.pixel_format; + g_renderer->StorePixelFormat(new_format); + + DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast(new_format), + static_cast(bpmem.zcontrol.zformat)); // no need to reinterpret pixel data in these cases if (new_format == old_format || old_format == PEControl::INVALID_FMT) - goto skip; + return; // Check for pixel format changes switch (old_format) { case PEControl::RGB8_Z24: case PEControl::Z24: + { // Z24 and RGB8_Z24 are treated equal, so just return in this case if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24) - goto skip; + return; if (new_format == PEControl::RGBA6_Z24) - convtype = 0; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB8ToRGBA6); + return; + } else if (new_format == PEControl::RGB565_Z16) - convtype = 1; - break; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB8ToRGB565); + return; + } + } + break; case PEControl::RGBA6_Z24: + { if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24) - convtype = 2; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGBA6ToRGB8); + return; + } else if (new_format == PEControl::RGB565_Z16) - convtype = 3; - break; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGBA6ToRGB565); + return; + } + } + break; case PEControl::RGB565_Z16: + { if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24) - convtype = 4; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB565ToRGB8); + return; + } else if (new_format == PEControl::RGBA6_Z24) - convtype = 5; - break; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB565ToRGBA6); + return; + } + } + break; default: break; } - if (convtype == -1) - { - ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d", static_cast(old_format), - static_cast(new_format)); - goto skip; - } - - g_renderer->ReinterpretPixelData(convtype); - -skip: - DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast(new_format), - static_cast(bpmem.zcontrol.zformat)); - - g_renderer->StorePixelFormat(new_format); + ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d", static_cast(old_format), + static_cast(new_format)); } void SetInterlacingMode(const BPCmd& bp) @@ -286,4 +319,4 @@ void SetInterlacingMode(const BPCmd& bp) break; } } -}; +}; // namespace BPFunctions diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index b29492be50..b118080eb5 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -10,11 +10,11 @@ add_library(videocommon BPStructs.cpp CPMemory.cpp CommandProcessor.cpp - Debugger.cpp DriverDetails.cpp Fifo.cpp FPSCounter.cpp - FramebufferManagerBase.cpp + FramebufferManager.cpp + FramebufferShaderGen.cpp GeometryShaderGen.cpp GeometryShaderManager.cpp HiresTextures.cpp diff --git a/Source/Core/VideoCommon/Debugger.cpp b/Source/Core/VideoCommon/Debugger.cpp deleted file mode 100644 index 4dbaa1865a..0000000000 --- a/Source/Core/VideoCommon/Debugger.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/FileUtil.h" -#include "Common/StringUtil.h" -#include "Common/Thread.h" - -#include "VideoCommon/BPMemory.h" -#include "VideoCommon/Debugger.h" -#include "VideoCommon/VideoConfig.h" - -GFXDebuggerBase* g_pdebugger = nullptr; -volatile bool GFXDebuggerPauseFlag = - false; // if true, the GFX thread will be spin locked until it's false again -volatile PauseEvent GFXDebuggerToPauseAtNext = - NOT_PAUSE; // Event which will trigger spin locking the GFX thread -volatile int GFXDebuggerEventToPauseCount = - 0; // Number of events to wait for until GFX thread will be paused - -void GFXDebuggerUpdateScreen() -{ - // TODO: Implement this in a backend-independent way - /* // update screen - if (D3D::bFrameInProgress) - { - D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface()); - D3D::dev->SetDepthStencilSurface(nullptr); - - D3D::dev->StretchRect(FramebufferManager::GetEFBColorRTSurface(), nullptr, - D3D::GetBackBufferSurface(), nullptr, - D3DTEXF_LINEAR); - - D3D::dev->EndScene(); - D3D::dev->Present(nullptr, nullptr, nullptr, nullptr); - - D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface()); - D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface()); - D3D::dev->BeginScene(); - } - else - { - D3D::dev->EndScene(); - D3D::dev->Present(nullptr, nullptr, nullptr, nullptr); - D3D::dev->BeginScene(); - }*/ -} - -// GFX thread -void GFXDebuggerCheckAndPause(bool update) -{ - if (GFXDebuggerPauseFlag) - { - g_pdebugger->OnPause(); - while (GFXDebuggerPauseFlag) - { - if (update) - GFXDebuggerUpdateScreen(); - Common::SleepCurrentThread(5); - } - g_pdebugger->OnContinue(); - } -} - -// GFX thread -void GFXDebuggerToPause(bool update) -{ - GFXDebuggerToPauseAtNext = NOT_PAUSE; - GFXDebuggerPauseFlag = true; - GFXDebuggerCheckAndPause(update); -} - -void ContinueGFXDebugger() -{ - GFXDebuggerPauseFlag = false; -} - -void GFXDebuggerBase::DumpPixelShader(const std::string& path) -{ - const std::string filename = StringFromFormat("%sdump_ps.txt", path.c_str()); - - std::string output; - bool useDstAlpha = bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && - bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; - if (!useDstAlpha) - { - output = "Destination alpha disabled:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, - /// g_nativeVertexFmt->m_components); - } - else - { - if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend) - { - output = "Using dual source blending for destination alpha:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, - /// g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); - } - else - { - output = "Using two passes for emulating destination alpha:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, - /// g_nativeVertexFmt->m_components); - output += "\n\nDestination alpha pass shader:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, - /// g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); - } - } - - File::CreateEmptyFile(filename); - File::WriteStringToFile(output, filename); -} - -void GFXDebuggerBase::DumpVertexShader(const std::string& path) -{ - const std::string filename = StringFromFormat("%sdump_vs.txt", path.c_str()); - - File::CreateEmptyFile(filename); - /// File::WriteStringToFile(GenerateVertexShaderCode(g_nativeVertexFmt->m_components, - /// g_ActiveConfig.backend_info.APIType), filename); -} - -void GFXDebuggerBase::DumpPixelShaderConstants(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpVertexShaderConstants(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpTextures(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpFrameBuffer(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpGeometry(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpVertexDecl(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpMatrices(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpStats(const std::string& path) -{ - // TODO -} diff --git a/Source/Core/VideoCommon/Debugger.h b/Source/Core/VideoCommon/Debugger.h deleted file mode 100644 index 1c2d293edc..0000000000 --- a/Source/Core/VideoCommon/Debugger.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -class GFXDebuggerBase -{ -public: - virtual ~GFXDebuggerBase() {} - // if paused, debugging functions can be enabled - virtual void OnPause() {} - virtual void OnContinue() {} - void DumpPixelShader(const std::string& path); - void DumpVertexShader(const std::string& path); - void DumpPixelShaderConstants(const std::string& path); - void DumpVertexShaderConstants(const std::string& path); - void DumpTextures(const std::string& path); - void DumpFrameBuffer(const std::string& path); - void DumpGeometry(const std::string& path); - void DumpVertexDecl(const std::string& path); - void DumpMatrices(const std::string& path); - void DumpStats(const std::string& path); -}; - -enum PauseEvent -{ - NOT_PAUSE = 0, - NEXT_FRAME = 1 << 0, - NEXT_FLUSH = 1 << 1, - - NEXT_PIXEL_SHADER_CHANGE = 1 << 2, - NEXT_VERTEX_SHADER_CHANGE = 1 << 3, - NEXT_TEXTURE_CHANGE = 1 << 4, - NEXT_NEW_TEXTURE = 1 << 5, - - NEXT_XFB_CMD = 1 << 6, // TODO - NEXT_EFB_CMD = 1 << 7, // TODO - - NEXT_MATRIX_CMD = 1 << 8, // TODO - NEXT_VERTEX_CMD = 1 << 9, // TODO - NEXT_TEXTURE_CMD = 1 << 10, // TODO - NEXT_LIGHT_CMD = 1 << 11, // TODO - NEXT_FOG_CMD = 1 << 12, // TODO - - NEXT_SET_TLUT = 1 << 13, // TODO - - NEXT_ERROR = 1 << 14, // TODO -}; - -extern GFXDebuggerBase* g_pdebugger; -extern volatile bool GFXDebuggerPauseFlag; -extern volatile PauseEvent GFXDebuggerToPauseAtNext; -extern volatile int GFXDebuggerEventToPauseCount; -void ContinueGFXDebugger(); -void GFXDebuggerCheckAndPause(bool update); -void GFXDebuggerToPause(bool update); -void GFXDebuggerUpdateScreen(); - -#define GFX_DEBUGGER_PAUSE_AT(event, update) \ - { \ - if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \ - GFXDebuggerPauseFlag) \ - GFXDebuggerToPause(update); \ - } -#define GFX_DEBUGGER_PAUSE_LOG_AT(event, update, dumpfunc) \ - { \ - if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \ - GFXDebuggerPauseFlag) \ - { \ - {dumpfunc}; \ - GFXDebuggerToPause(update); \ - } \ - } -#define GFX_DEBUGGER_LOG_AT(event, dumpfunc) \ - { \ - if ((GFXDebuggerToPauseAtNext & event)) \ - { \ - {dumpfunc}; \ - } \ - } diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp new file mode 100644 index 0000000000..0af19012ea --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -0,0 +1,764 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoCommon/FramebufferManager.h" +#include +#include "VideoCommon/FramebufferShaderGen.h" +#include "VideoCommon/VertexManagerBase.h" + +#include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" +#include "VideoCommon/AbstractStagingTexture.h" +#include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/DriverDetails.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/VideoConfig.h" + +// Maximum number of pixels poked in one batch * 6 +constexpr size_t MAX_POKE_VERTICES = 32768; + +std::unique_ptr g_framebuffer_manager; + +FramebufferManager::FramebufferManager() = default; + +FramebufferManager::~FramebufferManager() +{ + DestroyClearPipelines(); + DestroyPokePipelines(); + DestroyConversionPipelines(); + DestroyReadbackPipelines(); + DestroyReadbackFramebuffer(); + DestroyEFBFramebuffer(); +} + +bool FramebufferManager::Initialize() +{ + if (!CreateEFBFramebuffer()) + { + PanicAlert("Failed to create EFB framebuffer"); + return false; + } + + if (!CreateReadbackFramebuffer()) + { + PanicAlert("Failed to create EFB readback framebuffer"); + return false; + } + + if (!CompileReadbackPipelines()) + { + PanicAlert("Failed to compile EFB readback pipelines"); + return false; + } + + if (!CompileConversionPipelines()) + { + PanicAlert("Failed to compile EFB conversion pipelines"); + return false; + } + + if (!CompileClearPipelines()) + { + PanicAlert("Failed to compile EFB clear pipelines"); + return false; + } + + if (!CompilePokePipelines()) + { + PanicAlert("Failed to compile EFB poke pipelines"); + return false; + } + + return true; +} + +void FramebufferManager::RecreateEFBFramebuffer() +{ + FlushEFBPokes(); + InvalidatePeekCache(); + + DestroyReadbackFramebuffer(); + DestroyEFBFramebuffer(); + if (!CreateEFBFramebuffer() || !CreateReadbackFramebuffer()) + PanicAlert("Failed to recreate EFB framebuffer"); +} + +void FramebufferManager::RecompileShaders() +{ + DestroyPokePipelines(); + DestroyClearPipelines(); + DestroyConversionPipelines(); + DestroyReadbackPipelines(); + if (!CompileReadbackPipelines() || !CompileConversionPipelines() || !CompileClearPipelines() || + !CompilePokePipelines()) + { + PanicAlert("Failed to recompile EFB pipelines"); + } +} + +AbstractTextureFormat FramebufferManager::GetEFBColorFormat() +{ + // The EFB can be set to different pixel formats by the game through the + // BPMEM_ZCOMPARE register (which should probably have a different name). + // They are: + // - 24-bit RGB (8-bit components) with 24-bit Z + // - 24-bit RGBA (6-bit components) with 24-bit Z + // - Multisampled 16-bit RGB (5-6-5 format) with 16-bit Z + // We only use one EFB format here: 32-bit ARGB with 32-bit Z. + // Multisampling depends on user settings. + // The distinction becomes important for certain operations, i.e. the + // alpha channel should be ignored if the EFB does not have one. + return AbstractTextureFormat::RGBA8; +} + +AbstractTextureFormat FramebufferManager::GetEFBDepthFormat() +{ + // 32-bit depth clears are broken in the Adreno Vulkan driver, and have no effect. + // To work around this, we use a D24_S8 buffer instead, which results in a loss of accuracy. + // We still resolve this to a R32F texture, as there is no 24-bit format. + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_D32F_CLEAR)) + return AbstractTextureFormat::D24_S8; + else + return AbstractTextureFormat::D32F; +} + +static u32 CalculateEFBLayers() +{ + return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; +} + +TextureConfig FramebufferManager::GetEFBColorTextureConfig() +{ + return TextureConfig(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), 1, + CalculateEFBLayers(), g_ActiveConfig.iMultisamples, GetEFBColorFormat(), + AbstractTextureFlag_RenderTarget); +} + +TextureConfig FramebufferManager::GetEFBDepthTextureConfig() +{ + return TextureConfig(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), 1, + CalculateEFBLayers(), g_ActiveConfig.iMultisamples, GetEFBDepthFormat(), + AbstractTextureFlag_RenderTarget); +} + +FramebufferState FramebufferManager::GetEFBFramebufferState() const +{ + FramebufferState ret = {}; + ret.color_texture_format = m_efb_color_texture->GetFormat(); + ret.depth_texture_format = m_efb_depth_texture->GetFormat(); + ret.per_sample_shading = IsEFBMultisampled() && g_ActiveConfig.bSSAA; + ret.samples = m_efb_color_texture->GetSamples(); + return ret; +} + +bool FramebufferManager::CreateEFBFramebuffer() +{ + const TextureConfig efb_color_texture_config = GetEFBColorTextureConfig(); + const TextureConfig efb_depth_texture_config = GetEFBDepthTextureConfig(); + + // We need a second texture to swap with for changing pixel formats + m_efb_color_texture = g_renderer->CreateTexture(efb_color_texture_config); + m_efb_depth_texture = g_renderer->CreateTexture(efb_depth_texture_config); + m_efb_convert_color_texture = g_renderer->CreateTexture(efb_color_texture_config); + if (!m_efb_color_texture || !m_efb_depth_texture || !m_efb_convert_color_texture) + return false; + + m_efb_framebuffer = + g_renderer->CreateFramebuffer(m_efb_color_texture.get(), m_efb_depth_texture.get()); + m_efb_convert_framebuffer = + g_renderer->CreateFramebuffer(m_efb_convert_color_texture.get(), m_efb_depth_texture.get()); + if (!m_efb_framebuffer || !m_efb_convert_framebuffer) + return false; + + // Create resolved textures if MSAA is on + if (g_ActiveConfig.MultisamplingEnabled()) + { + m_efb_resolve_color_texture = g_renderer->CreateTexture( + TextureConfig(efb_color_texture_config.width, efb_color_texture_config.height, 1, + efb_color_texture_config.layers, 1, efb_color_texture_config.format, 0)); + m_efb_depth_resolve_texture = g_renderer->CreateTexture(TextureConfig( + efb_depth_texture_config.width, efb_depth_texture_config.height, 1, + efb_depth_texture_config.layers, 1, + AbstractTexture::GetColorFormatForDepthFormat(efb_depth_texture_config.format), + AbstractTextureFlag_RenderTarget)); + if (!m_efb_resolve_color_texture || !m_efb_depth_resolve_texture) + return false; + + m_efb_depth_resolve_framebuffer = + g_renderer->CreateFramebuffer(m_efb_depth_resolve_texture.get(), nullptr); + if (!m_efb_depth_resolve_framebuffer) + return false; + } + + // Clear the renderable textures out. + g_renderer->SetAndClearFramebuffer( + m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}}, + g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : 0.0f); + return true; +} + +void FramebufferManager::DestroyEFBFramebuffer() +{ + m_efb_framebuffer.reset(); + m_efb_convert_framebuffer.reset(); + m_efb_color_texture.reset(); + m_efb_convert_color_texture.reset(); + m_efb_depth_texture.reset(); + m_efb_resolve_color_texture.reset(); + m_efb_depth_resolve_framebuffer.reset(); + m_efb_depth_resolve_texture.reset(); +} + +void FramebufferManager::BindEFBFramebuffer() +{ + g_renderer->SetFramebuffer(m_efb_framebuffer.get()); +} + +AbstractTexture* FramebufferManager::ResolveEFBColorTexture(const MathUtil::Rectangle& region) +{ + // Return the normal EFB texture if multisampling is off. + if (!IsEFBMultisampled()) + { + m_efb_color_texture->FinishedRendering(); + return m_efb_color_texture.get(); + } + + // It's not valid to resolve an out-of-range rectangle. + MathUtil::Rectangle clamped_region = region; + clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight()); + clamped_region = g_renderer->ConvertFramebufferRectangle(clamped_region, m_efb_framebuffer.get()); + + // Resolve to our already-created texture. + for (u32 layer = 0; layer < GetEFBLayers(); layer++) + { + m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region, + layer, 0); + } + + m_efb_resolve_color_texture->FinishedRendering(); + return m_efb_resolve_color_texture.get(); +} + +AbstractTexture* FramebufferManager::ResolveEFBDepthTexture(const MathUtil::Rectangle& region) +{ + if (!IsEFBMultisampled()) + { + m_efb_depth_texture->FinishedRendering(); + return m_efb_depth_texture.get(); + } + + // It's not valid to resolve an out-of-range rectangle. + MathUtil::Rectangle clamped_region = region; + clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight()); + clamped_region = g_renderer->ConvertFramebufferRectangle(clamped_region, m_efb_framebuffer.get()); + + m_efb_depth_texture->FinishedRendering(); + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_efb_depth_resolve_framebuffer.get()); + g_renderer->SetPipeline(m_efb_depth_resolve_pipeline.get()); + g_renderer->SetTexture(0, m_efb_depth_texture.get()); + g_renderer->SetSamplerState(0, RenderState::GetPointSamplerState()); + g_renderer->SetViewportAndScissor(clamped_region); + g_renderer->Draw(0, 3); + m_efb_depth_resolve_texture->FinishedRendering(); + g_renderer->EndUtilityDrawing(); + + return m_efb_depth_resolve_texture.get(); +} + +bool FramebufferManager::ReinterpretPixelData(EFBReinterpretType convtype) +{ + if (!m_format_conversion_pipelines[static_cast(convtype)]) + return false; + + // Draw to the secondary framebuffer. + m_efb_color_texture->FinishedRendering(); + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_efb_convert_framebuffer.get()); + g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect()); + g_renderer->SetPipeline(m_format_conversion_pipelines[static_cast(convtype)].get()); + g_renderer->SetTexture(0, m_efb_color_texture.get()); + g_renderer->Draw(0, 3); + + // And swap the framebuffers around, so we do new drawing to the converted framebuffer. + std::swap(m_efb_color_texture, m_efb_convert_color_texture); + std::swap(m_efb_framebuffer, m_efb_convert_framebuffer); + g_renderer->EndUtilityDrawing(); + return true; +} + +bool FramebufferManager::CompileConversionPipelines() +{ + for (u32 i = 0; i < NUM_EFB_REINTERPRET_TYPES; i++) + { + std::unique_ptr pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateFormatConversionShader( + static_cast(i), GetEFBSamples())); + if (!pixel_shader) + return false; + + AbstractPipelineConfig config = {}; + config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetTexcoordGeometryShader() : nullptr; + config.pixel_shader = pixel_shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = GetEFBFramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + m_format_conversion_pipelines[i] = g_renderer->CreatePipeline(config); + if (!m_format_conversion_pipelines[i]) + return false; + } + + return true; +} + +void FramebufferManager::DestroyConversionPipelines() +{ + for (auto& pipeline : m_format_conversion_pipelines) + pipeline.reset(); +} + +bool FramebufferManager::PopulateColorReadbackTexture() +{ + g_vertex_manager->OnCPUEFBAccess(); + + // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. + AbstractTexture* src_texture = + ResolveEFBColorTexture(MathUtil::Rectangle(0, 0, GetEFBWidth(), GetEFBHeight())); + if (g_renderer->GetEFBScale() != 1) + { + // Downsample from internal resolution to 1x. + // TODO: This won't produce correct results at IRs above 2x. + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_color_copy_framebuffer.get()); + g_renderer->SetViewportAndScissor(m_color_copy_framebuffer->GetRect()); + g_renderer->SetPipeline(m_color_copy_pipeline.get()); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); + g_renderer->Draw(0, 3); + + // Copy from EFB or copy texture to staging texture. + m_color_readback_texture->CopyFromTexture(m_color_copy_texture.get(), + m_color_readback_texture->GetRect(), 0, 0, + m_color_readback_texture->GetRect()); + + g_renderer->EndUtilityDrawing(); + } + else + { + m_color_readback_texture->CopyFromTexture(src_texture, m_color_readback_texture->GetRect(), 0, + 0, m_color_readback_texture->GetRect()); + } + + // Wait until the copy is complete. + m_color_readback_texture->Flush(); + m_color_readback_texture_valid = true; + return true; +} + +bool FramebufferManager::PopulateDepthReadbackTexture() +{ + g_vertex_manager->OnCPUEFBAccess(); + + // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. + AbstractTexture* src_texture = + ResolveEFBDepthTexture(MathUtil::Rectangle(0, 0, GetEFBWidth(), GetEFBHeight())); + if (g_renderer->GetEFBScale() != 1) + { + // Downsample from internal resolution to 1x. + // TODO: This won't produce correct results at IRs above 2x. + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_depth_copy_framebuffer.get()); + g_renderer->SetViewportAndScissor(m_depth_copy_framebuffer->GetRect()); + g_renderer->SetPipeline(m_depth_copy_pipeline.get()); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); + g_renderer->Draw(0, 3); + + // No need to call FinishedRendering() here because CopyFromTexture() transitions. + m_depth_readback_texture->CopyFromTexture(m_depth_copy_texture.get(), + m_depth_readback_texture->GetRect(), 0, 0, + m_depth_readback_texture->GetRect()); + + g_renderer->EndUtilityDrawing(); + } + else + { + m_depth_readback_texture->CopyFromTexture(src_texture, m_depth_readback_texture->GetRect(), 0, + 0, m_depth_readback_texture->GetRect()); + } + + // Wait until the copy is complete. + m_depth_readback_texture->Flush(); + m_depth_readback_texture_valid = true; + return true; +} + +void FramebufferManager::InvalidatePeekCache() +{ + m_color_readback_texture_valid = false; + m_depth_readback_texture_valid = false; +} + +bool FramebufferManager::CompileReadbackPipelines() +{ + AbstractPipelineConfig config = {}; + config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetTexcoordGeometryShader() : nullptr; + config.pixel_shader = g_shader_cache->GetTextureCopyPixelShader(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetColorFramebufferState(GetEFBColorFormat()); + config.usage = AbstractPipelineUsage::Utility; + m_color_copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_color_copy_pipeline) + return false; + + // same for depth, except different format + config.framebuffer_state.color_texture_format = + AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()); + m_depth_copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_depth_copy_pipeline) + return false; + + if (IsEFBMultisampled()) + { + auto depth_resolve_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateResolveDepthPixelShader(GetEFBSamples())); + if (!depth_resolve_shader) + return false; + + config.pixel_shader = depth_resolve_shader.get(); + m_efb_depth_resolve_pipeline = g_renderer->CreatePipeline(config); + if (!m_efb_depth_resolve_pipeline) + return false; + } + + return true; +} + +void FramebufferManager::DestroyReadbackPipelines() +{ + m_efb_depth_resolve_pipeline.reset(); + m_depth_copy_pipeline.reset(); + m_color_copy_pipeline.reset(); +} + +bool FramebufferManager::CreateReadbackFramebuffer() +{ + const TextureConfig color_config(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, GetEFBColorFormat(), + AbstractTextureFlag_RenderTarget); + const TextureConfig depth_config( + EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, + AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), + AbstractTextureFlag_RenderTarget); + if (g_renderer->GetEFBScale() != 1) + { + m_color_copy_texture = g_renderer->CreateTexture(color_config); + m_depth_copy_texture = g_renderer->CreateTexture(depth_config); + if (!m_color_copy_texture || !m_depth_copy_texture) + return false; + + m_color_copy_framebuffer = g_renderer->CreateFramebuffer(m_color_copy_texture.get(), nullptr); + m_depth_copy_framebuffer = g_renderer->CreateFramebuffer(m_depth_copy_texture.get(), nullptr); + if (!m_color_copy_framebuffer || !m_depth_copy_framebuffer) + return false; + } + + m_color_readback_texture = + g_renderer->CreateStagingTexture(StagingTextureType::Mutable, color_config); + m_depth_readback_texture = + g_renderer->CreateStagingTexture(StagingTextureType::Mutable, depth_config); + if (!m_color_readback_texture || !m_depth_readback_texture) + return false; + + return true; +} + +void FramebufferManager::DestroyReadbackFramebuffer() +{ + m_depth_copy_framebuffer.reset(); + m_depth_copy_texture.reset(); + m_depth_readback_texture_valid = false; + m_color_copy_framebuffer.reset(); + m_color_copy_texture.reset(); + m_color_readback_texture_valid = false; +} + +void FramebufferManager::ClearEFB(const MathUtil::Rectangle& rc, bool clear_color, + bool clear_alpha, bool clear_z, u32 color, u32 z) +{ + FlushEFBPokes(); + InvalidatePeekCache(); + g_renderer->BeginUtilityDrawing(); + + // Set up uniforms. + struct Uniforms + { + float clear_color[4]; + float clear_depth; + float padding1, padding2, padding3; + }; + static_assert(std::is_standard_layout::value); + Uniforms uniforms = {{static_cast((color >> 16) & 0xFF) / 255.0f, + static_cast((color >> 8) & 0xFF) / 255.0f, + static_cast((color >> 0) & 0xFF) / 255.0f, + static_cast((color >> 24) & 0xFF) / 255.0f}, + static_cast(z & 0xFFFFFF) / 16777216.0f}; + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + uniforms.clear_depth = 1.0f - uniforms.clear_depth; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + const auto target_rc = g_renderer->ConvertFramebufferRectangle( + g_renderer->ConvertEFBRectangle(rc), m_efb_framebuffer.get()); + g_renderer->SetPipeline(m_efb_clear_pipelines[clear_color][clear_alpha][clear_z].get()); + g_renderer->SetViewportAndScissor(target_rc); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); +} + +bool FramebufferManager::CompileClearPipelines() +{ + auto vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateClearVertexShader()); + if (!vertex_shader) + return false; + + AbstractPipelineConfig config; + config.vertex_format = nullptr; + config.vertex_shader = vertex_shader.get(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetColorGeometryShader() : nullptr; + config.pixel_shader = g_shader_cache->GetColorPixelShader(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetAlwaysWriteDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = GetEFBFramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + + for (u32 color_enable = 0; color_enable < 2; color_enable++) + { + config.blending_state.colorupdate = color_enable != 0; + for (u32 alpha_enable = 0; alpha_enable < 2; alpha_enable++) + { + config.blending_state.alphaupdate = alpha_enable != 0; + for (u32 depth_enable = 0; depth_enable < 2; depth_enable++) + { + config.depth_state.testenable = depth_enable != 0; + config.depth_state.updateenable = depth_enable != 0; + + m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable] = + g_renderer->CreatePipeline(config); + if (!m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable]) + return false; + } + } + } + + return true; +} + +void FramebufferManager::DestroyClearPipelines() +{ + for (u32 color_enable = 0; color_enable < 2; color_enable++) + { + for (u32 alpha_enable = 0; alpha_enable < 2; alpha_enable++) + { + for (u32 depth_enable = 0; depth_enable < 2; depth_enable++) + { + m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable].reset(); + } + } + } +} + +u32 FramebufferManager::PeekEFBColor(u32 x, u32 y) +{ + if (!m_color_readback_texture_valid && !PopulateColorReadbackTexture()) + return 0; + + // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + u32 value; + m_color_readback_texture->ReadTexel(x, y, &value); + return value; +} + +float FramebufferManager::PeekEFBDepth(u32 x, u32 y) +{ + if (!m_depth_readback_texture_valid && !PopulateDepthReadbackTexture()) + return 0.0f; + + // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + float value; + m_depth_readback_texture->ReadTexel(x, y, &value); + return value; +} + +void FramebufferManager::PokeEFBColor(u32 x, u32 y, u32 color) +{ + // Flush if we exceeded the number of vertices per batch. + if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) + FlushEFBPokes(); + + CreatePokeVertices(&m_color_poke_vertices, x, y, 0.0f, color); + + // Update the peek cache if it's valid, since we know the color of the pixel now. + if (m_color_readback_texture_valid) + { + // See comment above for reasoning for lower-left coordinates. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + m_color_readback_texture->WriteTexel(x, y, &color); + } +} + +void FramebufferManager::PokeEFBDepth(u32 x, u32 y, float depth) +{ + // Flush if we exceeded the number of vertices per batch. + if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) + FlushEFBPokes(); + + CreatePokeVertices(&m_depth_poke_vertices, x, y, depth, 0); + + // Update the peek cache if it's valid, since we know the color of the pixel now. + if (m_depth_readback_texture_valid) + { + // See comment above for reasoning for lower-left coordinates. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + m_depth_readback_texture->WriteTexel(x, y, &depth); + } +} + +void FramebufferManager::CreatePokeVertices(std::vector* destination_list, u32 x, + u32 y, float z, u32 color) +{ + const float cs_pixel_width = 1.0f / EFB_WIDTH * 2.0f; + const float cs_pixel_height = 1.0f / EFB_HEIGHT * 2.0f; + if (g_ActiveConfig.backend_info.bSupportsLargePoints) + { + // GPU will expand the point to a quad. + const float cs_x = (static_cast(x) + 0.5f) * cs_pixel_width - 1.0f; + const float cs_y = 1.0f - (static_cast(y) + 0.5f) * cs_pixel_height; + const float point_size = static_cast(g_renderer->GetEFBScale()); + destination_list->push_back({{cs_x, cs_y, z, point_size}, color}); + return; + } + + // Generate quad from the single point (clip-space coordinates). + const float x1 = static_cast(x) * cs_pixel_width - 1.0f; + const float y1 = 1.0f - static_cast(y) * cs_pixel_height; + const float x2 = x1 + cs_pixel_width; + const float y2 = y1 + cs_pixel_height; + destination_list->push_back({{x1, y1, z, 1.0f}, color}); + destination_list->push_back({{x2, y1, z, 1.0f}, color}); + destination_list->push_back({{x1, y2, z, 1.0f}, color}); + destination_list->push_back({{x1, y2, z, 1.0f}, color}); + destination_list->push_back({{x2, y1, z, 1.0f}, color}); + destination_list->push_back({{x2, y2, z, 1.0f}, color}); +} + +void FramebufferManager::FlushEFBPokes() +{ + if (!m_color_poke_vertices.empty()) + { + DrawPokeVertices(m_color_poke_vertices.data(), static_cast(m_color_poke_vertices.size()), + m_color_poke_pipeline.get()); + m_color_poke_vertices.clear(); + } + + if (!m_depth_poke_vertices.empty()) + { + DrawPokeVertices(m_depth_poke_vertices.data(), static_cast(m_depth_poke_vertices.size()), + m_depth_poke_pipeline.get()); + m_depth_poke_vertices.clear(); + } +} + +void FramebufferManager::DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count, + const AbstractPipeline* pipeline) +{ + // Copy to vertex buffer. + g_renderer->BeginUtilityDrawing(); + u32 base_vertex, base_index; + g_vertex_manager->UploadUtilityVertices(vertices, sizeof(EFBPokeVertex), + static_cast(vertex_count), nullptr, 0, &base_vertex, + &base_index); + + // Now we can draw. + g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect()); + g_renderer->SetPipeline(pipeline); + g_renderer->Draw(base_vertex, vertex_count); + g_renderer->EndUtilityDrawing(); +} + +bool FramebufferManager::CompilePokePipelines() +{ + PortableVertexDeclaration vtx_decl = {}; + vtx_decl.position.enable = true; + vtx_decl.position.type = VAR_FLOAT; + vtx_decl.position.components = 4; + vtx_decl.position.integer = false; + vtx_decl.position.offset = offsetof(EFBPokeVertex, position); + vtx_decl.colors[0].enable = true; + vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; + vtx_decl.colors[0].components = 4; + vtx_decl.colors[0].integer = false; + vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); + vtx_decl.stride = sizeof(EFBPokeVertex); + + m_poke_vertex_format = g_renderer->CreateNativeVertexFormat(vtx_decl); + if (!m_poke_vertex_format) + return false; + + auto poke_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateEFBPokeVertexShader()); + if (!poke_vertex_shader) + return false; + + AbstractPipelineConfig config = {}; + config.vertex_format = m_poke_vertex_format.get(); + config.vertex_shader = poke_vertex_shader.get(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetColorGeometryShader() : nullptr; + config.pixel_shader = g_shader_cache->GetColorPixelShader(); + config.rasterization_state = RenderState::GetNoCullRasterizationState( + g_ActiveConfig.backend_info.bSupportsLargePoints ? PrimitiveType::Points : + PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = GetEFBFramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + m_color_poke_pipeline = g_renderer->CreatePipeline(config); + if (!m_color_poke_pipeline) + return false; + + // Turn off color writes, depth writes on for depth pokes. + config.depth_state = RenderState::GetAlwaysWriteDepthState(); + config.blending_state = RenderState::GetNoColorWriteBlendState(); + m_depth_poke_pipeline = g_renderer->CreatePipeline(config); + if (!m_depth_poke_pipeline) + return false; + + return true; +} + +void FramebufferManager::DestroyPokePipelines() +{ + m_depth_poke_pipeline.reset(); + m_color_poke_pipeline.reset(); + m_poke_vertex_format.reset(); +} diff --git a/Source/Core/VideoCommon/FramebufferManager.h b/Source/Core/VideoCommon/FramebufferManager.h new file mode 100644 index 0000000000..d0de7b62b1 --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferManager.h @@ -0,0 +1,171 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" +#include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/RenderState.h" +#include "VideoCommon/TextureConfig.h" + +class AbstractFramebuffer; +class AbstractPipeline; +class AbstractStagingTexture; +class NativeVertexFormat; + +enum class EFBReinterpretType +{ + RGB8ToRGB565 = 0, + RGB8ToRGBA6 = 1, + RGBA6ToRGB8 = 2, + RGBA6ToRGB565 = 3, + RGB565ToRGB8 = 4, + RGB565ToRGBA6 = 5 +}; +constexpr u32 NUM_EFB_REINTERPRET_TYPES = 6; + +inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper) +{ + return !((aLower >= bUpper) || (bLower >= aUpper)); +} + +class FramebufferManager final +{ +public: + FramebufferManager(); + virtual ~FramebufferManager(); + + // Does not require the framebuffer to be created. Slower than direct queries. + static AbstractTextureFormat GetEFBColorFormat(); + static AbstractTextureFormat GetEFBDepthFormat(); + static TextureConfig GetEFBColorTextureConfig(); + static TextureConfig GetEFBDepthTextureConfig(); + + // Accessors. + AbstractTexture* GetEFBColorTexture() const { return m_efb_color_texture.get(); } + AbstractTexture* GetEFBDepthTexture() const { return m_efb_depth_texture.get(); } + AbstractFramebuffer* GetEFBFramebuffer() const { return m_efb_framebuffer.get(); } + u32 GetEFBWidth() const { return m_efb_color_texture->GetWidth(); } + u32 GetEFBHeight() const { return m_efb_color_texture->GetHeight(); } + u32 GetEFBLayers() const { return m_efb_color_texture->GetLayers(); } + u32 GetEFBSamples() const { return m_efb_color_texture->GetSamples(); } + bool IsEFBMultisampled() const { return m_efb_color_texture->IsMultisampled(); } + bool IsEFBStereo() const { return m_efb_color_texture->GetLayers() > 1; } + FramebufferState GetEFBFramebufferState() const; + + // First-time setup. + bool Initialize(); + + // Recreate EFB framebuffers, call when the EFB size (IR) changes. + void RecreateEFBFramebuffer(); + + // Recompile shaders, use when MSAA mode changes. + void RecompileShaders(); + + // This is virtual, because D3D has both normalized and integer framebuffers. + void BindEFBFramebuffer(); + + // Resolve color/depth textures to a non-msaa texture, and return it. + AbstractTexture* ResolveEFBColorTexture(const MathUtil::Rectangle& region); + AbstractTexture* ResolveEFBDepthTexture(const MathUtil::Rectangle& region); + + // Reinterpret pixel format of EFB color texture. + // Assumes no render pass is currently in progress. + // Swaps EFB framebuffers, so re-bind afterwards. + bool ReinterpretPixelData(EFBReinterpretType convtype); + + // Clears the EFB using shaders. + void ClearEFB(const MathUtil::Rectangle& rc, bool clear_color, bool clear_alpha, + bool clear_z, u32 color, u32 z); + + // Reads a framebuffer value back from the GPU. This may block if the cache is not current. + u32 PeekEFBColor(u32 x, u32 y); + float PeekEFBDepth(u32 x, u32 y); + void InvalidatePeekCache(); + + // Writes a value to the framebuffer. This will never block, and writes will be batched. + void PokeEFBColor(u32 x, u32 y, u32 color); + void PokeEFBDepth(u32 x, u32 y, float depth); + void FlushEFBPokes(); + +protected: + struct EFBPokeVertex + { + float position[4]; + u32 color; + }; + static_assert(std::is_standard_layout::value, "EFBPokeVertex is standard-layout"); + + bool CreateEFBFramebuffer(); + void DestroyEFBFramebuffer(); + + bool CompileConversionPipelines(); + void DestroyConversionPipelines(); + + bool CompileReadbackPipelines(); + void DestroyReadbackPipelines(); + + bool CreateReadbackFramebuffer(); + void DestroyReadbackFramebuffer(); + + bool CompileClearPipelines(); + void DestroyClearPipelines(); + + bool CompilePokePipelines(); + void DestroyPokePipelines(); + + bool PopulateColorReadbackTexture(); + bool PopulateDepthReadbackTexture(); + + void CreatePokeVertices(std::vector* destination_list, u32 x, u32 y, float z, + u32 color); + + void DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count, + const AbstractPipeline* pipeline); + + std::unique_ptr m_efb_color_texture; + std::unique_ptr m_efb_convert_color_texture; + std::unique_ptr m_efb_depth_texture; + std::unique_ptr m_efb_resolve_color_texture; + std::unique_ptr m_efb_depth_resolve_texture; + + std::unique_ptr m_efb_framebuffer; + std::unique_ptr m_efb_convert_framebuffer; + std::unique_ptr m_efb_depth_resolve_framebuffer; + std::unique_ptr m_efb_depth_resolve_pipeline; + + // Format conversion shaders + std::array, 6> m_format_conversion_pipelines; + + // EFB readback texture + std::unique_ptr m_color_copy_texture; + std::unique_ptr m_depth_copy_texture; + std::unique_ptr m_color_copy_framebuffer; + std::unique_ptr m_depth_copy_framebuffer; + std::unique_ptr m_color_copy_pipeline; + std::unique_ptr m_depth_copy_pipeline; + + // CPU-side EFB readback texture + std::unique_ptr m_color_readback_texture; + std::unique_ptr m_depth_readback_texture; + bool m_color_readback_texture_valid = false; + bool m_depth_readback_texture_valid = false; + + // EFB clear pipelines + // Indexed by [color_write_enabled][alpha_write_enabled][depth_write_enabled] + std::array, 2>, 2>, 2> + m_efb_clear_pipelines; + + // EFB poke drawing setup + std::unique_ptr m_poke_vertex_format; + std::unique_ptr m_color_poke_pipeline; + std::unique_ptr m_depth_poke_pipeline; + std::vector m_color_poke_vertices; + std::vector m_depth_poke_vertices; +}; + +extern std::unique_ptr g_framebuffer_manager; diff --git a/Source/Core/VideoCommon/FramebufferManagerBase.cpp b/Source/Core/VideoCommon/FramebufferManagerBase.cpp deleted file mode 100644 index 530aac7584..0000000000 --- a/Source/Core/VideoCommon/FramebufferManagerBase.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoCommon/FramebufferManagerBase.h" - -#include - -#include "VideoCommon/AbstractTexture.h" -#include "VideoCommon/DriverDetails.h" -#include "VideoCommon/RenderBase.h" - -std::unique_ptr g_framebuffer_manager; - -unsigned int FramebufferManagerBase::m_EFBLayers = 1; - -FramebufferManagerBase::~FramebufferManagerBase() = default; - -AbstractTextureFormat FramebufferManagerBase::GetEFBDepthFormat() -{ - // 32-bit depth clears are broken in the Adreno Vulkan driver, and have no effect. - // To work around this, we use a D24_S8 buffer instead, which results in a loss of accuracy. - // We still resolve this to a R32F texture, as there is no 24-bit format. - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_D32F_CLEAR)) - return AbstractTextureFormat::D24_S8; - else - return AbstractTextureFormat::D32F; -} diff --git a/Source/Core/VideoCommon/FramebufferManagerBase.h b/Source/Core/VideoCommon/FramebufferManagerBase.h deleted file mode 100644 index ff9ff0cc2e..0000000000 --- a/Source/Core/VideoCommon/FramebufferManagerBase.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "Common/CommonTypes.h" - -enum class AbstractTextureFormat : u32; - -inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper) -{ - return !((aLower >= bUpper) || (bLower >= aUpper)); -} - -class FramebufferManagerBase -{ -public: - virtual ~FramebufferManagerBase(); - - static unsigned int GetEFBLayers() { return m_EFBLayers; } - static AbstractTextureFormat GetEFBDepthFormat(); - -protected: - static unsigned int m_EFBLayers; -}; - -extern std::unique_ptr g_framebuffer_manager; diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp new file mode 100644 index 0000000000..de7bdf3ee8 --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -0,0 +1,464 @@ +#include "VideoCommon/FramebufferShaderGen.h" +#include +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/VertexShaderGen.h" + +namespace FramebufferShaderGen +{ +static APIType GetAPIType() +{ + return g_ActiveConfig.backend_info.api_type; +} + +static void EmitUniformBufferDeclaration(std::stringstream& ss) +{ + if (GetAPIType() == APIType::D3D) + ss << "cbuffer UBO : register(b0)\n"; + else + ss << "UBO_BINDING(std140, 1) uniform UBO\n"; +} + +static void EmitSamplerDeclarations(std::stringstream& ss, u32 start = 0, u32 end = 1, + bool multisampled = false) +{ + switch (GetAPIType()) + { + case APIType::D3D: + { + for (u32 i = start; i < end; i++) + { + ss << (multisampled ? "Texture2DMSArray" : "Texture2DArray") << " tex" << i + << " : register(t" << i << ");\n"; + ss << "SamplerState" + << " samp" << i << " : register(s" << i << ");\n"; + } + } + break; + + case APIType::OpenGL: + case APIType::Vulkan: + { + for (u32 i = start; i < end; i++) + { + ss << "SAMPLER_BINDING(" << i << ") uniform " + << (multisampled ? "sampler2DMSArray" : "sampler2DArray") << " samp" << i << ";\n"; + } + } + break; + default: + break; + } +} + +static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords) +{ + switch (GetAPIType()) + { + case APIType::D3D: + ss << "tex" << n << ".Sample(samp" << n << ", " << coords << ")"; + break; + + case APIType::OpenGL: + case APIType::Vulkan: + ss << "texture(samp" << n << ", " << coords << ")"; + break; + + default: + break; + } +} + +static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, + u32 num_color_inputs, bool position_input, + u32 num_tex_outputs, u32 num_color_outputs, + const char* extra_inputs = "") +{ + switch (GetAPIType()) + { + case APIType::D3D: + { + ss << "void main("; + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "in float3 rawtex" << i << " : TEXCOORD" << i << ", "; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "in float4 rawcolor" << i << " : COLOR" << i << ", "; + if (position_input) + ss << "in float4 rawpos : POSITION, "; + ss << extra_inputs; + for (u32 i = 0; i < num_tex_outputs; i++) + ss << "out float3 v_tex" << i << " : TEXCOORD" << i << ", "; + for (u32 i = 0; i < num_color_outputs; i++) + ss << "out float4 v_col" << i << " : COLOR" << i << ", "; + ss << "out float4 opos : SV_Position)\n"; + } + break; + + case APIType::OpenGL: + case APIType::Vulkan: + { + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "ATTRIBUTE_LOCATION(" << (SHADER_TEXTURE0_ATTRIB + i) << ") in float3 rawtex" << i + << ";\n"; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "ATTRIBUTE_LOCATION(" << (SHADER_COLOR0_ATTRIB + i) << ") in float4 rawcolor" << i + << ";\n"; + if (position_input) + ss << "ATTRIBUTE_LOCATION(" << SHADER_POSITION_ATTRIB << ") in float4 rawpos;\n"; + for (u32 i = 0; i < num_tex_outputs; i++) + ss << "VARYING_LOCATION(" << i << ") out float3 v_tex" << i << ";\n"; + for (u32 i = 0; i < num_color_outputs; i++) + ss << "VARYING_LOCATION(" << (num_tex_inputs + i) << ") out float4 v_col" << i << ";\n"; + ss << "#define opos gl_Position\n"; + ss << extra_inputs << "\n"; + ss << "void main()\n"; + } + break; + default: + break; + } +} + +static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, + u32 num_color_inputs, const char* output_type = "float4", + const char* extra_vars = "") +{ + switch (GetAPIType()) + { + case APIType::D3D: + { + ss << "void main("; + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", "; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "in float4 v_col" << i << " : COLOR" << i << ", "; + ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n"; + } + break; + + case APIType::OpenGL: + case APIType::Vulkan: + { + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "VARYING_LOCATION(" << i << ") in float3 v_tex" << i << ";\n"; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "VARYING_LOCATION(" << (num_tex_inputs + i) << ") in float4 v_col" << i << ";\n"; + ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n"; + ss << extra_vars << "\n"; + ss << "void main()\n"; + } + break; + + default: + break; + } +} + +std::string GenerateScreenQuadVertexShader() +{ + std::stringstream ss; + EmitVertexMainDeclaration(ss, 0, 0, false, 1, 0, + GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : + "#define id gl_VertexID\n"); + ss << "{\n"; + ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"; + ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"; + + // NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left. + if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + return ss.str(); +} + +std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors) +{ + std::stringstream ss; + if (GetAPIType() == APIType::D3D) + { + ss << "struct VS_OUTPUT\n"; + ss << "{\n"; + for (u32 i = 0; i < num_tex; i++) + ss << " float3 tex" << i << " : TEXCOORD" << i << ";\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " float4 color" << i << " : COLOR" << i << ";\n"; + ss << " float4 position : SV_Position;\n"; + ss << "};\n"; + ss << "struct GS_OUTPUT\n"; + ss << "{"; + for (u32 i = 0; i < num_tex; i++) + ss << " float3 tex" << i << " : TEXCOORD" << i << ";\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " float4 color" << i << " : COLOR" << i << ";\n"; + ss << " float4 position : SV_Position;\n"; + ss << " uint slice : SV_RenderTargetArrayIndex;\n"; + ss << "};\n\n"; + ss << "[maxvertexcount(6)]\n"; + ss << "void main(triangle VS_OUTPUT vso[3], inout TriangleStream output)\n"; + ss << "{\n"; + ss << " for (uint slice = 0; slice < 2u; slice++)\n"; + ss << " {\n"; + ss << " for (int i = 0; i < 3; i++)\n"; + ss << " {\n"; + ss << " GS_OUTPUT gso;\n"; + ss << " gso.position = vso[i].position;\n"; + for (u32 i = 0; i < num_tex; i++) + ss << " gso.tex" << i << " = float3(vso[i].tex" << i << ".xy, float(slice));\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " gso.color" << i << " = vso[i].color" << i << ";\n"; + ss << " gso.slice = slice;\n"; + ss << " output.Append(gso);\n"; + ss << " }\n"; + ss << " output.RestartStrip();\n"; + ss << " }\n"; + ss << "}\n"; + } + else if (GetAPIType() == APIType::OpenGL || GetAPIType() == APIType::Vulkan) + { + ss << "layout(triangles) in;\n"; + ss << "layout(triangle_strip, max_vertices = 6) out;\n"; + for (u32 i = 0; i < num_tex; i++) + { + ss << "layout(location = " << i << ") in float3 v_tex" << i << "[];\n"; + ss << "layout(location = " << i << ") out float3 out_tex" << i << ";\n"; + } + for (u32 i = 0; i < num_colors; i++) + { + ss << "layout(location = " << (num_tex + i) << ") in float4 v_col" << i << "[];\n"; + ss << "layout(location = " << (num_tex + i) << ") out float4 out_col" << i << ";\n"; + } + ss << "\n"; + ss << "void main()\n"; + ss << "{\n"; + ss << " for (int j = 0; j < 2; j++)\n"; + ss << " {\n"; + ss << " gl_Layer = j;\n"; + + // We have to explicitly unroll this loop otherwise the GL compiler gets cranky. + for (u32 v = 0; v < 3; v++) + { + ss << " gl_Position = gl_in[" << v << "].gl_Position;\n"; + for (u32 i = 0; i < num_tex; i++) + ss << " out_tex" << i << " = float3(v_tex" << i << "[" << v << "].xy, float(j));\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " out_col" << i << " = v_col" << i << "[" << v << "];\n"; + ss << " EmitVertex();\n\n"; + } + ss << " EndPrimitive();\n"; + ss << " }\n"; + ss << "}\n"; + } + + return ss.str(); +} + +std::string GenerateTextureCopyVertexShader() +{ + std::stringstream ss; + EmitUniformBufferDeclaration(ss); + ss << "{"; + ss << " float2 src_offset;\n"; + ss << " float2 src_size;\n"; + ss << "};\n\n"; + + EmitVertexMainDeclaration(ss, 0, 0, false, 1, 0, + GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : + "#define id gl_VertexID"); + ss << "{\n"; + ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"; + ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"; + ss << " v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"; + + // NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left. + if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + return ss.str(); +} + +std::string GenerateTextureCopyPixelShader() +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, false); + EmitPixelMainDeclaration(ss, 1, 0); + ss << "{\n"; + ss << " ocol0 = "; + EmitSampleTexture(ss, 0, "v_tex0"); + ss << ";\n"; + ss << "}\n"; + return ss.str(); +} + +std::string GenerateColorPixelShader() +{ + std::stringstream ss; + EmitPixelMainDeclaration(ss, 0, 1); + ss << "{\n"; + ss << " ocol0 = v_col0;\n"; + ss << "}\n"; + return ss.str(); +} + +std::string GenerateResolveDepthPixelShader(u32 samples) +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, true); + EmitPixelMainDeclaration(ss, 1, 0, "float", + GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : ""); + ss << "{\n"; + ss << " int layer = int(v_tex0.z);\n"; + if (GetAPIType() == APIType::D3D) + ss << " int3 coords = int3(int2(ipos.xy), layer);\n"; + else + ss << " int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"; + + // Take the minimum of all depth samples. + if (GetAPIType() == APIType::D3D) + ss << " ocol0 = tex0.Load(coords, 0).r;\n"; + else + ss << " ocol0 = texelFetch(samp0, coords, 0).r;\n"; + ss << " for (int i = 1; i < " << samples << "; i++)\n"; + if (GetAPIType() == APIType::D3D) + ss << " ocol0 = min(ocol0, tex0.Load(coords, i).r);\n"; + else + ss << " ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n"; + + ss << "}\n"; + return ss.str(); +} + +std::string GenerateClearVertexShader() +{ + std::stringstream ss; + EmitUniformBufferDeclaration(ss); + ss << "{\n"; + ss << " float4 clear_color;\n"; + ss << " float clear_depth;\n"; + ss << "};\n"; + + EmitVertexMainDeclaration(ss, 0, 0, false, 0, 1, + GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : + "#define id gl_VertexID\n"); + ss << "{\n"; + ss << " float2 coord = float2(float((id << 1) & 2), float(id & 2));\n"; + ss << " opos = float4(coord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), clear_depth, 1.0f);\n"; + ss << " v_col0 = clear_color;\n"; + + // NDC space is flipped in Vulkan + if (GetAPIType() == APIType::Vulkan) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + return ss.str(); +} + +std::string GenerateEFBPokeVertexShader() +{ + std::stringstream ss; + EmitVertexMainDeclaration(ss, 0, 1, true, 0, 1); + ss << "{\n"; + ss << " v_col0 = rawcolor0;\n"; + ss << " opos = float4(rawpos.xyz, 1.0f);\n"; + if (g_ActiveConfig.backend_info.bSupportsLargePoints) + ss << " gl_PointSize = rawpos.w;\n"; + + // NDC space is flipped in Vulkan. + if (GetAPIType() == APIType::Vulkan) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + return ss.str(); +} + +std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples) +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, samples > 1); + EmitPixelMainDeclaration(ss, 1, 0, "float4", + GetAPIType() == APIType::D3D ? + "in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " : + ""); + ss << "{\n"; + ss << " int layer = int(v_tex0.z);\n"; + if (GetAPIType() == APIType::D3D) + ss << " int3 coords = int3(int2(ipos.xy), layer);\n"; + else + ss << " int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"; + + if (samples == 1) + { + // No MSAA at all. + if (GetAPIType() == APIType::D3D) + ss << " float4 val = tex0.Load(int4(coords, 0));\n"; + else + ss << " float4 val = texelFetch(samp0, coords, 0);\n"; + } + else if (g_ActiveConfig.bSSAA) + { + // Sample shading, shader runs once per sample + if (GetAPIType() == APIType::D3D) + ss << " float4 val = tex0.Load(coords, isample);"; + else + ss << " float4 val = texelFetch(samp0, coords, gl_SampleID);"; + } + else + { + // MSAA without sample shading, average out all samples. + ss << " float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"; + ss << " for (int i = 0; i < " << samples << "; i++)\n"; + if (GetAPIType() == APIType::D3D) + ss << " val += tex0.Load(coords, i);\n"; + else + ss << " val += texelFetch(samp0, coords, i);\n"; + ss << " val /= float(" << samples << ");\n"; + } + + switch (convtype) + { + case EFBReinterpretType::RGB8ToRGBA6: + ss << " int4 src8 = int4(round(val * 255.f));\n"; + ss << " int4 dst6;\n"; + ss << " dst6.r = src8.r >> 2;\n"; + ss << " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"; + ss << " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"; + ss << " dst6.a = src8.b & 0x3F;\n"; + ss << " ocol0 = float4(dst6) / 63.f;\n"; + break; + + case EFBReinterpretType::RGB8ToRGB565: + ss << " ocol0 = val;\n"; + break; + + case EFBReinterpretType::RGBA6ToRGB8: + ss << " int4 src6 = int4(round(val * 63.f));\n"; + ss << " int4 dst8;\n"; + ss << " dst8.r = (src6.r << 2) | (src6.g >> 4);\n"; + ss << " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"; + ss << " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"; + ss << " dst8.a = 255;\n"; + ss << " ocol0 = float4(dst8) / 255.f;\n"; + break; + + case EFBReinterpretType::RGBA6ToRGB565: + ss << " ocol0 = val;\n"; + break; + + case EFBReinterpretType::RGB565ToRGB8: + ss << " ocol0 = val;\n"; + break; + + case EFBReinterpretType::RGB565ToRGBA6: + // + ss << " ocol0 = val;\n"; + break; + } + + ss << "}\n"; + return ss.str(); +} + +} // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.h b/Source/Core/VideoCommon/FramebufferShaderGen.h new file mode 100644 index 0000000000..0e065521cf --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferShaderGen.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include "VideoCommon/VideoCommon.h" + +enum class EFBReinterpretType; + +namespace FramebufferShaderGen +{ +struct Config +{ + Config(APIType api_type_, u32 efb_samples_, u32 efb_layers_, bool ssaa_) + : api_type(api_type_), efb_samples(efb_samples_), efb_layers(efb_layers_), ssaa(ssaa_) + { + } + + APIType api_type; + u32 efb_samples; + u32 efb_layers; + bool ssaa; +}; + +std::string GenerateScreenQuadVertexShader(); +std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors); +std::string GenerateTextureCopyVertexShader(); +std::string GenerateTextureCopyPixelShader(); +std::string GenerateResolveDepthPixelShader(u32 samples); +std::string GenerateClearVertexShader(); +std::string GenerateEFBPokeVertexShader(); +std::string GenerateColorPixelShader(); +std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples); + +} // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index 75cf841a32..b7c3477a60 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -104,17 +104,17 @@ struct hash class NativeVertexFormat { public: + NativeVertexFormat(const PortableVertexDeclaration& vtx_decl) : m_decl(vtx_decl) {} virtual ~NativeVertexFormat() {} + NativeVertexFormat(const NativeVertexFormat&) = delete; NativeVertexFormat& operator=(const NativeVertexFormat&) = delete; NativeVertexFormat(NativeVertexFormat&&) = default; NativeVertexFormat& operator=(NativeVertexFormat&&) = default; - u32 GetVertexStride() const { return vtx_decl.stride; } - const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; } + u32 GetVertexStride() const { return m_decl.stride; } + const PortableVertexDeclaration& GetVertexDeclaration() const { return m_decl; } protected: - // Let subclasses construct. - NativeVertexFormat() {} - PortableVertexDeclaration vtx_decl; + PortableVertexDeclaration m_decl; }; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 9fbbb2cda4..9936bcb85c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -171,8 +171,7 @@ PixelShaderUid GetPixelShaderUid() uid_data->genMode_numindstages = bpmem.genMode.numindstages; uid_data->genMode_numtevstages = bpmem.genMode.numtevstages; uid_data->genMode_numtexgens = bpmem.genMode.numtexgens; - uid_data->bounding_box = g_ActiveConfig.BBoxUseFragmentShaderImplementation() && - g_ActiveConfig.bBBoxEnable && BoundingBox::active; + uid_data->bounding_box = g_ActiveConfig.bBBoxEnable && BoundingBox::active; uid_data->rgba6_format = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor; uid_data->dither = bpmem.blendmode.dither && uid_data->rgba6_format; @@ -456,10 +455,6 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg out.Write("globallycoherent RWBuffer bbox_data : register(u2);\n"); } } - - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, num_texgens, host_config, ""); - out.Write("};\n"); } static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, @@ -804,7 +799,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host } else { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); @@ -818,7 +813,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host // Note: z-textures are not written to depth buffer if early depth test is used if (uid_data->per_pixel_depth && uid_data->early_ztest) { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -839,7 +834,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host if (uid_data->per_pixel_depth && uid_data->late_ztest) { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -1316,7 +1311,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat if (per_pixel_depth) { out.Write("\t\tdepth = %s;\n", - (ApiType == APIType::D3D || ApiType == APIType::Vulkan) ? "0.0" : "1.0"); + !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : "1.0"); } // ZCOMPLOC HACK: diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 3ff6f9f073..7799a0abcb 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -522,9 +522,7 @@ void PixelShaderManager::SetBlendModeChanged() void PixelShaderManager::SetBoundingBoxActive(bool active) { - const bool enable = - active && g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation(); - + const bool enable = active && g_ActiveConfig.bBBoxEnable; if (enable == (constants.bounding_box != 0)) return; diff --git a/Source/Core/VideoCommon/PostProcessing.cpp b/Source/Core/VideoCommon/PostProcessing.cpp index a83469e451..b1c9c42056 100644 --- a/Source/Core/VideoCommon/PostProcessing.cpp +++ b/Source/Core/VideoCommon/PostProcessing.cpp @@ -5,74 +5,44 @@ #include #include +#include "Common/Assert.h" #include "Common/CommonPaths.h" #include "Common/CommonTypes.h" #include "Common/FileSearch.h" #include "Common/FileUtil.h" #include "Common/IniFile.h" #include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" #include "Common/StringUtil.h" +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" +#include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/PostProcessing.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/ShaderCache.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoConfig.h" +namespace VideoCommon +{ static const char s_default_shader[] = "void main() { SetOutput(Sample()); }\n"; -PostProcessingShaderImplementation::PostProcessingShaderImplementation() -{ - m_timer.Start(); -} +PostProcessingConfiguration::PostProcessingConfiguration() = default; -PostProcessingShaderImplementation::~PostProcessingShaderImplementation() -{ - m_timer.Stop(); -} +PostProcessingConfiguration::~PostProcessingConfiguration() = default; -static std::vector GetShaders(const std::string& sub_dir = "") -{ - std::vector paths = - Common::DoFileSearch({File::GetUserPath(D_SHADERS_IDX) + sub_dir, - File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir}, - {".glsl"}); - std::vector result; - for (std::string path : paths) - { - std::string name; - SplitPath(path, nullptr, &name, nullptr); - result.push_back(name); - } - return result; -} - -std::vector PostProcessingShaderImplementation::GetShaderList(APIType api_type) -{ - // Currently there is no differentiation between API types and shader languages. - // This could change in the future, hence the api_type parameter, but ideally, - // shaders should be compatible across backends. - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - return GetShaders(); - - return {}; -} - -std::vector PostProcessingShaderImplementation::GetAnaglyphShaderList(APIType api_type) -{ - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - return GetShaders(ANAGLYPH_DIR DIR_SEP); - - return {}; -} - -PostProcessingShaderConfiguration::PostProcessingShaderConfiguration() = default; - -PostProcessingShaderConfiguration::~PostProcessingShaderConfiguration() = default; - -std::string PostProcessingShaderConfiguration::LoadShader(std::string shader) +void PostProcessingConfiguration::LoadShader(const std::string& shader) { // Load the shader from the configuration if there isn't one sent to us. - if (shader.empty()) - shader = g_ActiveConfig.sPostProcessingShader; m_current_shader = shader; + if (shader.empty()) + { + LoadDefaultShader(); + return; + } const std::string sub_dir = (g_Config.stereo_mode == StereoMode::Anaglyph) ? ANAGLYPH_DIR DIR_SEP : ""; @@ -81,32 +51,32 @@ std::string PostProcessingShaderConfiguration::LoadShader(std::string shader) std::string code; std::string path = File::GetUserPath(D_SHADERS_IDX) + sub_dir + shader + ".glsl"; - if (shader.empty()) + if (!File::Exists(path)) { - code = s_default_shader; + // Fallback to shared user dir + path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl"; } - else - { - if (!File::Exists(path)) - { - // Fallback to shared user dir - path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl"; - } - if (!File::ReadFileToString(path, code)) - { - ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str()); - code = s_default_shader; - } + if (!File::ReadFileToString(path, code)) + { + ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str()); + LoadDefaultShader(); + return; } LoadOptions(code); LoadOptionsConfiguration(); - - return code; + m_current_shader_code = code; } -void PostProcessingShaderConfiguration::LoadOptions(const std::string& code) +void PostProcessingConfiguration::LoadDefaultShader() +{ + m_options.clear(); + m_any_options_dirty = false; + m_current_shader_code = s_default_shader; +} + +void PostProcessingConfiguration::LoadOptions(const std::string& code) { const std::string config_start_delimiter = "[configuration]"; const std::string config_end_delimiter = "[/configuration]"; @@ -254,7 +224,7 @@ void PostProcessingShaderConfiguration::LoadOptions(const std::string& code) } } -void PostProcessingShaderConfiguration::LoadOptionsConfiguration() +void PostProcessingConfiguration::LoadOptionsConfiguration() { IniFile ini; ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX)); @@ -288,7 +258,7 @@ void PostProcessingShaderConfiguration::LoadOptionsConfiguration() } } -void PostProcessingShaderConfiguration::SaveOptionsConfiguration() +void PostProcessingConfiguration::SaveOptionsConfiguration() { IniFile ini; ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX)); @@ -331,13 +301,7 @@ void PostProcessingShaderConfiguration::SaveOptionsConfiguration() ini.Save(File::GetUserPath(F_DOLPHINCONFIG_IDX)); } -void PostProcessingShaderConfiguration::ReloadShader() -{ - m_current_shader = ""; -} - -void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, int index, - float value) +void PostProcessingConfiguration::SetOptionf(const std::string& option, int index, float value) { auto it = m_options.find(option); @@ -346,7 +310,7 @@ void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, in m_any_options_dirty = true; } -void PostProcessingShaderConfiguration::SetOptioni(const std::string& option, int index, s32 value) +void PostProcessingConfiguration::SetOptioni(const std::string& option, int index, s32 value) { auto it = m_options.find(option); @@ -355,7 +319,7 @@ void PostProcessingShaderConfiguration::SetOptioni(const std::string& option, in m_any_options_dirty = true; } -void PostProcessingShaderConfiguration::SetOptionb(const std::string& option, bool value) +void PostProcessingConfiguration::SetOptionb(const std::string& option, bool value) { auto it = m_options.find(option); @@ -363,3 +327,384 @@ void PostProcessingShaderConfiguration::SetOptionb(const std::string& option, bo it->second.m_dirty = true; m_any_options_dirty = true; } + +PostProcessing::PostProcessing() +{ + m_timer.Start(); +} + +PostProcessing::~PostProcessing() +{ + m_timer.Stop(); +} + +static std::vector GetShaders(const std::string& sub_dir = "") +{ + std::vector paths = + Common::DoFileSearch({File::GetUserPath(D_SHADERS_IDX) + sub_dir, + File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir}, + {".glsl"}); + std::vector result; + for (std::string path : paths) + { + std::string name; + SplitPath(path, nullptr, &name, nullptr); + result.push_back(name); + } + return result; +} + +std::vector PostProcessing::GetShaderList() +{ + return GetShaders(); +} + +std::vector PostProcessing::GetAnaglyphShaderList() +{ + return GetShaders(ANAGLYPH_DIR DIR_SEP); +} + +bool PostProcessing::Initialize(AbstractTextureFormat format) +{ + m_framebuffer_format = format; + if (!CompileVertexShader() || !CompilePixelShader() || !CompilePipeline()) + return false; + + return true; +} + +void PostProcessing::RecompileShader() +{ + m_pipeline.reset(); + m_pixel_shader.reset(); + if (!CompilePixelShader()) + return; + + CompilePipeline(); +} + +void PostProcessing::RecompilePipeline() +{ + m_pipeline.reset(); + CompilePipeline(); +} + +void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, + const MathUtil::Rectangle& src, + const AbstractTexture* src_tex, int src_layer) +{ + if (g_renderer->GetCurrentFramebuffer()->GetColorFormat() != m_framebuffer_format) + { + m_framebuffer_format = g_renderer->GetCurrentFramebuffer()->GetColorFormat(); + RecompilePipeline(); + } + + if (!m_pipeline) + return; + + FillUniformBuffer(src, src_tex, src_layer); + g_vertex_manager->UploadUtilityUniforms(m_uniform_staging_buffer.data(), + static_cast(m_uniform_staging_buffer.size())); + + g_renderer->SetViewportAndScissor( + g_renderer->ConvertFramebufferRectangle(dst, g_renderer->GetCurrentFramebuffer())); + g_renderer->SetPipeline(m_pipeline.get()); + g_renderer->SetTexture(0, src_tex); + g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); + g_renderer->Draw(0, 3); +} + +std::string PostProcessing::GetUniformBufferHeader() const +{ + std::stringstream ss; + u32 unused_counter = 1; + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + ss << "cbuffer PSBlock : register(b0) {\n"; + else + ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; + + // Builtin uniforms + ss << " float4 resolution;\n"; + ss << " float4 src_rect;\n"; + ss << " uint time;\n"; + ss << " int layer;\n"; + for (u32 i = 0; i < 2; i++) + ss << " uint ubo_align_" << unused_counter++ << "_;\n"; + ss << "\n"; + + // Custom options/uniforms + for (const auto& it : m_config.GetOptions()) + { + if (it.second.m_type == + PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_BOOL) + { + ss << StringFromFormat(" int %s;\n", it.first.c_str()); + for (u32 i = 0; i < 3; i++) + ss << " int ubo_align_" << unused_counter++ << "_;\n"; + } + else if (it.second.m_type == + PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER) + { + u32 count = static_cast(it.second.m_integer_values.size()); + if (count == 1) + ss << StringFromFormat(" int %s;\n", it.first.c_str()); + else + ss << StringFromFormat(" int%u %s;\n", count, it.first.c_str()); + + for (u32 i = count; i < 4; i++) + ss << " int ubo_align_" << unused_counter++ << "_;\n"; + } + else if (it.second.m_type == + PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT) + { + u32 count = static_cast(it.second.m_float_values.size()); + if (count == 1) + ss << StringFromFormat(" float %s;\n", it.first.c_str()); + else + ss << StringFromFormat(" float%u %s;\n", count, it.first.c_str()); + + for (u32 i = count; i < 4; i++) + ss << " float ubo_align_" << unused_counter++ << "_;\n"; + } + } + + ss << "};\n\n"; + return ss.str(); +} + +std::string PostProcessing::GetHeader() const +{ + std::stringstream ss; + ss << GetUniformBufferHeader(); + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + ss << "Texture2DArray samp0 : register(t0);\n"; + ss << "SamplerState samp0_ss : register(s0);\n"; + } + else + { + ss << "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"; + ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; + ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; + } + + // Rename main, since we need to set up globals + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + ss << R"( +#define main real_main +static float3 v_tex0; +static float4 ocol0; + +// Wrappers for sampling functions. +#define texture(sampler, coords) sampler.Sample(sampler##_ss, coords) +#define textureOffset(sampler, coords, offset) sampler.Sample(sampler##_ss, coords, offset) +)"; + } + + ss << R"( +float4 Sample() { return texture(samp0, float3(v_tex0.xy, float(layer))); } +float4 SampleLocation(float2 location) { return texture(samp0, float3(location, float(layer))); } +float4 SampleLayer(int layer) { return texture(samp0, float3(v_tex0.xy, float(layer))); } +#define SampleOffset(offset) textureOffset(samp0, float3(v_tex0.xy, float(layer)), offset) + +float2 GetResolution() +{ + return resolution.xy; +} + +float2 GetInvResolution() +{ + return resolution.zw; +} + +float2 GetCoordinates() +{ + return v_tex0.xy; +} + +uint GetTime() +{ + return time; +} + +void SetOutput(float4 color) +{ + ocol0 = color; +} + +#define GetOption(x) (x) +#define OptionEnabled(x) ((x) != 0) + +)"; + return ss.str(); +} + +std::string PostProcessing::GetFooter() const +{ + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + return R"( + +#undef main +void main(in float3 v_tex0_ : TEXCOORD0, out float4 ocol0_ : SV_Target) +{ + v_tex0 = v_tex0_; + real_main(); + ocol0_ = ocol0; +})"; + } + else + { + return {}; + } +} + +bool PostProcessing::CompileVertexShader() +{ + std::stringstream ss; + ss << GetUniformBufferHeader(); + + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + ss << "void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"; + ss << " out float4 opos : SV_Position) {\n"; + } + else + { + ss << "VARYING_LOCATION(0) out float3 v_tex0;\n"; + ss << "#define id gl_VertexID\n"; + ss << "#define opos gl_Position\n"; + ss << "void main() {\n"; + } + ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"; + ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"; + ss << " v_tex0 = float3(src_rect.xy + (src_rect.zw * v_tex0.xy), 0.0f);\n"; + + if (g_ActiveConfig.backend_info.api_type == APIType::Vulkan) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + m_vertex_shader = g_renderer->CreateShaderFromSource(ShaderStage::Vertex, ss.str()); + if (!m_vertex_shader) + { + PanicAlert("Failed to compile post-processing vertex shader"); + return false; + } + + return true; +} + +struct BuiltinUniforms +{ + float resolution[4]; + float src_rect[4]; + s32 time; + u32 layer; + u32 padding[2]; +}; + +size_t PostProcessing::CalculateUniformsSize() const +{ + // Allocate a vec4 for each uniform to simplify allocation. + return sizeof(BuiltinUniforms) + m_config.GetOptions().size() * sizeof(float) * 4; +} + +void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle& src, + const AbstractTexture* src_tex, int src_layer) +{ + const float rcp_src_width = 1.0f / src_tex->GetWidth(); + const float rcp_src_height = 1.0f / src_tex->GetHeight(); + BuiltinUniforms builtin_uniforms = { + {static_cast(src_tex->GetWidth()), static_cast(src_tex->GetHeight()), + rcp_src_width, rcp_src_height}, + {static_cast(src.left) * rcp_src_width, static_cast(src.top) * rcp_src_height, + static_cast(src.GetWidth()) * rcp_src_width, + static_cast(src.GetHeight()) * rcp_src_height}, + static_cast(m_timer.GetTimeElapsed()), + static_cast(src_layer), + }; + + u8* buf = m_uniform_staging_buffer.data(); + std::memcpy(buf, &builtin_uniforms, sizeof(builtin_uniforms)); + buf += sizeof(builtin_uniforms); + + for (const auto& it : m_config.GetOptions()) + { + union + { + u32 as_bool[4]; + s32 as_int[4]; + float as_float[4]; + } value = {}; + + switch (it.second.m_type) + { + case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_BOOL: + value.as_bool[0] = it.second.m_bool_value ? 1 : 0; + break; + + case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER: + ASSERT(it.second.m_integer_values.size() < 4); + std::copy_n(it.second.m_integer_values.begin(), it.second.m_integer_values.size(), + value.as_int); + break; + + case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT: + ASSERT(it.second.m_float_values.size() < 4); + std::copy_n(it.second.m_float_values.begin(), it.second.m_float_values.size(), + value.as_float); + break; + } + + std::memcpy(buf, &value, sizeof(value)); + buf += sizeof(value); + } +} + +bool PostProcessing::CompilePixelShader() +{ + m_pipeline.reset(); + m_pixel_shader.reset(); + + // Generate GLSL and compile the new shader. + m_config.LoadShader(g_ActiveConfig.sPostProcessingShader); + m_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, GetHeader() + m_config.GetShaderCode() + GetFooter()); + if (!m_pixel_shader) + { + PanicAlert("Failed to compile post-processing shader %s", m_config.GetShader().c_str()); + + // Use default shader. + m_config.LoadDefaultShader(); + m_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, GetHeader() + m_config.GetShaderCode() + GetFooter()); + if (!m_pixel_shader) + return false; + } + + m_uniform_staging_buffer.resize(CalculateUniformsSize()); + return true; +} + +bool PostProcessing::CompilePipeline() +{ + AbstractPipelineConfig config = {}; + config.vertex_shader = m_vertex_shader.get(); + config.geometry_shader = g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer ? + g_shader_cache->GetTexcoordGeometryShader() : + nullptr; + config.pixel_shader = m_pixel_shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetColorFramebufferState(m_framebuffer_format); + config.usage = AbstractPipelineUsage::Utility; + m_pipeline = g_renderer->CreatePipeline(config); + if (!m_pipeline) + return false; + + return true; +} +} // namespace VideoCommon diff --git a/Source/Core/VideoCommon/PostProcessing.h b/Source/Core/VideoCommon/PostProcessing.h index e610b7fc38..ede8adaf6e 100644 --- a/Source/Core/VideoCommon/PostProcessing.h +++ b/Source/Core/VideoCommon/PostProcessing.h @@ -10,9 +10,16 @@ #include "Common/CommonTypes.h" #include "Common/Timer.h" +#include "VideoCommon/TextureConfig.h" #include "VideoCommon/VideoCommon.h" -class PostProcessingShaderConfiguration +class AbstractTexture; +class AbstractPipeline; +class AbstractShader; + +namespace VideoCommon +{ +class PostProcessingConfiguration { public: struct ConfigurationOption @@ -48,16 +55,17 @@ public: using ConfigMap = std::map; - PostProcessingShaderConfiguration(); - virtual ~PostProcessingShaderConfiguration(); + PostProcessingConfiguration(); + virtual ~PostProcessingConfiguration(); // Loads the configuration with a shader // If the argument is "" the class will load the shader from the g_activeConfig option. // Returns the loaded shader source from file - std::string LoadShader(std::string shader = ""); + void LoadShader(const std::string& shader); + void LoadDefaultShader(); void SaveOptionsConfiguration(); - void ReloadShader(); const std::string& GetShader() const { return m_current_shader; } + const std::string& GetShaderCode() const { return m_current_shader_code; } bool IsDirty() const { return m_any_options_dirty; } void SetDirty(bool dirty) { m_any_options_dirty = dirty; } bool HasOptions() const { return m_options.size() > 0; } @@ -72,26 +80,53 @@ public: private: bool m_any_options_dirty = false; std::string m_current_shader; + std::string m_current_shader_code; ConfigMap m_options; void LoadOptions(const std::string& code); void LoadOptionsConfiguration(); }; -class PostProcessingShaderImplementation +class PostProcessing { public: - PostProcessingShaderImplementation(); - virtual ~PostProcessingShaderImplementation(); + PostProcessing(); + virtual ~PostProcessing(); - static std::vector GetShaderList(APIType api_type); - static std::vector GetAnaglyphShaderList(APIType api_type); + static std::vector GetShaderList(); + static std::vector GetAnaglyphShaderList(); - PostProcessingShaderConfiguration* GetConfig() { return &m_config; } + PostProcessingConfiguration* GetConfig() { return &m_config; } + + bool Initialize(AbstractTextureFormat format); + + void RecompileShader(); + void RecompilePipeline(); + + void BlitFromTexture(const MathUtil::Rectangle& dst, const MathUtil::Rectangle& src, + const AbstractTexture* src_tex, int src_layer); protected: + std::string GetUniformBufferHeader() const; + std::string GetHeader() const; + std::string GetFooter() const; + + bool CompileVertexShader(); + bool CompilePixelShader(); + bool CompilePipeline(); + + size_t CalculateUniformsSize() const; + void FillUniformBuffer(const MathUtil::Rectangle& src, const AbstractTexture* src_tex, + int src_layer); + // Timer for determining our time value Common::Timer m_timer; + PostProcessingConfiguration m_config; - PostProcessingShaderConfiguration m_config; + std::unique_ptr m_vertex_shader; + std::unique_ptr m_pixel_shader; + std::unique_ptr m_pipeline; + AbstractTextureFormat m_framebuffer_format = AbstractTextureFormat::Undefined; + std::vector m_uniform_staging_buffer; }; +} // namespace VideoCommon diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 5a5b1c5aba..132ce4bd6c 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -50,14 +50,15 @@ #include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/BPFunctions.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/CPMemory.h" #include "VideoCommon/CommandProcessor.h" -#include "VideoCommon/Debugger.h" #include "VideoCommon/FPSCounter.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/ImageWrite.h" #include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/PostProcessing.h" #include "VideoCommon/ShaderCache.h" @@ -68,12 +69,10 @@ #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" -// TODO: Move these out of here. -int frameCount; - std::unique_ptr g_renderer; static float AspectToWidescreen(float aspect) @@ -97,7 +96,14 @@ Renderer::~Renderer() = default; bool Renderer::Initialize() { - return InitializeImGui(); + if (!InitializeImGui()) + return false; + + m_post_processor = std::make_unique(); + if (!m_post_processor->Initialize(m_backbuffer_format)) + return false; + + return true; } void Renderer::Shutdown() @@ -106,6 +112,142 @@ void Renderer::Shutdown() // can require additional graphics sub-systems so it needs to be done first ShutdownFrameDumping(); ShutdownImGui(); + m_post_processor.reset(); +} + +void Renderer::BeginUtilityDrawing() +{ + g_vertex_manager->Flush(); +} + +void Renderer::EndUtilityDrawing() +{ + // Reset framebuffer/scissor/viewport. Pipeline will be reset at next draw. + g_framebuffer_manager->BindEFBFramebuffer(); + BPFunctions::SetScissor(); + BPFunctions::SetViewport(); +} + +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) +{ + m_current_framebuffer = framebuffer; +} + +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) +{ + m_current_framebuffer = framebuffer; +} + +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, + const ClearColor& color_value, float depth_value) +{ + m_current_framebuffer = framebuffer; +} + +std::unique_ptr Renderer::CreateShaderFromSource(ShaderStage stage, + const std::string& source) +{ + return CreateShaderFromSource(stage, source.c_str(), source.size()); +} + +void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, + u32 color, u32 z) +{ + g_framebuffer_manager->ClearEFB(rc, colorEnable, alphaEnable, zEnable, color, z); +} + +void Renderer::ReinterpretPixelData(EFBReinterpretType convtype) +{ + g_framebuffer_manager->ReinterpretPixelData(convtype); +} + +u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) +{ + if (type == EFBAccessType::PeekColor) + { + u32 color = g_framebuffer_manager->PeekEFBColor(x, y); + + // a little-endian value is expected to be returned + color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000)); + + // check what to do with the alpha channel (GX_PokeAlphaRead) + PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); + + if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) + { + color = RGBA8ToRGBA6ToRGBA8(color); + } + else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + color = RGBA8ToRGB565ToRGBA8(color); + } + if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) + { + color |= 0xFF000000; + } + + if (alpha_read_mode.ReadMode == 2) + { + return color; // GX_READ_NONE + } + else if (alpha_read_mode.ReadMode == 1) + { + return color | 0xFF000000; // GX_READ_FF + } + else /*if(alpha_read_mode.ReadMode == 0)*/ + { + return color & 0x00FFFFFF; // GX_READ_00 + } + } + else // if (type == EFBAccessType::PeekZ) + { + // Depth buffer is inverted for improved precision near far plane + float depth = g_framebuffer_manager->PeekEFBDepth(x, y); + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + depth = 1.0f - depth; + + u32 ret = 0; + if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + // if Z is in 16 bit format you must return a 16 bit integer + ret = MathUtil::Clamp(static_cast(depth * 65536.0f), 0, 0xFFFF); + } + else + { + ret = MathUtil::Clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); + } + + return ret; + } +} + +void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) +{ + if (type == EFBAccessType::PokeColor) + { + for (size_t i = 0; i < num_points; i++) + { + // Convert to expected format (BGRA->RGBA) + // TODO: Check alpha, depending on mode? + const EfbPokeData& point = points[i]; + u32 color = ((point.data & 0xFF00FF00) | ((point.data >> 16) & 0xFF) | + ((point.data << 16) & 0xFF0000)); + g_framebuffer_manager->PokeEFBColor(point.x, point.y, color); + } + } + else // if (type == EFBAccessType::PokeZ) + { + for (size_t i = 0; i < num_points; i++) + { + // Convert to floating-point depth. + const EfbPokeData& point = points[i]; + float depth = float(point.data & 0xFFFFFF) / 16777216.0f; + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + depth = 1.0f - depth; + + g_framebuffer_manager->PokeEFBDepth(point.x, point.y, depth); + } + } } void Renderer::RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight, @@ -169,6 +311,8 @@ bool Renderer::CalculateTargetSize() int new_efb_width = 0; int new_efb_height = 0; std::tie(new_efb_width, new_efb_height) = CalculateTargetScale(EFB_WIDTH, EFB_HEIGHT); + new_efb_width = std::max(new_efb_width, 1); + new_efb_height = std::max(new_efb_height, 1); if (new_efb_width != m_target_width || new_efb_height != m_target_height) { @@ -251,6 +395,11 @@ void Renderer::CheckForConfigChanges() // Update texture cache settings with any changed options. g_texture_cache->OnConfigChanged(g_ActiveConfig); + // Check for post-processing shader changes. Done up here as it doesn't affect anything outside + // the post-processor. Note that options are applied every frame, so no need to check those. + if (m_post_processor->GetConfig()->GetShader() != g_ActiveConfig.sPostProcessingShader) + m_post_processor->RecompileShader(); + // Determine which (if any) settings have changed. ShaderHostConfig new_host_config = ShaderHostConfig::GetCurrent(); u32 changed_bits = 0; @@ -278,13 +427,30 @@ void Renderer::CheckForConfigChanges() // Notify the backend of the changes, if any. OnConfigChanged(changed_bits); + // Framebuffer changed? + if (changed_bits & (CONFIG_CHANGE_BIT_MULTISAMPLES | CONFIG_CHANGE_BIT_STEREO_MODE | + CONFIG_CHANGE_BIT_TARGET_SIZE)) + { + g_framebuffer_manager->RecreateEFBFramebuffer(); + } + // Reload shaders if host config has changed. if (changed_bits & (CONFIG_CHANGE_BIT_HOST_CONFIG | CONFIG_CHANGE_BIT_MULTISAMPLES)) { OSD::AddMessage("Video config changed, reloading shaders.", OSD::Duration::NORMAL); + WaitForGPUIdle(); SetPipeline(nullptr); g_vertex_manager->InvalidatePipelineObject(); - g_shader_cache->SetHostConfig(new_host_config, g_ActiveConfig.iMultisamples); + g_shader_cache->SetHostConfig(new_host_config); + g_shader_cache->Reload(); + g_framebuffer_manager->RecompileShaders(); + } + + // Viewport and scissor rect have to be reset since they will be scaled differently. + if (changed_bits & CONFIG_CHANGE_BIT_TARGET_SIZE) + { + BPFunctions::SetViewport(); + BPFunctions::SetScissor(); } } @@ -389,6 +555,86 @@ void Renderer::ResizeSurface() m_surface_resized.Set(); } +void Renderer::SetViewportAndScissor(const MathUtil::Rectangle& rect, float min_depth, + float max_depth) +{ + SetViewport(static_cast(rect.left), static_cast(rect.top), + static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), min_depth, + max_depth); + SetScissorRect(rect); +} + +void Renderer::ScaleTexture(AbstractFramebuffer* dst_framebuffer, + const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect) +{ + ASSERT(dst_framebuffer->GetColorFormat() == AbstractTextureFormat::RGBA8); + + BeginUtilityDrawing(); + + // The shader needs to know the source rectangle. + const auto converted_src_rect = g_renderer->ConvertFramebufferRectangle( + src_rect, src_texture->GetWidth(), src_texture->GetHeight()); + const float rcp_src_width = 1.0f / src_texture->GetWidth(); + const float rcp_src_height = 1.0f / src_texture->GetHeight(); + const std::array uniforms = {{converted_src_rect.left * rcp_src_width, + converted_src_rect.top * rcp_src_height, + converted_src_rect.GetWidth() * rcp_src_width, + converted_src_rect.GetHeight() * rcp_src_height}}; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + // Discard if we're overwriting the whole thing. + if (static_cast(dst_rect.GetWidth()) == dst_framebuffer->GetWidth() && + static_cast(dst_rect.GetHeight()) == dst_framebuffer->GetHeight()) + { + SetAndDiscardFramebuffer(dst_framebuffer); + } + else + { + SetFramebuffer(dst_framebuffer); + } + + SetViewportAndScissor(ConvertFramebufferRectangle(dst_rect, dst_framebuffer)); + SetPipeline(dst_framebuffer->GetLayers() > 1 ? g_shader_cache->GetRGBA8StereoCopyPipeline() : + g_shader_cache->GetRGBA8CopyPipeline()); + SetTexture(0, src_texture); + SetSamplerState(0, RenderState::GetLinearSamplerState()); + Draw(0, 3); + EndUtilityDrawing(); + if (dst_framebuffer->GetColorAttachment()) + dst_framebuffer->GetColorAttachment()->FinishedRendering(); +} + +MathUtil::Rectangle +Renderer::ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + const AbstractFramebuffer* framebuffer) +{ + return ConvertFramebufferRectangle(rect, framebuffer->GetWidth(), framebuffer->GetHeight()); +} + +MathUtil::Rectangle Renderer::ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + u32 fb_width, u32 fb_height) +{ + MathUtil::Rectangle ret = rect; + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + { + ret.top = fb_height - rect.bottom; + ret.bottom = fb_height - rect.top; + } + return ret; +} + +TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +{ + TargetRectangle result; + result.left = EFBToScaledX(rc.left); + result.top = EFBToScaledY(rc.top); + result.right = EFBToScaledX(rc.right); + result.bottom = EFBToScaledY(rc.bottom); + return result; +} + std::tuple Renderer::ScaleToDisplayAspectRatio(const int width, const int height) const { @@ -700,7 +946,7 @@ bool Renderer::InitializeImGui() vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false}; vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false}; vdecl.stride = sizeof(ImDrawVert); - m_imgui_vertex_format = g_vertex_manager->CreateNativeVertexFormat(vdecl); + m_imgui_vertex_format = CreateNativeVertexFormat(vdecl); if (!m_imgui_vertex_format) { PanicAlert("Failed to create imgui vertex format"); @@ -723,10 +969,10 @@ bool Renderer::InitializeImGui() pconfig.vertex_format = m_imgui_vertex_format.get(); pconfig.vertex_shader = vertex_shader.get(); pconfig.pixel_shader = pixel_shader.get(); - pconfig.rasterization_state.hex = RenderState::GetNoCullRasterizationState().hex; - pconfig.rasterization_state.primitive = PrimitiveType::Triangles; - pconfig.depth_state.hex = RenderState::GetNoDepthTestingDepthStencilState().hex; - pconfig.blending_state.hex = RenderState::GetNoBlendingBlendState().hex; + pconfig.rasterization_state = + RenderState::GetCullBackFaceRasterizationState(PrimitiveType::Triangles); + pconfig.depth_state = RenderState::GetNoDepthTestingDepthState(); + pconfig.blending_state = RenderState::GetNoBlendingBlendState(); pconfig.blending_state.blendenable = true; pconfig.blending_state.srcfactor = BlendMode::SRCALPHA; pconfig.blending_state.dstfactor = BlendMode::INVSRCALPHA; @@ -752,7 +998,7 @@ bool Renderer::InitializeImGui() io.Fonts->GetTexDataAsRGBA32(&font_tex_pixels, &font_tex_width, &font_tex_height); TextureConfig font_tex_config(font_tex_width, font_tex_height, 1, 1, 1, - AbstractTextureFormat::RGBA8, false); + AbstractTextureFormat::RGBA8, 0); std::unique_ptr font_tex = CreateTexture(font_tex_config); if (!font_tex) { @@ -799,10 +1045,8 @@ void Renderer::BeginImGuiFrame() ImGui::NewFrame(); } -void Renderer::RenderImGui() +void Renderer::DrawImGui() { - ImGui::Render(); - ImDrawData* draw_data = ImGui::GetDrawData(); if (!draw_data) return; @@ -842,9 +1086,11 @@ void Renderer::RenderImGui() continue; } - SetScissorRect(MathUtil::Rectangle( - static_cast(cmd.ClipRect.x), static_cast(cmd.ClipRect.y), - static_cast(cmd.ClipRect.z), static_cast(cmd.ClipRect.w))); + SetScissorRect(ConvertFramebufferRectangle( + MathUtil::Rectangle( + static_cast(cmd.ClipRect.x), static_cast(cmd.ClipRect.y), + static_cast(cmd.ClipRect.z), static_cast(cmd.ClipRect.w)), + m_current_framebuffer)); SetTexture(0, reinterpret_cast(cmd.TextureId)); DrawIndexed(base_index, cmd.ElemCount, base_vertex); base_index += cmd.ElemCount; @@ -859,7 +1105,10 @@ std::unique_lock Renderer::GetImGuiLock() void Renderer::BeginUIFrame() { - ResetAPIState(); + if (IsHeadless()) + return; + + BeginUtilityDrawing(); BindBackbuffer({0.0f, 0.0f, 0.0f, 1.0f}); } @@ -867,16 +1116,19 @@ void Renderer::EndUIFrame() { { auto lock = GetImGuiLock(); - RenderImGui(); + ImGui::Render(); } + if (!IsHeadless()) { + DrawImGui(); + std::lock_guard guard(m_swap_mutex); PresentBackbuffer(); + EndUtilityDrawing(); } BeginImGuiFrame(); - RestoreAPIState(); } void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, @@ -946,32 +1198,34 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // with the loader, and it has not been unmapped yet. Force a pipeline flush to avoid this. g_vertex_manager->Flush(); - // Render the XFB to the screen. - ResetAPIState(); - BindBackbuffer({0.0f, 0.0f, 0.0f, 1.0f}); - UpdateDrawRectangle(); - RenderXFBToScreen(xfb_entry->texture.get(), xfb_rect); - - // Hold the imgui lock while we're presenting. - // It's only to prevent races on inputs anyway, at this point. + // Render any UI elements to the draw list. { auto lock = GetImGuiLock(); DrawDebugText(); OSD::DrawMessages(); - - RenderImGui(); + ImGui::Render(); } - // Present to the window system. + // Render the XFB to the screen. + BeginUtilityDrawing(); + if (!IsHeadless()) { - std::lock_guard guard(m_swap_mutex); - PresentBackbuffer(); - } + BindBackbuffer({{0.0f, 0.0f, 0.0f, 1.0f}}); + UpdateDrawRectangle(); + RenderXFBToScreen(xfb_entry->texture.get(), xfb_rect); + DrawImGui(); - // Update the window size based on the frame that was just rendered. - // Due to depending on guest state, we need to call this every frame. - SetWindowSize(texture_config.width, texture_config.height); + // Present to the window system. + { + std::lock_guard guard(m_swap_mutex); + PresentBackbuffer(); + } + + // Update the window size based on the frame that was just rendered. + // Due to depending on guest state, we need to call this every frame. + SetWindowSize(texture_config.width, texture_config.height); + } m_fps_counter.Update(); @@ -984,12 +1238,11 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const if (IsFrameDumping()) DumpCurrentFrame(); - frameCount++; - GFX_DEBUGGER_PAUSE_AT(NEXT_FRAME, true); - // Begin new frame + m_frame_count++; stats.ResetFrame(); g_shader_cache->RetrieveAsyncShaders(); + g_vertex_manager->OnEndFrame(); BeginImGuiFrame(); // We invalidate the pipeline object at the start of the frame. @@ -1003,13 +1256,13 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const g_texture_cache->FlushEFBCopies(); // Remove stale EFB/XFB copies. - g_texture_cache->Cleanup(frameCount); + g_texture_cache->Cleanup(m_frame_count); // Handle any config changes, this gets propogated to the backend. CheckForConfigChanges(); g_Config.iSaveTargetId = 0; - RestoreAPIState(); + EndUtilityDrawing(); Core::Callback_VideoCopiedToXFB(true); } @@ -1028,6 +1281,24 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const } } +void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) +{ + const auto target_rc = GetTargetRectangle(); + if (g_ActiveConfig.stereo_mode == StereoMode::SBS || + g_ActiveConfig.stereo_mode == StereoMode::TAB) + { + TargetRectangle left_rc, right_rc; + std::tie(left_rc, right_rc) = ConvertStereoRectangle(target_rc); + + m_post_processor->BlitFromTexture(left_rc, rc, texture, 0); + m_post_processor->BlitFromTexture(right_rc, rc, texture, 1); + } + else + { + m_post_processor->BlitFromTexture(target_rc, rc, texture, 0); + } +} + bool Renderer::IsFrameDumping() { if (m_screenshot_request.IsSet()) @@ -1040,15 +1311,6 @@ bool Renderer::IsFrameDumping() } void Renderer::DumpCurrentFrame() -{ - // Scale/render to frame dump texture. - RenderFrameDump(); - - // Queue a readback for the next frame. - QueueFrameDumpReadback(); -} - -void Renderer::RenderFrameDump() { int target_width, target_height; if (!g_ActiveConfig.bInternalResolutionFrameDumps && !IsHeadless()) @@ -1063,50 +1325,76 @@ void Renderer::RenderFrameDump() m_last_xfb_texture->GetConfig().width, m_last_xfb_texture->GetConfig().height); } - // Ensure framebuffer exists (we lazily allocate it in case frame dumping isn't used). - // Or, resize texture if it isn't large enough to accommodate the current frame. - if (!m_frame_dump_render_texture || - m_frame_dump_render_texture->GetConfig().width != static_cast(target_width) || - m_frame_dump_render_texture->GetConfig().height != static_cast(target_height)) + // We only need to render a copy if we need to stretch/scale the XFB copy. + const AbstractTexture* source_tex = m_last_xfb_texture; + MathUtil::Rectangle source_rect = m_last_xfb_region; + if (source_rect.GetWidth() != target_width || source_rect.GetHeight() != target_height) { - // Recreate texture objects. Release before creating so we don't temporarily use twice the RAM. - TextureConfig config(target_width, target_height, 1, 1, 1, AbstractTextureFormat::RGBA8, true); - m_frame_dump_render_texture.reset(); - m_frame_dump_render_texture = CreateTexture(config); - ASSERT(m_frame_dump_render_texture); + if (!CheckFrameDumpRenderTexture(target_width, target_height)) + return; + + source_tex = m_frame_dump_render_texture.get(); + source_rect = MathUtil::Rectangle(0, 0, target_width, target_height); + ScaleTexture(m_frame_dump_render_framebuffer.get(), source_rect, m_last_xfb_texture, + m_last_xfb_region); } - // Scaling is likely to occur here, but if possible, do a bit-for-bit copy. - if (m_last_xfb_region.GetWidth() != target_width || - m_last_xfb_region.GetHeight() != target_height) - { - m_frame_dump_render_texture->ScaleRectangleFromTexture( - m_last_xfb_texture, m_last_xfb_region, EFBRectangle{0, 0, target_width, target_height}); - } - else - { - m_frame_dump_render_texture->CopyRectangleFromTexture( - m_last_xfb_texture, m_last_xfb_region, 0, 0, - EFBRectangle{0, 0, target_width, target_height}, 0, 0); - } -} - -void Renderer::QueueFrameDumpReadback() -{ // Index 0 was just sent to AVI dump. Swap with the second texture. if (m_frame_dump_readback_textures[0]) std::swap(m_frame_dump_readback_textures[0], m_frame_dump_readback_textures[1]); - std::unique_ptr& rbtex = m_frame_dump_readback_textures[0]; - if (!rbtex || rbtex->GetConfig() != m_frame_dump_render_texture->GetConfig()) - { - rbtex = CreateStagingTexture(StagingTextureType::Readback, - m_frame_dump_render_texture->GetConfig()); - } + if (!CheckFrameDumpReadbackTexture(target_width, target_height)) + return; + const auto converted_region = + ConvertFramebufferRectangle(source_rect, source_tex->GetWidth(), source_tex->GetHeight()); + m_frame_dump_readback_textures[0]->CopyFromTexture( + source_tex, converted_region, 0, 0, + MathUtil::Rectangle(0, 0, target_width, target_height)); m_last_frame_state = AVIDump::FetchState(m_last_xfb_ticks); m_last_frame_exported = true; - rbtex->CopyFromTexture(m_frame_dump_render_texture.get(), 0, 0); +} + +bool Renderer::CheckFrameDumpRenderTexture(u32 target_width, u32 target_height) +{ + // Ensure framebuffer exists (we lazily allocate it in case frame dumping isn't used). + // Or, resize texture if it isn't large enough to accommodate the current frame. + if (m_frame_dump_render_texture && m_frame_dump_render_texture->GetWidth() == target_width && + m_frame_dump_render_texture->GetHeight() == target_height) + { + return true; + } + + // Recreate texture, but release before creating so we don't temporarily use twice the RAM. + m_frame_dump_render_framebuffer.reset(); + m_frame_dump_render_texture.reset(); + m_frame_dump_render_texture = + CreateTexture(TextureConfig(target_width, target_height, 1, 1, 1, + AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget)); + if (!m_frame_dump_render_texture) + { + PanicAlert("Failed to allocate frame dump render texture"); + return false; + } + m_frame_dump_render_framebuffer = CreateFramebuffer(m_frame_dump_render_texture.get(), nullptr); + ASSERT(m_frame_dump_render_framebuffer); + return true; +} + +bool Renderer::CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height) +{ + std::unique_ptr& rbtex = m_frame_dump_readback_textures[0]; + if (rbtex && rbtex->GetWidth() == target_width && rbtex->GetHeight() == target_height) + return true; + + rbtex.reset(); + rbtex = CreateStagingTexture( + StagingTextureType::Readback, + TextureConfig(target_width, target_height, 1, 1, 1, AbstractTextureFormat::RGBA8, 0)); + if (!rbtex) + return false; + + return true; } void Renderer::FlushFrameDump() @@ -1151,6 +1439,7 @@ void Renderer::ShutdownFrameDumping() m_frame_dump_start.Set(); if (m_frame_dump_thread.joinable()) m_frame_dump_thread.join(); + m_frame_dump_render_framebuffer.reset(); m_frame_dump_render_texture.reset(); for (auto& tex : m_frame_dump_readback_textures) tex.reset(); diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 45a82c1968..deaaa89aa9 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -41,22 +41,26 @@ class AbstractShader; class AbstractTexture; class AbstractStagingTexture; class NativeVertexFormat; -class PostProcessingShaderImplementation; struct TextureConfig; struct ComputePipelineConfig; struct AbstractPipelineConfig; +struct PortableVertexDeclaration; enum class ShaderStage; enum class EFBAccessType; +enum class EFBReinterpretType; enum class StagingTextureType; +namespace VideoCommon +{ +class PostProcessing; +} + struct EfbPokeData { u16 x, y; u32 data; }; -extern int frameCount; - // Renderer really isn't a very good name for this class - it's more like "Misc". // The long term goal is to get rid of this class and replace it with others that make // more sense. @@ -78,37 +82,38 @@ public: virtual void SetScissorRect(const MathUtil::Rectangle& rc) {} virtual void SetTexture(u32 index, const AbstractTexture* texture) {} virtual void SetSamplerState(u32 index, const SamplerState& state) {} + virtual void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) {} virtual void UnbindTexture(const AbstractTexture* texture) {} - virtual void SetInterlacingMode() {} virtual void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { } virtual void SetFullscreen(bool enable_fullscreen) {} virtual bool IsFullscreen() const { return false; } - virtual void ApplyState() {} - virtual void RestoreState() {} - virtual void ResetAPIState() {} - virtual void RestoreAPIState() {} + virtual void BeginUtilityDrawing(); + virtual void EndUtilityDrawing(); virtual std::unique_ptr CreateTexture(const TextureConfig& config) = 0; virtual std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) = 0; virtual std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) = 0; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) = 0; // Framebuffer operations. - virtual void SetFramebuffer(const AbstractFramebuffer* framebuffer) {} - virtual void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) {} - virtual void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, float depth_value = 0.0f) - { - } + virtual void SetFramebuffer(AbstractFramebuffer* framebuffer); + virtual void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer); + virtual void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, + const ClearColor& color_value = {}, float depth_value = 0.0f); // Drawing with currently-bound pipeline state. virtual void Draw(u32 base_vertex, u32 num_vertices) {} virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {} + // Dispatching compute shaders with currently-bound state. + virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) + { + } + // Binds the backbuffer for rendering. The buffer will be cleared immediately after binding. // This is where any window size changes are detected, therefore m_backbuffer_width and/or // m_backbuffer_height may change after this function returns. @@ -122,12 +127,15 @@ public: CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) = 0; virtual std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) = 0; + virtual std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0; virtual std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) = 0; + std::unique_ptr CreateShaderFromSource(ShaderStage stage, + const std::string& source); + + AbstractFramebuffer* GetCurrentFramebuffer() const { return m_current_framebuffer; } - const AbstractFramebuffer* GetCurrentFramebuffer() const { return m_current_framebuffer; } - u32 GetCurrentFramebufferWidth() const { return m_current_framebuffer_width; } - u32 GetCurrentFramebufferHeight() const { return m_current_framebuffer_height; } // Ideal internal resolution - multiple of the native EFB resolution int GetTargetWidth() const { return m_target_width; } int GetTargetHeight() const { return m_target_height; } @@ -137,10 +145,27 @@ public: float GetBackbufferScale() const { return m_backbuffer_scale; } void SetWindowSize(int width, int height); - // EFB coordinate conversion functions + // Sets viewport and scissor to the specified rectangle. rect is assumed to be in framebuffer + // coordinates, i.e. lower-left origin in OpenGL. + void SetViewportAndScissor(const MathUtil::Rectangle& rect, float min_depth = 0.0f, + float max_depth = 1.0f); + // Scales a GPU texture using a copy shader. + virtual void ScaleTexture(AbstractFramebuffer* dst_framebuffer, + const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect); + + // Converts an upper-left to lower-left if required by the backend, optionally + // clamping to the framebuffer size. + MathUtil::Rectangle ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + u32 fb_width, u32 fb_height); + MathUtil::Rectangle ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + const AbstractFramebuffer* framebuffer); + + // EFB coordinate conversion functions // Use this to convert a whole native EFB rect to backbuffer coordinates - virtual TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) = 0; + TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc); const TargetRectangle& GetTargetRectangle() const { return m_target_rectangle; } float CalculateDrawAspectRatio() const; @@ -170,18 +195,20 @@ public: bool InitializeImGui(); virtual void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, - u32 color, u32 z) = 0; - virtual void ReinterpretPixelData(unsigned int convtype) = 0; + u32 color, u32 z); + virtual void ReinterpretPixelData(EFBReinterpretType convtype); void RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight, float Gamma = 1.0f); - virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0; - virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) = 0; + virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data); + virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); virtual u16 BBoxRead(int index) = 0; virtual void BBoxWrite(int index, u16 value) = 0; + virtual void BBoxFlush() {} virtual void Flush() {} + virtual void WaitForGPUIdle() {} // Finish up the current frame, print some stats void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, @@ -189,14 +216,14 @@ public: // Draws the specified XFB buffer to the screen, performing any post-processing. // Assumes that the backbuffer has already been bound and cleared. - virtual void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) {} + virtual void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc); // Called when the configuration changes, and backend structures need to be updated. virtual void OnConfigChanged(u32 bits) {} PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; } void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; } - PostProcessingShaderImplementation* GetPostProcessor() const { return m_post_processor.get(); } + VideoCommon::PostProcessing* GetPostProcessor() const { return m_post_processor.get(); } // Final surface changing // This is called when the surface is resized (WX) or the window changes (Android). void ChangeSurface(void* new_surface_handle); @@ -246,12 +273,10 @@ protected: // Renders ImGui windows to the currently-bound framebuffer. // Should be called with the ImGui lock held. - void RenderImGui(); + void DrawImGui(); - // TODO: Remove the width/height parameters once we make the EFB an abstract framebuffer. - const AbstractFramebuffer* m_current_framebuffer = nullptr; - u32 m_current_framebuffer_width = 1; - u32 m_current_framebuffer_height = 1; + AbstractFramebuffer* m_current_framebuffer = nullptr; + const AbstractPipeline* m_current_pipeline = nullptr; Common::Flag m_screenshot_request; Common::Event m_screenshot_completed; @@ -260,8 +285,8 @@ protected: bool m_aspect_wide = false; // The framebuffer size - int m_target_width = 0; - int m_target_height = 0; + int m_target_width = 1; + int m_target_height = 1; // Backbuffer (window) size and render area int m_backbuffer_width = 0; @@ -269,10 +294,11 @@ protected: float m_backbuffer_scale = 1.0f; AbstractTextureFormat m_backbuffer_format = AbstractTextureFormat::Undefined; TargetRectangle m_target_rectangle = {}; + int m_frame_count = 0; FPSCounter m_fps_counter; - std::unique_ptr m_post_processor; + std::unique_ptr m_post_processor; void* m_new_surface_handle = nullptr; Common::Flag m_surface_changed; @@ -315,6 +341,7 @@ private: // Texture used for screenshot/frame dumping std::unique_ptr m_frame_dump_render_texture; + std::unique_ptr m_frame_dump_render_framebuffer; std::array, 2> m_frame_dump_readback_textures; AVIDump::Frame m_last_frame_state; bool m_last_frame_exported = false; @@ -340,15 +367,15 @@ private: bool IsFrameDumping(); - // Asynchronously encodes the current staging texture to the frame dump. + // Checks that the frame dump render texture exists and is the correct size. + bool CheckFrameDumpRenderTexture(u32 target_width, u32 target_height); + + // Checks that the frame dump readback texture exists and is the correct size. + bool CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height); + + // Fills the frame dump staging texture with the current XFB texture. void DumpCurrentFrame(); - // Fills the frame dump render texture with the current XFB texture. - void RenderFrameDump(); - - // Queues the current frame for readback, which will be written to AVI next frame. - void QueueFrameDumpReadback(); - // Asynchronously encodes the specified pointer of frame data to the frame dump. void DumpFrameData(const u8* data, int w, int h, int stride, const AVIDump::Frame& state); diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index b61b031b04..04d8804f0c 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -6,6 +6,7 @@ #include #include #include "VideoCommon/SamplerCommon.h" +#include "VideoCommon/TextureConfig.h" void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_type) { @@ -23,6 +24,12 @@ RasterizationState& RasterizationState::operator=(const RasterizationState& rhs) return *this; } +FramebufferState& FramebufferState::operator=(const FramebufferState& rhs) +{ + hex = rhs.hex; + return *this; +} + void DepthState::Generate(const BPMemory& bp) { testenable = bp.zmode.testenable.Value(); @@ -206,10 +213,19 @@ RasterizationState GetInvalidRasterizationState() return state; } -RasterizationState GetNoCullRasterizationState() +RasterizationState GetNoCullRasterizationState(PrimitiveType primitive) { RasterizationState state = {}; state.cullmode = GenMode::CULL_NONE; + state.primitive = primitive; + return state; +} + +RasterizationState GetCullBackFaceRasterizationState(PrimitiveType primitive) +{ + RasterizationState state = {}; + state.cullmode = GenMode::CULL_BACK; + state.primitive = primitive; return state; } @@ -220,7 +236,7 @@ DepthState GetInvalidDepthState() return state; } -DepthState GetNoDepthTestingDepthStencilState() +DepthState GetNoDepthTestingDepthState() { DepthState state = {}; state.testenable = false; @@ -229,6 +245,15 @@ DepthState GetNoDepthTestingDepthStencilState() return state; } +DepthState GetAlwaysWriteDepthState() +{ + DepthState state = {}; + state.testenable = true; + state.updateenable = true; + state.func = ZMode::ALWAYS; + return state; +} + BlendingState GetInvalidBlendingState() { BlendingState state; @@ -251,6 +276,21 @@ BlendingState GetNoBlendingBlendState() return state; } +BlendingState GetNoColorWriteBlendState() +{ + BlendingState state = {}; + state.usedualsrc = false; + state.blendenable = false; + state.srcfactor = BlendMode::ONE; + state.srcfactoralpha = BlendMode::ONE; + state.dstfactor = BlendMode::ZERO; + state.dstfactoralpha = BlendMode::ZERO; + state.logicopenable = false; + state.colorupdate = false; + state.alphaupdate = false; + return state; +} + SamplerState GetInvalidSamplerState() { SamplerState state; @@ -287,4 +327,20 @@ SamplerState GetLinearSamplerState() state.anisotropic_filtering = false; return state; } + +FramebufferState GetColorFramebufferState(AbstractTextureFormat format) +{ + FramebufferState state = {}; + state.color_texture_format = format; + state.depth_texture_format = AbstractTextureFormat::Undefined; + state.per_sample_shading = false; + state.samples = 1; + return state; } + +FramebufferState GetRGBA8FramebufferState() +{ + return GetColorFramebufferState(AbstractTextureFormat::RGBA8); +} + +} // namespace RenderState diff --git a/Source/Core/VideoCommon/RenderState.h b/Source/Core/VideoCommon/RenderState.h index 5cd0ba7470..dc0676ccc4 100644 --- a/Source/Core/VideoCommon/RenderState.h +++ b/Source/Core/VideoCommon/RenderState.h @@ -9,6 +9,8 @@ #include "VideoCommon/BPMemory.h" #include "VideoCommon/BPStructs.h" +enum class AbstractTextureFormat : u32; + enum class PrimitiveType : u32 { Points, @@ -32,6 +34,20 @@ union RasterizationState u32 hex; }; +union FramebufferState +{ + BitField<0, 8, AbstractTextureFormat> color_texture_format; + BitField<8, 8, AbstractTextureFormat> depth_texture_format; + BitField<16, 8, u32> samples; + BitField<24, 1, u32> per_sample_shading; + + bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; } + bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; } + FramebufferState& operator=(const FramebufferState& rhs); + + u32 hex; +}; + union DepthState { void Generate(const BPMemory& bp); @@ -114,12 +130,17 @@ union SamplerState namespace RenderState { RasterizationState GetInvalidRasterizationState(); -RasterizationState GetNoCullRasterizationState(); +RasterizationState GetNoCullRasterizationState(PrimitiveType primitive); +RasterizationState GetCullBackFaceRasterizationState(PrimitiveType primitive); DepthState GetInvalidDepthState(); -DepthState GetNoDepthTestingDepthStencilState(); +DepthState GetNoDepthTestingDepthState(); +DepthState GetAlwaysWriteDepthState(); BlendingState GetInvalidBlendingState(); BlendingState GetNoBlendingBlendState(); +BlendingState GetNoColorWriteBlendState(); SamplerState GetInvalidSamplerState(); SamplerState GetPointSamplerState(); SamplerState GetLinearSamplerState(); -} +FramebufferState GetColorFramebufferState(AbstractTextureFormat format); +FramebufferState GetRGBA8FramebufferState(); +} // namespace RenderState diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 3b0c110f96..b42587a2d4 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -9,7 +9,8 @@ #include "Common/MsgHandler.h" #include "Core/ConfigManager.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/FramebufferShaderGen.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" @@ -22,17 +23,26 @@ std::unique_ptr g_shader_cache; namespace VideoCommon { ShaderCache::ShaderCache() = default; -ShaderCache::~ShaderCache() = default; +ShaderCache::~ShaderCache() +{ + ClearShaderCaches(); + ClearPipelineCaches(); +} bool ShaderCache::Initialize() { m_api_type = g_ActiveConfig.backend_info.api_type; m_host_config = ShaderHostConfig::GetCurrent(); - m_efb_depth_format = FramebufferManagerBase::GetEFBDepthFormat(); - m_efb_multisamples = g_ActiveConfig.iMultisamples; - // Create the async compiler, and start the worker threads. + if (!CompileSharedPipelines()) + return false; + m_async_shader_compiler = g_renderer->CreateAsyncShaderCompiler(); + return true; +} + +void ShaderCache::InitializeShaderCache() +{ m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads()); // Load shader and UID caches. @@ -53,17 +63,6 @@ bool ShaderCache::Initialize() // Switch to the runtime shader compiler thread configuration. m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); - return true; -} - -void ShaderCache::SetHostConfig(const ShaderHostConfig& host_config, u32 efb_multisamples) -{ - if (m_host_config.bits == host_config.bits && m_efb_multisamples == efb_multisamples) - return; - - m_host_config = host_config; - m_efb_multisamples = efb_multisamples; - Reload(); } void ShaderCache::Reload() @@ -99,8 +98,6 @@ void ShaderCache::Shutdown() // until everything has finished compiling. m_async_shader_compiler->StopWorkerThreads(); ClosePipelineUIDCache(); - ClearShaderCaches(); - ClearPipelineCaches(); } const AbstractPipeline* ShaderCache::GetPipelineForUid(const GXPipelineUid& uid) @@ -445,6 +442,11 @@ bool ShaderCache::NeedsGeometryShader(const GeometryShaderUid& uid) const return m_host_config.backend_geometry_shaders && !uid.GetUidData()->IsPassthrough(); } +bool ShaderCache::UseGeometryShaderForEFBCopies() const +{ + return m_host_config.backend_geometry_shaders && m_host_config.stereo; +} + AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader, const AbstractShader* geometry_shader, const AbstractShader* pixel_shader, @@ -460,10 +462,7 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( config.rasterization_state = rasterization_state; config.depth_state = depth_state; config.blending_state = blending_state; - config.framebuffer_state.color_texture_format = AbstractTextureFormat::RGBA8; - config.framebuffer_state.depth_texture_format = m_efb_depth_format; - config.framebuffer_state.per_sample_shading = m_host_config.ssaa; - config.framebuffer_state.samples = m_efb_multisamples; + config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState(); return config; } @@ -967,8 +966,9 @@ void ShaderCache::QueueUberShaderPipelines() config.vs_uid = vs_uid; config.gs_uid = gs_uid; config.ps_uid = ps_uid; - config.rasterization_state = RenderState::GetNoCullRasterizationState(); - config.depth_state = RenderState::GetNoDepthTestingDepthStencilState(); + config.rasterization_state = + RenderState::GetCullBackFaceRasterizationState(PrimitiveType::TriangleStrip); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); config.blending_state = RenderState::GetNoBlendingBlendState(); auto iter = m_gx_uber_pipeline_cache.find(config); @@ -998,24 +998,172 @@ void ShaderCache::QueueUberShaderPipelines() }); } -std::string ShaderCache::GetUtilityShaderHeader() const +const AbstractPipeline* +ShaderCache::GetEFBCopyToVRAMPipeline(const TextureConversionShaderGen::TCShaderUid& uid) { - std::stringstream ss; + auto iter = m_efb_copy_to_vram_pipelines.find(uid); + if (iter != m_efb_copy_to_vram_pipelines.end()) + return iter->second.get(); - ss << "#define API_D3D " << (m_api_type == APIType::D3D ? 1 : 0) << "\n"; - ss << "#define API_OPENGL " << (m_api_type == APIType::OpenGL ? 1 : 0) << "\n"; - ss << "#define API_VULKAN " << (m_api_type == APIType::Vulkan ? 1 : 0) << "\n"; - - if (m_efb_multisamples > 1) + auto shader_code = TextureConversionShaderGen::GeneratePixelShader(m_api_type, uid.GetUidData()); + auto shader = g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_code.GetBuffer()); + if (!shader) { - ss << "#define MSAA_ENABLED 1" << std::endl; - ss << "#define MSAA_SAMPLES " << m_efb_multisamples << std::endl; - if (m_host_config.ssaa) - ss << "#define SSAA_ENABLED 1" << std::endl; + m_efb_copy_to_vram_pipelines.emplace(uid, nullptr); + return nullptr; } - ss << "#define EFB_LAYERS " << (m_host_config.stereo ? 2 : 1) << std::endl; - - return ss.str(); + AbstractPipelineConfig config = {}; + config.vertex_format = nullptr; + config.vertex_shader = m_efb_copy_vertex_shader.get(); + config.geometry_shader = + UseGeometryShaderForEFBCopies() ? m_texcoord_geometry_shader.get() : nullptr; + config.pixel_shader = shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetRGBA8FramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + auto iiter = m_efb_copy_to_vram_pipelines.emplace(uid, g_renderer->CreatePipeline(config)); + return iiter.first->second.get(); } + +const AbstractPipeline* ShaderCache::GetEFBCopyToRAMPipeline(const EFBCopyParams& uid) +{ + auto iter = m_efb_copy_to_ram_pipelines.find(uid); + if (iter != m_efb_copy_to_ram_pipelines.end()) + return iter->second.get(); + + auto shader_code = TextureConversionShaderTiled::GenerateEncodingShader(uid, m_api_type); + auto shader = + g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_code, std::strlen(shader_code)); + if (!shader) + { + m_efb_copy_to_ram_pipelines.emplace(uid, nullptr); + return nullptr; + } + + AbstractPipelineConfig config = {}; + config.vertex_format = nullptr; + config.vertex_shader = m_screen_quad_vertex_shader.get(); + config.geometry_shader = + UseGeometryShaderForEFBCopies() ? m_texcoord_geometry_shader.get() : nullptr; + config.pixel_shader = shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetColorFramebufferState(AbstractTextureFormat::BGRA8); + config.usage = AbstractPipelineUsage::Utility; + auto iiter = m_efb_copy_to_ram_pipelines.emplace(uid, g_renderer->CreatePipeline(config)); + return iiter.first->second.get(); +} + +bool ShaderCache::CompileSharedPipelines() +{ + m_screen_quad_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateScreenQuadVertexShader()); + m_texture_copy_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateTextureCopyVertexShader()); + m_efb_copy_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, + TextureConversionShaderGen::GenerateVertexShader(m_api_type).GetBuffer()); + if (!m_screen_quad_vertex_shader || !m_texture_copy_vertex_shader || !m_efb_copy_vertex_shader) + return false; + + if (UseGeometryShaderForEFBCopies()) + { + m_texcoord_geometry_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Geometry, FramebufferShaderGen::GeneratePassthroughGeometryShader(1, 0)); + m_color_geometry_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Geometry, FramebufferShaderGen::GeneratePassthroughGeometryShader(0, 1)); + if (!m_texcoord_geometry_shader || !m_color_geometry_shader) + return false; + } + + m_texture_copy_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateTextureCopyPixelShader()); + m_color_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateColorPixelShader()); + if (!m_texture_copy_pixel_shader || !m_color_pixel_shader) + return false; + + AbstractPipelineConfig config; + config.vertex_format = nullptr; + config.vertex_shader = m_texture_copy_vertex_shader.get(); + config.geometry_shader = nullptr; + config.pixel_shader = m_texture_copy_pixel_shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetRGBA8FramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + m_copy_rgba8_pipeline = g_renderer->CreatePipeline(config); + if (!m_copy_rgba8_pipeline) + return false; + + if (UseGeometryShaderForEFBCopies()) + { + config.geometry_shader = m_texcoord_geometry_shader.get(); + m_rgba8_stereo_copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_rgba8_stereo_copy_pipeline) + return false; + } + + if (m_host_config.backend_palette_conversion) + { + config.vertex_shader = m_screen_quad_vertex_shader.get(); + config.geometry_shader = nullptr; + + for (size_t i = 0; i < NUM_PALETTE_CONVERSION_SHADERS; i++) + { + auto shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, TextureConversionShaderTiled::GeneratePaletteConversionShader( + static_cast(i), m_api_type)); + if (!shader) + return false; + + config.pixel_shader = shader.get(); + m_palette_conversion_pipelines[i] = g_renderer->CreatePipeline(config); + if (!m_palette_conversion_pipelines[i]) + return false; + } + } + + return true; +} + +const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat format) +{ + ASSERT(static_cast(format) < NUM_PALETTE_CONVERSION_SHADERS); + return m_palette_conversion_pipelines[static_cast(format)].get(); +} + +const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format, + TLUTFormat palette_format) +{ + const auto key = std::make_pair(static_cast(format), static_cast(palette_format)); + auto iter = m_texture_decoding_shaders.find(key); + if (iter != m_texture_decoding_shaders.end()) + return iter->second.get(); + + std::string shader_source = + TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::OpenGL); + if (shader_source.empty()) + { + m_texture_decoding_shaders.emplace(key, nullptr); + return nullptr; + } + + std::unique_ptr shader = + g_renderer->CreateShaderFromSource(ShaderStage::Compute, shader_source); + if (!shader) + { + m_texture_decoding_shaders.emplace(key, nullptr); + return nullptr; + } + + auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader)); + return iiter.first->second.get(); +} + } // namespace VideoCommon diff --git a/Source/Core/VideoCommon/ShaderCache.h b/Source/Core/VideoCommon/ShaderCache.h index a91559c965..66caad93ac 100644 --- a/Source/Core/VideoCommon/ShaderCache.h +++ b/Source/Core/VideoCommon/ShaderCache.h @@ -25,12 +25,16 @@ #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/PixelShaderGen.h" #include "VideoCommon/RenderState.h" +#include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/TextureConversionShader.h" +#include "VideoCommon/TextureConverterShaderGen.h" #include "VideoCommon/UberShaderPixel.h" #include "VideoCommon/UberShaderVertex.h" #include "VideoCommon/VertexShaderGen.h" class NativeVertexFormat; enum class AbstractTextureFormat : u32; +enum class TLUTFormat; namespace VideoCommon { @@ -44,8 +48,11 @@ public: bool Initialize(); void Shutdown(); - // Changes the shader host config. Shaders will be reloaded if there are changes. - void SetHostConfig(const ShaderHostConfig& host_config, u32 efb_multisamples); + // Compiles/loads cached shaders. + void InitializeShaderCache(); + + // Changes the shader host config. Shaders should be reloaded afterwards. + void SetHostConfig(const ShaderHostConfig& host_config) { m_host_config = host_config; } // Reloads/recreates all shaders and pipelines. void Reload(); @@ -53,9 +60,6 @@ public: // Retrieves all pending shaders/pipelines from the async compiler. void RetrieveAsyncShaders(); - // Get utility shader header based on current config. - std::string GetUtilityShaderHeader() const; - // Accesses ShaderGen shader caches const AbstractPipeline* GetPipelineForUid(const GXPipelineUid& uid); const AbstractPipeline* GetUberPipelineForUid(const GXUberPipelineUid& uid); @@ -64,7 +68,48 @@ public: // The optional will be empty if this pipeline is now background compiling. std::optional GetPipelineForUidAsync(const GXPipelineUid& uid); + // Shared shaders + const AbstractShader* GetScreenQuadVertexShader() const + { + return m_screen_quad_vertex_shader.get(); + } + const AbstractShader* GetTextureCopyVertexShader() const + { + return m_texture_copy_vertex_shader.get(); + } + const AbstractShader* GetEFBCopyVertexShader() const { return m_efb_copy_vertex_shader.get(); } + const AbstractShader* GetTexcoordGeometryShader() const + { + return m_texcoord_geometry_shader.get(); + } + const AbstractShader* GetTextureCopyPixelShader() const + { + return m_texture_copy_pixel_shader.get(); + } + const AbstractShader* GetColorGeometryShader() const { return m_color_geometry_shader.get(); } + const AbstractShader* GetColorPixelShader() const { return m_color_pixel_shader.get(); } + + // EFB copy to RAM/VRAM pipelines + const AbstractPipeline* + GetEFBCopyToVRAMPipeline(const TextureConversionShaderGen::TCShaderUid& uid); + const AbstractPipeline* GetEFBCopyToRAMPipeline(const EFBCopyParams& uid); + + // RGBA8 framebuffer copy pipelines + const AbstractPipeline* GetRGBA8CopyPipeline() const { return m_copy_rgba8_pipeline.get(); } + const AbstractPipeline* GetRGBA8StereoCopyPipeline() const + { + return m_rgba8_stereo_copy_pipeline.get(); + } + + // Palette texture conversion pipelines + const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format); + + // Texture decoding compute shaders + const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format); + private: + static constexpr size_t NUM_PALETTE_CONVERSION_SHADERS = 3; + void WaitForAsyncCompiler(); void LoadShaderCaches(); void ClearShaderCaches(); @@ -74,6 +119,7 @@ private: void InvalidateCachedPipelines(); void ClearPipelineCaches(); void QueueUberShaderPipelines(); + bool CompileSharedPipelines(); // GX shader compiler methods std::unique_ptr CompileVertexShader(const VertexShaderUid& uid) const; @@ -93,6 +139,9 @@ private: const AbstractShader* CreateGeometryShader(const GeometryShaderUid& uid); bool NeedsGeometryShader(const GeometryShaderUid& uid) const; + // Should we use geometry shaders for EFB copies? + bool UseGeometryShaderForEFBCopies() const; + // GX pipeline compiler methods AbstractPipelineConfig GetGXPipelineConfig(const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader, @@ -130,10 +179,17 @@ private: // Configuration bits. APIType m_api_type = APIType::Nothing; ShaderHostConfig m_host_config = {}; - AbstractTextureFormat m_efb_depth_format; - u32 m_efb_multisamples = 1; std::unique_ptr m_async_shader_compiler; + // Shared shaders + std::unique_ptr m_screen_quad_vertex_shader; + std::unique_ptr m_texture_copy_vertex_shader; + std::unique_ptr m_efb_copy_vertex_shader; + std::unique_ptr m_texcoord_geometry_shader; + std::unique_ptr m_color_geometry_shader; + std::unique_ptr m_texture_copy_pixel_shader; + std::unique_ptr m_color_pixel_shader; + // GX Shader Caches template struct ShaderModuleCache @@ -157,6 +213,22 @@ private: std::map, bool>> m_gx_uber_pipeline_cache; File::IOFile m_gx_pipeline_uid_cache_file; + + // EFB copy to VRAM/RAM pipelines + std::map> + m_efb_copy_to_vram_pipelines; + std::map> m_efb_copy_to_ram_pipelines; + + // Copy pipeline for RGBA8 textures + std::unique_ptr m_copy_rgba8_pipeline; + std::unique_ptr m_rgba8_stereo_copy_pipeline; + + // Palette conversion pipelines + std::array, NUM_PALETTE_CONVERSION_SHADERS> + m_palette_conversion_pipelines; + + // Texture decoding shaders + std::map, std::unique_ptr> m_texture_decoding_shaders; }; } // namespace VideoCommon diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 54d87691de..ee4c3d32f8 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -34,6 +34,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing; bits.backend_shader_framebuffer_fetch = g_ActiveConfig.backend_info.bSupportsFramebufferFetch; bits.backend_logic_op = g_ActiveConfig.backend_info.bSupportsLogicOp; + bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion; return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 83416d06d4..216f791df5 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -181,7 +181,8 @@ union ShaderHostConfig u32 backend_dynamic_sampler_indexing : 1; u32 backend_shader_framebuffer_fetch : 1; u32 backend_logic_op : 1; - u32 pad : 10; + u32 backend_palette_conversion : 1; + u32 pad : 9; }; static ShaderHostConfig GetCurrent(); @@ -216,7 +217,7 @@ template inline void GenerateVSOutputMembers(T& object, APIType api_type, u32 texgens, const ShaderHostConfig& host_config, const char* qualifier) { - DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "POSITION"); + DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "SV_Position"); DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0); DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1); diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index a42438e0b1..3beaeedbf8 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -28,16 +28,21 @@ #include "Core/FifoPlayer/FifoRecorder.h" #include "Core/HW/Memmap.h" +#include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/BPMemory.h" -#include "VideoCommon/Debugger.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/HiresTextures.h" +#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/SamplerCommon.h" +#include "VideoCommon/ShaderCache.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/TextureConversionShader.h" +#include "VideoCommon/TextureConverterShaderGen.h" #include "VideoCommon/TextureDecoder.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -50,8 +55,9 @@ std::unique_ptr g_texture_cache; std::bitset<8> TextureCacheBase::valid_bind_points; -TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr tex) - : texture(std::move(tex)) +TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr tex, + std::unique_ptr fb) + : texture(std::move(tex)), framebuffer(std::move(fb)) { } @@ -88,6 +94,25 @@ TextureCacheBase::TextureCacheBase() InvalidateAllBindPoints(); } +TextureCacheBase::~TextureCacheBase() +{ + HiresTexture::Shutdown(); + Invalidate(); + Common::FreeAlignedMemory(temp); + temp = nullptr; +} + +bool TextureCacheBase::Initialize() +{ + if (!CreateUtilityTextures()) + { + PanicAlert("Failed to create utility textures."); + return false; + } + + return true; +} + void TextureCacheBase::Invalidate() { FlushEFBCopies(); @@ -107,14 +132,6 @@ void TextureCacheBase::Invalidate() texture_pool.clear(); } -TextureCacheBase::~TextureCacheBase() -{ - HiresTexture::Shutdown(); - Invalidate(); - Common::FreeAlignedMemory(temp); - temp = nullptr; -} - void TextureCacheBase::OnConfigChanged(VideoConfig& config) { if (config.bHiresTextures != backup_config.hires_textures || @@ -138,14 +155,6 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config) g_ActiveConfig.bTexFmtOverlayCenter); } - if ((config.stereo_mode != StereoMode::Off) != backup_config.stereo_3d || - config.bStereoEFBMonoDepth != backup_config.efb_mono_depth) - { - g_texture_cache->DeleteShaders(); - if (!g_texture_cache->CompileShaders()) - PanicAlert("Failed to recompile one or more texture conversion shaders."); - } - SetBackupConfig(config); } @@ -242,7 +251,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma { TextureConfig new_config = entry->texture->GetConfig(); new_config.levels = 1; - new_config.rendertarget = true; + new_config.flags |= AbstractTextureFlag_RenderTarget; TCacheEntry* decoded_entry = AllocateCacheEntry(new_config); if (!decoded_entry) @@ -278,29 +287,27 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e return; } - TextureConfig newconfig; - newconfig.width = new_width; - newconfig.height = new_height; - newconfig.layers = entry->GetNumLayers(); - newconfig.rendertarget = true; - - std::unique_ptr new_texture = AllocateTexture(newconfig); - if (new_texture) + const TextureConfig newconfig(new_width, new_height, 1, entry->GetNumLayers(), 1, + AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget); + std::optional new_texture = AllocateTexture(newconfig); + if (!new_texture) { - new_texture->ScaleRectangleFromTexture(entry->texture.get(), - entry->texture->GetConfig().GetRect(), - new_texture->GetConfig().GetRect()); - entry->texture.swap(new_texture); + ERROR_LOG(VIDEO, "Scaling failed due to texture allocation failure"); + return; + } - auto config = new_texture->GetConfig(); - // At this point new_texture has the old texture in it, - // we can potentially reuse this, so let's move it back to the pool - texture_pool.emplace(config, TexPoolEntry(std::move(new_texture))); - } - else - { - ERROR_LOG(VIDEO, "Scaling failed"); - } + // No need to convert the coordinates here since they'll be the same. + g_renderer->ScaleTexture(new_texture->framebuffer.get(), + new_texture->texture->GetConfig().GetRect(), entry->texture.get(), + entry->texture->GetConfig().GetRect()); + entry->texture.swap(new_texture->texture); + entry->framebuffer.swap(new_texture->framebuffer); + + // At this point new_texture has the old texture in it, + // we can potentially reuse this, so let's move it back to the pool + auto config = new_texture->texture->GetConfig(); + texture_pool.emplace( + config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer))); } TextureCacheBase::TCacheEntry* @@ -383,6 +390,17 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale dst_y = 0; } + // If the source rectangle is outside of what we actually have in VRAM, skip the copy. + // The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates + // to the graphics driver, which can cause GPU resets. + if (static_cast(src_x) >= entry->native_width || + static_cast(src_y) >= entry->native_height || + static_cast(dst_x) >= entry_to_update->native_width || + static_cast(dst_y) >= entry_to_update->native_height) + { + continue; + } + u32 copy_width = std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x); u32 copy_height = @@ -482,12 +500,79 @@ static u32 CalculateLevelSize(u32 level_0_size, u32 level) return std::max(level_0_size >> level, 1u); } +static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, + bool has_arbitrary_mips) +{ + const FourTexUnits& tex = bpmem.tex[index / 4]; + const TexMode0& tm0 = tex.texMode0[index % 4]; + + SamplerState state = {}; + state.Generate(bpmem, index); + + // Force texture filtering config option. + if (g_ActiveConfig.bForceFiltering) + { + state.min_filter = SamplerState::Filter::Linear; + state.mag_filter = SamplerState::Filter::Linear; + state.mipmap_filter = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? + SamplerState::Filter::Linear : + SamplerState::Filter::Point; + } + + // Custom textures may have a greater number of mips + if (custom_tex) + state.max_lod = 255; + + // Anisotropic filtering option. + if (g_ActiveConfig.iMaxAnisotropy != 0 && !SamplerCommon::IsBpTexMode0PointFiltering(tm0)) + { + // https://www.opengl.org/registry/specs/EXT/texture_filter_anisotropic.txt + // For predictable results on all hardware/drivers, only use one of: + // GL_LINEAR + GL_LINEAR (No Mipmaps [Bilinear]) + // GL_LINEAR + GL_LINEAR_MIPMAP_LINEAR (w/ Mipmaps [Trilinear]) + // Letting the game set other combinations will have varying arbitrary results; + // possibly being interpreted as equal to bilinear/trilinear, implicitly + // disabling anisotropy, or changing the anisotropic algorithm employed. + state.min_filter = SamplerState::Filter::Linear; + state.mag_filter = SamplerState::Filter::Linear; + if (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + state.mipmap_filter = SamplerState::Filter::Linear; + state.anisotropic_filtering = 1; + } + else + { + state.anisotropic_filtering = 0; + } + + if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + { + // Apply a secondary bias calculated from the IR scale to pull inwards mipmaps + // that have arbitrary contents, eg. are used for fog effects where the + // distance they kick in at is important to preserve at any resolution. + // Correct this with the upscaling factor of custom textures. + s64 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f; + state.lod_bias = MathUtil::Clamp(state.lod_bias + lod_offset, -32768, 32767); + + // Anisotropic also pushes mips farther away so it cannot be used either + state.anisotropic_filtering = 0; + } + + g_renderer->SetSamplerState(index, state); +} + void TextureCacheBase::BindTextures() { for (u32 i = 0; i < bound_textures.size(); i++) { - if (IsValidBindPoint(i) && bound_textures[i]) - g_renderer->SetTexture(i, bound_textures[i]->texture.get()); + const TCacheEntry* tentry = bound_textures[i]; + if (IsValidBindPoint(i) && tentry) + { + g_renderer->SetTexture(i, tentry->texture.get()); + PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height); + + const float custom_tex_scale = tentry->GetWidth() / float(tentry->native_width); + SetSamplerState(i, custom_tex_scale, tentry->is_custom_tex, tentry->has_arbitrary_mips); + } } } @@ -668,8 +753,6 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) entry->frameCount = FRAMECOUNT_INVALID; bound_textures[stage] = entry; - GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true); - // We need to keep track of invalided textures until they have actually been replaced or // re-loaded valid_bind_points.set(stage); @@ -957,25 +1040,17 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since // there's no conversion between formats. In the future this could be extended with a separate // shader, however. - bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() && - g_texture_cache->SupportsGPUTextureDecode(texformat, tlutfmt) && - !(from_tmem && texformat == TextureFormat::RGBA8); + const bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() && + !(from_tmem && texformat == TextureFormat::RGBA8); // create the entry/texture - TextureConfig config; - config.width = width; - config.height = height; - config.levels = texLevels; - config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8; - - ArbitraryMipmapDetector arbitrary_mip_detector; - + const TextureConfig config(width, height, texLevels, 1, 1, + hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8, 0); TCacheEntry* entry = AllocateCacheEntry(config); - GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); - if (!entry) return nullptr; + ArbitraryMipmapDetector arbitrary_mip_detector; const u8* tlut = &texMem[tlutaddr]; if (hires_tex) { @@ -989,14 +1064,10 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo if (!hires_tex) { - if (decode_on_gpu) - { - u32 row_stride = bytes_per_block * (expandedWidth / bsw); - g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, - height, expandedWidth, expandedHeight, row_stride, tlut, - tlutfmt); - } - else + if (!decode_on_gpu || + !DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height, + expandedWidth, expandedHeight, bytes_per_block * (expandedWidth / bsw), + tlut, tlutfmt)) { size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight; @@ -1089,20 +1160,16 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh); const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data; - size_t mip_size = + const u32 mip_size = TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat); - if (decode_on_gpu) - { - u32 row_stride = bytes_per_block * (expanded_mip_width / bsw); - g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat, - mip_width, mip_height, expanded_mip_width, - expanded_mip_height, row_stride, tlut, tlutfmt); - } - else + if (!decode_on_gpu || + !DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat, mip_width, + mip_height, expanded_mip_width, expanded_mip_height, + bytes_per_block * (expanded_mip_width / bsw), tlut, tlutfmt)) { // No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning - size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height; + const u32 decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height; TexDecoder_Decode(dst_buffer, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlut, tlutfmt); entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, dst_buffer, @@ -1133,6 +1200,8 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt); + // This should only be needed if the texture was updated, or used GPU decoding. + entry->texture->FinishedRendering(); return entry; } @@ -1300,7 +1369,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati // or as a container for overlapping textures, never need to be combined // with other textures TCacheEntry* stitched_entry = - CreateNormalTexture(tex_info, FramebufferManagerBase::GetEFBLayers()); + CreateNormalTexture(tex_info, g_framebuffer_manager->GetEFBLayers()); stitched_entry->may_have_overlapping_textures = false; // It is possible that some of the overlapping textures overlap each other. @@ -1385,6 +1454,17 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati dst_y = 0; } + // If the source rectangle is outside of what we actually have in VRAM, skip the copy. + // The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates + // to the graphics driver, which can cause GPU resets. + if (static_cast(src_x) >= entry->native_width || + static_cast(src_y) >= entry->native_height || + static_cast(dst_x) >= stitched_entry->native_width || + static_cast(dst_y) >= stitched_entry->native_height) + { + continue; + } + u32 copy_width = std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x); u32 copy_height = std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y); @@ -1450,6 +1530,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati return nullptr; } + stitched_entry->texture->FinishedRendering(); return stitched_entry; } @@ -1457,17 +1538,10 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers) { // create the entry/texture - TextureConfig config; - config.width = tex_info.native_width; - config.height = tex_info.native_height; - config.levels = tex_info.computed_levels; - config.format = AbstractTextureFormat::RGBA8; - config.rendertarget = true; - config.layers = layers; - + const TextureConfig config(tex_info.native_width, tex_info.native_height, + tex_info.computed_levels, layers, 1, AbstractTextureFormat::RGBA8, + AbstractTextureFlag_RenderTarget); TCacheEntry* entry = AllocateCacheEntry(config); - GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); - if (!entry) return nullptr; @@ -1500,15 +1574,15 @@ TextureCacheBase::GetTextureFromMemory(const TextureLookupInformation& tex_info) // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since // there's no conversion between formats. In the future this could be extended with a separate // shader, however. - bool decode_on_gpu = g_ActiveConfig.UseGPUTextureDecoding() && - g_texture_cache->SupportsGPUTextureDecode(tex_info.full_format.texfmt, - tex_info.full_format.tlutfmt) && - !(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8); + const bool decode_on_gpu = + g_ActiveConfig.UseGPUTextureDecoding() && + !(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8); // Since it's coming from RAM, it can only have one layer (no stereo). TCacheEntry* entry = CreateNormalTexture(tex_info, 1); entry->may_have_overlapping_textures = false; LoadTextureLevelZeroFromMemory(entry, tex_info, decode_on_gpu); + entry->texture->FinishedRendering(); return entry; } @@ -1518,15 +1592,13 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda { const u8* tlut = &texMem[tex_info.tlut_address]; - if (decode_on_gpu) - { - u32 row_stride = tex_info.bytes_per_block * (tex_info.expanded_width / tex_info.block_width); - g_texture_cache->DecodeTextureOnGPU( - entry_to_update, 0, tex_info.src_data, tex_info.total_bytes, tex_info.full_format.texfmt, - tex_info.native_width, tex_info.native_height, tex_info.expanded_width, - tex_info.expanded_height, row_stride, tlut, tex_info.full_format.tlutfmt); - } - else + if (!decode_on_gpu || + DecodeTextureOnGPU(entry_to_update, 0, tex_info.src_data, tex_info.total_bytes, + tex_info.full_format.texfmt, tex_info.native_width, tex_info.native_height, + tex_info.expanded_width, tex_info.expanded_height, + tex_info.bytes_per_block * + (tex_info.expanded_width / tex_info.block_width), + tlut, tex_info.full_format.tlutfmt)) { size_t decoded_texture_size = tex_info.expanded_width * sizeof(u32) * tex_info.expanded_height; CheckTempSize(decoded_texture_size); @@ -1547,12 +1619,12 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda } } -TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients( - const CopyFilterCoefficients::Values& coefficients) const +EFBCopyFilterCoefficients +TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. - return {{ + return EFBCopyFilterCoefficients{ static_cast(static_cast(coefficients[0]) + static_cast(coefficients[1])) / 64.0f, static_cast(static_cast(coefficients[2]) + static_cast(coefficients[3]) + @@ -1560,31 +1632,31 @@ TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterC 64.0f, static_cast(static_cast(coefficients[5]) + static_cast(coefficients[6])) / 64.0f, - }}; + }; } -TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients( - const CopyFilterCoefficients::Values& coefficients) const +EFBCopyFilterCoefficients +TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // If the user disables the copy filter, only apply it to the VRAM copy. // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. - CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients); + EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients); if (!g_ActiveConfig.bDisableCopyFilter) return res; // Disabling the copy filter in options should not ignore the values the game sets completely, // as some games use the filter coefficients to control the brightness of the screen. Instead, // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect. - res[1] += res[0] + res[2]; - res[0] = 0; - res[2] = 0; + res.middle = res.upper + res.middle + res.lower; + res.upper = 0.0f; + res.lower = 0.0f; return res; } -bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const +bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients) { // If the top/bottom coefficients are zero, no point sampling/blending from these rows. - return coefficients[0] != 0 || coefficients[2] != 0; + return coefficients.upper != 0 || coefficients.lower != 0; } void TextureCacheBase::CopyRenderTargetToTexture( @@ -1726,12 +1798,8 @@ void TextureCacheBase::CopyRenderTargetToTexture( if (copy_to_vram) { // create the texture - TextureConfig config; - config.rendertarget = true; - config.width = scaled_tex_w; - config.height = scaled_tex_h; - config.layers = FramebufferManagerBase::GetEFBLayers(); - + const TextureConfig config(scaled_tex_w, scaled_tex_h, 1, g_framebuffer_manager->GetEFBLayers(), + 1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget); entry = AllocateCacheEntry(config); if (entry) { @@ -1776,7 +1844,7 @@ void TextureCacheBase::CopyRenderTargetToTexture( if (copy_to_ram) { - CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); + EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, NeedsCopyFilterInShader(coefficients)); @@ -1916,11 +1984,6 @@ void TextureCacheBase::FlushEFBCopies() m_pending_efb_copies.clear(); } -TextureConfig TextureCacheBase::GetEncodingTextureConfig() -{ - return TextureConfig(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, true); -} - void TextureCacheBase::WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride, std::unique_ptr staging_texture) { @@ -1979,8 +2042,8 @@ std::unique_ptr TextureCacheBase::GetEFBCopyStagingTextu return ptr; } - std::unique_ptr tex = - g_renderer->CreateStagingTexture(StagingTextureType::Readback, GetEncodingTextureConfig()); + std::unique_ptr tex = g_renderer->CreateStagingTexture( + StagingTextureType::Readback, m_efb_encoding_texture->GetConfig()); if (!tex) WARN_LOG(VIDEO, "Failed to create EFB copy staging texture"); @@ -2037,37 +2100,50 @@ void TextureCacheBase::UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_ TextureCacheBase::TCacheEntry* TextureCacheBase::AllocateCacheEntry(const TextureConfig& config) { - std::unique_ptr texture = AllocateTexture(config); - - if (!texture) - { + std::optional alloc = AllocateTexture(config); + if (!alloc) return nullptr; - } - TCacheEntry* cacheEntry = new TCacheEntry(std::move(texture)); + + TCacheEntry* cacheEntry = + new TCacheEntry(std::move(alloc->texture), std::move(alloc->framebuffer)); cacheEntry->textures_by_hash_iter = textures_by_hash.end(); cacheEntry->id = last_entry_id++; return cacheEntry; } -std::unique_ptr TextureCacheBase::AllocateTexture(const TextureConfig& config) +std::optional +TextureCacheBase::AllocateTexture(const TextureConfig& config) { TexPool::iterator iter = FindMatchingTextureFromPool(config); - std::unique_ptr entry; if (iter != texture_pool.end()) { - entry = std::move(iter->second.texture); + auto entry = std::move(iter->second); texture_pool.erase(iter); + return std::move(entry); } - else + + std::unique_ptr texture = g_renderer->CreateTexture(config); + if (!texture) { - entry = g_renderer->CreateTexture(config); - if (!entry) - return nullptr; - - INCSTAT(stats.numTexturesCreated); + WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u texture", config.width, config.height, + config.layers); + return {}; } - return entry; + std::unique_ptr framebuffer; + if (config.IsRenderTarget()) + { + framebuffer = g_renderer->CreateFramebuffer(texture.get(), nullptr); + if (!framebuffer) + { + WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u framebuffer", config.width, config.height, + config.layers); + return {}; + } + } + + INCSTAT(stats.numTexturesCreated); + return TexPoolEntry(std::move(texture), std::move(framebuffer)); } TextureCacheBase::TexPool::iterator @@ -2080,7 +2156,7 @@ TextureCacheBase::FindMatchingTextureFromPool(const TextureConfig& config) // As non-render-target textures are usually static, this should not matter much. auto range = texture_pool.equal_range(config); auto matching_iter = std::find_if(range.first, range.second, [](const auto& iter) { - return iter.first.rendertarget || iter.second.frameCount != FRAMECOUNT_INVALID; + return iter.first.IsRenderTarget() || iter.second.frameCount != FRAMECOUNT_INVALID; }); return matching_iter != range.second ? matching_iter : texture_pool.end(); } @@ -2171,7 +2247,8 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe } auto config = entry->texture->GetConfig(); - texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture))); + texture_pool.emplace(config, + TexPoolEntry(std::move(entry->texture), std::move(entry->framebuffer))); // Don't delete if there's a pending EFB copy, as we need the TCacheEntry alive. if (!entry->pending_efb_copy) @@ -2180,6 +2257,283 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe return textures_by_address.erase(iter); } +bool TextureCacheBase::CreateUtilityTextures() +{ + constexpr TextureConfig encoding_texture_config( + EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, AbstractTextureFlag_RenderTarget); + m_efb_encoding_texture = g_renderer->CreateTexture(encoding_texture_config); + if (!m_efb_encoding_texture) + return false; + + m_efb_encoding_framebuffer = g_renderer->CreateFramebuffer(m_efb_encoding_texture.get(), nullptr); + if (!m_efb_encoding_framebuffer) + return false; + + if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) + { + constexpr TextureConfig decoding_texture_config( + 1024, 1024, 1, 1, 1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_ComputeImage); + m_decoding_texture = g_renderer->CreateTexture(decoding_texture_config); + if (!m_decoding_texture) + return false; + } + + return true; +} + +void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, + const EFBRectangle& src_rect, bool scale_by_half, + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients) +{ + // Flush EFB pokes first, as they're expected to be included. + g_framebuffer_manager->FlushEFBPokes(); + + // Get the pipeline which we will be using. If the compilation failed, this will be null. + const AbstractPipeline* copy_pipeline = + g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid( + dst_format, is_depth_copy, is_intensity, scale_by_half, + NeedsCopyFilterInShader(filter_coefficients))); + if (!copy_pipeline) + { + WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); + return; + } + + const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); + AbstractTexture* src_texture = + is_depth_copy ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) : + g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect); + + g_renderer->BeginUtilityDrawing(); + + // Fill uniform buffer. + struct Uniforms + { + float src_left, src_top, src_width, src_height; + float filter_coefficients[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float pixel_height; + u32 padding; + }; + Uniforms uniforms; + const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle( + scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer()); + const float rcp_efb_width = 1.0f / static_cast(g_framebuffer_manager->GetEFBWidth()); + const float rcp_efb_height = 1.0f / static_cast(g_framebuffer_manager->GetEFBHeight()); + uniforms.src_left = framebuffer_rect.left * rcp_efb_width; + uniforms.src_top = framebuffer_rect.top * rcp_efb_height; + uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width; + uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height; + uniforms.filter_coefficients[0] = filter_coefficients.upper; + uniforms.filter_coefficients[1] = filter_coefficients.middle; + uniforms.filter_coefficients[2] = filter_coefficients.lower; + uniforms.gamma_rcp = 1.0f / gamma; + uniforms.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f; + uniforms.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f; + uniforms.pixel_height = g_ActiveConfig.bCopyEFBScaled ? rcp_efb_height : 1.0f / EFB_HEIGHT; + uniforms.padding = 0; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + // Use the copy pipeline to render the VRAM copy. + g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect()); + g_renderer->SetPipeline(copy_pipeline); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, scale_by_half ? RenderState::GetLinearSamplerState() : + RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + entry->texture->FinishedRendering(); +} + +void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, + u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients) +{ + // Flush EFB pokes first, as they're expected to be included. + g_framebuffer_manager->FlushEFBPokes(); + + // Get the pipeline which we will be using. If the compilation failed, this will be null. + const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToRAMPipeline(params); + if (!copy_pipeline) + { + WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); + return; + } + + const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); + AbstractTexture* src_texture = + params.depth ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) : + g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect); + + g_renderer->BeginUtilityDrawing(); + + // Fill uniform buffer. + struct Uniforms + { + std::array position_uniform; + float y_scale; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float filter_coefficients[3]; + u32 padding; + }; + Uniforms encoder_params; + const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle( + scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer()); + const float rcp_efb_height = 1.0f / static_cast(g_framebuffer_manager->GetEFBHeight()); + encoder_params.position_uniform[0] = scaled_src_rect.left; + encoder_params.position_uniform[1] = scaled_src_rect.top; + encoder_params.position_uniform[2] = static_cast(native_width); + encoder_params.position_uniform[3] = scale_by_half ? 2 : 1; + encoder_params.y_scale = y_scale; + encoder_params.gamma_rcp = 1.0f / gamma; + encoder_params.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f; + encoder_params.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f; + encoder_params.filter_coefficients[0] = filter_coefficients.upper; + encoder_params.filter_coefficients[1] = filter_coefficients.middle; + encoder_params.filter_coefficients[2] = filter_coefficients.lower; + g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params)); + + // We also linear filtering for both box filtering and downsampling higher resolutions to 1x + // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more + // complex down filtering to average all pixels and produce the correct result. + const bool linear_filter = + (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f; + + // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left. + const u32 render_width = bytes_per_row / sizeof(u32); + const u32 render_height = num_blocks_y; + const auto encode_rect = MathUtil::Rectangle(0, 0, render_width, render_height); + + // Render to GPU texture, and then copy to CPU-accessible texture. + g_renderer->SetAndDiscardFramebuffer(m_efb_encoding_framebuffer.get()); + g_renderer->SetViewportAndScissor(encode_rect); + g_renderer->SetPipeline(copy_pipeline); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, linear_filter ? RenderState::GetLinearSamplerState() : + RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + dst->CopyFromTexture(m_efb_encoding_texture.get(), encode_rect, 0, 0, encode_rect); + g_renderer->EndUtilityDrawing(); + + // Flush if there's sufficient draws between this copy and the last. + g_vertex_manager->OnEFBCopyToRAM(); +} + +bool TextureCacheBase::ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, + const void* palette, TLUTFormat format) +{ + DEBUG_ASSERT(entry->texture->GetConfig().IsRenderTarget() && entry->framebuffer); + if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) + { + ERROR_LOG(VIDEO, "Backend does not support palette conversion!"); + return false; + } + + g_renderer->BeginUtilityDrawing(); + + const u32 palette_size = unconverted->format == TextureFormat::I4 ? 32 : 512; + u32 texel_buffer_offset; + if (!g_vertex_manager->UploadTexelBuffer(palette, palette_size, + TexelBufferFormat::TEXEL_BUFFER_FORMAT_R16_UINT, + &texel_buffer_offset)) + { + ERROR_LOG(VIDEO, "Texel buffer upload failed"); + return false; + } + + struct Uniforms + { + float multiplier; + u32 texel_buffer_offset; + u32 pad[2]; + }; + static_assert(std::is_standard_layout::value); + Uniforms uniforms = {}; + uniforms.multiplier = unconverted->format == TextureFormat::I4 ? 15.0f : 255.0f; + uniforms.texel_buffer_offset = texel_buffer_offset; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(entry->texture->GetRect()); + g_renderer->SetPipeline(g_shader_cache->GetPaletteConversionPipeline(format)); + g_renderer->SetTexture(1, unconverted->texture.get()); + g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + entry->texture->FinishedRendering(); + return true; +} + +bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, + u32 data_size, TextureFormat format, u32 width, + u32 height, u32 aligned_width, u32 aligned_height, + u32 row_stride, const u8* palette, + TLUTFormat palette_format) +{ + const auto* info = TextureConversionShaderTiled::GetDecodingShaderInfo(format); + if (!info) + return false; + + const AbstractShader* shader = g_shader_cache->GetTextureDecodingShader(format, palette_format); + if (!shader) + return false; + + // Copy to GPU-visible buffer, aligned to the data type. + const u32 bytes_per_buffer_elem = + VertexManagerBase::GetTexelBufferElementSize(info->buffer_format); + + // Allocate space in stream buffer, and copy texture + palette across. + u32 src_offset = 0, palette_offset = 0; + if (info->palette_size > 0) + { + if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset, + palette, info->palette_size, + TEXEL_BUFFER_FORMAT_R16_UINT, &palette_offset)) + { + return false; + } + } + else + { + if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset)) + return false; + } + + // Set up uniforms. + struct Uniforms + { + u32 dst_width, dst_height; + u32 src_width, src_height; + u32 src_offset, src_row_stride; + u32 palette_offset, unused; + } uniforms = {width, height, aligned_width, + aligned_height, src_offset, row_stride / bytes_per_buffer_elem, + palette_offset}; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + g_renderer->SetComputeImageTexture(m_decoding_texture.get(), false, true); + + auto dispatch_groups = + TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height); + g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1); + + // Copy from decoding texture -> final texture + // This is because we don't want to have to create compute view for every layer + const auto copy_rect = entry->texture->GetConfig().GetMipRect(dst_level); + entry->texture->CopyRectangleFromTexture(m_decoding_texture.get(), copy_rect, 0, 0, copy_rect, 0, + dst_level); + entry->texture->FinishedRendering(); + return true; +} + u32 TextureCacheBase::TCacheEntry::BytesPerRow() const { const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt); @@ -2272,3 +2626,9 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const return temp_hash; } } + +TextureCacheBase::TexPoolEntry::TexPoolEntry(std::unique_ptr tex, + std::unique_ptr fb) + : texture(std::move(tex)), framebuffer(std::move(fb)) +{ +} diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index fc9c49311b..bd3ceab046 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -23,6 +23,7 @@ #include "VideoCommon/VideoCommon.h" struct VideoConfig; +class AbstractFramebuffer; class AbstractStagingTexture; struct TextureAndTLUTFormat @@ -68,6 +69,14 @@ struct EFBCopyParams bool copy_filter; }; +// Reduced version of the full coefficient array, with a single value for each row. +struct EFBCopyFilterCoefficients +{ + float upper; + float middle; + float lower; +}; + struct TextureLookupInformation { u32 address; @@ -110,13 +119,11 @@ private: static const int FRAMECOUNT_INVALID = 0; public: - // Reduced version of the full coefficient array, reduced to a single value for each row. - using CopyFilterCoefficientArray = std::array; - struct TCacheEntry { // common members std::unique_ptr texture; + std::unique_ptr framebuffer; u32 addr; u32 size_in_bytes; u64 base_hash; @@ -157,7 +164,8 @@ public: u32 pending_efb_copy_height = 0; bool pending_efb_copy_invalidated = false; - explicit TCacheEntry(std::unique_ptr tex); + explicit TCacheEntry(std::unique_ptr tex, + std::unique_ptr fb); ~TCacheEntry(); @@ -214,7 +222,10 @@ public: AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; } }; - virtual ~TextureCacheBase(); // needs virtual for DX11 dtor + TextureCacheBase(); + virtual ~TextureCacheBase(); + + bool Initialize(); void OnConfigChanged(VideoConfig& config); @@ -224,15 +235,6 @@ public: void Invalidate(); - virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) = 0; - - virtual bool CompileShaders() = 0; - virtual void DeleteShaders() = 0; - TCacheEntry* Load(const u32 stage); static void InvalidateAllBindPoints() { valid_bind_points.reset(); } static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); } @@ -262,39 +264,39 @@ public: bool clamp_top, bool clamp_bottom, const CopyFilterCoefficients::Values& filter_coefficients); - virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, - TLUTFormat format) = 0; - - // Returns true if the texture data and palette formats are supported by the GPU decoder. - virtual bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) - { - return false; - } - - // Decodes the specified data to the GPU texture specified by entry. - // width, height are the size of the image in pixels. - // aligned_width, aligned_height are the size of the image in pixels, aligned to the block size. - // row_stride is the number of bytes for a row of blocks, not pixels. - virtual void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, - size_t data_size, TextureFormat format, u32 width, u32 height, - u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format) - { - } - void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height); // Flushes all pending EFB copies to emulated RAM. void FlushEFBCopies(); - // Returns a texture config suitable for drawing a RAM EFB copy into. - static TextureConfig GetEncodingTextureConfig(); + // Returns false if the top/bottom row coefficients are zero. + static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); protected: - TextureCacheBase(); + // Applies a palette to an EFB copy/texture. + bool ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, + TLUTFormat format); - // Returns false if the top/bottom row coefficients are zero. - bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const; + // Decodes the specified data to the GPU texture specified by entry. + // Returns false if the configuration is not supported. + // width, height are the size of the image in pixels. + // aligned_width, aligned_height are the size of the image in pixels, aligned to the block size. + // row_stride is the number of bytes for a row of blocks, not pixels. + bool DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, u32 data_size, + TextureFormat format, u32 width, u32 height, u32 aligned_width, + u32 aligned_height, u32 row_stride, const u8* palette, + TLUTFormat palette_format); + + virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients); + virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, + const EFBRectangle& src_rect, bool scale_by_half, + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients); alignas(16) u8* temp = nullptr; size_t temp_size = 0; @@ -307,13 +309,17 @@ private: struct TexPoolEntry { std::unique_ptr texture; + std::unique_ptr framebuffer; int frameCount = FRAMECOUNT_INVALID; - TexPoolEntry(std::unique_ptr tex) : texture(std::move(tex)) {} + + TexPoolEntry(std::unique_ptr tex, std::unique_ptr fb); }; using TexAddrCache = std::multimap; using TexHashCache = std::multimap; using TexPool = std::unordered_multimap; + bool CreateUtilityTextures(); + void SetBackupConfig(const VideoConfig& config); TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt); @@ -325,7 +331,7 @@ private: void CheckTempSize(size_t required_size); TCacheEntry* AllocateCacheEntry(const TextureConfig& config); - std::unique_ptr AllocateTexture(const TextureConfig& config); + std::optional AllocateTexture(const TextureConfig& config); TexPool::iterator FindMatchingTextureFromPool(const TextureConfig& config); TexAddrCache::iterator GetTexCacheIter(TCacheEntry* entry); @@ -334,12 +340,6 @@ private: std::pair FindOverlappingTextures(u32 addr, u32 size_in_bytes); - virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) = 0; - // Removes and unlinks texture from texture cache and returns it to the pool TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter, bool discard_pending_efb_copy = false); @@ -347,10 +347,10 @@ private: void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); // Precomputing the coefficients for the previous, current, and next lines for the copy filter. - CopyFilterCoefficientArray - GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const; - CopyFilterCoefficientArray - GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const; + static EFBCopyFilterCoefficients + GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + static EFBCopyFilterCoefficients + GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); // Flushes a pending EFB copy to RAM from the host to the guest RAM. void WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride, @@ -385,6 +385,13 @@ private: }; BackupConfig backup_config = {}; + // Encoding texture used for EFB copies to RAM. + std::unique_ptr m_efb_encoding_texture; + std::unique_ptr m_efb_encoding_framebuffer; + + // Decoding texture used for GPU texture decoding. + std::unique_ptr m_decoding_texture; + // Pool of readback textures used for deferred EFB copies. std::vector> m_efb_copy_staging_texture_pool; diff --git a/Source/Core/VideoCommon/TextureConfig.cpp b/Source/Core/VideoCommon/TextureConfig.cpp index 0407576af6..98a9dbc4f3 100644 --- a/Source/Core/VideoCommon/TextureConfig.cpp +++ b/Source/Core/VideoCommon/TextureConfig.cpp @@ -9,8 +9,8 @@ bool TextureConfig::operator==(const TextureConfig& o) const { - return std::tie(width, height, levels, layers, samples, format, rendertarget) == - std::tie(o.width, o.height, o.levels, o.layers, o.samples, o.format, o.rendertarget); + return std::tie(width, height, levels, layers, samples, format, flags) == + std::tie(o.width, o.height, o.levels, o.layers, o.samples, o.format, o.flags); } bool TextureConfig::operator!=(const TextureConfig& o) const @@ -38,8 +38,3 @@ size_t TextureConfig::GetMipStride(u32 level) const { return AbstractTexture::CalculateStrideForFormat(format, std::max(width >> level, 1u)); } - -bool TextureConfig::IsMultisampled() const -{ - return samples > 1; -} diff --git a/Source/Core/VideoCommon/TextureConfig.h b/Source/Core/VideoCommon/TextureConfig.h index a2358ff3b0..ee52537285 100644 --- a/Source/Core/VideoCommon/TextureConfig.h +++ b/Source/Core/VideoCommon/TextureConfig.h @@ -34,13 +34,19 @@ enum class StagingTextureType Mutable // Optimize for CPU reads, GPU writes, allow slow CPU reads }; +enum AbstractTextureFlag : u32 +{ + AbstractTextureFlag_RenderTarget = (1 << 0), // Texture is used as a framebuffer. + AbstractTextureFlag_ComputeImage = (1 << 1), // Texture is used as a compute image. +}; + struct TextureConfig { constexpr TextureConfig() = default; constexpr TextureConfig(u32 width_, u32 height_, u32 levels_, u32 layers_, u32 samples_, - AbstractTextureFormat format_, bool rendertarget_) + AbstractTextureFormat format_, u32 flags_) : width(width_), height(height_), levels(levels_), layers(layers_), samples(samples_), - format(format_), rendertarget(rendertarget_) + format(format_), flags(flags_) { } @@ -50,7 +56,10 @@ struct TextureConfig MathUtil::Rectangle GetMipRect(u32 level) const; size_t GetStride() const; size_t GetMipStride(u32 level) const; - bool IsMultisampled() const; + + bool IsMultisampled() const { return samples > 1; } + bool IsRenderTarget() const { return (flags & AbstractTextureFlag_RenderTarget) != 0; } + bool IsComputeImage() const { return (flags & AbstractTextureFlag_ComputeImage) != 0; } u32 width = 0; u32 height = 0; @@ -58,7 +67,7 @@ struct TextureConfig u32 layers = 1; u32 samples = 1; AbstractTextureFormat format = AbstractTextureFormat::RGBA8; - bool rendertarget = false; + u32 flags = 0; }; namespace std @@ -71,7 +80,7 @@ struct hash result_type operator()(const argument_type& c) const noexcept { - const u64 id = static_cast(c.rendertarget) << 63 | static_cast(c.format) << 50 | + const u64 id = static_cast(c.flags) << 58 | static_cast(c.format) << 50 | static_cast(c.layers) << 48 | static_cast(c.levels) << 32 | static_cast(c.height) << 16 | static_cast(c.width); return std::hash{}(id); diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index e57cbdf603..7359c2aea3 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -15,7 +15,9 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureConversionShader.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" +#include "VideoCommon/VideoConfig.h" #define WRITE p += sprintf @@ -59,21 +61,10 @@ u16 GetEncodedSampleCount(EFBCopyFormat format) static void WriteHeader(char*& p, APIType ApiType) { - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { // left, top, of source rectangle within source texture // width of the destination rectangle, scale_factor (1 or 2) - WRITE(p, "uniform int4 position;\n"); - WRITE(p, "uniform float y_scale;\n"); - WRITE(p, "uniform float gamma_rcp;\n"); - WRITE(p, "uniform float2 clamp_tb;\n"); - WRITE(p, "uniform float3 filter_coefficients;\n"); - WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); - } - else if (ApiType == APIType::Vulkan) - { WRITE(p, "UBO_BINDING(std140, 1) uniform PSBlock {\n"); WRITE(p, " int4 position;\n"); WRITE(p, " float y_scale;\n"); @@ -81,8 +72,9 @@ static void WriteHeader(char*& p, APIType ApiType) WRITE(p, " float2 clamp_tb;\n"); WRITE(p, " float3 filter_coefficients;\n"); WRITE(p, "};\n"); + WRITE(p, "VARYING_LOCATION(0) in float3 v_tex0;\n"); WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"); } else // D3D { @@ -147,7 +139,7 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A else { // Handle D3D depth inversion. - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) WRITE(p, "1.0 - ("); else WRITE(p, "("); @@ -225,7 +217,9 @@ static void WriteSwizzler(char*& p, const EFBCopyParams& params, EFBCopyFormat f else // D3D { WRITE(p, "void main(\n"); - WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n"); + WRITE(p, " in float3 v_tex0 : TEXCOORD0,\n"); + WRITE(p, " in float4 rawpos : SV_Position,\n"); + WRITE(p, " out float4 ocol0 : SV_Target)\n"); WRITE(p, "{\n" " int2 sampleUv;\n" " int2 uv1 = int2(rawpos.xy);\n"); @@ -846,38 +840,65 @@ const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type // NOTE: In these uniforms, a row refers to a row of blocks, not texels. static const char decoding_shader_header[] = R"( -#ifdef VULKAN +#if defined(PALETTE_FORMAT_IA8) || defined(PALETTE_FORMAT_RGB565) || defined(PALETTE_FORMAT_RGB5A3) +#define HAS_PALETTE 1 +#endif -layout(std140, push_constant) uniform PushConstants { - uvec2 dst_size; - uvec2 src_size; - uint src_offset; - uint src_row_stride; - uint palette_offset; -} push_constants; -#define u_dst_size (push_constants.dst_size) -#define u_src_size (push_constants.src_size) -#define u_src_offset (push_constants.src_offset) -#define u_src_row_stride (push_constants.src_row_stride) -#define u_palette_offset (push_constants.palette_offset) +#ifdef API_D3D +cbuffer UBO : register(b0) { +#else +UBO_BINDING(std140, 1) uniform UBO { +#endif + uint2 u_dst_size; + uint2 u_src_size; + uint u_src_offset; + uint u_src_row_stride; + uint u_palette_offset; +}; -TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; -TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; +#ifdef API_D3D -IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image; +Buffer s_input_buffer : register(t0); +#ifdef HAS_PALETTE +Buffer s_palette_buffer : register(t1); +#endif + +RWTexture2DArray output_image : register(u0); + +// Helpers for reading/writing. +#define texelFetch(buffer, pos) buffer.Load(pos) +#define imageStore(image, coords, value) image[coords] = value +#define GROUP_MEMORY_BARRIER_WITH_SYNC GroupMemoryBarrierWithGroupSync(); +#define GROUP_SHARED groupshared + +#define DEFINE_MAIN(lx, ly) \ + [numthreads(lx, ly, 1)] \ + void main(uint3 gl_WorkGroupID : SV_GroupId, \ + uint3 gl_LocalInvocationID : SV_GroupThreadID, \ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID) + +uint bitfieldExtract(uint val, int off, int size) +{ + // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n" + // Microsoft's HLSL compiler automatically optimises this to a bitfield extract instruction. + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; +} #else -uniform uvec2 u_dst_size; -uniform uvec2 u_src_size; -uniform uint u_src_offset; -uniform uint u_src_row_stride; -uniform uint u_palette_offset; +TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; +#ifdef HAS_PALETTE +TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; +#endif +IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image; -SAMPLER_BINDING(9) uniform usamplerBuffer s_input_buffer; -SAMPLER_BINDING(10) uniform usamplerBuffer s_palette_buffer; +#define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier(); +#define GROUP_SHARED shared -layout(rgba8, binding = 0) uniform writeonly image2DArray output_image; +#define DEFINE_MAIN(lx, ly) \ + layout(local_size_x = lx, local_size_y = ly) in; \ + void main() #endif @@ -908,10 +929,10 @@ uint Convert6To8(uint v) return (v << 2) | (v >> 4); } -uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords) +uint GetTiledTexelOffset(uint2 block_size, uint2 coords) { - uvec2 block = coords / block_size; - uvec2 offset = coords % block_size; + uint2 block = coords / block_size; + uint2 offset = coords % block_size; uint buffer_pos = u_src_offset; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * (block_size.x * block_size.y); @@ -920,16 +941,16 @@ uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords) return buffer_pos; } -uvec4 GetPaletteColor(uint index) +uint4 GetPaletteColor(uint index) { // Fetch and swap BE to LE. uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x); - uvec4 color; + uint4 color; #if defined(PALETTE_FORMAT_IA8) uint a = bitfieldExtract(val, 8, 8); uint i = bitfieldExtract(val, 0, 8); - color = uvec4(i, i, i, a); + color = uint4(i, i, i, a); #elif defined(PALETTE_FORMAT_RGB565) color.x = Convert5To8(bitfieldExtract(val, 11, 5)); color.y = Convert6To8(bitfieldExtract(val, 5, 6)); @@ -953,29 +974,27 @@ uvec4 GetPaletteColor(uint index) } #else // Not used. - color = uvec4(0, 0, 0, 0); + color = uint4(0, 0, 0, 0); #endif return color; } -vec4 GetPaletteColorNormalized(uint index) +float4 GetPaletteColorNormalized(uint index) { - uvec4 color = GetPaletteColor(index); - return vec4(color) / 255.0; + uint4 color = GetPaletteColor(index); + return float4(color) / 255.0; } )"; static const std::map s_decoding_shader_info{ {TextureFormat::I4, - {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x8 blocks, 4 bits per pixel // We need to do the tiling manually here because the texel size is smaller than @@ -996,108 +1015,98 @@ static const std::map s_decoding_shader_info{ else i = Convert4To8((val & 0x0Fu)); - uvec4 color = uvec4(i, i, i, i); - vec4 norm_color = vec4(color) / 255.0; + uint4 color = uint4(i, i, i, i); + float4 norm_color = float4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::IA4, - {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x4 blocks, 8 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; uint i = Convert4To8((val & 0x0Fu)); uint a = Convert4To8((val >> 4)); - uvec4 color = uvec4(i, i, i, a); - vec4 norm_color = vec4(color) / 255.0; + uint4 color = uint4(i, i, i, a); + float4 norm_color = float4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::I8, - {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x4 blocks, 8 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); uint i = texelFetch(s_input_buffer, int(buffer_pos)).x; - uvec4 color = uvec4(i, i, i, i); - vec4 norm_color = vec4(color) / 255.0; + uint4 color = uint4(i, i, i, i); + float4 norm_color = float4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::IA8, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks, 16 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; uint a = (val & 0xFFu); uint i = (val >> 8); - uvec4 color = uvec4(i, i, i, a); - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + uint4 color = uint4(i, i, i, a); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::RGB565, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); - uvec4 color; + uint4 color; color.x = Convert5To8(bitfieldExtract(val, 11, 5)); color.y = Convert6To8(bitfieldExtract(val, 5, 6)); color.z = Convert5To8(bitfieldExtract(val, 0, 5)); color.a = 255u; - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::RGB5A3, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); - uvec4 color; + uint4 color; if ((val & 0x8000u) != 0u) { color.x = Convert5To8(bitfieldExtract(val, 10, 5)); @@ -1113,19 +1122,17 @@ static const std::map s_decoding_shader_info{ color.b = Convert4To8(bitfieldExtract(val, 0, 4)); } - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::RGBA8, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks // We can't use the normal calculation function, as these are packed as the AR channels @@ -1144,18 +1151,18 @@ static const std::map s_decoding_shader_info{ uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x; uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x; - uvec4 color; + uint4 color; color.a = (val1 & 0xFFu); color.r = (val1 >> 8); color.g = (val2 & 0xFFu); color.b = (val2 >> 8); - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::CMPR, - {BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true, R"( // In the compute version of this decoder, we flatten the blocks to a one-dimension array. // Each group is subdivided into 16, and the first thread in each group fetches the DXT data. @@ -1167,17 +1174,15 @@ static const std::map s_decoding_shader_info{ #define BLOCK_SIZE (BLOCK_SIZE_X * BLOCK_SIZE_Y) #define BLOCKS_PER_GROUP (GROUP_SIZE / BLOCK_SIZE) - layout(local_size_x = GROUP_SIZE, local_size_y = 1) in; - - shared uvec2 shared_temp[BLOCKS_PER_GROUP]; - uint DXTBlend(uint v1, uint v2) { // 3/8 blend, which is close to 1/3 return ((v1 * 3u + v2 * 5u) >> 3); } - void main() + GROUP_SHARED uint2 shared_temp[BLOCKS_PER_GROUP]; + + DEFINE_MAIN(GROUP_SIZE, 8) { uint local_thread_id = gl_LocalInvocationID.x; uint block_in_group = local_thread_id / BLOCK_SIZE; @@ -1188,7 +1193,7 @@ static const std::map s_decoding_shader_info{ // from the block size of the overall texture (4 vs 8). We can however use a multiply and // subtraction to avoid the modulo for calculating the block's X coordinate. uint blocks_wide = u_src_size.x / BLOCK_SIZE_X; - uvec2 block_coords; + uint2 block_coords; block_coords.y = block_index / blocks_wide; block_coords.x = block_index - (block_coords.y * blocks_wide); @@ -1196,8 +1201,8 @@ static const std::map s_decoding_shader_info{ if (thread_in_block == 0u) { // Calculate tiled block coordinates. - uvec2 tile_block_coords = block_coords / 2u; - uvec2 subtile_block_coords = block_coords % 2u; + uint2 tile_block_coords = block_coords / 2u; + uint2 subtile_block_coords = block_coords % 2u; uint buffer_pos = u_src_offset; buffer_pos += tile_block_coords.y * u_src_row_stride; buffer_pos += tile_block_coords.x * 4u; @@ -1205,16 +1210,15 @@ static const std::map s_decoding_shader_info{ buffer_pos += subtile_block_coords.x; // Read the entire DXT block to shared memory. - uvec2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy; + uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy; shared_temp[block_in_group] = raw_data; } // Ensure store is completed before the remaining threads in the block continue. - memoryBarrierShared(); - barrier(); + GROUP_MEMORY_BARRIER_WITH_SYNC; // Unpack colors and swap BE to LE. - uvec2 raw_data = shared_temp[block_in_group]; + uint2 raw_data = shared_temp[block_in_group]; uint swapped = ((raw_data.x & 0xFF00FF00u) >> 8) | ((raw_data.x & 0x00FF00FFu) << 8); uint c1 = swapped & 0xFFFFu; uint c2 = swapped >> 16; @@ -1230,18 +1234,18 @@ static const std::map s_decoding_shader_info{ // Determine the four colors the block can use. // It's quicker to just precalculate all four colors rather than branching on the index. // NOTE: These must be masked with 0xFF. This is done at the normalization stage below. - uvec4 color0, color1, color2, color3; - color0 = uvec4(red1, green1, blue1, 255u); - color1 = uvec4(red2, green2, blue2, 255u); + uint4 color0, color1, color2, color3; + color0 = uint4(red1, green1, blue1, 255u); + color1 = uint4(red2, green2, blue2, 255u); if (c1 > c2) { - color2 = uvec4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u); - color3 = uvec4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u); + color2 = uint4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u); + color3 = uint4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u); } else { - color2 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u); - color3 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u); + color2 = uint4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u); + color3 = uint4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u); } // Calculate the texel coordinates that we will write to. @@ -1257,7 +1261,7 @@ static const std::map s_decoding_shader_info{ // Select the un-normalized color from the precalculated color array. // Using a switch statement here removes the need for dynamic indexing of an array. - uvec4 color; + uint4 color; switch (index) { case 0u: color = color0; break; @@ -1268,19 +1272,17 @@ static const std::map s_decoding_shader_info{ } // Normalize and write to the output image. - vec4 norm_color = vec4(color & 0xFFu) / 255.0; - imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color); + float4 norm_color = float4(color & 0xFFu) / 255.0; + imageStore(output_image, int3(int2(uint2(global_x, global_y)), 0), norm_color); } )"}}, {TextureFormat::C4, - {BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C4)), 8, 8, - false, + {TEXEL_BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C4)), + 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x8 blocks, 4 bits per pixel // We need to do the tiling manually here because the texel size is smaller than @@ -1296,58 +1298,52 @@ static const std::map s_decoding_shader_info{ // Select high nibble for odd texels, low for even. uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu); - vec4 norm_color = GetPaletteColorNormalized(index); - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = GetPaletteColorNormalized(index); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::C8, - {BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C8)), 8, 8, - false, + {TEXEL_BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C8)), + 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x4 blocks, 8 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); uint index = texelFetch(s_input_buffer, int(buffer_pos)).x; - vec4 norm_color = GetPaletteColorNormalized(index); - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = GetPaletteColorNormalized(index); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::C14X2, - {BUFFER_FORMAT_R16_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8, - 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, + static_cast(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks, 16 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu; - vec4 norm_color = GetPaletteColorNormalized(index); - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = GetPaletteColorNormalized(index); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, // We do the inverse BT.601 conversion for YCbCr to RGB // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion {TextureFormat::XFB, - {BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 uv = gl_GlobalInvocationID.xy; + uint2 uv = gl_GlobalInvocationID.xy; int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u)); - vec4 yuyv = vec4(texelFetch(s_input_buffer, buffer_pos)); + float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos)); float y = mix(yuyv.r, yuyv.b, (uv.x & 1u) == 1u); @@ -1355,33 +1351,21 @@ static const std::map s_decoding_shader_info{ float uComp = yuyv.g - 128.0; float vComp = yuyv.a - 128.0; - vec4 rgb = vec4(yComp + (1.596 * vComp), + float4 rgb = float4(yComp + (1.596 * vComp), yComp - (0.813 * vComp) - (0.391 * uComp), yComp + (2.018 * uComp), 255.0); - vec4 rgba_norm = rgb / 255.0; - imageStore(output_image, ivec3(ivec2(uv), 0), rgba_norm); + float4 rgba_norm = rgb / 255.0; + imageStore(output_image, int3(int2(uv), 0), rgba_norm); } )"}}}; -static const std::array s_buffer_bytes_per_texel = {{ - 1, // BUFFER_FORMAT_R8_UINT - 2, // BUFFER_FORMAT_R16_UINT - 8, // BUFFER_FORMAT_R32G32_UINT - 4, // BUFFER_FORMAT_RGBA8_UINT -}}; - const DecodingShaderInfo* GetDecodingShaderInfo(TextureFormat format) { auto iter = s_decoding_shader_info.find(format); return iter != s_decoding_shader_info.end() ? &iter->second : nullptr; } -u32 GetBytesPerBufferElement(BufferFormat buffer_format) -{ - return s_buffer_bytes_per_texel[buffer_format]; -} - std::pair GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height) { // Flatten to a single dimension? @@ -1419,4 +1403,126 @@ std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_form return ss.str(); } +std::string GeneratePaletteConversionShader(TLUTFormat palette_format, APIType api_type) +{ + std::stringstream ss; + + ss << R"( +int Convert3To8(int v) +{ + // Swizzle bits: 00000123 -> 12312312 + return (v << 5) | (v << 2) | (v >> 1); +} +int Convert4To8(int v) +{ + // Swizzle bits: 00001234 -> 12341234 + return (v << 4) | v; +} +int Convert5To8(int v) +{ + // Swizzle bits: 00012345 -> 12345123 + return (v << 3) | (v >> 2); +} +int Convert6To8(int v) +{ + // Swizzle bits: 00123456 -> 12345612 + return (v << 2) | (v >> 4); +})"; + + switch (palette_format) + { + case TLUTFormat::IA8: + ss << R"( +float4 DecodePixel(int val) +{ + int i = val & 0xFF; + int a = val >> 8; + return float4(i, i, i, a) / 255.0; +})"; + break; + + case TLUTFormat::RGB565: + ss << R"( +float4 DecodePixel(int val) +{ + int r, g, b, a; + r = Convert5To8((val >> 11) & 0x1f); + g = Convert6To8((val >> 5) & 0x3f); + b = Convert5To8((val) & 0x1f); + a = 0xFF; + return float4(r, g, b, a) / 255.0; +})"; + break; + + case TLUTFormat::RGB5A3: + ss << R"( +float4 DecodePixel(int val) +{ + int r,g,b,a; + if ((val&0x8000) > 0) + { + r=Convert5To8((val>>10) & 0x1f); + g=Convert5To8((val>>5 ) & 0x1f); + b=Convert5To8((val ) & 0x1f); + a=0xFF; + } + else + { + a=Convert3To8((val>>12) & 0x7); + r=Convert4To8((val>>8 ) & 0xf); + g=Convert4To8((val>>4 ) & 0xf); + b=Convert4To8((val ) & 0xf); + } + return float4(r, g, b, a) / 255.0; +})"; + break; + + default: + PanicAlert("Unknown format"); + break; + } + + ss << "\n"; + + if (api_type == APIType::D3D) + { + ss << "Buffer tex0 : register(t0);\n"; + ss << "Texture2DArray tex1 : register(t1);\n"; + ss << "SamplerState samp1 : register(s1);\n"; + ss << "cbuffer PSBlock : register(b0) {\n"; + } + else + { + ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; + ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"; + ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; + } + + ss << " float multiplier;\n"; + ss << " int texel_buffer_offset;\n"; + ss << "};\n"; + + if (api_type == APIType::D3D) + { + ss << "void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target) {\n"; + ss << " int src = int(round(tex1.Sample(samp1, v_tex0).r * multiplier));\n"; + ss << " src = int(tex0.Load(src + texel_buffer_offset).r);\n"; + } + else + { + ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; + ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; + ss << "void main() {\n"; + ss << " float3 coords = v_tex0;\n"; + ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n"; + ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; + } + + ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n"; + ss << " ocol0 = DecodePixel(src);\n"; + ss << "}\n"; + + return ss.str(); +} + } // namespace TextureConversionShaderTiled diff --git a/Source/Core/VideoCommon/TextureConversionShader.h b/Source/Core/VideoCommon/TextureConversionShader.h index 804d59b193..f6c266bd63 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.h +++ b/Source/Core/VideoCommon/TextureConversionShader.h @@ -13,6 +13,7 @@ enum class APIType; enum class TextureFormat; enum class EFBCopyFormat; enum class TLUTFormat; +enum TexelBufferFormat : u32; struct EFBCopyParams; namespace TextureConversionShaderTiled @@ -21,20 +22,10 @@ u16 GetEncodedSampleCount(EFBCopyFormat format); const char* GenerateEncodingShader(const EFBCopyParams& params, APIType ApiType); -// View format of the input data to the texture decoding shader. -enum BufferFormat -{ - BUFFER_FORMAT_R8_UINT, - BUFFER_FORMAT_R16_UINT, - BUFFER_FORMAT_R32G32_UINT, - BUFFER_FORMAT_RGBA8_UINT, - BUFFER_FORMAT_COUNT -}; - // Information required to compile and dispatch a texture decoding shader. struct DecodingShaderInfo { - BufferFormat buffer_format; + TexelBufferFormat buffer_format; u32 palette_size; u32 group_size_x; u32 group_size_y; @@ -46,10 +37,6 @@ struct DecodingShaderInfo // If this format does not have a shader written for it, returns nullptr. const DecodingShaderInfo* GetDecodingShaderInfo(TextureFormat format); -// Determine how many bytes there are in each element of the texel buffer. -// Needed for alignment and stride calculations. -u32 GetBytesPerBufferElement(BufferFormat buffer_format); - // Determine how many thread groups should be dispatched for an image of the specified width/height. // First is the number of X groups, second is the number of Y groups, Z is always one. std::pair GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height); @@ -58,4 +45,7 @@ std::pair GetDispatchCount(const DecodingShaderInfo* info, u32 width, std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format, APIType api_type); +// Returns the GLSL string containing the palette conversion shader for the specified format. +std::string GeneratePaletteConversionShader(TLUTFormat palette_format, APIType api_type); + } // namespace TextureConversionShaderTiled diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 5cfb3e7c76..15de7ba9a7 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -31,72 +31,99 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i return out; } -ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) +static void WriteHeader(APIType api_type, ShaderCode& out) { - const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth; - - ShaderCode out; - if (api_type == APIType::OpenGL) + if (api_type == APIType::D3D) { - out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" - "uniform float3 filter_coefficients;\n" - "uniform float gamma_rcp;\n" - "uniform float2 clamp_tb;\n" - "uniform float pixel_height;\n"); - out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" - " return texture(samp9, float3(uv.x, clamp(uv.y - (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), %s));\n" - "}\n", - mono_depth ? "0.0" : "uv.z"); - out.Write("#define uv0 f_uv0\n" - "in vec3 uv0;\n" - "out vec4 ocol0;\n" - "void main(){\n"); + out.Write("cbuffer PSBlock : register(b0) {\n" + " float2 src_offset, src_size;\n" + " float3 filter_coefficients;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float pixel_height;\n" + "};\n\n"); } - else if (api_type == APIType::Vulkan) + else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n" + " float2 src_offset, src_size;\n" " float3 filter_coefficients;\n" " float gamma_rcp;\n" " float2 clamp_tb;\n" " float pixel_height;\n" "};\n"); + } +} + +ShaderCode GenerateVertexShader(APIType api_type) +{ + ShaderCode out; + WriteHeader(api_type, out); + + if (api_type == APIType::D3D) + { + out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n" + " out float4 opos : SV_Position) {\n"); + } + else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { + out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n" + "#define id gl_VertexID\n" + "#define opos gl_Position\n" + "void main() {\n"); + } + out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"); + out.Write( + " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"); + out.Write(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"); + + // NDC space is flipped in Vulkan + if (api_type == APIType::Vulkan) + out.Write(" opos.y = -opos.y;\n"); + + out.Write("}\n"); + + return out; +} + +ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) +{ + const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth; + + ShaderCode out; + WriteHeader(api_type, out); + + if (api_type == APIType::D3D) + { + out.Write("Texture2DArray tex0 : register(t0);\n" + "SamplerState samp0 : register(s0);\n" + "float4 SampleEFB(float3 uv, float y_offset) {\n" + " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{\n"); + } + else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " "clamp_tb.x, clamp_tb.y), %s));\n" "}\n", mono_depth ? "0.0" : "uv.z"); - out.Write("layout(location = 0) in vec3 uv0;\n" - "layout(location = 1) in vec4 col0;\n" - "layout(location = 0) out vec4 ocol0;" - "void main(){\n"); - } - else if (api_type == APIType::D3D) - { - out.Write("Texture2DArray tex0 : register(t0);\n" - "SamplerState samp0 : register(s0);\n" - "uniform float3 filter_coefficients;\n" - "uniform float gamma_rcp;\n" - "uniform float2 clamp_tb;\n" - "uniform float pixel_height;\n\n"); - out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" - " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), %s));\n" - "}\n", - mono_depth ? "0.0" : "uv.z"); - out.Write("void main(out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0) {\n"); + out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n" + "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;" + "void main()\n{\n"); } // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. if (uid_data->copy_filter) { - out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" - " float4 current_row = SampleEFB(uv0, 0.0f);\n" - " float4 next_row = SampleEFB(uv0, 1.0f);\n" + out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" + " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" + " float4 next_row = SampleEFB(v_tex0, 1.0f);\n" " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n" " current_row.rgb * filter_coefficients[1] +\n" " next_row.rgb * filter_coefficients[2], \n" @@ -105,14 +132,14 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) else { out.Write( - " float4 current_row = SampleEFB(uv0, 0.0f);\n" + " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" " current_row.a);\n"); } if (uid_data->is_depth_copy) { - if (api_type == APIType::D3D || api_type == APIType::Vulkan) + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) out.Write("texcol.x = 1.0 - texcol.x;\n"); out.Write(" int depth = int(texcol.x * 16777216.0);\n" @@ -268,8 +295,8 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) break; case EFBCopyFormat::XFB: - out.Write(" ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), " - "texcol.a);\n"); + out.Write( + " ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), 1.0f);\n"); break; default: diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.h b/Source/Core/VideoCommon/TextureConverterShaderGen.h index 1f231f53c3..af8e7af0bc 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.h +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.h @@ -28,7 +28,8 @@ struct UidData using TCShaderUid = ShaderUid; -ShaderCode GenerateShader(APIType api_type, const UidData* uid_data); +ShaderCode GenerateVertexShader(APIType api_type); +ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data); TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, bool scale_by_half, bool copy_filter); diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 27e69808dc..1b9695f5ce 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -52,8 +52,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch; const bool early_depth = uid_data->early_depth != 0; const bool per_pixel_depth = uid_data->per_pixel_depth != 0; - const bool bounding_box = - host_config.bounding_box && g_ActiveConfig.BBoxUseFragmentShaderImplementation(); + const bool bounding_box = host_config.bounding_box; const u32 numTexgen = uid_data->num_texgens; ShaderCode out; @@ -1058,7 +1057,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, if (host_config.fast_depth_calc) { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else out.Write(" int zCoord = int(rawpos.z * 16777216.0);\n"); @@ -1113,7 +1112,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write(" // If early depth is enabled, write to zbuffer before depth textures\n"); out.Write(" // If early depth isn't enabled, we write to the zbuffer here\n"); out.Write(" int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n"); - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n"); else out.Write(" depth = float(zbuffer_zCoord) / 16777216.0;\n"); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 9cb7d23089..673569f4d9 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -19,6 +19,7 @@ #include "VideoCommon/DataReader.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" @@ -131,7 +132,7 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d auto iter = s_native_vertex_map.find(decl); if (iter == s_native_vertex_map.end()) { - std::unique_ptr fmt = g_vertex_manager->CreateNativeVertexFormat(decl); + std::unique_ptr fmt = g_renderer->CreateNativeVertexFormat(decl); auto ipair = s_native_vertex_map.emplace(decl, std::move(fmt)); iter = ipair.first; } @@ -228,9 +229,7 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal const PortableVertexDeclaration& format = loader->m_native_vtx_decl; std::unique_ptr& native = s_native_vertex_map[format]; if (!native) - { - native = g_vertex_manager->CreateNativeVertexFormat(format); - } + native = g_renderer->CreateNativeVertexFormat(format); loader->m_native_vertex_format = native.get(); } state->vertex_loaders[vtx_attr_group] = loader; diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 0e178e4d9b..bbc355a98c 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -17,8 +17,9 @@ #include "Core/ConfigManager.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/DataReader.h" -#include "VideoCommon/Debugger.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" @@ -79,11 +80,15 @@ static bool AspectIs16_9(float width, float height) } VertexManagerBase::VertexManagerBase() + : m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE) { } -VertexManagerBase::~VertexManagerBase() +VertexManagerBase::~VertexManagerBase() = default; + +bool VertexManagerBase::Initialize() { + return true; } u32 VertexManagerBase::GetRemainingSize() const @@ -94,6 +99,10 @@ u32 VertexManagerBase::GetRemainingSize() const DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall) { + // Flush all EFB pokes and invalidate the peek cache. + g_framebuffer_manager->InvalidatePeekCache(); + g_framebuffer_manager->FlushEFBPokes(); + // The SSE vertex loader can write up to 4 bytes past the end u32 const needed_vertex_bytes = count * stride + 4; @@ -132,7 +141,18 @@ DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, // need to alloc new buffer if (m_is_flushed) { - g_vertex_manager->ResetBuffer(stride, cullall); + if (cullall) + { + // This buffer isn't getting sent to the GPU. Just allocate it on the cpu. + m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data(); + m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); + IndexGenerator::Start(m_cpu_index_buffer.data()); + } + else + { + ResetBuffer(stride); + } + m_is_flushed = false; } @@ -210,6 +230,48 @@ std::pair VertexManagerBase::ResetFlushAspectRatioCount() return val; } +void VertexManagerBase::ResetBuffer(u32 vertex_stride) +{ + m_base_buffer_pointer = m_cpu_vertex_buffer.data(); + m_cur_buffer_pointer = m_cpu_vertex_buffer.data(); + m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); + IndexGenerator::Start(m_cpu_index_buffer.data()); +} + +void VertexManagerBase::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) +{ + *out_base_vertex = 0; + *out_base_index = 0; +} + +void VertexManagerBase::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) +{ + // If bounding box is enabled, we need to flush any changes first, then invalidate what we have. + if (::BoundingBox::active && g_ActiveConfig.bBBoxEnable && + g_ActiveConfig.backend_info.bSupportsBBox) + { + g_renderer->BBoxFlush(); + } + + g_renderer->DrawIndexed(base_index, num_indices, base_vertex); +} + +void VertexManagerBase::UploadUniforms() +{ +} + +void VertexManagerBase::InvalidateConstants() +{ + VertexShaderManager::dirty = true; + GeometryShaderManager::dirty = true; + PixelShaderManager::dirty = true; +} + +void VertexManagerBase::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) +{ +} + void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_stride, u32 num_vertices, const u16* indices, u32 num_indices, u32* out_base_vertex, u32* out_base_index) @@ -218,7 +280,7 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s ASSERT(m_is_flushed); // Copy into the buffers usually used for GX drawing. - ResetBuffer(std::max(vertex_stride, 1u), false); + ResetBuffer(std::max(vertex_stride, 1u)); if (vertices) { const u32 copy_size = vertex_stride * num_vertices; @@ -232,64 +294,42 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index); } -static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, - bool has_arbitrary_mips) +u32 VertexManagerBase::GetTexelBufferElementSize(TexelBufferFormat buffer_format) { - const FourTexUnits& tex = bpmem.tex[index / 4]; - const TexMode0& tm0 = tex.texMode0[index % 4]; + // R8 - 1, R16 - 2, RGBA8 - 4, R32G32 - 8 + return 1u << static_cast(buffer_format); +} - SamplerState state = {}; - state.Generate(bpmem, index); +bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + return false; +} - // Force texture filtering config option. - if (g_ActiveConfig.bForceFiltering) - { - state.min_filter = SamplerState::Filter::Linear; - state.mag_filter = SamplerState::Filter::Linear; - state.mipmap_filter = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? - SamplerState::Filter::Linear : - SamplerState::Filter::Point; - } +bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, + u32 palette_size, TexelBufferFormat palette_format, + u32* palette_offset) +{ + return false; +} - // Custom textures may have a greater number of mips - if (custom_tex) - state.max_lod = 255; +void VertexManagerBase::LoadTextures() +{ + BitSet32 usedtextures; + for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevorders[i / 2].getEnable(i & 1)) + usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true; - // Anisotropic filtering option. - if (g_ActiveConfig.iMaxAnisotropy != 0 && !SamplerCommon::IsBpTexMode0PointFiltering(tm0)) - { - // https://www.opengl.org/registry/specs/EXT/texture_filter_anisotropic.txt - // For predictable results on all hardware/drivers, only use one of: - // GL_LINEAR + GL_LINEAR (No Mipmaps [Bilinear]) - // GL_LINEAR + GL_LINEAR_MIPMAP_LINEAR (w/ Mipmaps [Trilinear]) - // Letting the game set other combinations will have varying arbitrary results; - // possibly being interpreted as equal to bilinear/trilinear, implicitly - // disabling anisotropy, or changing the anisotropic algorithm employed. - state.min_filter = SamplerState::Filter::Linear; - state.mag_filter = SamplerState::Filter::Linear; - if (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) - state.mipmap_filter = SamplerState::Filter::Linear; - state.anisotropic_filtering = 1; - } - else - { - state.anisotropic_filtering = 0; - } + if (bpmem.genMode.numindstages > 0) + for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; - if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) - { - // Apply a secondary bias calculated from the IR scale to pull inwards mipmaps - // that have arbitrary contents, eg. are used for fog effects where the - // distance they kick in at is important to preserve at any resolution. - // Correct this with the upscaling factor of custom textures. - s64 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f; - state.lod_bias = MathUtil::Clamp(state.lod_bias + lod_offset, -32768, 32767); + for (unsigned int i : usedtextures) + g_texture_cache->Load(i); - // Anisotropic also pushes mips farther away so it cannot be used either - state.anisotropic_filtering = 0; - } - - g_renderer->SetSamplerState(index, state); + g_texture_cache->BindTextures(); } void VertexManagerBase::Flush() @@ -297,6 +337,8 @@ void VertexManagerBase::Flush() if (m_is_flushed) return; + m_is_flushed = true; + // loading a state will invalidate BP, so check for it g_video_backend->CheckInvalidState(); @@ -340,41 +382,6 @@ void VertexManagerBase::Flush() (bpmem.alpha_test.hex >> 16) & 0xff); #endif - // If the primitave is marked CullAll. All we need to do is update the vertex constants and - // calculate the zfreeze refrence slope - if (!m_cull_all) - { - BitSet32 usedtextures; - for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevorders[i / 2].getEnable(i & 1)) - usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true; - - if (bpmem.genMode.numindstages > 0) - for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; - - for (unsigned int i : usedtextures) - { - const auto* tentry = g_texture_cache->Load(i); - - if (tentry) - { - float custom_tex_scale = tentry->GetWidth() / float(tentry->native_width); - SetSamplerState(i, custom_tex_scale, tentry->is_custom_tex, tentry->has_arbitrary_mips); - PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height); - } - else - { - ERROR_LOG(VIDEO, "error loading texture"); - } - } - g_texture_cache->BindTextures(); - } - - // set global vertex constants - VertexShaderManager::SetConstants(); - // Track some stats used elsewhere by the anamorphic widescreen heuristic. if (!SConfig::GetInstance().bWii) { @@ -394,6 +401,7 @@ void VertexManagerBase::Flush() } // Calculate ZSlope for zfreeze + VertexShaderManager::SetConstants(); if (!bpmem.genMode.zfreeze) { // Must be done after VertexShaderManager::SetConstants() @@ -407,20 +415,24 @@ void VertexManagerBase::Flush() if (!m_cull_all) { - // Update and upload constants. Note for the Vulkan backend, this must occur before the - // vertex/index buffer is committed, otherwise the data will be associated with the - // previous command buffer, instead of the one with the draw if there is an overflow. - GeometryShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); - UploadConstants(); - - // Now the vertices can be flushed to the GPU. + // Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call + // must be careful to not upload any utility vertices, as the binding will be lost otherwise. const u32 num_indices = IndexGenerator::GetIndexLen(); u32 base_vertex, base_index; CommitBuffer(IndexGenerator::GetNumVerts(), VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices, &base_vertex, &base_index); + // Texture loading can cause palettes to be applied (-> uniforms -> draws). + // Palette application does not use vertices, only a full-screen quad, so this is okay. + // Same with GPU texture decoding, which uses compute shaders. + LoadTextures(); + + // Now we can upload uniforms, as nothing else will override them. + GeometryShaderManager::SetConstants(); + PixelShaderManager::SetConstants(); + UploadUniforms(); + // Update the pipeline, or compile one if needed. UpdatePipelineConfig(); UpdatePipelineObject(); @@ -435,18 +447,17 @@ void VertexManagerBase::Flush() if (PerfQueryBase::ShouldEmulate()) g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + + OnDraw(); } } - GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); - if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens) + { ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value()); - - m_is_flushed = true; - m_cull_all = false; + } } void VertexManagerBase::DoState(PointerWrap& p) @@ -649,3 +660,109 @@ void VertexManagerBase::UpdatePipelineObject() break; } } + +void VertexManagerBase::OnDraw() +{ + m_draw_counter++; + + // If we didn't have any CPU access last frame, do nothing. + if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution) + return; + + // Check if this draw is scheduled to kick a command buffer. + // The draw counters will always be sorted so a binary search is possible here. + if (std::binary_search(m_scheduled_command_buffer_kicks.begin(), + m_scheduled_command_buffer_kicks.end(), m_draw_counter)) + { + // Kick a command buffer on the background thread. + g_renderer->Flush(); + } +} + +void VertexManagerBase::OnCPUEFBAccess() +{ + // Check this isn't another access without any draws inbetween. + if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter) + return; + + // Store the current draw counter for scheduling in OnEndFrame. + m_cpu_accesses_this_frame.emplace_back(m_draw_counter); +} + +void VertexManagerBase::OnEFBCopyToRAM() +{ + // If we're not deferring, try to preempt it next frame. + if (!g_ActiveConfig.bDeferEFBCopies) + { + OnCPUEFBAccess(); + return; + } + + // Otherwise, only execute if we have at least 10 objects between us and the last copy. + const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter; + m_last_efb_copy_draw_counter = m_draw_counter; + if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) + return; + + g_renderer->Flush(); +} + +void VertexManagerBase::OnEndFrame() +{ + m_draw_counter = 0; + m_last_efb_copy_draw_counter = 0; + m_scheduled_command_buffer_kicks.clear(); + + // If we have no CPU access at all, leave everything in the one command buffer for maximum + // parallelism between CPU/GPU, at the cost of slightly higher latency. + if (m_cpu_accesses_this_frame.empty()) + return; + + // In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway + // between the draw counters that invoked the readback, or every 250 draws, whichever is smaller. + if (g_ActiveConfig.iCommandBufferExecuteInterval > 0) + { + u32 last_draw_counter = 0; + u32 interval = static_cast(g_ActiveConfig.iCommandBufferExecuteInterval); + for (u32 draw_counter : m_cpu_accesses_this_frame) + { + // We don't want to waste executing command buffers for only a few draws, so set a minimum. + // Leave last_draw_counter as-is, so we get the correct number of draws between submissions. + u32 draw_count = draw_counter - last_draw_counter; + if (draw_count < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) + continue; + + if (draw_count <= interval) + { + u32 mid_point = draw_count / 2; + m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point); + } + else + { + u32 counter = interval; + while (counter < draw_count) + { + m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter); + counter += interval; + } + } + + last_draw_counter = draw_counter; + } + } + +#if 0 + { + std::stringstream ss; + std::for_each(m_cpu_accesses_this_frame.begin(), m_cpu_accesses_this_frame.end(), [&ss](u32 idx) { ss << idx << ","; }); + WARN_LOG(VIDEO, "CPU EFB accesses in last frame: %s", ss.str().c_str()); + } + { + std::stringstream ss; + std::for_each(m_scheduled_command_buffer_kicks.begin(), m_scheduled_command_buffer_kicks.end(), [&ss](u32 idx) { ss << idx << ","; }); + WARN_LOG(VIDEO, "Scheduled command buffer kicks: %s", ss.str().c_str()); + } +#endif + + m_cpu_accesses_this_frame.clear(); +} diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index cd3e4ed552..9a657bd7f3 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -25,6 +25,16 @@ struct Slope bool dirty; }; +// View format of the input data to the texture decoding shader. +enum TexelBufferFormat : u32 +{ + TEXEL_BUFFER_FORMAT_R8_UINT, + TEXEL_BUFFER_FORMAT_R16_UINT, + TEXEL_BUFFER_FORMAT_RGBA8_UINT, + TEXEL_BUFFER_FORMAT_R32G32_UINT, + NUM_TEXEL_BUFFER_FORMATS +}; + class VertexManagerBase { private: @@ -42,19 +52,24 @@ public: // We may convert triangle-fans to triangle-lists, almost 3x as many indices. static constexpr u32 MAXIBUFFERSIZE = MathUtil::NextPowerOf2(MAX_PRIMITIVES_PER_COMMAND * 3); + // Streaming buffer sizes. + // Texel buffer will fit the maximum size of an encoded GX texture. 1024x1024, RGBA8 = 4MB. + static constexpr u32 VERTEX_STREAM_BUFFER_SIZE = 40 * 1024 * 1024; + static constexpr u32 INDEX_STREAM_BUFFER_SIZE = 4 * 1024 * 1024; + static constexpr u32 UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; + static constexpr u32 TEXEL_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; + VertexManagerBase(); - // needs to be virtual for DX11's dtor virtual ~VertexManagerBase(); + virtual bool Initialize(); + PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall); void FlushData(u32 count, u32 stride); void Flush(); - virtual std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0; - void DoState(PointerWrap& p); std::pair ResetFlushAspectRatioCount(); @@ -70,38 +85,69 @@ public: } // Utility pipeline drawing (e.g. EFB copies, post-processing, UI). - virtual void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) = 0; + virtual void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size); void UploadUtilityVertices(const void* vertices, u32 vertex_stride, u32 num_vertices, const u16* indices, u32 num_indices, u32* out_base_vertex, u32* out_base_index); + // Determine how many bytes there are in each element of the texel buffer. + // Needed for alignment and stride calculations. + static u32 GetTexelBufferElementSize(TexelBufferFormat buffer_format); + + // Texel buffer, used for palette conversion. + virtual bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset); + + // The second set of parameters uploads a second blob in the same buffer, used for GPU texture + // decoding for palette textures, as both the texture data and palette must be uploaded. + virtual bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset); + + // CPU access tracking - call after a draw call is made. + void OnDraw(); + + // Call after CPU access is requested. + void OnCPUEFBAccess(); + + // Call after an EFB copy to RAM. If true, the current command buffer should be executed. + void OnEFBCopyToRAM(); + + // Call at the end of a frame. + void OnEndFrame(); + protected: - // Vertex buffers/index buffer creation. - virtual void CreateDeviceObjects() {} - virtual void DestroyDeviceObjects() {} + // When utility uniforms are used, the GX uniforms need to be re-written afterwards. + static void InvalidateConstants(); // Prepares the buffer for the next batch of vertices. - virtual void ResetBuffer(u32 vertex_stride, bool cull_all) = 0; + virtual void ResetBuffer(u32 vertex_stride); // Commits/uploads the current batch of vertices. virtual void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, - u32* out_base_vertex, u32* out_base_index) = 0; + u32* out_base_vertex, u32* out_base_index); // Uploads uniform buffers for GX draws. - virtual void UploadConstants() = 0; + virtual void UploadUniforms(); // Issues the draw call for the current batch in the backend. - virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) = 0; + virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex); + + u32 GetRemainingSize() const; + static u32 GetRemainingIndices(int primitive); + + void CalculateZSlope(NativeVertexFormat* format); + void LoadTextures(); u8* m_cur_buffer_pointer = nullptr; u8* m_base_buffer_pointer = nullptr; u8* m_end_buffer_pointer = nullptr; - u32 GetRemainingSize() const; - static u32 GetRemainingIndices(int primitive); + // Alternative buffers in CPU memory for primitives we are going to discard. + std::vector m_cpu_vertex_buffer; + std::vector m_cpu_index_buffer; Slope m_zslope = {}; - void CalculateZSlope(NativeVertexFormat* format); VideoCommon::GXPipelineUid m_current_pipeline_config; VideoCommon::GXUberPipelineUid m_current_uber_pipeline_config; @@ -114,12 +160,22 @@ protected: bool m_cull_all = false; private: + // Minimum number of draws per command buffer when attempting to preempt a readback operation. + static constexpr u32 MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK = 10; + + void UpdatePipelineConfig(); + void UpdatePipelineObject(); + bool m_is_flushed = true; size_t m_flush_count_4_3 = 0; size_t m_flush_count_anamorphic = 0; - void UpdatePipelineConfig(); - void UpdatePipelineObject(); + // CPU access tracking + u32 m_draw_counter = 0; + u32 m_last_efb_copy_draw_counter = 0; + std::vector m_cpu_accesses_this_frame; + std::vector m_scheduled_command_buffer_kicks; + bool m_allow_background_execution = true; }; extern std::unique_ptr g_vertex_manager; diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index aa8b3d110f..268a3b70b7 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -282,7 +282,6 @@ void VideoBackendBase::InitializeShared() m_initialized = true; m_invalid = false; - frameCount = 0; CommandProcessor::Init(); Fifo::Init(); diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index e2a8d60b1b..f4ab78c032 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -48,11 +48,11 @@ - - + + @@ -114,11 +114,11 @@ - - + + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 3ce3543380..f804839f54 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -29,12 +29,6 @@ - - Base - - - Base - Base @@ -197,6 +191,12 @@ Shader Generators + + Shader Generators + + + Base + @@ -206,12 +206,6 @@ - - Base - - - Base - Base @@ -374,7 +368,6 @@ Base - Base @@ -384,6 +377,15 @@ Shader Generators + + Shader Generators + + + Base + + + Shader Generators + diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index a849c7c784..3b1b22dc89 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -145,8 +145,6 @@ void VideoConfig::Refresh() bEFBAccessEnable = Config::Get(Config::GFX_HACK_EFB_ACCESS_ENABLE); bBBoxEnable = Config::Get(Config::GFX_HACK_BBOX_ENABLE); - bBBoxPreferStencilImplementation = - Config::Get(Config::GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION); bForceProgressive = Config::Get(Config::GFX_HACK_FORCE_PROGRESSIVE); bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM); bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index eace03b4a2..9271762015 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -114,7 +114,6 @@ struct VideoConfig final bool bEFBAccessEnable; bool bPerfQueriesEnable; bool bBBoxEnable; - bool bBBoxPreferStencilImplementation; // OpenGL-only, to see how slow it is compared to SSBOs bool bForceProgressive; bool bEFBEmulateFormatChanges; @@ -186,6 +185,7 @@ struct VideoConfig final std::string AdapterName; // for OpenGL u32 MaxTextureSize; + bool bUsesLowerLeftOrigin; bool bSupportsExclusiveFullscreen; bool bSupportsDualSourceBlend; @@ -215,6 +215,7 @@ struct VideoConfig final bool bSupportsBPTCTextures; bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES bool bSupportsBackgroundCompiling; + bool bSupportsLargePoints; } backend_info; // Utility @@ -223,12 +224,6 @@ struct VideoConfig final { return backend_info.bSupportsExclusiveFullscreen && !bBorderlessFullscreen; } - bool BBoxUseFragmentShaderImplementation() const - { - if (backend_info.api_type == APIType::OpenGL && bBBoxPreferStencilImplementation) - return false; - return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics; - } bool UseGPUTextureDecoding() const { return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;